From 630b1437f0b95d5f7f7a7124308f33dda51dc2f4 Mon Sep 17 00:00:00 2001 From: baozhaoling Date: Mon, 8 Apr 2024 14:07:25 +0800 Subject: [PATCH] Add Hygon's supported patches. Signed-off-by: Zhaoling Bao --- HYGON-0001-arch-support-for-hygon.patch | 2946 +++ HYGON-0002-array-widen-compare.patch | 2092 ++ HYGON-0003-function-attribute-judgement.patch | 595 + ...0004-struct-data-layout-optimization.patch | 15898 ++++++++++++++++ HYGON-0005-basick-block-reorder.patch | 2664 +++ HYGON-0006-coop-vectorize.patch | 1427 ++ HYGON-0007-padding-slp-optimization.patch | 451 + gcc.spec | 23 +- 8 files changed, 26095 insertions(+), 1 deletion(-) create mode 100644 HYGON-0001-arch-support-for-hygon.patch create mode 100644 HYGON-0002-array-widen-compare.patch create mode 100644 HYGON-0003-function-attribute-judgement.patch create mode 100644 HYGON-0004-struct-data-layout-optimization.patch create mode 100644 HYGON-0005-basick-block-reorder.patch create mode 100644 HYGON-0006-coop-vectorize.patch create mode 100644 HYGON-0007-padding-slp-optimization.patch diff --git a/HYGON-0001-arch-support-for-hygon.patch b/HYGON-0001-arch-support-for-hygon.patch new file mode 100644 index 0000000..2f465ac --- /dev/null +++ b/HYGON-0001-arch-support-for-hygon.patch @@ -0,0 +1,2946 @@ +From ba127953bf66f35f75e6c67b89aaea33f19d6e69 Mon Sep 17 00:00:00 2001 +From: baozhaoling +Date: Fri, 3 Mar 2023 14:29:13 +0800 +Subject: [PATCH] [feat][gcc]: Add arch support for hygon. Manage hygon's insn + latency. + +Signed-off-by: baozhaoling +Change-Id: Id8baaf9a7a5ccf8e243a1c0a4e05300915107eaf +--- + gcc/common/config/i386/cpuinfo.h | 14 + + gcc/common/config/i386/i386-common.cc | 17 +- + gcc/common/config/i386/i386-cpuinfo.h | 3 + + gcc/config.gcc | 10 +- + gcc/config/i386/cpuid.h | 4 + + gcc/config/i386/driver-i386.cc | 7 + + gcc/config/i386/hygon.md | 1266 +++++++++++++++++++++++++ + gcc/config/i386/i386-c.cc | 7 + + gcc/config/i386/i386-options.cc | 4 +- + gcc/config/i386/i386.cc | 3 +- + gcc/config/i386/i386.h | 1 + + gcc/config/i386/i386.md | 44 +- + gcc/config/i386/i386.opt | 4 +- + gcc/config/i386/mmx.md | 23 + + gcc/config/i386/sse.md | 68 ++ + gcc/config/i386/x86-tune-costs.h | 151 +++ + gcc/config/i386/x86-tune-sched.cc | 1 + + gcc/config/i386/x86-tune.def | 66 +- + 18 files changed, 1654 insertions(+), 39 deletions(-) + create mode 100644 gcc/config/i386/hygon.md + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index 0333da56ba5..dc2b9750965 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -948,6 +948,20 @@ cpu_indicator_init (struct __processor_model *cpu_model, + get_amd_cpu (cpu_model, cpu_model2, cpu_features2); + cpu_model->__cpu_vendor = VENDOR_AMD; + } ++ else if (vendor == signature_HYGON_ebx) ++ { ++ cpu_model->__cpu_vendor = VENDOR_HYGON; ++ cpu_model2->__cpu_family = family; ++ cpu_model2->__cpu_model = model; ++ ++ /* Find available features. */ ++ get_available_features (cpu_model, cpu_model2, cpu_features2, ++ ecx, edx); ++ ++ cpu_model->__cpu_type = HYGON; ++ CHECK___builtin_cpu_is ("dhyana"); ++ cpu_model->__cpu_subtype = HYGON_DHYANA; ++ } + else if (vendor == signature_CENTAUR_ebx) + cpu_model->__cpu_vendor = VENDOR_CENTAUR; + else if (vendor == signature_CYRIX_ebx) +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index e2594cae4cc..ae8e3c1855c 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1831,7 +1831,8 @@ const char *const processor_names[] = + "znver1", + "znver2", + "znver3", +- "znver4" ++ "znver4", ++ "dhyana" + }; + + /* Guarantee that the array is aligned with enum processor_type. */ +@@ -2079,6 +2080,16 @@ const pta processor_alias_table[] = + | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT, + M_CPU_TYPE (AMD_BTVER2), P_PROC_BMI}, ++ {"dhyana", PROCESSOR_DHYANA, CPU_DHYANA, ++ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 ++ | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 ++ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 ++ | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW ++ | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE ++ | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED ++ | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES ++ | PTA_SHA | PTA_LZCNT | PTA_POPCNT, ++ M_CPU_SUBTYPE (HYGON_DHYANA), P_PROC_AVX2}, + + {"generic", PROCESSOR_GENERIC, CPU_GENERIC, + PTA_64BIT +@@ -2099,10 +2110,12 @@ const pta processor_alias_table[] = + M_CPU_TYPE (AMDFAM10H_SHANGHAI), P_NONE}, + {"istanbul", PROCESSOR_GENERIC, CPU_GENERIC, 0, + M_CPU_TYPE (AMDFAM10H_ISTANBUL), P_NONE}, ++ {"hygon", PROCESSOR_GENERIC, CPU_GENERIC, 0, ++ M_CPU_TYPE (HYGON), P_NONE}, + }; + + /* NB: processor_alias_table stops at the "generic" entry. */ +-unsigned int const pta_size = ARRAY_SIZE (processor_alias_table) - 7; ++unsigned int const pta_size = ARRAY_SIZE (processor_alias_table) - 8; + unsigned int const num_arch_names = ARRAY_SIZE (processor_alias_table); + + /* Provide valid option values for -march and -mtune options. */ +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index 82996ebb32d..ba3bc7287a9 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -29,6 +29,7 @@ enum processor_vendor + { + VENDOR_INTEL = 1, + VENDOR_AMD, ++ VENDOR_HYGON, + VENDOR_OTHER, + VENDOR_CENTAUR, + VENDOR_CYRIX, +@@ -58,6 +59,7 @@ enum processor_types + INTEL_GOLDMONT_PLUS, + INTEL_TREMONT, + AMDFAM19H, ++ HYGON, + CPU_TYPE_MAX, + BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX + }; +@@ -92,6 +94,7 @@ enum processor_subtypes + AMDFAM19H_ZNVER3, + INTEL_COREI7_ROCKETLAKE, + AMDFAM19H_ZNVER4, ++ HYGON_DHYANA, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index 5c378c698ff..ce2600ccc5f 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -669,7 +669,7 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ + silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ +-nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 native" ++nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 dhyana native" + + # Additional x86 processors supported by --with-cpu=. Each processor + # MUST be separated by exactly one space. +@@ -3847,6 +3847,10 @@ case ${target} in + cpu=pentiumpro + arch_without_sse2=yes + ;; ++ dhyana-*) ++ arch=dhyana ++ cpu=dhyana ++ ;; + *) + arch=pentiumpro + cpu=generic +@@ -3929,6 +3933,10 @@ case ${target} in + arch=corei7 + cpu=corei7 + ;; ++ dhyana-*) ++ arch=dhyana ++ cpu=dhyana ++ ;; + *) + arch=x86-64 + cpu=generic +diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h +index 8b3dc2b1dde..dfd387937b4 100644 +--- a/gcc/config/i386/cpuid.h ++++ b/gcc/config/i386/cpuid.h +@@ -164,6 +164,10 @@ + #define signature_CYRIX_ecx 0x64616574 + #define signature_CYRIX_edx 0x736e4978 + ++#define signature_HYGON_ebx 0x6f677948 ++#define signature_HYGON_ecx 0x656e6975 ++#define signature_HYGON_edx 0x6e65476e ++ + #define signature_INTEL_ebx 0x756e6547 + #define signature_INTEL_ecx 0x6c65746e + #define signature_INTEL_edx 0x49656e69 +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index 3b5161aeddc..3504426f9d9 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -495,6 +495,10 @@ const char *host_detect_local_cpu (int argc, const char **argv) + else + processor = PROCESSOR_PENTIUM; + } ++ else if (vendor == VENDOR_HYGON) ++ { ++ processor = PROCESSOR_DHYANA; ++ } + else if (vendor == VENDOR_CENTAUR) + { + processor = PROCESSOR_GENERIC; +@@ -775,6 +779,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) + case PROCESSOR_BTVER2: + cpu = "btver2"; + break; ++ case PROCESSOR_DHYANA: ++ cpu = "dhyana"; ++ break; + + default: + /* Use something reasonable. */ +diff --git a/gcc/config/i386/hygon.md b/gcc/config/i386/hygon.md +new file mode 100644 +index 00000000000..cb03fddaeb2 +--- /dev/null ++++ b/gcc/config/i386/hygon.md +@@ -0,0 +1,1266 @@ ++;; Copyright (C) 2012-2022 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++;; ++ ++ ++(define_attr "dhyana_decode" "direct,vector,double" ++ (const_string "direct")) ++ ++(define_attr "hygon_sse_attr" "other,abs,neg,sqrt,maxmin,blend,blendv,rcp,movnt,avg,sign,sadbw,insr" ++ (const_string "other")) ++ ++;; HYGON Scheduling ++;; Modeling automatons for decoders, integer execution pipes, ++;; AGU pipes and floating point execution units. ++(define_automaton "dhyana, dhyana_ieu, dhyana_fp, dhyana_agu") ++ ++;; Decoders unit has 4 decoders and all of them can decode fast path ++;; and vector type instructions. ++(define_cpu_unit "dhyana-decode0" "dhyana") ++(define_cpu_unit "dhyana-decode1" "dhyana") ++(define_cpu_unit "dhyana-decode2" "dhyana") ++(define_cpu_unit "dhyana-decode3" "dhyana") ++ ++;; Currently blocking all decoders for vector path instructions as ++;; they are dispatched separetely as microcode sequence. ++;; Fix me: Need to revisit this. ++(define_reservation "dhyana-vector" "dhyana-decode0+dhyana-decode1+dhyana-decode2+dhyana-decode3") ++ ++;; Direct instructions can be issued to any of the four decoders. ++(define_reservation "dhyana-direct" "dhyana-decode0|dhyana-decode1|dhyana-decode2|dhyana-decode3") ++ ++;; Fix me: Need to revisit this later to simulate fast path double behavior. ++(define_reservation "dhyana-double" "dhyana-direct") ++ ++ ++;; Integer unit 4 ALU pipes. ++(define_cpu_unit "dhyana-ieu0" "dhyana_ieu") ++(define_cpu_unit "dhyana-ieu1" "dhyana_ieu") ++(define_cpu_unit "dhyana-ieu2" "dhyana_ieu") ++(define_cpu_unit "dhyana-ieu3" "dhyana_ieu") ++(define_reservation "dhyana-ieu" "dhyana-ieu0|dhyana-ieu1|dhyana-ieu2|dhyana-ieu3") ++ ++;; 2 AGU pipes in dhyana ++;; According to CPU diagram last AGU unit is used only for stores. ++(define_cpu_unit "dhyana-agu0" "dhyana_agu") ++(define_cpu_unit "dhyana-agu1" "dhyana_agu") ++(define_reservation "dhyana-agu-reserve" "dhyana-agu0|dhyana-agu1") ++ ++;; Load is 4 cycles. We do not model reservation of load unit. ++;;(define_reservation "dhyana-load" "dhyana-agu-reserve, nothing, nothing, nothing") ++(define_reservation "dhyana-load" "dhyana-agu-reserve") ++(define_reservation "dhyana-store" "dhyana-agu-reserve") ++ ++;; vectorpath (microcoded) instructions are single issue instructions. ++;; So, they occupy all the integer units. ++(define_reservation "dhyana-ivector" "dhyana-ieu0+dhyana-ieu1 ++ +dhyana-ieu2+dhyana-ieu3 ++ +dhyana-agu0+dhyana-agu1") ++ ++;; Floating point unit 4 FP pipes. ++(define_cpu_unit "dhyana-fp0" "dhyana_fp") ++(define_cpu_unit "dhyana-fp1" "dhyana_fp") ++(define_cpu_unit "dhyana-fp2" "dhyana_fp") ++(define_cpu_unit "dhyana-fp3" "dhyana_fp") ++ ++(define_reservation "dhyana-fpu" "dhyana-fp0|dhyana-fp1|dhyana-fp2|dhyana-fp3") ++ ++(define_reservation "dhyana-fvector" "dhyana-fp0+dhyana-fp1 ++ +dhyana-fp2+dhyana-fp3 ++ +dhyana-agu0+dhyana-agu1") ++ ++;; Call instruction ++(define_insn_reservation "dhyana_call" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (eq_attr "type" "call,callv")) ++ "dhyana-double,dhyana-store,dhyana-ieu0|dhyana-ieu3") ++ ++;; General instructions ++(define_insn_reservation "dhyana_push" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "store"))) ++ "dhyana-direct,dhyana-store") ++ ++(define_insn_reservation "dhyana_push_load" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "push") ++ (eq_attr "memory" "both"))) ++ "dhyana-direct,dhyana-load,dhyana-store") ++ ++(define_insn_reservation "dhyana_pop" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load") ++ ++(define_insn_reservation "dhyana_pop_mem" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "pop") ++ (eq_attr "memory" "both"))) ++ "dhyana-direct,dhyana-load,dhyana-store") ++ ++;; Leave ++(define_insn_reservation "dhyana_leave" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (eq_attr "type" "leave")) ++ "dhyana-double,dhyana-ieu, dhyana-store") ++ ++;; Integer Instructions or General instructions ++;; Multiplications ++;; Reg operands ++(define_insn_reservation "dhyana_imul" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "imul") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-ieu1") ++ ++(define_insn_reservation "dhyana_imul_mem" 7 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "imul") ++ (eq_attr "memory" "!none"))) ++ "dhyana-direct,dhyana-load, dhyana-ieu1") ++ ++;; Divisions ++;; Reg operands ++(define_insn_reservation "dhyana_idiv_DI" 41 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-ieu2*41") ++ ++(define_insn_reservation "dhyana_idiv_SI" 25 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-ieu2*25") ++ ++(define_insn_reservation "dhyana_idiv_HI" 17 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-ieu2*17") ++ ++(define_insn_reservation "dhyana_idiv_QI" 15 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-ieu2*15") ++ ++;; Mem operands ++(define_insn_reservation "dhyana_idiv_mem_DI" 45 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-load,dhyana-ieu2*41") ++ ++(define_insn_reservation "dhyana_idiv_mem_SI" 29 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-load,dhyana-ieu2*25") ++ ++(define_insn_reservation "dhyana_idiv_mem_HI" 21 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-load,dhyana-ieu2*17") ++ ++(define_insn_reservation "dhyana_idiv_mem_QI" 19 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "idiv") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-load,dhyana-ieu2*15") ++ ++;; STR ISHIFT which are micro coded. ++;; Fix me: Latency need to be rechecked. ++(define_insn_reservation "dhyana_str_ishift" 6 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "str,ishift") ++ (eq_attr "memory" "both,store"))) ++ "dhyana-vector,dhyana-ivector") ++ ++;; MOV - integer moves ++(define_insn_reservation "dhyana_load_imov_double" 2 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "double") ++ (and (eq_attr "type" "imovx") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-ieu|dhyana-ieu") ++ ++(define_insn_reservation "dhyana_load_imov_direct" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-ieu") ++ ++(define_insn_reservation "dhyana_load_imov_double_store" 2 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "double") ++ (and (eq_attr "type" "imovx") ++ (eq_attr "memory" "store")))) ++ "dhyana-double,dhyana-ieu|dhyana-ieu,dhyana-store") ++ ++(define_insn_reservation "dhyana_load_imov_direct_store" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "store"))) ++ "dhyana-direct,dhyana-ieu,dhyana-store") ++ ++(define_insn_reservation "dhyana_load_imov_double_load" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "double") ++ (and (eq_attr "type" "imovx") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-ieu|dhyana-ieu") ++ ++(define_insn_reservation "dhyana_load_imov_direct_load" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "imov,imovx") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load") ++ ++;; INTEGER/GENERAL instructions ++;; register/imm operands only: ALU, ICMP, NEG, NOT, ROTATE, ISHIFT, TEST ++(define_insn_reservation "dhyana_insn" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") ++ (eq_attr "memory" "none,unknown"))) ++ "dhyana-direct,dhyana-ieu") ++ ++(define_insn_reservation "dhyana_insn_load" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-ieu") ++ ++(define_insn_reservation "dhyana_insn_store" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") ++ (eq_attr "memory" "store"))) ++ "dhyana-direct,dhyana-ieu,dhyana-store") ++ ++(define_insn_reservation "dhyana_insn_both" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") ++ (eq_attr "memory" "both"))) ++ "dhyana-direct,dhyana-load,dhyana-ieu,dhyana-store") ++ ++;; Special latency for multi type. ++(define_insn_reservation "dhyana_fp_fcomp" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "unit" "i387") ++ (eq_attr "type" "multi")))) ++ "dhyana-double,dhyana-fp0") ++ ++;; Fix me: Other vector type insns keeping latency 6 as of now. ++(define_insn_reservation "dhyana_ieu_vector" 6 ++ (and (eq_attr "cpu" "dhyana") ++ (eq_attr "type" "other,str,multi")) ++ "dhyana-vector,dhyana-ivector") ++ ++;; ALU1 register operands. ++(define_insn_reservation "dhyana_alu1_vector" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "vector") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "none,unknown")))) ++ "dhyana-vector,dhyana-ivector") ++ ++(define_insn_reservation "dhyana_alu1_double" 2 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "double") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "none,unknown")))) ++ "dhyana-double,dhyana-ieu") ++ ++(define_insn_reservation "dhyana_alu1_direct" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "direct") ++ (and (eq_attr "type" "alu1") ++ (eq_attr "memory" "none,unknown")))) ++ "dhyana-direct,dhyana-ieu") ++ ++;; Branches : Fix me need to model conditional branches. ++(define_insn_reservation "dhyana_branch" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct") ++ ++;; Indirect branches check latencies. ++(define_insn_reservation "dhyana_indirect_branch_mem" 6 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "ibr") ++ (eq_attr "memory" "load"))) ++ "dhyana-vector,dhyana-ivector") ++ ++;; LEA executes in ALU units with 1 cycle latency. ++(define_insn_reservation "dhyana_lea" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (eq_attr "type" "lea")) ++ "dhyana-direct,dhyana-ieu") ++ ++;; Other integer instrucions ++(define_insn_reservation "dhyana_idirect" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "unit" "integer,unknown") ++ (eq_attr "memory" "none,unknown"))) ++ "dhyana-direct,dhyana-ieu") ++ ++;; Floating point ++(define_insn_reservation "dhyana_fp_cmov" 6 ++ (and (eq_attr "cpu" "dhyana") ++ (eq_attr "type" "fcmov")) ++ "dhyana-vector,dhyana-fvector") ++ ++(define_insn_reservation "dhyana_fp_mov_direct_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "dhyana-direct,dhyana-load,dhyana-fp3|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_fp_mov_direct_store" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "direct") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "store")))) ++ "dhyana-direct,dhyana-fp2|dhyana-fp3,dhyana-store") ++ ++(define_insn_reservation "dhyana_fp_mov_double" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp3") ++ ++(define_insn_reservation "dhyana_fp_mov_double_load" 12 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "double") ++ (and (eq_attr "type" "fmov") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp3") ++ ++(define_insn_reservation "dhyana_fp_mov_direct" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (eq_attr "type" "fmov")) ++ "dhyana-direct,dhyana-fp3") ++ ++;; SQRT ++(define_insn_reservation "dhyana_fp_sqrt" 22 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fpspc") ++ (eq_attr "hygon_sse_attr" "sqrt"))) ++ "dhyana-direct,dhyana-fp3*22") ++ ++(define_insn_reservation "dhyana_sse_sqrt_sf" 14 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "SF") ++ (and (eq_attr "memory" "none,unknown") ++ (and (eq_attr "hygon_sse_attr" "sqrt") ++ (eq_attr "type" "sse"))))) ++ "dhyana-direct,dhyana-fp3*14") ++ ++(define_insn_reservation "dhyana_sse_sqrt_sf_mem" 19 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "SF") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "hygon_sse_attr" "sqrt") ++ (eq_attr "type" "sse"))))) ++ "dhyana-direct,dhyana-load,dhyana-fp3*14") ++ ++(define_insn_reservation "dhyana_sse_sqrt_df" 20 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V2DF,DF") ++ (and (eq_attr "memory" "none,unknown") ++ (and (eq_attr "hygon_sse_attr" "sqrt") ++ (eq_attr "type" "sse"))))) ++ "dhyana-direct,dhyana-fp3*20") ++ ++(define_insn_reservation "dhyana_sse_sqrt_df_mem" 25 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V2DF,DF") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "hygon_sse_attr" "sqrt") ++ (eq_attr "type" "sse"))))) ++ "dhyana-direct,dhyana-load,dhyana-fp3*20") ++ ++;; RCP ++(define_insn_reservation "dhyana_sse_rcp" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V4SF,V8SF,SF") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "hygon_sse_attr" "rcp") ++ (eq_attr "type" "sse"))))) ++ "dhyana-direct,(dhyana-fp0|dhyana-fp1)*5") ++ ++(define_insn_reservation "dhyana_sse_rcp_mem" 10 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V4SF,V8SF,SF") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "hygon_sse_attr" "rcp") ++ (eq_attr "type" "sse"))))) ++ "dhyana-direct,dhyana-load,(dhyana-fp0|dhyana-fp1)*5") ++ ++;; TODO: AGU? ++(define_insn_reservation "dhyana_fp_spc_direct" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fpspc") ++ (eq_attr "memory" "store"))) ++ "dhyana-direct,dhyana-fp3") ++ ++(define_insn_reservation "dhyana_fp_insn_vector" 6 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "dhyana_decode" "vector") ++ (eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov"))) ++ "dhyana-vector,dhyana-fvector") ++ ++;; FABS ++(define_insn_reservation "dhyana_fp_abs" 2 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fsgn") ++ (eq_attr "hygon_sse_attr" "abs"))) ++ "dhyana-direct,dhyana-fp3*2") ++ ++(define_insn_reservation "dhyana_fp_neg" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fsgn") ++ (eq_attr "hygon_sse_attr" "neg"))) ++ "dhyana-direct,dhyana-fp3") ++ ++;; FCMP ++(define_insn_reservation "dhyana_fp_fcmp" 2 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "dhyana_decode" "double") ++ (eq_attr "type" "fcmp")))) ++ "dhyana-double,dhyana-fp0,dhyana-fp2") ++ ++(define_insn_reservation "dhyana_fp_fcmp_load" 9 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "dhyana_decode" "double") ++ (eq_attr "type" "fcmp")))) ++ "dhyana-double,dhyana-load, dhyana-fp0,dhyana-fp2") ++ ++;;FADD FSUB FMUL ++(define_insn_reservation "dhyana_fp_op_mul" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fop,fmul") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0*5") ++ ++(define_insn_reservation "dhyana_fp_op_mul_load" 12 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fop,fmul") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0*5") ++ ++(define_insn_reservation "dhyana_fp_op_imul_load" 16 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fop,fmul") ++ (and (eq_attr "fp_int_src" "true") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp3,dhyana-fp0") ++ ++(define_insn_reservation "dhyana_fp_op_div" 15 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fdiv") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp3*15") ++ ++(define_insn_reservation "dhyana_fp_op_div_load" 22 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fdiv") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp3*15") ++ ++(define_insn_reservation "dhyana_fp_op_idiv_load" 27 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "fdiv") ++ (and (eq_attr "fp_int_src" "true") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp3*19") ++ ++;; MMX, SSE, SSEn.n, AVX, AVX2 instructions ++(define_insn_reservation "dhyana_fp_insn" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (eq_attr "type" "mmx")) ++ "dhyana-direct,dhyana-fpu") ++ ++(define_insn_reservation "dhyana_mmx_add" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxadd") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp1|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_mmx_add_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxadd") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp1|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_mmx_hadd" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sseadd1") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0") ++ ++(define_insn_reservation "dhyana_mmx_hadd_load" 10 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sseadd1") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0") ++ ++(define_insn_reservation "dhyana_mmx_cmp" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxcmp") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_mmx_cmp_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxcmp") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_mmx_cvt_pck_shuf" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_mmx_cvt_pck_shuf_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_mmx_shift_move" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxshft,mmxmov") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp2") ++ ++(define_insn_reservation "dhyana_mmx_shift_move_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxshft,mmxmov") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp2") ++ ++(define_insn_reservation "dhyana_mmx_move_store" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxshft,mmxmov") ++ (eq_attr "memory" "store,both"))) ++ "dhyana-direct,dhyana-fp2,dhyana-store") ++ ++(define_insn_reservation "dhyana_mmx_mul" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxmul") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0*3") ++ ++(define_insn_reservation "dhyana_mmx_load" 10 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "mmxmul") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0*3") ++ ++;; sseabs ++(define_insn_reservation "dhyana_sse_abs" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "hygon_sse_attr" "abs") ++ (and (eq_attr "type" "sselog1") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp0|dhyana-fp1|dhyana-fp3") ++ ++;; TODO ++(define_insn_reservation "dhyana_avx256_log" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "sselog") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_avx256_log_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "sselog") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_pinsr_reg" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "hygon_sse_attr" "insr") ++ (and (match_operand 2 "register_operand") ++ (eq_attr "memory" "none"))))) ++ "dhyana-direct,dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_pinsr" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sselog") ++ (and (eq_attr "hygon_sse_attr" "insr") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_log" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sselog") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_log_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sselog") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_avx256_log1" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "sselog1") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_avx256_log1_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "sselog1") ++ (eq_attr "memory" "!none")))) ++ "dhyana-double,dhyana-load,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_sign" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "hygon_sse_attr" "sign") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sse_sign_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sselog1") ++ (and (eq_attr "hygon_sse_attr" "sign") ++ (eq_attr "memory" "!none")))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sse_log1" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sselog1") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_log1_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sselog1") ++ (eq_attr "memory" "!none"))) ++ "dhyana-direct,dhyana-load,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_extrq" 2 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sse") ++ (and (eq_attr "memory" "none") ++ (eq_attr "prefix_data16" "1")))) ++ "dhyana-direct,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_alignr" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "prefix_extra" "1")) ++ (and (eq_attr "type" "sseishft") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_ishift" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sseishft") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_ishift_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sseishft") ++ (eq_attr "memory" "!none"))) ++ "dhyana-direct,dhyana-load,dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_insertimm" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sseins") ++ (and (eq_attr "memory" "none") ++ (eq_attr "length_immediate" "2")))) ++ "dhyana-direct,dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_insert" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sseins") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp3,dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_comi" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF") ++ (and (eq_attr "prefix" "!vex") ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "none")))))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_comi_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load")))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_comi_double" 2 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "V4SF,V2DF,TI")) ++ (and (eq_attr "prefix" "vex") ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "none"))))) ++ "dhyana-double,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_comi_double_load" 10 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "V4SF,V2DF,TI")) ++ (and (eq_attr "prefix" "vex") ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load"))))) ++ "dhyana-double,dhyana-load,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_test" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-fp1|dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_test_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "type" "ssecomi") ++ (eq_attr "memory" "load")))) ++ "dhyana-direct,dhyana-load,dhyana-fp1|dhyana-fp2") ++ ++;; SSE moves ++;; Fix me: Need to revist this again some of the moves may be restricted ++;; to some fpu pipes. ++(define_insn_reservation "dhyana_sse_movnt" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "hygon_sse_attr" "movnt") ++ (and (eq_attr "type" "ssemov,mmxmov") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_movnt_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "hygon_sse_attr" "movnt") ++ (and (eq_attr "type" "ssemov,mmxmov") ++ (eq_attr "memory" "load")))) ++ "dhyana-direct,dhyana-load,dhyana-fp2") ++ ++(define_insn_reservation "dhyana_sse_mov" 2 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "SI") ++ (and (eq_attr "isa" "avx") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "none"))))) ++ "dhyana-direct,dhyana-ieu0") ++ ++(define_insn_reservation "dhyana_avx_mov" 2 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "TI") ++ (and (eq_attr "isa" "avx") ++ (and (eq_attr "type" "ssemov") ++ (and (match_operand:SI 1 "register_operand") ++ (eq_attr "memory" "none")))))) ++ "dhyana-direct,dhyana-ieu2") ++ ++(define_insn_reservation "dhyana_sseavx_mov" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-fpu") ++ ++(define_insn_reservation "dhyana_sseavx_blend" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssemov,sselog1") ++ (and (eq_attr "hygon_sse_attr" "blend") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sseavx_blendv" 3 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssemov") ++ (and (eq_attr "hygon_sse_attr" "blendv") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sseavx_mov_store" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "store")))) ++ "dhyana-direct,dhyana-fpu,dhyana-store") ++ ++(define_insn_reservation "dhyana_sseavx_mov_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fpu") ++ ++(define_insn_reservation "dhyana_avx256_mov" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fpu") ++ ++(define_insn_reservation "dhyana_avx256_mov_store" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "store")))) ++ "dhyana-double,dhyana-fpu,dhyana-store") ++ ++(define_insn_reservation "dhyana_avx256_mov_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "ssemov") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fpu") ++ ++;; SSE max & min ++(define_insn_reservation "dhyana_sse_maxmin" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "memory" "none") ++ (eq_attr "hygon_sse_attr" "maxmin")))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_maxmin_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "type" "sseadd") ++ (and (eq_attr "memory" "load") ++ (eq_attr "hygon_sse_attr" "maxmin")))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_pmaxmin" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "type" "mmxadd,sseiadd") ++ (and (eq_attr "memory" "none") ++ (eq_attr "hygon_sse_attr" "maxmin")))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sse_pmaxmin_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "type" "mmxadd,sseiadd") ++ (and (eq_attr "memory" "load") ++ (eq_attr "hygon_sse_attr" "maxmin")))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp3") ++ ++;; SSE avg ++(define_insn_reservation "dhyana_sse_avg" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "hygon_sse_attr" "avg")) ++ (and (eq_attr "type" "sseiadd,mmxshft") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sse_avg_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "hygon_sse_attr" "avg")) ++ (and (eq_attr "type" "sseiadd,mmxshft") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp3") ++ ++;;MMX sadbw ++(define_insn_reservation "dhyana_sse_sadbw" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sseiadd,mmxshft") ++ (and (eq_attr "hygon_sse_attr" "sadbw") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,dhyana-fp0") ++ ++(define_insn_reservation "dhyana_sse_sadbw_load" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "sseiadd,mmxshft") ++ (and (eq_attr "hygon_sse_attr" "sadbw") ++ (eq_attr "memory" "load")))) ++ "dhyana-direct,dhyana-load,dhyana-fp0") ++ ++;; SSE add ++(define_insn_reservation "dhyana_sseavx_add" 3 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "type" "sseadd") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp2|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sseavx_add_load" 10 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) ++ (and (eq_attr "type" "sseadd") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp2|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_avx256_add" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "sseadd") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp2|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_avx256_add_load" 10 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "type" "sseadd") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp2|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sseavx_fma" 5 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssemuladd") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp1,dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sseavx_fma_load" 12 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssemuladd") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp1,dhyana-fp3") ++ ++(define_insn_reservation "dhyana_avx256_fma" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "type" "ssemuladd") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp0|dhyana-fp1,dhyana-fp3") ++ ++(define_insn_reservation "dhyana_avx256_fma_load" 12 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "type" "ssemuladd") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp0|dhyana-fp1,dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sseavx_iadd" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "DI,TI")) ++ (and (eq_attr "type" "sseiadd") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp1|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sseavx_iadd_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "DI,TI")) ++ (and (eq_attr "type" "sseiadd") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp1|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_avx256_iadd" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "sseiadd") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp0|dhyana-fp1|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_avx256_iadd_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "sseiadd") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp0|dhyana-fp1|dhyana-fp3") ++ ++;; SSE conversions. ++(define_insn_reservation "dhyana_ssecvtsf_si_load" 12 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "SI") ++ (and (eq_attr "type" "sseicvt") ++ (and (match_operand:SF 1 "memory_operand") ++ (eq_attr "memory" "load"))))) ++ "dhyana-double,dhyana-load,dhyana-fp3,dhyana-ieu0") ++ ++(define_insn_reservation "dhyana_ssecvtdf_si" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "SI") ++ (and (match_operand:DF 1 "register_operand") ++ (and (eq_attr "type" "sseicvt") ++ (eq_attr "memory" "none"))))) ++ "dhyana-double,dhyana-fp3,dhyana-ieu0") ++ ++(define_insn_reservation "dhyana_ssecvtdf_si_load" 12 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "SI") ++ (and (eq_attr "type" "sseicvt") ++ (and (match_operand:DF 1 "memory_operand") ++ (eq_attr "memory" "load"))))) ++ "dhyana-double,dhyana-load,dhyana-fp3,dhyana-ieu0") ++ ++;; All other used ssecvt fp3 pipes ++;; Check: Need to revisit this again. ++;; Some SSE converts may use different pipe combinations. ++(define_insn_reservation "dhyana_ssecvt" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "ssecvt") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp3") ++ ++(define_insn_reservation "dhyana_ssecvt_load" 11 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "type" "ssecvt") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp3") ++ ++;; SSE div ++(define_insn_reservation "dhyana_ssediv_ss_ps" 10 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "V4SF,SF")) ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp3*10") ++ ++(define_insn_reservation "dhyana_ssediv_ss_ps_load" 17 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "V4SF,SF")) ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp3*10") ++ ++(define_insn_reservation "dhyana_ssediv_sd_pd" 13 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "V2DF,DF")) ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp3*13") ++ ++(define_insn_reservation "dhyana_ssediv_sd_pd_load" 20 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "V2DF,DF")) ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp3*13") ++ ++(define_insn_reservation "dhyana_ssediv_avx256_ps" 12 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssediv")))) ++ "dhyana-double,dhyana-fp3*12") ++ ++(define_insn_reservation "dhyana_ssediv_avx256_ps_load" 19 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp3*12") ++ ++(define_insn_reservation "dhyana_ssediv_avx256_pd" 15 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp3*15") ++ ++(define_insn_reservation "dhyana_ssediv_avx256_pd_load" 22 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "type" "ssediv") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp3*15") ++;; SSE MUL ++(define_insn_reservation "dhyana_ssemul_ss_ps" 3 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "V4SF,SF")) ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,(dhyana-fp0|dhyana-fp1)*3") ++ ++(define_insn_reservation "dhyana_ssemul_ss_ps_load" 10 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "V4SF,SF")) ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,(dhyana-fp0|dhyana-fp1)*3") ++ ++(define_insn_reservation "dhyana_ssemul_avx256_ps" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,(dhyana-fp0|dhyana-fp1)*3") ++ ++(define_insn_reservation "dhyana_ssemul_avx256_ps_load" 10 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,(dhyana-fp0|dhyana-fp1)*3") ++ ++(define_insn_reservation "dhyana_ssemul_sd_pd" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V2DF,DF") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "none")))) ++ "dhyana-direct,(dhyana-fp0|dhyana-fp1)*4") ++ ++(define_insn_reservation "dhyana_ssemul_sd_pd_load" 11 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V2DF,DF") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "load")))) ++ "dhyana-direct,dhyana-load,(dhyana-fp0|dhyana-fp1)*4") ++ ++(define_insn_reservation "dhyana_ssemul_avx256_pd" 5 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,(dhyana-fp0|dhyana-fp1)*4") ++ ++(define_insn_reservation "dhyana_ssemul_avx256_pd_load" 12 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "type" "ssemul") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,(dhyana-fp0|dhyana-fp1)*4") ++ ++;;SSE imul ++(define_insn_reservation "dhyana_sseimul" 3 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "TI")) ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0*3") ++ ++(define_insn_reservation "dhyana_sseimul_avx256" 4 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp0*4") ++ ++(define_insn_reservation "dhyana_sseimul_load" 10 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "TI")) ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0*3") ++ ++(define_insn_reservation "dhyana_sseimul_avx256_load" 11 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp0*4") ++ ++(define_insn_reservation "dhyana_sseimul_di" 3 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "DI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseimul")))) ++ "dhyana-direct,dhyana-fp0*3") ++ ++(define_insn_reservation "dhyana_sseimul_load_di" 10 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "DI") ++ (and (eq_attr "type" "sseimul") ++ (eq_attr "memory" "load")))) ++ "dhyana-direct,dhyana-load,dhyana-fp0*3") ++ ++;; SSE compares ++(define_insn_reservation "dhyana_sse_cmp" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_cmp_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "SF,DF,V4SF,V2DF")) ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_cmp_avx256" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_cmp_avx256_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp0|dhyana-fp1") ++ ++(define_insn_reservation "dhyana_sse_icmp" 1 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "QI,HI,SI,DI,TI")) ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "none"))) ++ "dhyana-direct,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sse_icmp_load" 8 ++ (and (and (eq_attr "cpu" "dhyana") ++ (eq_attr "mode" "QI,HI,SI,DI,TI")) ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "load"))) ++ "dhyana-direct,dhyana-load,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sse_icmp_avx256" 1 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "none")))) ++ "dhyana-double,dhyana-fp0|dhyana-fp3") ++ ++(define_insn_reservation "dhyana_sse_icmp_avx256_load" 8 ++ (and (eq_attr "cpu" "dhyana") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "type" "ssecmp") ++ (eq_attr "memory" "load")))) ++ "dhyana-double,dhyana-load,dhyana-fp0|dhyana-fp3") +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 3fec4c7e245..fdbcfc25a78 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -250,6 +250,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__rocketlake"); + def_or_undef (parse_in, "__rocketlake__"); + break; ++ case PROCESSOR_DHYANA: ++ def_or_undef (parse_in, "__dhyana"); ++ def_or_undef (parse_in, "__dhyana__"); ++ break; + /* use PROCESSOR_max to not set/unset the arch macro. */ + case PROCESSOR_max: + break; +@@ -419,6 +423,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_ROCKETLAKE: + def_or_undef (parse_in, "__tune_rocketlake__"); + break; ++ case PROCESSOR_DHYANA: ++ def_or_undef (parse_in, "__tune_dhyana__"); ++ break; + case PROCESSOR_INTEL: + case PROCESSOR_GENERIC: + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 099cec4b610..a1e07600e99 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -160,6 +160,7 @@ along with GCC; see the file COPYING3. If not see + #define m_ZNVER (m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4) + #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \ + | m_ZNVER) ++#define m_DHYANA (HOST_WIDE_INT_1U<" +@@ -1751,6 +1753,7 @@ + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set (attr "enabled") + (if_then_else + (match_test ("SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH")) +@@ -4501,6 +4504,10 @@ + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "double") + (const_string "direct"))) ++ (set (attr "dhyana_decode") ++ (if_then_else (eq_attr "prefix_0f" "0") ++ (const_string "double") ++ (const_string "direct"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") +@@ -5344,6 +5351,7 @@ + [(set_attr "type" "fmov") + (set_attr "mode" "") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "fp_int_src" "true")]) + + (define_insn "floatxf2" +@@ -5354,6 +5362,7 @@ + [(set_attr "type" "fmov") + (set_attr "mode" "XF") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "fp_int_src" "true")]) + + (define_expand "float2" +@@ -5388,6 +5397,7 @@ + (set_attr "amdfam10_decode" "*,vector,double") + (set_attr "bdver1_decode" "*,double,direct") + (set_attr "znver1_decode" "double,*,*") ++ (set_attr "dhyana_decode" "double,*,*") + (set_attr "fp_int_src" "true") + (set (attr "enabled") + (if_then_else +@@ -5426,6 +5436,7 @@ + [(set_attr "type" "fmov") + (set_attr "mode" "") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "fp_int_src" "true")]) + + ;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory +@@ -9861,6 +9872,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "mode" "DI")]) + + ;; Turn *anddi_1 into *andsi_1_zext if possible. +@@ -10489,6 +10501,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "mode" "DI")]) + + (define_insn_and_split "*xordi_1_btc" +@@ -10513,6 +10526,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "mode" "DI")]) + + ;; Optimize a ^ ((a ^ b) & mask) to (~mask & a) | (b & mask) +@@ -11460,6 +11474,7 @@ + "TARGET_80387 && reload_completed" + "" + [(set_attr "type" "fsgn") ++ (set_attr "hygon_sse_attr" "") + (set_attr "mode" "")]) + + ;; Copysign instructions +@@ -13743,6 +13758,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "mode" "")]) + + ;; Avoid useless masking of count operand. +@@ -13808,6 +13824,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "mode" "")]) + + ;; Avoid useless masking of count operand. +@@ -13946,6 +13963,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "mode" "DI")]) + + (define_insn "*btrq_imm" +@@ -13959,6 +13977,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "mode" "DI")]) + + (define_insn "*btcq_imm" +@@ -13972,6 +13991,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") ++ (set_attr "dhyana_decode" "double") + (set_attr "mode" "DI")]) + + ;; Allow Nocona to avoid these instructions if a register is available. +@@ -15830,6 +15850,7 @@ + (set_attr "prefix_0f" "1") + (set_attr "btver2_decode" "double") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "")]) + + (define_insn_and_split "ctz2" +@@ -15951,6 +15972,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "DI")]) + + (define_insn "bsr_rex64_1" +@@ -15963,6 +15985,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "DI")]) + + (define_insn "bsr_rex64_1_zext" +@@ -15978,6 +16001,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "DI")]) + + (define_insn "bsr" +@@ -15992,6 +16016,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "SI")]) + + (define_insn "bsr_1" +@@ -16004,6 +16029,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "SI")]) + + (define_insn "bsr_zext_1" +@@ -16018,6 +16044,7 @@ + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "SI")]) + + ; As bsr is undefined behavior on zero and for other input +@@ -17990,6 +18017,7 @@ + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") ++ (set_attr "hygon_sse_attr" "sqrt") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) +@@ -18157,6 +18185,7 @@ + "fprem" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "fmodxf3" +@@ -18232,6 +18261,7 @@ + "fprem1" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "remainderxf3" +@@ -18309,6 +18339,7 @@ + "f" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "2" +@@ -18340,6 +18371,7 @@ + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "sincos3" +@@ -18373,6 +18405,7 @@ + "fptan" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "tanxf2" +@@ -18415,6 +18448,7 @@ + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "atan23" +@@ -18718,6 +18752,7 @@ + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "logxf2" +@@ -18815,6 +18850,7 @@ + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "log1pxf2" +@@ -18855,6 +18891,7 @@ + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "logbxf2" +@@ -18935,6 +18972,7 @@ + "f2xm1" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_insn "fscalexf4_i387" +@@ -18950,6 +18988,7 @@ + "fscale" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "expNcorexf3" +@@ -19267,6 +19306,7 @@ + "frndint" + [(set_attr "type" "fpspc") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "XF")]) + + (define_expand "rinthf2" +@@ -20942,6 +20982,7 @@ + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") ++ (set_attr "hygon_sse_attr" "maxmin,*") + (set_attr "type" "sseadd") + (set_attr "mode" "")]) + +@@ -20986,6 +21027,7 @@ + v\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,maybe_evex") ++ (set_attr "hygon_sse_attr" "maxmin,*") + (set_attr "type" "sseadd") + (set_attr "mode" "")]) + +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index a3675e515bc..442dd796a5a 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -597,8 +597,8 @@ computations into a vector ones. + + mdispatch-scheduler + Target RejectNegative Var(flag_dispatch_scheduler) +-Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4 +-or znver1 and Haifa scheduling is selected. ++Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4, ++znver1 or dhyana and Haifa scheduling is selected. + + mprefer-avx128 + Target Alias(mprefer-vector-width=, 128, 256) +diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md +index 197f19e4b1a..71f449ab077 100644 +--- a/gcc/config/i386/mmx.md ++++ b/gcc/config/i386/mmx.md +@@ -543,6 +543,7 @@ + [(set_attr "isa" "*,x64") + (set_attr "mmx_isa" "native,*") + (set_attr "type" "mmxmov,ssemov") ++ (set_attr "hygon_sse_attr" "movnt,*") + (set_attr "mode" "DI")]) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@@ -770,6 +771,7 @@ + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxadd,sseadd,sseadd") + (set_attr "btver2_sse_attr" "*,maxmin,maxmin") ++ (set_attr "hygon_sse_attr" "*,maxmin,maxmin") + (set_attr "prefix_extra" "1,*,*") + (set_attr "prefix" "*,orig,vex") + (set_attr "mode" "V2SF,V4SF,V4SF")]) +@@ -795,6 +797,7 @@ + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxadd,sseadd,sseadd") + (set_attr "btver2_sse_attr" "*,maxmin,maxmin") ++ (set_attr "hygon_sse_attr" "*,maxmin,maxmin") + (set_attr "prefix_extra" "1,*,*") + (set_attr "prefix" "*,orig,vex") + (set_attr "mode" "V2SF,V4SF,V4SF")]) +@@ -1130,6 +1133,7 @@ + vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend,blend,*") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") +@@ -2122,6 +2126,7 @@ + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) +@@ -2150,6 +2155,7 @@ + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxadd,sseiadd,sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "mode" "DI,TI,TI")]) + + (define_expand "v4hi3" +@@ -2171,6 +2177,7 @@ + vpb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) +@@ -2186,6 +2193,7 @@ + vpw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "mode" "TI")]) + + (define_insn "3" +@@ -2200,6 +2208,7 @@ + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) +@@ -2228,6 +2237,7 @@ + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxadd,sseiadd,sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "mode" "DI,TI,TI")]) + + (define_expand "v8qi3" +@@ -2248,6 +2258,7 @@ + vpb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "mode" "TI")]) + + (define_insn "v2hi3" +@@ -2262,6 +2273,7 @@ + vpw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) +@@ -2702,6 +2714,7 @@ + vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend,blend,*") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") +@@ -2722,6 +2735,7 @@ + vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend,blend,*") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") +@@ -3638,6 +3652,7 @@ + vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend,blend,*") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,orig,vex") +@@ -3656,6 +3671,7 @@ + vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend,blend,*") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,orig,vex") +@@ -4296,6 +4312,7 @@ + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxshft,sseiadd,sseiadd") ++ (set_attr "hygon_sse_attr" "avg") + (set (attr "prefix_extra") + (if_then_else + (not (ior (match_test "TARGET_SSE") +@@ -4327,6 +4344,7 @@ + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxshft,sseiadd,sseiadd") ++ (set_attr "hygon_sse_attr" "avg") + (set_attr "mode" "DI,TI,TI")]) + + (define_expand "uavg3_ceil" +@@ -4363,6 +4381,7 @@ + vpavgb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "avg") + (set_attr "mode" "TI")]) + + (define_insn "uavgv2qi3_ceil" +@@ -4383,6 +4402,7 @@ + vpavgb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "avg") + (set_attr "mode" "TI")]) + + (define_insn "uavgv2hi3_ceil" +@@ -4403,6 +4423,7 @@ + vpavgw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "avg") + (set_attr "mode" "TI")]) + + (define_insn "mmx_psadbw" +@@ -4419,6 +4440,7 @@ + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxshft,sseiadd,sseiadd") ++ (set_attr "hygon_sse_attr" "sadbw") + (set_attr "mode" "DI,TI,TI")]) + + (define_expand "reduc_plus_scal_v8qi" +@@ -4577,6 +4599,7 @@ + "maskmovq\t{%2, %1|%1, %2}" + [(set_attr "type" "mmxcvt") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "DI")]) + + (define_int_iterator EMMS +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index eb767e56ca4..82ee1717bdd 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -1764,6 +1764,7 @@ + "movnti\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix_data16" "0") ++ (set_attr "hygon_sse_attr" "movnt") + (set_attr "mode" "")]) + + (define_insn "_movnt" +@@ -1774,6 +1775,7 @@ + "TARGET_SSE" + "%vmovnt\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "movnt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -1789,6 +1791,7 @@ + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) ++ (set_attr "hygon_sse_attr" "movnt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -2415,6 +2418,7 @@ + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "btver2_sse_attr" "rcp") ++ (set_attr "hygon_sse_attr" "rcp") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -2433,6 +2437,7 @@ + (set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "btver2_sse_attr" "rcp") ++ (set_attr "hygon_sse_attr" "rcp") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "SF")]) + +@@ -2452,6 +2457,7 @@ + (set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "btver2_sse_attr" "rcp") ++ (set_attr "hygon_sse_attr" "rcp") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "SF")]) + +@@ -2567,6 +2573,7 @@ + (set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") + (set_attr "btver2_sse_attr" "sqrt") ++ (set_attr "hygon_sse_attr" "sqrt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -2586,6 +2593,7 @@ + (set_attr "atom_sse_attr" "sqrt") + (set_attr "prefix" "") + (set_attr "btver2_sse_attr" "sqrt") ++ (set_attr "hygon_sse_attr" "sqrt") + (set_attr "mode" "")]) + + (define_insn "*_vmsqrt2" +@@ -2605,6 +2613,7 @@ + (set_attr "atom_sse_attr" "sqrt") + (set_attr "prefix" "") + (set_attr "btver2_sse_attr" "sqrt") ++ (set_attr "hygon_sse_attr" "sqrt") + (set_attr "mode" "")]) + + (define_expand "rsqrt2" +@@ -2806,6 +2815,7 @@ + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "btver2_sse_attr" "maxmin") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -2830,6 +2840,7 @@ + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "btver2_sse_attr" "maxmin") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -2854,6 +2865,7 @@ + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseadd") + (set_attr "btver2_sse_attr" "maxmin") ++ (set_attr "hygon_sse_attr" "maxmin") + (set (attr "prefix") + (cond [(eq_attr "alternative" "0") + (const_string "orig") +@@ -2881,6 +2893,7 @@ + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sse") + (set_attr "btver2_sse_attr" "maxmin") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix" "") + (set_attr "mode" "")]) + +@@ -7635,6 +7648,7 @@ + (set_attr "bdver1_decode" "double,direct,*") + (set_attr "btver2_decode" "double,double,double") + (set_attr "znver1_decode" "double,double,double") ++ (set_attr "dhyana_decode" "double,double,double") + (set (attr "length_vex") + (if_then_else + (and (match_test "mode == DImode") +@@ -8019,6 +8033,7 @@ + (set_attr "bdver1_decode" "double,direct,*") + (set_attr "btver2_decode" "double,double,double") + (set_attr "znver1_decode" "double,double,double") ++ (set_attr "dhyana_decode" "double,double,double") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "DF")]) + +@@ -10567,6 +10582,10 @@ + (const_string "fmov") + ] + (const_string "ssemov"))) ++ (set (attr "hygon_sse_attr") ++ (if_then_else (eq_attr "alternative" "8,9,10") ++ (const_string "insr") ++ (const_string "*"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "8,9,10") + (const_string "1") +@@ -10634,6 +10653,13 @@ + (if_then_else (eq_attr "alternative" "0,1,2,5,6,9") + (const_string "ssemov") + (const_string "sselog"))) ++ (set (attr "hygon_sse_attr") ++ (cond [(eq_attr "alternative" "5,6,9") ++ (const_string "blend") ++ (eq_attr "alternative" "3,4,7,8,10,11") ++ (const_string "insr") ++ ] ++ (const_string "*"))) + (set (attr "prefix_data16") + (if_then_else (eq_attr "alternative" "3,4") + (const_string "1") +@@ -15956,6 +15982,7 @@ + "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) +@@ -15997,6 +16024,7 @@ + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "")]) +@@ -16009,6 +16037,7 @@ + "TARGET_AVX512BW" + "vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +@@ -16107,6 +16136,7 @@ + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) +@@ -16122,6 +16152,7 @@ + vpw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "*,1") + (set_attr "prefix" "orig,vex") +@@ -16191,6 +16222,7 @@ + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) +@@ -16206,6 +16238,7 @@ + vpb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "maxmin") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "*,1") + (set_attr "prefix" "orig,vex") +@@ -17885,6 +17918,7 @@ + } + [(set_attr "isa" "noavx,noavx,avx,avx,,,avx2") + (set_attr "type" "sselog") ++ (set_attr "hygon_sse_attr" "insr") + (set (attr "prefix_rex") + (if_then_else + (and (not (match_test "TARGET_AVX")) +@@ -19453,6 +19487,10 @@ + (const_string "mmxmov") + ] + (const_string "sselog"))) ++ (set (attr "hygon_sse_attr") ++ (if_then_else (eq_attr "alternative" "0,1,2,3") ++ (const_string "insr") ++ (const_string "*"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "0,1,2,3") + (const_string "1") +@@ -19553,6 +19591,10 @@ + (eq_attr "alternative" "0,1,2,3,4,5") + (const_string "sselog") + (const_string "ssemov"))) ++ (set (attr "hygon_sse_attr") ++ (if_then_else (eq_attr "alternative" "0,1,2,3") ++ (const_string "insr") ++ (const_string "*"))) + (set (attr "prefix_rex") + (if_then_else (eq_attr "alternative" "0,1,2,3") + (const_string "1") +@@ -19754,8 +19796,10 @@ + vpavg\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "avg") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,") ++ + (set_attr "mode" "")]) + + ;; The correct representation for this is absolutely enormous, and +@@ -19772,6 +19816,7 @@ + vpsadbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sseiadd") ++ (set_attr "hygon_sse_attr" "sadbw") + (set_attr "atom_unit" "simul") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,maybe_evex") +@@ -19785,6 +19830,7 @@ + "TARGET_SSE" + "%vmovmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "movnt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -19797,6 +19843,7 @@ + "TARGET_64BIT && TARGET_SSE" + "%vmovmsk\t{%1, %k0|%k0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "movnt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +@@ -20085,6 +20132,7 @@ + return "%vmaskmovdqu\t{%2, %1|%1, %2}"; + } + [(set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend") + (set_attr "prefix_data16" "1") + (set (attr "length_address") + (symbol_ref ("Pmode != word_mode"))) +@@ -20093,6 +20141,7 @@ + (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) + (set_attr "prefix" "maybe_vex") + (set_attr "znver1_decode" "vector") ++ (set_attr "dhyana_decode" "vector") + (set_attr "mode" "TI")]) + + (define_insn "sse_ldmxcsr" +@@ -20797,6 +20846,7 @@ + vpsign\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog1") ++ (set_attr "hygon_sse_attr" "sign") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "orig,vex") +@@ -20816,6 +20866,7 @@ + [(set_attr "isa" "*,noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "sselog1") ++ (set_attr "hygon_sse_attr" "sign") + (set_attr "prefix_extra" "1") + (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) + (set_attr "mode" "DI,TI,TI")]) +@@ -20944,6 +20995,7 @@ + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_vex") ++ (set_attr "hygon_sse_attr" "abs") + (set_attr "mode" "")]) + + (define_insn "abs2_mask" +@@ -20957,6 +21009,7 @@ + "vpabs\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "sselog1") + (set_attr "prefix" "evex") ++ (set_attr "hygon_sse_attr" "abs") + (set_attr "mode" "")]) + + (define_insn "abs2_mask" +@@ -20970,6 +21023,7 @@ + "vpabs\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" + [(set_attr "type" "sselog1") + (set_attr "prefix" "evex") ++ (set_attr "hygon_sse_attr" "abs") + (set_attr "mode" "")]) + + (define_expand "abs2" +@@ -21001,6 +21055,7 @@ + "TARGET_SSE4A" + "movnt\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "movnt") + (set_attr "mode" "")]) + + (define_insn "sse4a_vmmovnt" +@@ -21013,6 +21068,7 @@ + "TARGET_SSE4A" + "movnt\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "movnt") + (set_attr "mode" "")]) + + (define_insn "sse4a_extrqi" +@@ -21089,6 +21145,7 @@ + vblend\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") +@@ -21109,6 +21166,7 @@ + vblendv\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blendv,blendv,blend") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") +@@ -21141,6 +21199,7 @@ + } + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blendv,blendv,blend") + (set_attr "length_immediate" "1") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") +@@ -21265,6 +21324,7 @@ + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") + (set_attr "znver1_decode" "vector,vector,vector") ++ (set_attr "dhyana_decode" "vector,vector,vector") + (set_attr "mode" "")]) + + ;; Mode attribute used by `vmovntdqa' pattern +@@ -21279,6 +21339,7 @@ + "%vmovntdqa\t{%1, %0|%0, %1}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "movnt") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "")]) +@@ -21302,6 +21363,7 @@ + (set_attr "prefix" "orig,orig,vex") + (set_attr "btver2_decode" "vector,vector,vector") + (set_attr "znver1_decode" "vector,vector,vector") ++ (set_attr "dhyana_decode" "vector,vector,vector") + (set_attr "mode" "")]) + + (define_insn "_packusdw" +@@ -21336,6 +21398,7 @@ + vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix" "orig,orig,vex") +@@ -21429,6 +21492,7 @@ + vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,orig,vex") +@@ -21500,6 +21564,7 @@ + return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } + [(set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -21514,6 +21579,7 @@ + "TARGET_AVX2" + "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemov") ++ (set_attr "hygon_sse_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "vex") +@@ -25944,6 +26010,7 @@ + "TARGET_AVX" + "vmaskmov\t{%1, %2, %0|%0, %2, %1}" + [(set_attr "type" "sselog1") ++ (set_attr "hygon_sse_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "btver2_decode" "vector") +@@ -25959,6 +26026,7 @@ + "TARGET_AVX" + "vmaskmov\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") ++ (set_attr "hygon_sse_attr" "blend") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "vex") + (set_attr "btver2_decode" "vector") +diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h +index f105d57cae7..1b06def4e95 100644 +--- a/gcc/config/i386/x86-tune-costs.h ++++ b/gcc/config/i386/x86-tune-costs.h +@@ -3467,3 +3467,154 @@ struct processor_costs core_cost = { + "16", /* Func alignment. */ + }; + ++/* DHYANA has optimized REP instruction for medium sized blocks, but for ++ very small blocks it is better to use loop. For large blocks, libcall ++ can do nontemporary accesses and beat inline considerably. */ ++static stringop_algs dhyana_memcpy[2] = { ++ /* 32-bit tuning. */ ++ {libcall, {{6, loop, false}, ++ {14, unrolled_loop, false}, ++ {-1, libcall, false}}}, ++ /* 64-bit tuning. */ ++ {libcall, {{16, loop, false}, ++ {128, rep_prefix_8_byte, false}, ++ {-1, libcall, false}}}}; ++static stringop_algs dhyana_memset[2] = { ++ /* 32-bit tuning. */ ++ {libcall, {{8, loop, false}, ++ {24, unrolled_loop, false}, ++ {128, rep_prefix_4_byte, false}, ++ {-1, libcall, false}}}, ++ /* 64-bit tuning. */ ++ {libcall, {{48, unrolled_loop, false}, ++ {128, rep_prefix_8_byte, false}, ++ {-1, libcall, false}}}}; ++struct processor_costs dhyana_cost = { ++ { ++ /* Start of register allocator costs. integer->integer move cost is 2. */ ++ ++ /* reg-reg moves are done by renaming and thus they are even cheaper than ++ 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond ++ to doubles of latencies, we do not model this correctly. It does not ++ seem to make practical difference to bump prices up even more. */ ++ 6, /* cost for loading QImode using ++ movzbl. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ 2, /* cost of reg,reg fld/fst. */ ++ {6, 6, 16}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode. */ ++ {8, 8, 16}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode. */ ++ 2, /* cost of moving MMX register. */ ++ {6, 6}, /* cost of loading MMX registers ++ in SImode and DImode. */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode. */ ++ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ ++ {6, 6, 6, 12, 24}, /* cost of loading SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ {8, 8, 8, 16, 32}, /* cost of storing SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ 6, 6, /* SSE->integer and integer->SSE moves. */ ++ 8, 8, /* mask->integer and integer->mask moves */ ++ {6, 6, 6}, /* cost of loading mask register ++ in QImode, HImode, SImode. */ ++ {8, 8, 8}, /* cost if storing mask register ++ in QImode, HImode, SImode. */ ++ 2, /* cost of moving mask register. */ ++ /* End of register allocator costs. */ ++ }, ++ ++ COSTS_N_INSNS (1), /* cost of an add instruction. */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction. */ ++ COSTS_N_INSNS (1), /* variable shift costs. */ ++ COSTS_N_INSNS (1), /* constant shift costs. */ ++ {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ ++ COSTS_N_INSNS (3), /* HI. */ ++ COSTS_N_INSNS (3), /* SI. */ ++ COSTS_N_INSNS (3), /* DI. */ ++ COSTS_N_INSNS (3)}, /* other. */ ++ 0, /* cost of multiply per each bit ++ set. */ ++ /* Depending on parameters, idiv can get faster on HYGON. This is upper ++ bound. */ ++ {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ ++ COSTS_N_INSNS (22), /* HI. */ ++ COSTS_N_INSNS (30), /* SI. */ ++ COSTS_N_INSNS (45), /* DI. */ ++ COSTS_N_INSNS (45)}, /* other. */ ++ COSTS_N_INSNS (1), /* cost of movsx. */ ++ COSTS_N_INSNS (1), /* cost of movzx. */ ++ 8, /* "large" insn. */ ++ 9, /* MOVE_RATIO. */ ++ 6, /* CLEAR_RATIO */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer ++ registers. */ ++ {6, 6, 6, 12, 24}, /* cost of loading SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit */ ++ {8, 8, 8, 16, 32}, /* cost of storing SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit */ ++ {6, 6, 6, 12, 24}, /* cost of unaligned loads. */ ++ {8, 8, 8, 16, 32}, /* cost of unaligned stores. */ ++ 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ ++ 6, /* cost of moving SSE register to integer. */ ++ /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, ++ throughput 12. Approx 9 uops do not depend on vector size and every load ++ is 7 uops. */ ++ 18, 8, /* Gather load static, per_elt. */ ++ 18, 10, /* Gather store static, per_elt. */ ++ 32, /* size of l1 cache. */ ++ 512, /* size of l2 cache. */ ++ 64, /* size of prefetch block. */ ++ /* DHYANA processors never drop prefetches; if they cannot be performed ++ immediately, they are queued. We set number of simultaneous prefetches ++ to a large constant to reflect this (it probably is not a good idea not ++ to limit number of prefetches at all, as their execution also takes some ++ time). */ ++ 100, /* number of parallel prefetches. */ ++ 3, /* Branch cost. */ ++ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (5), /* cost of FMUL instruction. */ ++ /* Latency of fdiv is 8-15. */ ++ COSTS_N_INSNS (15), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ /* Latency of fsqrt is 4-10. */ ++ COSTS_N_INSNS (10), /* cost of FSQRT instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ ++ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ ++ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ ++ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ ++ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ ++ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ ++ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ ++ /* 9-13 */ ++ COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ ++ COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ ++ COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ ++ /* DHYANA can execute 4 integer operations per cycle. FP operations take 3 cycles ++ and it can execute 2 integer additions and 2 multiplications thus ++ reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests ++ that 4 works better than 6 probably due to register pressure. ++ ++ Integer vector operations are taken by FP unit and execute 3 vector ++ plus/minus operations per cycle but only one multiply. This is adjusted ++ in ix86_reassociation_width. */ ++ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ ++ dhyana_memcpy, ++ dhyana_memset, ++ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ ++ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ ++ "16", /* Loop alignment. */ ++ "16", /* Jump alignment. */ ++ "0:0:8", /* Label alignment. */ ++ "16", /* Func alignment. */ ++}; +diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc +index 2ead7ac557b..bccc6bc5451 100644 +--- a/gcc/config/i386/x86-tune-sched.cc ++++ b/gcc/config/i386/x86-tune-sched.cc +@@ -74,6 +74,7 @@ ix86_issue_rate (void) + case PROCESSOR_HASWELL: + case PROCESSOR_TREMONT: + case PROCESSOR_ALDERLAKE: ++ case PROCESSOR_DHYANA: + case PROCESSOR_GENERIC: + return 4; + +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index e6b9e21250f..d69bed78f8b 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -42,7 +42,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", + m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming + on modern chips. Prefer stores affecting whole integer register +@@ -52,7 +52,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", + m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 + | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL + | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | m_ALDERLAKE +- | m_GENERIC) ++ | m_DHYANA | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store + destinations to be 128bit to allow register renaming on 128bit SSE units, +@@ -62,7 +62,8 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", + that can be partly masked by careful scheduling of moves. */ + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_DHYANA ++ | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids + partial write to the destination in scalar SSE conversion from FP +@@ -70,14 +71,14 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY, + "sse_partial_reg_fp_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_ALDERLAKE | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial + write to the destination in scalar SSE conversion from integer to FP. */ + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, + "sse_partial_reg_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_ALDERLAKE | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_DEST_FALSE_DEP_FOR_GLC: This knob inserts zero-idiom before + several insns to break false dependency on the dest register for GLC +@@ -109,32 +110,32 @@ DEF_TUNE (X86_TUNE_MOVX, "movx", + m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE + | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL + | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE +- | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by + full sized loads. */ + DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", + m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL + | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE +- | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent + conditional jump instruction for 32 bit TARGET. */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", +- m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC) ++ m_CORE_ALL | m_BDVER | m_ZNVER | m_DHYANA | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent + conditional jump instruction for TARGET_64BIT. */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER +- | m_ZNVER | m_GENERIC) ++ | m_ZNVER | m_DHYANA | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a + subsequent conditional jump instruction when the condition jump + check sign flag (SF) or overflow flag (OF). */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER +- | m_ZNVER | m_GENERIC) ++ | m_ZNVER | m_DHYANA | m_GENERIC) + + /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional + jump instruction when the alu instruction produces the CCFLAG consumed by +@@ -172,14 +173,14 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move", + /* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */ + DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave", + m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_TREMONT +- | m_ALDERLAKE | m_GENERIC) ++ | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions. + Some chips, like 486 and Pentium works faster with separate load + and push instructions. */ + DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory", + m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE +- | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred + over esp subtraction. */ +@@ -256,7 +257,8 @@ DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", + DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", + ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_DHYANA ++ | m_GENERIC)) + + /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag + will impact LEA instruction selection. */ +@@ -304,14 +306,14 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, + DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES, + "misaligned_move_string_pro_epilogues", + m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_TREMONT +- | m_ALDERLAKE | m_GENERIC) ++ | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ + DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER + | m_BTVER | m_ZNVER | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT +- | m_ALDERLAKE | m_GENERIC) ++ | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ + DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", +@@ -322,7 +324,7 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", + DEF_TUNE (X86_TUNE_USE_BT, "use_bt", + m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL + | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS +- | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency + for bit-manipulation instructions. */ +@@ -343,7 +345,8 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", + + /* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */ + DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence", +- m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_DHYANA ++ | m_GENERIC) + + /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by + generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) - +@@ -368,10 +371,10 @@ DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", + ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL + | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE + | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE +- | m_GENERIC)) ++ | m_DHYANA | m_GENERIC)) + + /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ +-DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) ++DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_DHYANA) + + /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ + DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", +@@ -393,29 +396,30 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill", + DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM + | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE +- | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC) ++ | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_DHYANA | m_GENERIC) + + /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores + instead of a sequence loading registers by parts. */ + DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM + | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS +- | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_DHYANA | m_GENERIC) + + /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single + precision 128bit instructions instead of double where possible. */ + DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal", +- m_BDVER | m_ZNVER) ++ m_BDVER | m_ZNVER | m_DHYANA) + + /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */ + DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", +- m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_DHYANA ++ | m_GENERIC) + + /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to + xorps/xorpd and other variants. */ + DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER +- | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_DHYANA | m_GENERIC) + + /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer + to SSE registers. If disabled, the moves will be done by storing +@@ -467,7 +471,7 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", + /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2 + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_DHYANA | m_GENERIC)) + + /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2 + elements. */ +@@ -477,7 +481,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts", + /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4 + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_DHYANA | m_GENERIC)) + + /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4 + elements. */ +@@ -487,7 +491,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", + /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", +- ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_GENERIC)) ++ ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_DHYANA | m_GENERIC)) + + /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more + elements. */ +@@ -496,7 +500,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER, "use_scatter", + + /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or + smaller FMA chain. */ +-DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3) ++DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_DHYANA) + + /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or + smaller FMA chain. */ +@@ -524,16 +528,16 @@ DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL, "256_unaligned_load_optimal", + /* X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL: if false, unaligned stores are + split. */ + DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal", +- ~(m_NEHALEM | m_SANDYBRIDGE | m_BDVER | m_ZNVER1)) ++ ~(m_NEHALEM | m_SANDYBRIDGE | m_BDVER | m_ZNVER1 | m_DHYANA)) + + /* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX256 ops are split into two AVX128 ops. */ + DEF_TUNE (X86_TUNE_AVX256_SPLIT_REGS, "avx256_split_regs",m_BDVER | m_BTVER2 +- | m_ZNVER1) ++ | m_ZNVER1 | m_DHYANA) + + /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for + the auto-vectorizer. */ + DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 +- | m_ZNVER1) ++ | m_ZNVER1 | m_DHYANA) + + /* X86_TUNE_AVX256_OPTIMAL: Use 256-bit AVX instructions instead of 512-bit AVX + instructions in the auto-vectorizer. */ +-- +2.22.0 + diff --git a/HYGON-0002-array-widen-compare.patch b/HYGON-0002-array-widen-compare.patch new file mode 100644 index 0000000..3842e28 --- /dev/null +++ b/HYGON-0002-array-widen-compare.patch @@ -0,0 +1,2092 @@ +From 4e00b2ddee6f8c173283fed91273a3cb4e8a3f8a Mon Sep 17 00:00:00 2001 +From: He Dian +Date: Tue, 14 Nov 2023 11:10:46 +0800 +Subject: [PATCH] [feat][gcc]: add feature array-widen-compare + +In the narrow-byte array comparison scenario, the types of pointers +pointing to array are extended so that elements of multiple bytes can +be loaded at a time when a wide type is used to dereference an array, +thereby improving the performance of this comparison scenario. In some +extreme situations this may result in unsafe behavior. +This option may generate better or worse code; results are highly dependent +on the structure of loops within the source code. +We can enable this feature by options: -O3 -farray-widen-compare + +Signed-off-by: Dian He +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 5 + + gcc/doc/invoke.texi | 13 +- + gcc/opt-functions.awk | 2 +- + gcc/passes.def | 1 + + .../gcc.dg/tree-ssa/awiden-compare-1.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-2.c | 90 + + .../gcc.dg/tree-ssa/awiden-compare-3.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-4.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-5.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-6.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-7.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-8.c | 24 + + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + gcc/tree-ssa-loop-array-widen-compare.cc | 1647 +++++++++++++++++ + 16 files changed, 1906 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c + create mode 100644 gcc/tree-ssa-loop-array-widen-compare.c + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 31ff95500c9..0aabc6ea3f2 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1653,6 +1653,7 @@ OBJS = \ + tree-ssa-loop-ivopts.o \ + tree-ssa-loop-manip.o \ + tree-ssa-loop-niter.o \ ++ tree-ssa-loop-array-widen-compare.o \ + tree-ssa-loop-prefetch.o \ + tree-ssa-loop-split.o \ + tree-ssa-loop-unswitch.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index 8a0dafc522d..d3541b4e612 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1116,6 +1116,11 @@ fasynchronous-unwind-tables + Common Var(flag_asynchronous_unwind_tables) Optimization + Generate unwind tables that are exact at each instruction boundary. + ++farray-widen-compare ++Common Var(flag_array_widen_compare) Optimization ++Extends types for pointers to arrays to improve array comparsion performance. ++In some extreme situations this may result in unsafe behavior. ++ + fauto-inc-dec + Common Var(flag_auto_inc_dec) Init(1) Optimization + Generate auto-inc/dec instructions. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index cb83dd8a1cc..a76a3a3a877 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -507,7 +507,7 @@ Objective-C and Objective-C++ Dialects}. + -falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol + -fno-allocation-dce -fallow-store-data-races @gol + -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol +--fauto-inc-dec -fbranch-probabilities @gol ++-farray-widen-compare -fauto-inc-dec -fbranch-probabilities @gol + -fcaller-saves @gol + -fcombine-stack-adjustments -fconserve-stack @gol + -fcompare-elim -fcprop-registers -fcrossjumping @gol +@@ -11386,6 +11386,17 @@ This pass is always skipped on architectures that do not have + instructions to support this. Enabled by default at @option{-O1} and + higher on architectures that support this. + ++@item -farray-widen-compare ++@opindex farray-widen-compare ++In the narrow-byte array comparison scenario, the types of pointers ++pointing to array are extended so that elements of multiple bytes can ++be loaded at a time when a wide type is used to dereference an array, ++thereby improving the performance of this comparison scenario. In some ++extreme situations this may result in unsafe behavior. ++ ++This option may generate better or worse code; results are highly dependent ++on the structure of loops within the source code. ++ + @item -fdce + @opindex fdce + Perform dead code elimination (DCE) on RTL@. +diff --git a/gcc/opt-functions.awk b/gcc/opt-functions.awk +index 2aee0b9f1c3..0288fb68adc 100644 +--- a/gcc/opt-functions.awk ++++ b/gcc/opt-functions.awk +@@ -179,7 +179,7 @@ function switch_bit_fields (flags) + flag_init("ToLower", flags) \ + byte_size_flag + +- if (flag_set_p("Report", flags)) ++ if (var_name(flags) != "flag_array_widen_compare" && flag_set_p("Report", flags)) + print "#error Report option property is dropped" + + sub(", $", "", result) +diff --git a/gcc/passes.def b/gcc/passes.def +index 375d3d62d51..8dbb7983e3e 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_dse); + NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */); + NEXT_PASS (pass_phiopt, true /* early_p */); ++ NEXT_PASS (pass_array_widen_compare); + NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_if_to_switch); + NEXT_PASS (pass_convert_switch); +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c +new file mode 100644 +index 00000000000..c2498b12518 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c +new file mode 100644 +index 00000000000..e5d6738dbd4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c +@@ -0,0 +1,90 @@ ++/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define EMPTY_HASH_VALUE 0 ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++#define true 1 ++ ++typedef struct { ++ uint32_t len; ++ uint32_t dist; ++} lzma_match; ++ ++ ++lzma_match * ++func ( ++ const uint32_t len_limit, ++ const uint32_t pos, ++ const uint8_t *const cur, ++ uint32_t cur_match, ++ uint32_t depth, ++ uint32_t *const son, ++ const uint32_t cyclic_pos, ++ const uint32_t cyclic_size, ++ lzma_match *matches, ++ uint32_t len_best) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (true) ++ { ++ const uint32_t delta = pos - cur_match; ++ if (depth-- == 0 || delta >= cyclic_size) ++ { ++ *ptr0 = EMPTY_HASH_VALUE; ++ *ptr1 = EMPTY_HASH_VALUE; ++ return matches; ++ } ++ ++ uint32_t *const pair = son + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) << 1); ++ ++ const uint8_t *const pb = cur -delta; ++ uint32_t len = my_min(len0, len1); ++ ++ if (pb[len] == cur[len]) ++ { ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ ++ if (len_best < len) ++ { ++ len_best = len; ++ matches->len = len; ++ matches->dist = delta - 1; ++ ++matches; ++ ++ if (len == len_limit) ++ { ++ *ptr1 = pair[0]; ++ *ptr0 = pair[1]; ++ return matches; ++ } ++ } ++ } ++ ++ if (pb[len] < cur[len]) ++ { ++ *ptr1 = cur_match; ++ ptr1 = pair + 1; ++ cur_match = *ptr1; ++ len1 = len; ++ } ++ else ++ { ++ *ptr0 = cur_match; ++ ptr0 = pair; ++ cur_match = *ptr0; ++ len0 = len; ++ } ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c +new file mode 100644 +index 00000000000..6d0d36f3133 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ break; ++ len = len + 1; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c +new file mode 100644 +index 00000000000..ee923e3ab21 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ break; ++ len = len + 2; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c +new file mode 100644 +index 00000000000..ee2340af3f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len-1]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c +new file mode 100644 +index 00000000000..57e93695765 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len++ != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c +new file mode 100644 +index 00000000000..07af3edbd24 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ len = len + 1; ++ if (pb[len] != cur[len]) ++ break; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c +new file mode 100644 +index 00000000000..4054e77dbbc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile { target {{ x86_64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -m64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ { ++ len = len - 1; ++ break; ++ } ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 2dae5e1c760..794b8017d18 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -216,6 +216,7 @@ DEFTIMEVAR (TV_TREE_NRV , "tree NRV optimization") + DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies") + DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier") + DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier") ++DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare") + DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") + DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") + DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 606d1d60b85..55ee2fe7f9e 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -453,6 +453,7 @@ extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt); ++extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt); +diff --git a/gcc/tree-ssa-loop-array-widen-compare.cc b/gcc/tree-ssa-loop-array-widen-compare.cc +new file mode 100644 +index 00000000000..83908e385a6 +--- /dev/null ++++ b/gcc/tree-ssa-loop-array-widen-compare.cc +@@ -0,0 +1,1647 @@ ++/* Array widen compare. ++ Copyright (C) 2022-2022 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "gimple-ssa.h" ++#include "tree-pretty-print.h" ++#include "fold-const.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "tree-ssa-loop-manip.h" ++#include "tree-ssa-loop.h" ++#include "ssa.h" ++#include "tree-into-ssa.h" ++#include "cfganal.h" ++#include "cfgloop.h" ++#include "gimple-pretty-print.h" ++#include "tree-cfg.h" ++#include "cgraph.h" ++#include "print-tree.h" ++#include "cfghooks.h" ++#include "gimple-fold.h" ++ ++ ++/* This pass handles scenarios similar to the following: ++ ++ uint32_t ++ func (uint32_t len0, uint32_t len1, const uint32_t len_limit, ++ const uint8_t *const pb, const uint8_t *const cur) ++ { ++ uint32_t len = my_min (len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++ } ++ ++ Features of this type of loop: ++ 1) the loop has two exits; ++ 2) One of the exits comes from the comparison result of the array; ++ ++ From the source code point of view, the pass completes the conversion of the ++ above scenario into: ++ ++ uint32_t ++ func (uint32_t len0, uint32_t len1, const uint32_t len_limit, ++ const uint8_t *const pb, const uint8_t *const cur) ++ { ++ uint32_t len = my_min (len0, len1); ++ // align_loop ++ for (++len; len + sizeof (uint64_t) <= len_limit; len += sizeof (uint64_t)) ++ { ++ uint64_t a = *((uint64_t*)(cur+len)); ++ uint64_t b = *((uint64_t*)(pb+len)); ++ if (a != b) ++ { ++ int lz = __builtin_ctzll (a ^ b); ++ len += lz / 8; ++ return len; ++ } ++ } ++ // epilogue_loop ++ for (;len != len_limit; ++len) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++ } ++ ++ This pass is to complete the conversion of such scenarios from the internal ++ perspective of the compiler: ++ 1) determine_loop_form: The function completes the screening of such ++ scenarios; ++ 2) convert_to_new_loop: The function completes the conversion of ++ origin_loop to new loops, and removes origin_loop; ++ 3) origin_loop_info: The structure is used to record important information ++ of origin_loop: such as loop exit, growth step size ++ of loop induction variable, initial value ++ of induction variable, etc; ++ 4) create_new_loops: The function is used as the key content of the pass ++ to complete the creation of new loops. */ ++ ++/* The useful information of origin loop. */ ++struct origin_loop_info ++{ ++ tree base; /* The initial index of the array in the old loop. */ ++ tree limit; /* The limit index of the array in the old loop. */ ++ tree arr1; /* Array 1 in the old loop. */ ++ tree arr2; /* Array 2 in the old loop. */ ++ edge entry_edge; /* The edge into the old loop. */ ++ basic_block exit_bb1; ++ basic_block exit_bb2; ++ edge exit_e1; ++ edge exit_e2; ++ gimple *cond_stmt1; ++ gimple *cond_stmt2; ++ gimple *update_stmt; ++ bool exist_prolog_assgin; ++ /* Whether the marker has an initial value assigned ++ to the array index. */ ++ unsigned HOST_WIDE_INT step; ++ /* The growth step of the loop induction variable. */ ++ tree indvar; ++ tree arr2_off_to_arr1; ++ tree body_indvar; ++ auto_vec arr2_offs_to_arr1; ++ auto_vec arr2_offs_code_to_arr1; ++ auto_vec arr2_offs_to_arr1_without_indvar; ++ basic_block header; ++ basic_block body; ++ basic_block latch; ++}; ++ ++typedef struct origin_loop_info origin_loop_info; ++ ++static origin_loop_info origin_loop; ++hash_map defs_map; ++ ++/* Dump the bb information in a loop. */ ++ ++static void ++dump_loop_bb (struct loop *loop) ++{ ++ basic_block *body = get_loop_body_in_dom_order (loop); ++ basic_block bb = NULL; ++ ++ for (unsigned i = 0; i < loop->num_nodes; i++) ++ { ++ bb = body[i]; ++ if (bb->loop_father != loop) ++ continue; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "===== the %dth bb of loop ==========:\n", i); ++ gimple_dump_bb (dump_file, bb, 0, dump_flags); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ free (body); ++} ++ ++/* Return true if the loop has precisely one backedge. */ ++ ++static bool ++loop_single_backedge_p (class loop *loop) ++{ ++ basic_block header = loop->header; ++ basic_block latch = loop->latch; ++ ++ gphi_iterator gsi=gsi_start_phis (header); ++ gphi *phi=gsi.phi (); ++ if (!phi) ++ return false; ++ if (phi->nargs!=2) ++ return false; ++ ++ return true; ++} ++ ++/* Return true if the loop has precisely one preheader BB. */ ++ ++static bool ++loop_single_preheader_bb (class loop *loop) ++{ ++ basic_block header = loop->header; ++ if (EDGE_COUNT (header->preds) != 2) ++ return false; ++ ++ edge e1 = EDGE_PRED (header, 0); ++ edge e2 = EDGE_PRED (header, 1); ++ ++ if ((e1->src == loop->latch && e2->src->loop_father != loop) ++ || (e2->src == loop->latch && e1->src->loop_father != loop)) ++ return true; ++ ++ return false; ++} ++ ++/* Initialize the origin_loop structure. */ ++static void ++init_origin_loop_structure () ++{ ++ origin_loop.base = NULL; ++ origin_loop.limit = NULL; ++ origin_loop.arr1 = NULL; ++ origin_loop.arr2 = NULL; ++ origin_loop.exit_e1 = NULL; ++ origin_loop.exit_e2 = NULL; ++ origin_loop.exit_bb1 = NULL; ++ origin_loop.exit_bb2 =NULL; ++ origin_loop.entry_edge = NULL; ++ origin_loop.cond_stmt1 = NULL; ++ origin_loop.cond_stmt2 = NULL; ++ origin_loop.update_stmt = NULL; ++ origin_loop.exist_prolog_assgin = false; ++ origin_loop.step = 0; ++ origin_loop.indvar = NULL; ++ origin_loop.header=NULL; ++ origin_loop.body=NULL; ++ origin_loop.latch=NULL; ++ origin_loop.arr2_offs_to_arr1.release (); ++ origin_loop.arr2_offs_code_to_arr1.release (); ++ origin_loop.arr2_offs_to_arr1_without_indvar.release (); ++} ++ ++/* Get the edge that first entered the loop. */ ++ ++static edge ++get_loop_preheader_edge (class loop *loop) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ FOR_EACH_EDGE (e, ei, loop->header->preds) ++ if (e->src != loop->latch) ++ break; ++ ++ if (!e) ++ { ++ gcc_assert (!loop_outer (loop)); ++ return single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ } ++ ++ return e; ++} ++ ++/* Make sure the exit condition stmt satisfies a specific form. */ ++ ++static bool ++check_cond_stmt (gimple *stmt) ++{ ++ if (!stmt) ++ return false; ++ if (gimple_code (stmt) != GIMPLE_COND) ++ return false; ++ ++ if (gimple_cond_code (stmt) != NE_EXPR ++ && gimple_cond_code (stmt) != EQ_EXPR ++ && gimple_cond_code (stmt) != LT_EXPR) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ /* The parameter that does not support the cond statement is not SSA_NAME. ++ eg: if (len_1 != 100). */ ++ if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME) ++ return false; ++ ++ return true; ++} ++ ++/* Record the exit information in the original loop including exit edge, ++ exit bb block, exit condition stmt, ++ eg: exit_eX origin_exit_bbX cond_stmtX. */ ++ ++static bool ++record_origin_loop_exit_info (class loop *loop) ++{ ++ bool found = false; ++ edge e = NULL; ++ unsigned i = 0; ++ gimple *stmt; ++ ++ if (origin_loop.exit_e1 != NULL || origin_loop.exit_bb1 != NULL ++ || origin_loop.exit_e2 != NULL || origin_loop.exit_bb2 != NULL ++ || origin_loop.cond_stmt1 != NULL || origin_loop.cond_stmt2 != NULL) ++ return false; ++ ++ auto_vec exit_edges = get_loop_exit_edges (loop); ++ if (exit_edges == vNULL) ++ return false; ++ ++ if (exit_edges.length () != 2) ++ goto fail; ++ ++ FOR_EACH_VEC_ELT (exit_edges, i, e) ++ { ++ if (e->src == loop->header) ++ { ++ origin_loop.exit_e1 = e; ++ origin_loop.exit_bb1 = e->dest; ++ stmt = gsi_stmt (gsi_last_bb (e->src)); ++ if (check_cond_stmt (stmt)) ++ origin_loop.cond_stmt1 = stmt; ++ } ++ else ++ { ++ origin_loop.exit_e2 = e; ++ origin_loop.exit_bb2 = e->dest; ++ stmt = gsi_stmt (gsi_last_bb (e->src)); ++ if (check_cond_stmt (stmt)) ++ origin_loop.cond_stmt2 = stmt; ++ } ++ } ++ ++ if (origin_loop.exit_e1 != NULL && origin_loop.exit_bb1 != NULL ++ && origin_loop.exit_e2 != NULL && origin_loop.exit_bb2 != NULL ++ && origin_loop.cond_stmt1 != NULL && origin_loop.cond_stmt2 != NULL) ++ found = true; ++ ++fail: ++ exit_edges.release (); ++ return found; ++} ++ ++/* Returns true if t is SSA_NAME and user variable exists. */ ++ ++static bool ++ssa_name_var_p (tree t) ++{ ++ if (!t || TREE_CODE (t) != SSA_NAME) ++ return false; ++ if (SSA_NAME_VAR (t)) ++ return true; ++ return false; ++} ++ ++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */ ++ ++static bool ++same_ssa_name_var_p (tree t1, tree t2) ++{ ++ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2)) ++ return false; ++ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2)) ++ return true; ++ return false; ++} ++ ++/* Get origin loop induction variable upper bound. */ ++ ++static bool ++get_iv_upper_bound (gimple *stmt) ++{ ++ if (origin_loop.limit != NULL) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ if (!lhs || !rhs) ++ return false; ++ ++ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE) ++ return false; ++ ++ gimple *gl=SSA_NAME_DEF_STMT (lhs); ++ gimple *gr=SSA_NAME_DEF_STMT (rhs); ++ if (!gl || !gr) ++ return false; ++ ++ tree indvar=origin_loop.indvar; ++ if (same_ssa_name_var_p (lhs, indvar) ++ && !same_ssa_name_var_p (rhs, indvar)) ++ origin_loop.limit = rhs; ++ else if (!same_ssa_name_var_p (lhs, indvar) ++ && same_ssa_name_var_p (rhs, indvar)) ++ origin_loop.limit = lhs; ++ return true; ++} ++ ++/* Returns true only when the expression on the rhs code of stmt is PLUS_EXPR, ++ rhs1 is SSA_NAME with the same var as origin_loop base, and rhs2 is ++ INTEGER_CST. */ ++ ++static bool ++check_update_stmt (gimple *stmt) ++{ ++ if (!stmt) ++ return false; ++ ++ if (gimple_assign_rhs_code (stmt) != PLUS_EXPR) ++ return false; ++ ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (!(TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST)) ++ return false; ++ ++ if (!same_ssa_name_var_p (rhs1, origin_loop.indvar)) ++ return false; ++ ++ if (!tree_fits_uhwi_p (rhs2)) ++ return false; ++ ++ origin_loop.step = tree_to_uhwi (rhs2); ++ if (origin_loop.step != 1) ++ return false; ++ ++ return true; ++} ++ ++/* Get origin loop induction variable initial value. */ ++ ++static bool ++get_iv_base (gimple *stmt) ++{ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ if (origin_loop.base != NULL || origin_loop.update_stmt != NULL) ++ return false; ++ ++ basic_block header = gimple_bb (stmt); ++ ++ gphi_iterator gsi; ++ edge e; ++ edge_iterator ei; ++ tree iv_after; ++ ++ for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gphi *phi = gsi.phi (); ++ tree res = gimple_phi_result (phi); ++ if (!same_ssa_name_var_p (res, origin_loop.limit==lhs?rhs:lhs)) ++ continue; ++ tree base = PHI_ARG_DEF_FROM_EDGE (phi, origin_loop.entry_edge); ++ origin_loop.base = base; ++ FOR_EACH_EDGE (e, ei, header->preds) ++ { ++ if (e != origin_loop.entry_edge) ++ { ++ iv_after = PHI_ARG_DEF_FROM_EDGE (phi, e); ++ gimple *update = SSA_NAME_DEF_STMT (iv_after); ++ if (!check_update_stmt (update)) ++ return false; ++ origin_loop.update_stmt = update; ++ if (gimple_bb (update) == header && iv_after == lhs) ++ origin_loop.exist_prolog_assgin = true; ++ } ++ } ++ } ++ ++ if (origin_loop.base != NULL && origin_loop.update_stmt != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Record the upper bound and initial value of the induction variable in the ++ original loop; When prolog_assign is present, make sure loop header is in ++ simple form; And the interpretation of prolog_assign is as follows: ++ eg: while (++len != limit) ++ ...... ++ For such a loop, ++len will be processed before entering header_bb, and the ++ assign is regarded as the prolog_assign of the loop. */ ++ ++static bool ++record_origin_loop_header (class loop *loop) ++{ ++ basic_block header = loop->header; ++ origin_loop.header=header; ++ ++ ++ if (origin_loop.entry_edge || origin_loop.base ++ || origin_loop.update_stmt || origin_loop.limit) ++ return false; ++ origin_loop.entry_edge = get_loop_preheader_edge (loop); ++ ++ gphi_iterator gsi=gsi_start_phis (header); ++ gphi *phi=gsi.phi (); ++ origin_loop.indvar=gimple_phi_result (phi); ++ if (!origin_loop.indvar) ++ return false; ++ ++ for (gimple_stmt_iterator gsi=gsi_last_bb (header); ++ !gsi_end_p (gsi); ++ gsi_prev (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (stmt && is_gimple_debug (stmt)) ++ continue; ++ if (stmt && gimple_code (stmt) == GIMPLE_COND) ++ { ++ if (!get_iv_upper_bound (stmt)) ++ return false; ++ if (!get_iv_base (stmt)) ++ return false; ++ } else if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN) ++ { ++ if (stmt != origin_loop.update_stmt ++ || !origin_loop.exist_prolog_assgin) ++ return false; ++ } else ++ return false; ++ } ++ ++ if (!(origin_loop.entry_edge && origin_loop.base ++ && origin_loop.update_stmt && origin_loop.limit)) ++ return false; ++ ++ return true; ++} ++ ++/* When prolog_assign does not exist, make sure that update_stmt exists in the ++ loop latch, and its form is a specific form, eg: ++ len_2 = len_1 + 1. */ ++ ++static bool ++record_origin_loop_latch (class loop *loop) ++{ ++ basic_block latch = loop->latch; ++ basic_block body = gimple_bb (origin_loop.cond_stmt2); ++ origin_loop.latch=latch; ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ ++ gsi = gsi_start_bb (latch); ++ ++ if (origin_loop.exist_prolog_assgin) ++ { ++ if (gsi_end_p (gsi) && latch != body) ++ return true; ++ else if (latch==body) ++ return true; ++ } else ++ { ++ for (gsi = gsi_start_bb (latch); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == origin_loop.update_stmt) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Returns true when the DEF_STMT corresponding to arg0 of the mem_ref tree ++ satisfies the POINTER_PLUS_EXPR type. */ ++ ++static bool ++check_body_mem_ref (tree mem_ref) ++{ ++ tree arg0 = TREE_OPERAND (mem_ref , 0); ++ tree arg1 = TREE_OPERAND (mem_ref , 1); ++ ++ if (TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE ++ && TREE_CODE (arg1) == INTEGER_CST ++ && tree_to_uhwi (arg1) == 0) ++ { ++ gimple *tmp_g = SSA_NAME_DEF_STMT (arg0); ++ if (tmp_g && gimple_assign_rhs_code (tmp_g) == POINTER_PLUS_EXPR) ++ return true; ++ } ++ return false; ++} ++ ++/* Returns true if the rh2 of the current stmt comes from the indvar in the ++ original loop. */ ++ ++static bool ++check_body_pointer_plus (gimple *stmt) ++{ ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (TREE_TYPE (rhs1)) != POINTER_TYPE ++ && gimple_assign_rhs_code (stmt)!=POINTER_PLUS_EXPR) ++ return false; ++ ++ tree body_indvar=origin_loop.body_indvar; ++ basic_block body=origin_loop.body; ++ tree tmp_rhs1=NULL, tmp_rhs2=NULL, tmp_lhs=NULL, off_without_indvar; ++ gimple *g=NULL; ++ for (g=SSA_NAME_DEF_STMT (rhs2); ++ g && gimple_bb (g)==body; ++ g=SSA_NAME_DEF_STMT (tmp_rhs1)) ++ { ++ tmp_rhs1=gimple_assign_rhs1 (g); ++ off_without_indvar=tmp_rhs2=gimple_assign_rhs2 (g); ++ tmp_lhs=gimple_assign_lhs (g); ++ if (tmp_lhs==body_indvar) ++ break; ++ if (gimple_assign_rhs_code (g)!=MINUS_EXPR ++ && gimple_assign_rhs_code (g)!=PLUS_EXPR) ++ return false; ++ if (!tmp_rhs2) ++ return false; ++ origin_loop.arr2_offs_to_arr1.safe_push (tmp_rhs2); ++ origin_loop.arr2_offs_code_to_arr1.safe_push (gimple_assign_rhs_code (g)); ++ ++ g=SSA_NAME_DEF_STMT (tmp_rhs2); ++ if (g && (gimple_bb (g)==origin_loop.header ++ || gimple_bb (g)==origin_loop.body ++ || gimple_bb (g)==origin_loop.latch)) ++ { ++ tmp_rhs1=gimple_assign_rhs1 (g); ++ tmp_rhs2=gimple_assign_rhs2 (g); ++ if (tmp_rhs1 && SSA_NAME_DEF_STMT (tmp_rhs1) ++ && (gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs1))==origin_loop.header ++ || gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs1))==origin_loop.body ++ || gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs1))==origin_loop.latch)) ++ return false; ++ if (tmp_rhs2 && SSA_NAME_DEF_STMT (tmp_rhs2) ++ && (gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs2))==origin_loop.header ++ || gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs2))==origin_loop.body ++ || gimple_bb (SSA_NAME_DEF_STMT (tmp_rhs2))==origin_loop.latch)) ++ return false; ++ origin_loop.arr2_offs_to_arr1_without_indvar.safe_push ( ++ SSA_NAME_DEF_STMT (off_without_indvar)); ++ } else ++ origin_loop.arr2_offs_to_arr1_without_indvar.safe_push ( ++ NULL); ++ } ++ ++ if (!origin_loop.arr1) ++ origin_loop.arr1 = rhs1; ++ else if (!origin_loop.arr2) ++ origin_loop.arr2 = rhs1; ++ else ++ return false; ++ ++ return true; ++} ++ ++/* Record the array comparison information in the original loop, while ensuring ++ that there are only statements related to cont_stmt in the loop body. */ ++ ++static bool ++record_origin_loop_body (class loop *loop) ++{ ++ basic_block body = gimple_bb (origin_loop.cond_stmt2); ++ origin_loop.body=body; ++ ++ ++ if (origin_loop.arr1 != NULL || origin_loop.arr2 != NULL) ++ return false; ++ ++ gimple_stmt_iterator gsi = gsi_start_bb (body); ++ gimple *g = NULL; ++ for (g = gsi_stmt (gsi); ++ g && is_gimple_debug (g); ++ gsi_next (&gsi), g=gsi_stmt (gsi)); ++ tree body_indvar=gimple_assign_lhs (gsi_stmt (gsi)); ++ if (!body_indvar) ++ return false; ++ origin_loop.body_indvar=body_indvar; ++ for (; !gsi_end_p (gsi); gsi_next (&gsi)) ++ gimple_set_visited (gsi_stmt (gsi), false); ++ ++ tree cond_lhs = gimple_cond_lhs (origin_loop.cond_stmt2); ++ tree cond_rhs = gimple_cond_rhs (origin_loop.cond_stmt2); ++ if (TREE_CODE (TREE_TYPE (cond_lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (cond_rhs)) != INTEGER_TYPE) ++ return false; ++ ++ auto_vec stack; ++ stack.safe_push (cond_lhs); ++ stack.safe_push (cond_rhs); ++ gimple_set_visited (origin_loop.cond_stmt2, true); ++ ++ while (!stack.is_empty ()) ++ { ++ tree op = stack.pop (); ++ gimple *g = SSA_NAME_DEF_STMT (op); ++ if (!g || gimple_bb (g) != body || !is_gimple_assign (g)) ++ continue; ++ gimple_set_visited (g, true); ++ if (gimple_assign_rhs_code (g) == MEM_REF) ++ { ++ tree mem_ref = gimple_assign_rhs1 (g); ++ if (!check_body_mem_ref (mem_ref)) ++ return false; ++ stack.safe_push (TREE_OPERAND (mem_ref , 0)); ++ } else if (gimple_assign_rhs_code (g) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (g); ++ if (!check_body_pointer_plus (g)) ++ return false; ++ //stack.safe_push (rhs2); ++ } else if (gimple_assign_rhs_code (g) == NOP_EXPR) ++ { ++ tree rhs = gimple_assign_rhs1 (g); ++ if (!same_ssa_name_var_p (rhs, origin_loop.indvar)) ++ return false; ++ stack.safe_push (rhs); ++ } else ++ return false; ++ } ++ if (!origin_loop.arr1 || !origin_loop.arr2) ++ return false; ++ ++ return true; ++} ++ ++/* Dump the original loop information to see if the origin loop ++ form matches. */ ++ ++static void ++dump_origin_loop_info () ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nThe origin loop info:\n"); ++ fprintf (dump_file, "\n the origin_loop.limit is:\n"); ++ print_node (dump_file, "", origin_loop.limit, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.base is:\n"); ++ print_node (dump_file, "", origin_loop.base, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.arr1 is:\n"); ++ print_node (dump_file, "", origin_loop.arr1, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.arr2 is:\n"); ++ print_node (dump_file, "", origin_loop.arr2, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.cond_stmt1 is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.cond_stmt1, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.cond_stmt2 is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.cond_stmt2, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.update_stmt is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.update_stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++} ++ ++/* Returns true only if the exit bb of the original loop is unique and its phi ++ node parameter comes from the same variable. */ ++ ++static bool ++check_exit_bb (class loop *loop) ++{ ++ if (origin_loop.exit_bb1 != origin_loop.exit_bb2 ++ || flow_bb_inside_loop_p (loop, origin_loop.exit_bb1)) ++ return false; ++ ++ gphi_iterator gsi; ++ for (gsi = gsi_start_phis (origin_loop.exit_bb1); ++ !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ gphi *phi = gsi.phi (); ++ tree res = gimple_phi_result (phi); ++ if (same_ssa_name_var_p (res, origin_loop.indvar)) ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool if_limit_related_to_base_indvar (tree base, tree limit, int depth) ++{ ++ if (depth<=0) ++ return false; ++ if (!limit || !base) ++ return false; ++ if (TREE_CODE (limit)!=SSA_NAME || TREE_CODE (base)!=SSA_NAME) ++ return false; ++ gimple *gb=SSA_NAME_DEF_STMT (base); ++ gimple *gl=SSA_NAME_DEF_STMT (limit); ++ if (!gl) ++ return false; ++ tree rhs1=gimple_assign_rhs1 (gl); ++ tree rhs2=gimple_assign_rhs2 (gl); ++ if (rhs1==base || rhs2==base) ++ return true; ++ --depth; ++ return (if_limit_related_to_base_indvar (base, rhs1, depth) ++ || if_limit_related_to_base_indvar (base, rhs2, depth)); ++} ++ ++ ++/* Make sure that the recorded origin_loop information meets the ++ relative requirements. */ ++static bool ++check_origin_loop_info (class loop *loop) ++{ ++ dump_origin_loop_info (); ++ tree arr1_elem_size, arr2_elem_size; ++ ++ if (!check_exit_bb (loop)) ++ return false; ++ ++ if (TREE_CODE (TREE_TYPE (origin_loop.arr1)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (origin_loop.arr2)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))) != INTEGER_TYPE) ++ return false; ++ ++ arr1_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))); ++ arr2_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))); ++ ++ if (tree_to_uhwi (arr1_elem_size)>16 || tree_to_uhwi (arr2_elem_size)>16) ++ return false; ++ ++ return true; ++} ++ ++/* Record the useful information of the original loop and judge whether the ++ information meets the specified conditions. */ ++ ++static bool ++check_record_loop_form (class loop *loop) ++{ ++ ++ if (!record_origin_loop_exit_info (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop exit information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_header (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop header information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_latch (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop latch information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_body (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop body information.\n"); ++ } ++ return false; ++ } ++ ++ if (!check_origin_loop_info (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to check origin loop information.\n"); ++ } ++ return false; ++ } ++ ++ return true; ++} ++ ++/* The main entry for judging whether the loop meets some conditions. */ ++ ++static bool ++determine_loop_form (class loop *loop) ++{ ++ if (loop->inner) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, there is inner loop or" ++ "redundant bb.\n"); ++ } ++ return false; ++ } ++ ++ if (single_exit (loop) || !loop->latch) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, only one exit or loop_latch" ++ "does not exist.\n"); ++ } ++ return false; ++ } ++ ++ /* Support loop with only one backedge. */ ++ if (!loop_single_backedge_p (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, loop back edges are not" ++ "unique.\n"); ++ } ++ return false; ++ } ++ ++ /* Support loop with only one preheader BB. */ ++ if (!loop_single_preheader_bb (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, loop preheader bb are not" ++ "unique.\n"); ++ } ++ return false; ++ } ++ ++ init_origin_loop_structure (); ++ if (!check_record_loop_form (loop)) ++ return false; ++ ++ return true; ++} ++ ++/* Create prolog bb for newly constructed loop; When prolog_assign exists in ++ the original loop, the corresponding assign needs to be added to prolog_bb; ++ eg: ++ len_16 = len_10 + 1 ++ Create simple copy statement when prolog_assign does not exist; ++ eg: ++ len_16 = len_10 ++ ++ The IR of bb is as above. */ ++ ++static void ++create_prolog_bb (basic_block &prolog_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer, edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree lhs1; ++ ++ prolog_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (prolog_bb, outer); ++ redirect_edge_and_branch (entry_edge, prolog_bb); ++ set_immediate_dominator (CDI_DOMINATORS, prolog_bb, dominator_bb); ++ gsi = gsi_last_bb (prolog_bb); ++ lhs1 = copy_ssa_name (origin_loop.indvar); ++ ++ if (origin_loop.exist_prolog_assgin) ++ g = gimple_build_assign (lhs1, PLUS_EXPR, origin_loop.base, ++ build_int_cst (TREE_TYPE (origin_loop.base), origin_loop.step)); ++ else ++ g = gimple_build_assign (lhs1, NOP_EXPR, origin_loop.base); ++ gimple_seq_add_stmt (&stmts, g); ++ ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ set_current_def (origin_loop.indvar, lhs1); ++ defs_map.put (prolog_bb, lhs1); ++} ++ ++/* Create preheader bb for new loop; In order to ensure the standard form of ++ the loop, add a preheader_bb before loop_header. */ ++ ++static void ++create_loop_pred_bb (basic_block &loop_pred_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ loop_pred_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (loop_pred_bb, outer); ++ set_immediate_dominator (CDI_DOMINATORS, loop_pred_bb, dominator_bb); ++ defs_map.put (loop_pred_bb, get_current_def (origin_loop.indvar)); ++} ++ ++/* Add phi_arg for bb with phi node. */ ++ ++static void ++rewrite_add_phi_arg (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ gphi *phi; ++ gphi_iterator gsi; ++ tree res; ++ location_t loc; ++ ++ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi (); ++ res = gimple_phi_result (phi); ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (PHI_ARG_DEF_FROM_EDGE (phi, e)) ++ continue; ++ tree var = *(defs_map.get (e->src)); ++ if (!same_ssa_name_var_p (var, res)) ++ continue; ++ if (virtual_operand_p (var)) ++ loc = UNKNOWN_LOCATION; ++ else ++ loc = gimple_location (SSA_NAME_DEF_STMT (var)); ++ add_phi_arg (phi, var, e, loc); ++ } ++ } ++} ++ ++/* Create loop_header BB for align_loop. ++ eg: ++ _18 = (long unsigned int) len_17; ++ _19 = _18 + 8; ++ _20 = (long unsigned int) len_limit_12 (D); ++ if (_19 <= _20) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_header (basic_block &align_loop_header, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gcond *cond_stmt; ++ gphi *phi; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.indvar); ++ align_loop_header = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_header, outer); ++ make_single_succ_edge (after_bb, align_loop_header, EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_header, dominator_bb); ++ gsi = gsi_last_bb (align_loop_header); ++ phi = create_phi_node (NULL_TREE, align_loop_header); ++ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi)); ++ res = gimple_phi_result (phi); ++ ++ tree lhs1 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, res); ++ tree lhs2 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (lhs1), lhs1, ++ build_int_cst (TREE_TYPE (lhs1), 8)); ++ tree lhs3 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, ++ origin_loop.limit); ++ cond_stmt = gimple_build_cond (LE_EXPR, lhs2, lhs3, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ set_current_def (origin_loop.indvar, res); ++ defs_map.put (align_loop_header, res); ++} ++ ++/* Create loop body BB for align_loop. ++ eg: ++ _21 = (sizetype) len_17; ++ _22 = cur_15 (D) + _21; ++ _23 = MEM[(long unsigned int *)_22]; ++ _24 = pb_13 (D) + _21; ++ _25 = MEM[(long unsigned int *)_24]; ++ if (_23 != _25) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_body_bb (basic_block &align_loop_body_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gcond *cond_stmt; ++ tree lhs1, lhs2; ++ ++ align_loop_body_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_body_bb, outer); ++ make_edge (after_bb, align_loop_body_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_body_bb, dominator_bb); ++ gsi = gsi_last_bb (align_loop_body_bb); ++ ++ tree indvar = gimple_build (&stmts, NOP_EXPR, sizetype, ++ get_current_def (origin_loop.indvar)); ++ ++ tree var=indvar; ++ for (uint64_t i=0; i ++ len_26 = len_17 + 8; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_latch (basic_block &align_loop_latch, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.indvar); ++ align_loop_latch = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_latch, outer); ++ make_edge (after_bb, align_loop_latch, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_latch, dominator_bb); ++ gsi = gsi_last_bb (align_loop_latch); ++ res = copy_ssa_name (entry_node); ++ g = gimple_build_assign (res, PLUS_EXPR, entry_node, ++ build_int_cst (TREE_TYPE (entry_node), 8)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (align_loop_latch, res); ++} ++ ++/* Create a new loop and add it to outer_loop and return. */ ++ ++static class loop * ++init_new_loop (class loop *outer_loop, basic_block header, basic_block latch) ++{ ++ class loop *new_loop; ++ new_loop = alloc_loop (); ++ new_loop->header = header; ++ new_loop->latch = latch; ++ add_loop (new_loop, outer_loop); ++ ++ return new_loop; ++} ++ ++/* Create necessary exit BB for align_loop. ++ eg: ++ _27 = _23 ^ _25; ++ _28 = __builtin_ctzll (_27); ++ _29 = _28 >> 3; ++ len_30 = _29 + len_17; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_exit_bb (basic_block &align_loop_exit_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gimple *cond_stmt; ++ tree lhs1, lhs2; ++ tree cond_lhs, cond_rhs; ++ gcall *build_ctzll; ++ ++ tree entry_node = get_current_def (origin_loop.indvar); ++ align_loop_exit_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_exit_bb, outer); ++ make_edge (after_bb, align_loop_exit_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_exit_bb, dominator_bb); ++ gsi = gsi_last_bb (align_loop_exit_bb); ++ ++ cond_stmt = gsi_stmt (gsi_last_bb (after_bb)); ++ cond_lhs = gimple_cond_lhs (cond_stmt); ++ cond_rhs = gimple_cond_rhs (cond_stmt); ++ ++ lhs1 = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (cond_lhs), cond_lhs, ++ cond_rhs); ++ build_ctzll = gimple_build_call (builtin_decl_explicit (BUILT_IN_CTZLL), 1, ++ lhs1); ++ lhs1 = make_ssa_name (integer_type_node); ++ gimple_call_set_lhs (build_ctzll, lhs1); ++ gimple_seq_add_stmt (&stmts, build_ctzll); ++ lhs2 = copy_ssa_name (lhs1); ++ g = gimple_build_assign (lhs2, RSHIFT_EXPR, lhs1, ++ build_int_cst (TREE_TYPE (lhs1), 3)); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs1 = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (entry_node), lhs2); ++ lhs2 = copy_ssa_name (entry_node); ++ g = gimple_build_assign (lhs2, PLUS_EXPR, lhs1, entry_node); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (align_loop_exit_bb, lhs2); ++} ++ ++/* Create loop_header BB for epilogue_loop. ++ eg: ++ # len_31 = PHI ++ if (len_31 != len_limit_12 (D)) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_header (basic_block &epilogue_loop_header, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gcond *cond_stmt; ++ tree res; ++ gphi *phi; ++ ++ tree entry_node = get_current_def (origin_loop.indvar); ++ epilogue_loop_header = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_header, outer); ++ make_single_succ_edge (after_bb, epilogue_loop_header, EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_header, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_header); ++ phi = create_phi_node (NULL_TREE, epilogue_loop_header); ++ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi)); ++ res = gimple_phi_result (phi); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt1), res, ++ origin_loop.limit, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ set_current_def (origin_loop.indvar, res); ++ defs_map.put (epilogue_loop_header, res); ++} ++ ++/* Create loop body BB for epilogue_loop. ++ eg: ++ _32 = (sizetype) len_31; ++ _33 = pb_13 (D) + _32; ++ _34 = *_33; ++ _35 = cur_15 (D) + _32; ++ _36 = *_35; ++ if (_34 != _36) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_body_bb (basic_block &epilogue_loop_body_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gcond *cond_stmt; ++ tree lhs1, lhs2, lhs3; ++ ++ tree entry_node = get_current_def (origin_loop.indvar); ++ epilogue_loop_body_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_body_bb, outer); ++ make_edge (after_bb, epilogue_loop_body_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_body_bb, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_body_bb); ++ ++ lhs1 = gimple_build (&stmts, NOP_EXPR, sizetype, entry_node); ++ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, ++ TREE_TYPE (origin_loop.arr1), origin_loop.arr1, lhs1); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, lhs2, ++ build_int_cst (TREE_TYPE (lhs2), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ ++ for (uint64_t i=0; i ++ len_37 = len_31 + 1; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_latch (basic_block &epilogue_loop_latch, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.indvar); ++ epilogue_loop_latch = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_latch, outer); ++ make_edge (after_bb, epilogue_loop_latch, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_latch, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_latch); ++ res = copy_ssa_name (entry_node); ++ g = gimple_build_assign (res, PLUS_EXPR, entry_node, ++ build_int_cst (TREE_TYPE (entry_node), origin_loop.step)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (epilogue_loop_latch, res); ++} ++ ++/* convert_to_new_loop ++ | | ++ | | ++ | | entry_edge ++ | ______ | ++ | / V V ++ | | -----origin_loop_header--- ++ | | | | ++ | | -------------------------\ ++ | | | \ ++ | | V \___ ___ ___ ___ ___ ___ ___ ++ | | -----origin_loop_body----- | ++ | | | | | ++ | | -------------------------\ | ++ | | | \___ ___ ___ ___ | ++ | | V V V ++ | | -----origin_loop_latch---- -----exit_bb------ ++ | | | | | | ++ | | /-------------------------- ------------------ ++ | \ __ / ++ | ++ | | ++ | ====> |entry_edge ++ | V ++ | -------prolog_bb----- ++ | | | ++ | --------------------- ++ | | ++ | V ++ | -----align_loop_header---- ++ | /-----------------> | | ++ |/ -------------------------- ++ || / \ ++ || V V ++ || ---align_loop_body--- ---epilogue_loop_header-- ++ || | | -------| |<---| ++ || --------------------\ / ------------------------- | ++ || | \____ | | | ++ || V | | V | ++ || ---align_loop_latch--- | | ---epilogue_loop_body---- | ++ || | | | | ----| | | ++ || ---------------------- | | / ------------------------- | ++ || / __________/ | | | | ++ || / | | | V | ++ | \ __________/ | | | ---epilogue_loop_latch--- | ++ | | | | | | | ++ | | | | ------------------------- / ++ | V | | | / ++ | -align_loop_exit_bb- | | \______________/ ++ | | | | | ++ | -------------------- | | ++ | | | | ++ | | V V ++ | | -----exit_bb------ ++ | |---->| | ++ | ------------------ ++ ++ The origin_loop conversion process starts from entry_edge and ends at ++ exit_bb; The execution logic of origin_loop is completely replaced by ++ align_loop + epilogue_loop: ++ 1) align_loop mainly implements the idea of ​​using wide-type dereference ++ and comparison on array elements, so as to achieve the effect of ++ acceleration; For the corresponding source code understanding, please ++ refer to the description of the pass at the beginning; ++ 2) epilogue_loop processes the previous loop remaining array element ++ comparison. */ ++ ++ ++basic_block alh, alb, all, elh, elb, ell; ++ ++static void ++create_new_loops (edge entry_edge) ++{ ++ basic_block prolog_bb; ++ basic_block align_loop_header, align_loop_latch, align_loop_body_bb; ++ basic_block align_pred_bb, align_loop_exit_bb; ++ basic_block epilogue_loop_header, epilogue_loop_latch, epilogue_loop_body_bb; ++ basic_block epilogue_loop_pred_bb; ++ class loop *align_loop; ++ class loop *epilogue_loop; ++ ++ ++ class loop *outer = entry_edge->src->loop_father; ++ ++ create_prolog_bb (prolog_bb, entry_edge->src, entry_edge->src, outer, ++ entry_edge); ++ ++ create_loop_pred_bb (align_pred_bb, prolog_bb, prolog_bb, outer); ++ make_single_succ_edge (prolog_bb, align_pred_bb, EDGE_FALLTHRU); ++ ++ create_align_loop_header (align_loop_header, align_pred_bb, ++ align_pred_bb, outer); ++ ++ create_align_loop_body_bb (align_loop_body_bb, align_loop_header, ++ align_loop_header, outer); ++ ++ create_align_loop_latch (align_loop_latch, align_loop_body_bb, ++ align_loop_body_bb, outer); ++ make_edge (align_loop_latch, align_loop_header, EDGE_FALLTHRU); ++ rewrite_add_phi_arg (align_loop_header); ++ ++ align_loop = init_new_loop (outer, align_loop_header, align_loop_latch); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint byte align loop %d:\n", align_loop->num); ++ flow_loop_dump (align_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ create_align_loop_exit_bb (align_loop_exit_bb, align_loop_body_bb, ++ align_loop_body_bb, outer); ++ ++ create_loop_pred_bb (epilogue_loop_pred_bb, align_loop_header, ++ align_loop_header, outer); ++ make_edge (align_loop_header, epilogue_loop_pred_bb, EDGE_FALSE_VALUE); ++ ++ create_epilogue_loop_header (epilogue_loop_header, epilogue_loop_pred_bb, ++ epilogue_loop_pred_bb, outer); ++ ++ create_epilogue_loop_body_bb (epilogue_loop_body_bb, epilogue_loop_header, ++ epilogue_loop_header, outer); ++ ++ create_epilogue_loop_latch (epilogue_loop_latch, epilogue_loop_body_bb, ++ epilogue_loop_body_bb, outer); ++ make_single_succ_edge (epilogue_loop_latch, epilogue_loop_header, ++ EDGE_FALLTHRU); ++ rewrite_add_phi_arg (epilogue_loop_header); ++ ++ epilogue_loop = init_new_loop (outer, epilogue_loop_header, ++ epilogue_loop_latch); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint epilogue loop %d:\n", epilogue_loop->num); ++ flow_loop_dump (epilogue_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ make_single_succ_edge (align_loop_exit_bb, origin_loop.exit_bb1, ++ EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb1, ++ entry_edge->src); ++ make_edge (epilogue_loop_body_bb, origin_loop.exit_bb1, EDGE_TRUE_VALUE); ++ ++ make_edge (epilogue_loop_header, origin_loop.exit_bb2, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb2, ++ entry_edge->src); ++ ++ rewrite_add_phi_arg (origin_loop.exit_bb1); ++ rewrite_add_phi_arg (origin_loop.exit_bb2); ++ ++ remove_edge (origin_loop.exit_e1); ++ remove_edge (origin_loop.exit_e2); ++ ++ alb=align_loop_body_bb; ++ elb=epilogue_loop_body_bb; ++} ++ ++/* Make sure that the dominance relationship of the newly inserted cfg ++ is not missing. */ ++ ++static void ++update_loop_dominator (cdi_direction dir) ++{ ++ gcc_assert (dom_info_available_p (dir)); ++ ++ basic_block bb; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ basic_block imm_bb = get_immediate_dominator (dir, bb); ++ if (!imm_bb || bb == origin_loop.exit_bb1) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, bb, ++ recompute_dominator (CDI_DOMINATORS, bb)); ++ continue; ++ } ++ } ++} ++ ++/* Clear information about the original loop. */ ++ ++static void ++remove_origin_loop (class loop *loop) ++{ ++ basic_block *body; ++ ++ body = get_loop_body_in_dom_order (loop); ++ unsigned n = loop->num_nodes; ++ for (unsigned i = 0; i < n; i++) ++ { ++ delete_basic_block (body[i]); ++ } ++ free (body); ++ delete_loop (loop); ++} ++ ++/* Perform the conversion of origin_loop to new_loop. */ ++ ++static void ++convert_to_new_loop (class loop *loop) ++{ ++ create_new_loops (origin_loop.entry_edge); ++ remove_origin_loop (loop); ++ update_loop_dominator (CDI_DOMINATORS); ++ update_ssa (TODO_update_ssa); ++} ++ ++/* The main entry of array-widen-compare optimizes. */ ++ ++static unsigned int ++tree_ssa_array_widen_compare () ++{ ++ unsigned int todo = 0; ++ //class loop *loop; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ flow_loops_dump (dump_file, NULL, 1); ++ fprintf (dump_file, "\nConfirm which loop can be optimized using" ++ " array-widen-compare\n"); ++ } ++ ++ for (auto loop: loops_list (cfun, LI_FROM_INNERMOST)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "======================================\n"); ++ fprintf (dump_file, "Processing loop %d:\n", loop->num); ++ fprintf (dump_file, "======================================\n"); ++ flow_loop_dump (loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ if (determine_loop_form (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "The %dth loop form is success matched," ++ "and the loop can be optimized.\n", ++ loop->num); ++ dump_loop_bb (loop); ++ } ++ convert_to_new_loop (loop); ++ } ++ } ++ todo |= (TODO_update_ssa); ++ return todo; ++} ++ ++/* Array widen compare. */ ++ ++namespace { ++ ++const pass_data pass_data_tree_array_widen_compare = ++{ ++ GIMPLE_PASS, ++ "awiden_compare", ++ OPTGROUP_LOOP, ++ TV_TREE_ARRAY_WIDEN_COMPARE, ++ (PROP_cfg | PROP_ssa), ++ 0, ++ 0, ++ 0, ++ (TODO_update_ssa | TODO_verify_all) ++}; ++ ++class pass_array_widen_compare : public gimple_opt_pass ++{ ++public: ++ pass_array_widen_compare (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_tree_array_widen_compare, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *); ++ ++}; // class pass_array_widen_compare ++ ++bool ++pass_array_widen_compare::gate (function *) ++{ ++ return (flag_array_widen_compare > 0 && optimize >= 3); ++} ++ ++unsigned int ++pass_array_widen_compare::execute (function *fun) ++{ ++ if (number_of_loops (fun) <= 1) ++ return 0; ++ ++ /* Only supports LP64 data mode. */ ++ if (TYPE_PRECISION (long_integer_type_node) != 64 ++ || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "The current data mode is not supported," ++ "only the LP64 date mode is supported.\n"); ++ return 0; ++ } ++ ++ return tree_ssa_array_widen_compare (); ++} ++ ++} // anon namespace ++ ++gimple_opt_pass * ++make_pass_array_widen_compare (gcc::context *ctxt) ++{ ++ return new pass_array_widen_compare (ctxt); ++} +-- +2.22.0 + diff --git a/HYGON-0003-function-attribute-judgement.patch b/HYGON-0003-function-attribute-judgement.patch new file mode 100644 index 0000000..7e34f6c --- /dev/null +++ b/HYGON-0003-function-attribute-judgement.patch @@ -0,0 +1,595 @@ +From e3084877691d6be4e337f51fd9c214c9baa9bac1 Mon Sep 17 00:00:00 2001 +From: Monama <13440944+monama@user.noreply.gitee.com> +Date: Tue, 21 Nov 2023 02:35:20 +0000 +Subject: [PATCH] !7 Add function attribute judgement for INLINE_HINT_kown_hot + hint & loop-elim. * Introduce redundant loop elimination optimization + controlled * Add function attribute judgement for INLINE_HINT_known_hot hint, + +--- + gcc/common.opt | 4 + + gcc/ipa-inline-analysis.cc | 13 +- + gcc/testsuite/gcc.dg/ipa/inlinehint-6.c | 47 +++ + gcc/tree-ssa-phiopt.cc | 448 ++++++++++++++++++++++++ + 4 files changed, 508 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/ipa/inlinehint-6.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index e51fb27c5e3..6343fad3335 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1240,6 +1240,10 @@ fcompare-elim + Common Var(flag_compare_elim_after_reload) Optimization + Perform comparison elimination after register allocation has finished. + ++floop-elim ++Common Var(flag_loop_elim) Init(0) Optimization ++Perform redundant loop elimination. ++ + fconserve-stack + Common Var(flag_conserve_stack) Optimization + Do not perform optimizations increasing noticeably stack usage. +diff --git a/gcc/ipa-inline-analysis.cc b/gcc/ipa-inline-analysis.cc +index 11d8d09ee43..16ac24cfc6a 100644 +--- a/gcc/ipa-inline-analysis.cc ++++ b/gcc/ipa-inline-analysis.cc +@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see + #include "ipa-utils.h" + #include "cfgexpand.h" + #include "gimplify.h" ++#include "attribs.h" + + /* Cached node/edge growths. */ + fast_call_summary *edge_growth_cache = NULL; +@@ -249,15 +250,19 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) + hints = estimates.hints; + } + +- /* When we have profile feedback, we can quite safely identify hot +- edges and for those we disable size limits. Don't do that when +- probability that caller will call the callee is low however, since it ++ /* When we have profile feedback or function attribute, we can quite safely ++ identify hot edges and for those we disable size limits. Don't do that ++ when probability that caller will call the callee is low however, since it + may hurt optimization of the caller's hot path. */ +- if (edge->count.ipa ().initialized_p () && edge->maybe_hot_p () ++ if ((edge->count.ipa ().initialized_p () && edge->maybe_hot_p () + && (edge->count.ipa ().apply_scale (2, 1) + > (edge->caller->inlined_to + ? edge->caller->inlined_to->count.ipa () + : edge->caller->count.ipa ()))) ++ || (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl)) ++ != NULL ++ && lookup_attribute ("hot", DECL_ATTRIBUTES (edge->callee->decl)) ++ != NULL)) + hints |= INLINE_HINT_known_hot; + + gcc_checking_assert (size >= 0); +diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c +new file mode 100644 +index 00000000000..1f3be641c6d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c +@@ -0,0 +1,47 @@ ++/* { dg-options "-O3 -c -fdump-ipa-inline-details -fno-early-inlining -fno-ipa-cp" } */ ++/* { dg-add-options bind_pic_locally } */ ++ ++#define size_t long long int ++ ++struct A ++{ ++ size_t f1, f2, f3, f4; ++}; ++struct C ++{ ++ struct A a; ++ size_t b; ++}; ++struct C x; ++ ++__attribute__((hot)) struct C callee (struct A *a, struct C *c) ++{ ++ c->a=(*a); ++ ++ if((c->b + 7) & 17) ++ { ++ c->a.f1 = c->a.f2 + c->a.f1; ++ c->a.f2 = c->a.f3 - c->a.f2; ++ c->a.f3 = c->a.f2 + c->a.f3; ++ c->a.f4 = c->a.f2 - c->a.f4; ++ c->b = c->a.f2; ++ ++ } ++ return *c; ++} ++ ++__attribute__((hot)) struct C caller (size_t d, size_t e, size_t f, size_t g, struct C *c) ++{ ++ struct A a; ++ a.f1 = 1 + d; ++ a.f2 = e; ++ a.f3 = 12 + f; ++ a.f4 = 68 + g; ++ if (c->b > 0) ++ return callee (&a, c); ++ else ++ return *c; ++} ++ ++/* { dg-final { scan-ipa-dump "known_hot" "inline" } } */ ++ +diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc +index c56d0b9ff15..cf300d14121 100644 +--- a/gcc/tree-ssa-phiopt.cc ++++ b/gcc/tree-ssa-phiopt.cc +@@ -77,6 +77,7 @@ static hash_set * get_non_trapping (); + static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree); + static void hoist_adjacent_loads (basic_block, basic_block, + basic_block, basic_block); ++static bool do_phiopt_pattern (basic_block, basic_block, basic_block); + static bool gate_hoist_loads (void); + + /* This pass tries to transform conditional stores into unconditional +@@ -266,6 +267,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + hoist_adjacent_loads (bb, bb1, bb2, bb3); + continue; + } ++ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2)) ++ { ++ continue; ++ } + else + continue; + +@@ -3767,6 +3772,449 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1, + } + } + ++static bool check_uses (tree, hash_set *); ++ ++/* Check SSA_NAME is used in ++ if (SSA_NAME == 0) ++ ... ++ or ++ if (SSA_NAME != 0) ++ ... ++*/ ++static bool ++check_uses_cond (const_tree ssa_name, gimple *stmt, ++ hash_set *hset ATTRIBUTE_UNUSED) ++{ ++ tree_code code = gimple_cond_code (stmt); ++ if (code != EQ_EXPR && code != NE_EXPR) ++ { ++ return false; ++ } ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ if ((lhs == ssa_name && integer_zerop (rhs)) ++ || (rhs == ssa_name && integer_zerop (lhs))) ++ { ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Check SSA_NAME is used in ++ _tmp = SSA_NAME == 0; ++ or ++ _tmp = SSA_NAME != 0; ++ or ++ _tmp = SSA_NAME | _tmp2; ++*/ ++static bool ++check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set *hset) ++{ ++ tree_code code = gimple_assign_rhs_code (stmt); ++ tree lhs, rhs1, rhs2; ++ ++ switch (code) ++ { ++ case EQ_EXPR: ++ case NE_EXPR: ++ rhs1 = gimple_assign_rhs1 (stmt); ++ rhs2 = gimple_assign_rhs2 (stmt); ++ if ((rhs1 == ssa_name && integer_zerop (rhs2)) ++ || (rhs2 == ssa_name && integer_zerop (rhs1))) ++ { ++ return true; ++ } ++ break; ++ ++ case BIT_IOR_EXPR: ++ lhs = gimple_assign_lhs (stmt); ++ if (hset->contains (lhs)) ++ { ++ return false; ++ } ++ /* We should check the use of _tmp further. */ ++ return check_uses (lhs, hset); ++ ++ default: ++ break; ++ } ++ return false; ++} ++ ++/* Check SSA_NAME is used in ++ # result = PHI ++*/ ++static bool ++check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set *hset) ++{ ++ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) ++ { ++ tree arg = gimple_phi_arg_def (stmt, i); ++ if (!integer_zerop (arg) && arg != ssa_name) ++ { ++ return false; ++ } ++ } ++ ++ tree result = gimple_phi_result (stmt); ++ ++ /* It is used to avoid infinite recursion, ++ ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)> ++ {BODY} ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)> ++ {BODY} ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ ... ++ */ ++ if (hset->contains (result)) ++ { ++ return false; ++ } ++ ++ return check_uses (result, hset); ++} ++ ++/* Check the use of SSA_NAME, it should only be used in comparison ++ operation and PHI node. HSET is used to record the ssa_names ++ that have been already checked. */ ++static bool ++check_uses (tree ssa_name, hash_set *hset) ++{ ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ ++ if (TREE_CODE (ssa_name) != SSA_NAME) ++ { ++ return false; ++ } ++ ++ if (SSA_NAME_VAR (ssa_name) ++ && is_global_var (SSA_NAME_VAR (ssa_name))) ++ { ++ return false; ++ } ++ ++ hset->add (ssa_name); ++ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) ++ { ++ gimple *stmt = USE_STMT (use_p); ++ ++ /* Ignore debug gimple statements. */ ++ if (is_gimple_debug (stmt)) ++ { ++ continue; ++ } ++ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_COND: ++ if (!check_uses_cond (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ case GIMPLE_ASSIGN: ++ if (!check_uses_assign (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ case GIMPLE_PHI: ++ if (!check_uses_phi (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool ++check_def_gimple (gimple *def1, gimple *def2, const_tree result) ++{ ++ /* def1 and def2 should be POINTER_PLUS_EXPR. */ ++ if (!is_gimple_assign (def1) || !is_gimple_assign (def2) ++ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR ++ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR) ++ { ++ return false; ++ } ++ ++ tree rhs12 = gimple_assign_rhs2 (def1); ++ ++ tree rhs21 = gimple_assign_rhs1 (def2); ++ tree rhs22 = gimple_assign_rhs2 (def2); ++ ++ if (rhs21 != result) ++ { ++ return false; ++ } ++ ++ /* We should have a positive pointer-plus constant to ensure ++ that the pointer value is continuously increasing. */ ++ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST ++ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0) ++ { ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool ++check_loop_body (basic_block bb0, basic_block bb2, const_tree result) ++{ ++ gimple *g01 = first_stmt (bb0); ++ if (!g01 || !is_gimple_assign (g01) ++ || gimple_assign_rhs_code (g01) != MEM_REF ++ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result) ++ { ++ return false; ++ } ++ ++ gimple *g02 = g01->next; ++ /* GIMPLE_COND would be the last gimple in a basic block, ++ and have no other side effects on RESULT. */ ++ if (!g02 || gimple_code (g02) != GIMPLE_COND) ++ { ++ return false; ++ } ++ ++ if (first_stmt (bb2) != last_stmt (bb2)) ++ { ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Pattern is like ++
++   arg1 = base (rhs11) + cst (rhs12); [def1]
++   goto 
++
++   
++   arg2 = result (rhs21) + cst (rhs22); [def2]
++
++   
++   # result = PHI 
++   _v = *result;  [g01]
++   if (_v == 0)   [g02]
++     goto 
++   else
++     goto 
++
++   
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
++		gphi *phi_stmt, gimple *&output)
++{
++  /* Start check from PHI node in BB0.  */
++  if (gimple_phi_num_args (phi_stmt) != 2
++      || virtual_operand_p (gimple_phi_result (phi_stmt)))
++    {
++      return false;
++    }
++
++  tree result = gimple_phi_result (phi_stmt);
++  tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
++  tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
++
++  if (TREE_CODE (arg1) != SSA_NAME
++      || TREE_CODE (arg2) != SSA_NAME
++      || SSA_NAME_IS_DEFAULT_DEF (arg1)
++      || SSA_NAME_IS_DEFAULT_DEF (arg2))
++    {
++      return false;
++    }
++
++  gimple *def1 = SSA_NAME_DEF_STMT (arg1);
++  gimple *def2 = SSA_NAME_DEF_STMT (arg2);
++
++  /* Swap bb1 and bb2 if pattern is like
++     if (_v != 0)
++       goto 
++     else
++       goto 
++  */
++  if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
++    {
++      std::swap (bb1, bb2);
++    }
++
++  /* prebb[def1] --> bb0 <-- bb2[def2] */
++  if (!gimple_bb (def1)
++      || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
++      || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
++    {
++      return false;
++    }
++
++  /* Check whether define gimple meets the pattern requirements.  */
++  if (!check_def_gimple (def1, def2, result))
++    {
++      return false;
++    }
++
++  if (!check_loop_body (bb0, bb2, result))
++    {
++      return false;
++    }
++
++  output = def1;
++  return true;
++}
++
++/* Check pattern
++   
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
++		    const_tree result, gimple *&output)
++{
++  gimple *g1 = first_stmt (bb1);
++  if (!g1 || !is_gimple_assign (g1)
++      || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
++      || gimple_assign_rhs1 (g1) != result
++      || gimple_assign_rhs2 (g1) != base)
++    {
++      return false;
++    }
++
++  gimple *g2 = g1->next;
++  if (!g2 || !is_gimple_assign (g2)
++      || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
++      || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
++      || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
++    {
++      return false;
++    }
++
++  /* INTEGER_CST cst in gimple def1.  */
++  HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
++  /* INTEGER_CST cst in gimple g2.  */
++  HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
++  /* _2 must be at least a positive number.  */
++  if (num2 == 0 || num1 / num2 <= 0)
++    {
++      return false;
++    }
++
++  gimple *g3 = g2->next;
++  if (!g3 || !is_gimple_assign (g3)
++      || gimple_assign_rhs_code (g3) != NOP_EXPR
++      || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
++      || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
++    {
++      return false;
++    }
++
++  /* _3 should only be used in comparison operation or PHI node.  */
++  hash_set *hset = new hash_set;
++  if (!check_uses (gimple_assign_lhs (g3), hset))
++    {
++      delete hset;
++      return false;
++    }
++  delete hset;
++
++  output = g3;
++  return true;
++}
++
++static bool
++do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
++{
++  gphi_iterator gsi;
++
++  for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gphi *phi_stmt = gsi.phi ();
++      gimple *def1 = NULL;
++      tree base, cst, result;
++
++      if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
++	{
++	  continue;
++	}
++
++      base = gimple_assign_rhs1 (def1);
++      cst = gimple_assign_rhs2 (def1);
++      result = gimple_phi_result (phi_stmt);
++
++      gimple *stmt = NULL;
++      if (!check_gimple_order (bb1, base, cst, result, stmt))
++	{
++	  continue;
++	}
++
++      gcc_assert (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "to\n");
++	}
++
++      /* Rewrite statement
++	   _3 = (unsigned int) _2;
++	 to
++	   _3 = (unsigned int) 1;
++      */
++      tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
++      gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
++      update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++
++      return true;
++    }
++  return false;
++}
++
+ /* Determine whether we should attempt to hoist adjacent loads out of
+    diamond patterns in pass_phiopt.  Always hoist loads if
+    -fhoist-adjacent-loads is specified and the target machine has
+-- 
+2.22.0
+
diff --git a/HYGON-0004-struct-data-layout-optimization.patch b/HYGON-0004-struct-data-layout-optimization.patch
new file mode 100644
index 0000000..fe4cdb5
--- /dev/null
+++ b/HYGON-0004-struct-data-layout-optimization.patch
@@ -0,0 +1,15898 @@
+From 7a2cd0cac452ce1d453e92e3094a76ca862dcc48 Mon Sep 17 00:00:00 2001
+From: Kang Mengbo 
+Date: Mon, 27 Nov 2023 14:55:06 +0800
+Subject: [PATCH]         Struct data layout optimization
+
+        Layout the structure memory space to improve the locality of the memory space, including field reordering, stripping, pointers compression, dead field elimination, semi-layout, ect.
+
+        Fix kp-struct-layout patch bug
+        Fix KP-Struct-reorg Patch While merging KP-Struct-reorg Patch from gcc-10.3.0 to gcc-12.3.0:
+            1. Fix Legacy check Funcs
+            2. Fix 'static bool is_a_helper::test(U*) [with U = gcond; T = gimple*]d never defined'
+            3. Modified the vec  to auto_vec  to solve zero callers.length()
+            4. For NULL normal_callers[i]->call_stmt value, use normal_callers[i]->caller->get_untransformed_body() to get call_stmt
+            5. Add bool get_void_ptr_layers(tree input, int &input_layers) Function to update void_ptr layers
+
+        Reference
+            1. https://gcc.gnu.org/git/?p=gcc-old.git;a=commit;h=6e1bd1c900533c627b5e4fbbecb41dcd7974b522
+            2. https://gitee.com/src-openeuler/gcc/blob/openEuler-22.09/0014-Backport-StructReorg-Structure-reorganization-optimi.patch
+
+        Signed-off-by: Kang Mengbo
+---
+ gcc/Makefile.in                               |    1 +
+ gcc/common.opt                                |   14 +-
+ gcc/configure                                 |    2 +-
+ gcc/configure.ac                              |    2 +-
+ gcc/doc/invoke.texi                           |   32 +
+ gcc/ipa-free-lang-data.cc                     |   28 +-
+ gcc/ipa-param-manipulation.cc                 |    3 +-
+ gcc/ipa-param-manipulation.h                  |    3 +-
+ gcc/ipa-struct-reorg/escapes.def              |   65 +
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 8860 +++++++++++++++++
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h       |  265 +
+ gcc/opts.cc                                   |   16 +
+ gcc/params.opt                                |   11 +
+ gcc/passes.def                                |    2 +
+ gcc/pointer-query.cc                          |    2 +
+ gcc/symbol-summary.h                          |   13 +-
+ .../g++.dg/struct/no-body-function.cpp        |   18 +
+ .../g++.dg/struct/struct-reorg-1.cpp          |   13 +
+ .../g++.dg/struct/struct-reorg-2.cpp          |   17 +
+ .../g++.dg/struct/struct-reorg-3.cpp          |   24 +
+ gcc/testsuite/g++.dg/struct/struct-reorg.exp  |   26 +
+ .../gcc.dg/struct/complete_struct_relayout.c  |   60 +
+ .../gcc.dg/struct/csr_allocation-1.c          |   46 +
+ .../gcc.dg/struct/csr_allocation-2.c          |   59 +
+ .../gcc.dg/struct/csr_allocation-3.c          |   77 +
+ gcc/testsuite/gcc.dg/struct/csr_cast_int.c    |   52 +
+ .../gcc.dg/struct/csr_separate_instance.c     |   48 +
+ .../gcc.dg/struct/csr_skip_void_struct_name.c |   53 +
+ gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c  |   86 +
+ .../gcc.dg/struct/dfe_ele_minus_verify.c      |   60 +
+ .../gcc.dg/struct/dfe_extr_board_init.c       |   77 +
+ gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c   |   77 +
+ gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c |   56 +
+ gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c     |  162 +
+ gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c   |  126 +
+ .../gcc.dg/struct/dfe_extr_mv_udc_core.c      |   82 +
+ .../gcc.dg/struct/dfe_extr_tcp_usrreq.c       |   58 +
+ .../gcc.dg/struct/dfe_extr_ui_main.c          |   61 +
+ .../gcc.dg/struct/dfe_mem_ref_offset.c        |   58 +
+ .../struct/dfe_mul_layer_ptr_record_bug.c     |   30 +
+ gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c    |   71 +
+ .../gcc.dg/struct/dfe_ptr_negate_expr.c       |   55 +
+ gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c     |   55 +
+ gcc/testsuite/gcc.dg/struct/pc_cast_int.c     |   91 +
+ .../gcc.dg/struct/pc_compress_and_decomress.c |   90 +
+ gcc/testsuite/gcc.dg/struct/pc_ptr2void.c     |   87 +
+ .../gcc.dg/struct/pc_simple_rewrite_pc.c      |  112 +
+ .../gcc.dg/struct/pc_skip_void_struct_name.c  |   53 +
+ .../struct/rf_DTE_struct_instance_field.c     |   75 +
+ gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c   |   94 +
+ .../gcc.dg/struct/rf_create_fields_bug.c      |   82 +
+ .../gcc.dg/struct/rf_create_new_func_bug.c    |   56 +
+ .../gcc.dg/struct/rf_ele_minus_verify.c       |   60 +
+ .../gcc.dg/struct/rf_escape_by_base.c         |   83 +
+ gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c |   72 +
+ .../gcc.dg/struct/rf_mem_ref_offset.c         |   58 +
+ .../struct/rf_mul_layer_ptr_record_bug.c      |   30 +
+ .../gcc.dg/struct/rf_pass_conflict.c          |  109 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c |   87 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c     |   71 +
+ .../gcc.dg/struct/rf_ptr_negate_expr.c        |   55 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c   |   34 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c      |   55 +
+ gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c  |   58 +
+ .../gcc.dg/struct/rf_rescusive_type.c         |   57 +
+ .../struct/rf_rewrite_assign_more_cmp.c       |   65 +
+ .../gcc.dg/struct/rf_rewrite_cond_bug.c       |   72 +
+ .../gcc.dg/struct/rf_rewrite_cond_more_cmp.c  |   58 +
+ .../gcc.dg/struct/rf_rewrite_phi_bug.c        |   81 +
+ gcc/testsuite/gcc.dg/struct/rf_shwi.c         |   23 +
+ gcc/testsuite/gcc.dg/struct/rf_visible_func.c |   92 +
+ .../gcc.dg/struct/rf_void_ptr_param_func.c    |   54 +
+ .../gcc.dg/struct/semi_relayout_rewrite.c     |   86 +
+ .../gcc.dg/struct/sr_address_of_field.c       |   37 +
+ gcc/testsuite/gcc.dg/struct/sr_convert_mem.c  |   23 +
+ gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c  |   25 +
+ gcc/testsuite/gcc.dg/struct/sr_pointer_and.c  |   17 +
+ .../gcc.dg/struct/sr_pointer_minus.c          |   33 +
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |  102 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-1.c  |   30 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-2.c  |   29 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-3.c  |   28 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-4.c  |   59 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-5.c  |   31 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-6.c  |   54 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-7.c  |   38 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-8.c  |   25 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-9.c  |   54 +
+ .../gcc.dg/struct/w_prof_global_array.c       |   29 +
+ .../gcc.dg/struct/w_prof_global_var.c         |   42 +
+ .../gcc.dg/struct/w_prof_local_array.c        |   37 +
+ .../gcc.dg/struct/w_prof_local_var.c          |   40 +
+ .../gcc.dg/struct/w_prof_single_str_global.c  |   31 +
+ gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c |   64 +
+ .../gcc.dg/struct/w_ratio_cold_str.c          |   43 +
+ .../gcc.dg/struct/wo_prof_array_field.c       |   26 +
+ .../struct/wo_prof_array_through_pointer.c    |   38 +
+ .../gcc.dg/struct/wo_prof_double_malloc.c     |   29 +
+ .../gcc.dg/struct/wo_prof_empty_str.c         |   44 +
+ .../struct/wo_prof_escape_arg_to_local.c      |   44 +
+ .../gcc.dg/struct/wo_prof_escape_return-1.c   |   33 +
+ .../gcc.dg/struct/wo_prof_escape_return.c     |   32 +
+ .../gcc.dg/struct/wo_prof_escape_str_init.c   |   31 +
+ .../struct/wo_prof_escape_substr_array.c      |   33 +
+ .../struct/wo_prof_escape_substr_pointer.c    |   48 +
+ .../struct/wo_prof_escape_substr_value.c      |   45 +
+ .../gcc.dg/struct/wo_prof_global_array.c      |   32 +
+ .../gcc.dg/struct/wo_prof_global_var.c        |   45 +
+ .../gcc.dg/struct/wo_prof_local_array.c       |   40 +
+ .../gcc.dg/struct/wo_prof_local_var.c         |   43 +
+ .../gcc.dg/struct/wo_prof_malloc_size_var-1.c |   47 +
+ .../gcc.dg/struct/wo_prof_malloc_size_var.c   |   47 +
+ .../struct/wo_prof_mult_field_peeling.c       |   42 +
+ .../gcc.dg/struct/wo_prof_single_str_global.c |   34 +
+ .../gcc.dg/struct/wo_prof_single_str_local.c  |   34 +
+ .../struct/wo_prof_single_str_pointer.c       |   38 +
+ .../gcc.dg/struct/wo_prof_two_strs.c          |   67 +
+ gcc/timevar.def                               |    1 +
+ gcc/tree-pass.h                               |    1 +
+ 119 files changed, 14757 insertions(+), 14 deletions(-)
+ create mode 100644 gcc/ipa-struct-reorg/escapes.def
+ create mode 100644 gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+ create mode 100644 gcc/ipa-struct-reorg/ipa-struct-reorg.h
+ create mode 100644 gcc/testsuite/g++.dg/struct/no-body-function.cpp
+ create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp
+ create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp
+ create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp
+ create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg.exp
+ create mode 100644 gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_cast_int.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_separate_instance.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_cast_int.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_ptr2void.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_shwi.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_visible_func.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_address_of_field.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_convert_mem.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_pointer_and.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-4.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_global_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_global_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_local_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_local_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 0aabc6ea3f2..deceae8a82d 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1451,6 +1451,7 @@ OBJS = \
+ 	incpath.o \
+ 	init-regs.o \
+ 	internal-fn.o \
++	ipa-struct-reorg/ipa-struct-reorg.o \
+ 	ipa-cp.o \
+ 	ipa-sra.o \
+ 	ipa-devirt.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 6343fad3335..c5898e589eb 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1968,9 +1968,19 @@ fipa-matrix-reorg
+ Common Ignore
+ Does nothing. Preserved for backward compatibility.
+ 
++fipa-reorder-fields
++Common Var(flag_ipa_reorder_fields) Init(0) Optimization
++Perform structure fields reorder optimizations.
++
+ fipa-struct-reorg
+-Common Ignore
+-Does nothing. Preserved for backward compatibility.
++Common Var(flag_ipa_struct_reorg) Init(0) Optimization
++Perform structure layout optimizations.
++
++fipa-struct-reorg=
++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 6)
++-fipa-struct-reorg=[0,1,2,3,4,5,6] adding none, struct-reorg, reorder-fields,
++dfe, safe-pointer-compression, unsafe-pointer-compression, semi-relayout
++optimizations.
+ 
+ fipa-vrp
+ Common Var(flag_ipa_vrp) Optimization
+diff --git a/gcc/configure b/gcc/configure
+index c749ace011d..98bbf0f857b 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -34191,7 +34191,7 @@ $as_echo "$as_me: executing $ac_file commands" >&6;}
+     "depdir":C) $SHELL $ac_aux_dir/mkinstalldirs $DEPDIR ;;
+     "gccdepdir":C)
+   ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR
+-  for lang in $subdirs c-family common analyzer rtl-ssa
++  for lang in $subdirs c-family common analyzer rtl-ssa ipa-struct-reorg
+   do
+       ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR
+   done ;;
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index 992a50e7b20..c74f4b55527 100644
+--- a/gcc/configure.ac
++++ b/gcc/configure.ac
+@@ -1340,7 +1340,7 @@ AC_CHECK_HEADERS(ext/hash_map)
+ ZW_CREATE_DEPDIR
+ AC_CONFIG_COMMANDS([gccdepdir],[
+   ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR
+-  for lang in $subdirs c-family common analyzer rtl-ssa
++  for lang in $subdirs c-family common analyzer rtl-ssa ipa-struct-reorg
+   do
+       ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR
+   done], [subdirs="$subdirs" ac_aux_dir=$ac_aux_dir DEPDIR=$DEPDIR])
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index a76a3a3a877..3ff1f8db7d3 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -526,6 +526,8 @@ Objective-C and Objective-C++ Dialects}.
+ -finline-functions  -finline-functions-called-once  -finline-limit=@var{n} @gol
+ -finline-small-functions -fipa-modref -fipa-cp  -fipa-cp-clone @gol
+ -fipa-bit-cp  -fipa-vrp  -fipa-pta  -fipa-profile  -fipa-pure-const @gol
++-fipa-reorder-fields @gol
++-fipa-struct-reorg @gol
+ -fipa-reference  -fipa-reference-addressable @gol
+ -fipa-stack-alignment  -fipa-icf  -fira-algorithm=@var{algorithm} @gol
+ -flive-patching=@var{level} @gol
+@@ -11912,6 +11914,27 @@ Enabled by default at @option{-O1} and higher.
+ Reduce stack alignment on call sites if possible.
+ Enabled by default.
+ 
++@item -fipa-reorder-fields
++@opindex fipa-reorder-fields
++Introduce structure fields reordering optimization, that change fields
++ordering of C-like structures in order to better utilize spatial locality.
++This transformation is affective for programs containing arrays of structures.
++It works only in whole program mode, so it requires @option{-fwhole-program}
++to be enabled.
++
++@item -fipa-struct-reorg
++@opindex fipa-struct-reorg
++Perform structure reorganization optimization, that change C-like structures
++layout in order to better utilize spatial locality.  This transformation is
++affective for programs containing arrays of structures.  Available in two
++compilation modes: profile-based (enabled with @option{-fprofile-generate})
++or static (which uses built-in heuristics).  It works only in whole program
++mode, so it requires @option{-fwhole-program} to be
++enabled.  Structures considered @samp{cold} by this transformation are not
++affected (see @option{--param struct-reorg-cold-struct-ratio=@var{value}}).
++
++With this flag, the program debug info reflects a new structure layout.
++
+ @item -fipa-pta
+ @opindex fipa-pta
+ Perform interprocedural pointer analysis and interprocedural modification
+@@ -13782,6 +13805,15 @@ In each case, the @var{value} is an integer.  The following choices
+ of @var{name} are recognized for all targets:
+ 
+ @table @gcctabopt
++@item struct-reorg-cold-struct-ratio
++The threshold ratio (as a percentage) between a structure frequency
++and the frequency of the hottest structure in the program.  This parameter
++is used by struct-reorg optimization enabled by @option{-fipa-struct-reorg}.
++We say that if the ratio of a structure frequency, calculated by profiling,
++to the hottest structure frequency in the program is less than this
++parameter, then structure reorganization is not applied to this structure.
++The default is 10.
++
+ @item predictable-branch-outcome
+ When branch is predicted to be taken with probability lower than this threshold
+ (in percent), then it is considered well predictable.
+diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc
+index a742156858c..01e5cdece1c 100644
+--- a/gcc/ipa-free-lang-data.cc
++++ b/gcc/ipa-free-lang-data.cc
+@@ -49,6 +49,7 @@
+ #include "except.h"
+ #include "ipa-utils.h"
+ 
++extern bool lang_c_p (void);
+ namespace {
+ 
+ /* Data used when collecting DECLs and TYPEs for language data removal.  */
+@@ -102,6 +103,14 @@ fld_worklist_push (tree t, class free_lang_data_d *fld)
+ static tree
+ fld_simplified_type_name (tree type)
+ {
++  /* Simplify type will cause that struct A and struct A within
++     struct B are different type pointers, so skip it in structure
++     optimizations.  */
++  if (flag_ipa_struct_reorg
++      && lang_c_p ()
++      && flag_lto_partition == LTO_PARTITION_ONE)
++    return TYPE_NAME (type);
++
+   if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
+     return TYPE_NAME (type);
+   /* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the
+@@ -109,9 +118,9 @@ fld_simplified_type_name (tree type)
+      this must match fld_  */
+   if (type != TYPE_MAIN_VARIANT (type)
+       || (!DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (type))
+-	  && (TREE_CODE (type) != RECORD_TYPE
+-	      || !TYPE_BINFO (type)
+-	      || !BINFO_VTABLE (TYPE_BINFO (type)))))
++    && (TREE_CODE (type) != RECORD_TYPE
++	|| !TYPE_BINFO (type)
++	|| !BINFO_VTABLE (TYPE_BINFO (type)))))
+     return DECL_NAME (TYPE_NAME (type));
+   return TYPE_NAME (type);
+ }
+@@ -340,14 +349,19 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
+ {
+   if (!t)
+     return t;
++  /* Simplify type will cause that struct A and struct A within
++     struct B are different type pointers, so skip it in structure
++     optimizations.  */
++  if (flag_ipa_struct_reorg
++      && lang_c_p ()
++      && flag_lto_partition == LTO_PARTITION_ONE)
++    return t;
+   if (POINTER_TYPE_P (t))
+     return fld_incomplete_type_of (t, fld);
+   /* FIXME: This triggers verification error, see PR88140.  */
+-#if 0
+-  if (TREE_CODE (t) == ARRAY_TYPE)
++  if (TREE_CODE (t) == ARRAY_TYPE && 0)
+     return fld_process_array_type (t, fld_simplified_type (TREE_TYPE (t), fld),
+-				   fld_simplified_types, fld);
+-#endif
++	   fld_simplified_types, fld);
+   return t;
+ }
+ 
+diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc
+index 38328c3e8d0..f9e956008d8 100644
+--- a/gcc/ipa-param-manipulation.cc
++++ b/gcc/ipa-param-manipulation.cc
+@@ -55,7 +55,8 @@ static const char *ipa_param_prefixes[IPA_PARAM_PREFIX_COUNT]
+   = {"SYNTH",
+      "ISRA",
+      "simd",
+-     "mask"};
++     "mask",
++     "struct_reorg"};
+ 
+ /* Names of parameters for dumping.  Keep in sync with enum ipa_parm_op.  */
+ 
+diff --git a/gcc/ipa-param-manipulation.h b/gcc/ipa-param-manipulation.h
+index a9ad2b216be..71f4a0a2f08 100644
+--- a/gcc/ipa-param-manipulation.h
++++ b/gcc/ipa-param-manipulation.h
+@@ -126,6 +126,7 @@ enum ipa_param_name_prefix_indices
+    IPA_PARAM_PREFIX_ISRA,
+    IPA_PARAM_PREFIX_SIMD,
+    IPA_PARAM_PREFIX_MASK,
++   IPA_PARAM_PREFIX_REORG,
+    IPA_PARAM_PREFIX_COUNT
+ };
+ 
+@@ -189,7 +190,7 @@ struct GTY(()) ipa_adjusted_param
+ 
+   /* Index into ipa_param_prefixes specifying a prefix to be used with
+      DECL_NAMEs of newly synthesized parameters.  */
+-  unsigned param_prefix_index : 2;
++  unsigned param_prefix_index : 3;
+ 
+   /* Storage order of the original parameter (for the cases when the new
+      parameter is a component of an original one).  */
+diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def
+new file mode 100644
+index 00000000000..3663e064c49
+--- /dev/null
++++ b/gcc/ipa-struct-reorg/escapes.def
+@@ -0,0 +1,65 @@
++/* Copyright (C) 2016 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++.  */
++
++/* Before including this file, you should define a macro:
++   DEF_ESCAPE (ENUM, TEXT)
++
++   This macro will be called once for each escape reason. The
++   ENUM will be of type "escape_type".  The TEXT is describing
++   the reason for the escape.
++*/
++DEF_ESCAPE (escape_marked_as_used, "Type used in variable marked as used")
++DEF_ESCAPE (escape_via_global_var, "Type used via a external visible variable")
++DEF_ESCAPE (escape_via_global_init, "Type used via a global init of a variable")
++DEF_ESCAPE (escape_non_supported_allocator, "Type used by allocation which is not currently supported")
++DEF_ESCAPE (escape_dependent_type_escapes, "Type uses a type which escapes or is used by a type which escapes")
++DEF_ESCAPE (escape_var_arg_function, "Types escapes via a variable argument function")
++DEF_ESCAPE (escape_bitfields, "Types has bitfields")
++DEF_ESCAPE (escape_recusive_type, "Type has a recusive relationship")
++DEF_ESCAPE (escape_variable_sized_array, "Type has a variable sized type")
++DEF_ESCAPE (escape_external_function, "Type escapes via an external function call")
++DEF_ESCAPE (escape_visible_function, "Type escapes via expternally visible function call")
++DEF_ESCAPE (escape_pointer_function, "Type escapes via an function pointer call")
++DEF_ESCAPE (escape_unkown_field, "Type escapes via an unkown field accessed")
++DEF_ESCAPE (escape_union, "Type escapes via an union")
++DEF_ESCAPE (escape_inline_asm, "Type escapes via inline-asm")
++DEF_ESCAPE (escape_non_multiply_size, "Type escapes a pointer plus which is not a multiplicate of the size")
++DEF_ESCAPE (escape_cast_void, "Type escapes a cast to/from void*")
++DEF_ESCAPE (escape_cast_another_ptr, "Type escapes a cast to a different pointer")
++DEF_ESCAPE (escape_cast_int, "Type escapes a cast from/to intergral type")
++DEF_ESCAPE (escape_int_const, "Type escapes via integer constant")
++DEF_ESCAPE (escape_vce, "Type escapes via a VIEW_CONVERT_EXPR")
++DEF_ESCAPE (escape_array_access, "Type escapes via an array access")
++DEF_ESCAPE (escape_noclonable_function, "Type escapes via a non-clonable function")
++DEF_ESCAPE (escape_rescusive_type, "Recusive type")
++DEF_ESCAPE (escape_user_alignment, "Type has an user alignment set")
++DEF_ESCAPE (escape_volatile, "Type has an variable which is volatile")
++DEF_ESCAPE (escape_non_eq, "Type has a comparison other than equals or not equals")
++DEF_ESCAPE (escape_addr, "Type escapes via taking the address of field")
++DEF_ESCAPE (escape_cannot_change_signature, "Type used in a call that cannot change signature")
++DEF_ESCAPE (escape_non_optimize, "Type used by a function which turns off struct reorg")
++DEF_ESCAPE (escape_array, "Type is used in an array [not handled yet]")
++DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]")
++DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]")
++DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance")
++DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt")
++DEF_ESCAPE (escape_via_orig_escape, "Type escapes via a original escape type")
++DEF_ESCAPE (escape_instance_field, "Type escapes via a field of instance")
++DEF_ESCAPE (escape_via_empty_no_orig, "Type escapes via empty and no original")
++
++#undef DEF_ESCAPE
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+new file mode 100644
+index 00000000000..0d68389b160
+--- /dev/null
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -0,0 +1,8860 @@
++/* Struct-reorg optimizations.
++   Copyright (C) 2016-2017 Free Software Foundation, Inc.
++   Contributed by Andrew Pinski  
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++.  */
++
++/* This pass implements the structure reorganization organization
++   (struct-reorg). Right now it handles just splitting off the hottest fields
++   for a struct of 2 fields: struct s { type1 field1; // Hot field type2 field2;
++   };
++   s *v;
++   into:
++   struct s_hot {
++     type1 field1;
++   };
++   struct c_cold {
++     type2 field2;
++   };
++   s_hot *v_hot;
++   s_cold *v_cold;
++
++   TODO: This pass can be extended to more fields, and other alogrothims like
++   reordering.
++
++   This pass operate in four stages:
++    1. All of the field accesses, declarations (struct types and pointers to
++   that type) and struct types are scanned and recorded.  This includes global
++   declarations. Also record all allocation and freeing sites; this is needed
++   for the rewriting phase.
++
++       FIXME: If there is a top-level inline-asm, the pass immediately returns.
++
++    2. Prune out the types which are considered escaping.
++       Examples of types which are considered escaping:
++       1. A declaration has been marked as having the attribute used or has user
++   defined alignment (type too).
++       2. Accesses are via a BIT_FIELD_REF. FIXME: Handle VECTOR_TYPE for this
++   case.
++       3. The "allocation" site is not a known builtin function.
++       4. Casting to/from an integer.
++
++    3. Analyze the types for which optimization to do.
++       a. Split the fields into two different structs.
++      (FIXME: two field case handled only)
++      Look at all structs which contain two fields, if one of the fields is
++   hotter then split it and put it on the rewritting for accesses. Allocations
++   and freeing are marked to split into two functions; all uses of that type
++   will now be considered as two. b. Reorder fields hottest to the coldest.
++   TODO: Implement.
++
++    4. Rewrite each access and allocation and free which is marked as rewriting.
++
++ */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "tree.h"
++#include "tree-pass.h"
++#include "cgraph.h"
++#include "diagnostic-core.h"
++#include "function.h"
++#include "basic-block.h"
++#include "gimple.h"
++#include "vec.h"
++#include "tree-pretty-print.h"
++#include "gimple-pretty-print.h"
++#include "gimple-iterator.h"
++#include "gimple-walk.h"
++#include "cfg.h"
++#include "cfghooks.h" /* For split_block.  */
++#include "ssa.h"
++#include "tree-dfa.h"
++#include "fold-const.h"
++#include "tree-inline.h"
++#include "stor-layout.h"
++#include "tree-into-ssa.h"
++#include "tree-cfg.h"
++#include "alloc-pool.h"
++#include "symbol-summary.h"
++#include "alloc-pool.h"
++#include "ipa-prop.h"
++#include "ipa-struct-reorg.h"
++#include "tree-eh.h"
++#include "bitmap.h"
++#include "cfgloop.h"
++#include "langhooks.h"
++#include "ipa-param-manipulation.h"
++#include "tree-ssa-live.h" /* For remove_unused_locals.  */
++#include "gimple-fold.h"
++#include "gimplify-me.h"
++
++#define VOID_POINTER_P(type)                                                   \
++  (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type)))
++
++/* Check whether in C language or LTO with only C language.  */
++bool
++lang_c_p (void)
++{
++  const char *language_string = lang_hooks.name;
++
++  if (!language_string)
++    {
++      return false;
++    }
++
++  if (lang_GNU_C ())
++    {
++      return true;
++    }
++  else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
++    {
++      unsigned i = 0;
++      tree t = NULL_TREE;
++
++      FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
++	{
++	  language_string = TRANSLATION_UNIT_LANGUAGE (t);
++	  if (language_string == NULL || strncmp (language_string, "GNU C", 5)
++	      || (language_string[5] != '\0'
++		  && !(ISDIGIT (language_string[5]))))
++	    {
++	      return false;
++	    }
++	}
++      return true;
++    }
++  return false;
++}
++
++/* Build a binary operation and gimplify it.  Emit code before GSI.
++   Return the gimple_val holding the result.  */
++
++tree
++gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
++		 tree a, tree b)
++{
++  tree ret;
++
++  ret = fold_build2_loc (gimple_location (gsi_stmt (*gsi)), code, type, a, b);
++  return force_gimple_operand_gsi (gsi, ret, true, NULL, true, GSI_SAME_STMT);
++}
++
++/* Build a unary operation and gimplify it.  Emit code before GSI.
++   Return the gimple_val holding the result.  */
++
++tree
++gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
++		 tree a)
++{
++  tree ret;
++
++  ret = fold_build1_loc (gimple_location (gsi_stmt (*gsi)), code, type, a);
++  return force_gimple_operand_gsi (gsi, ret, true, NULL, true, GSI_SAME_STMT);
++}
++
++namespace {
++
++using namespace struct_reorg;
++using namespace struct_relayout;
++
++static void
++set_var_attributes (tree var)
++{
++  if (!var)
++    return;
++  gcc_assert (TREE_CODE (var) == VAR_DECL);
++
++  DECL_ARTIFICIAL (var) = 1;
++  DECL_EXTERNAL (var) = 0;
++  TREE_STATIC (var) = 1;
++  TREE_PUBLIC (var) = 0;
++  TREE_USED (var) = 1;
++  DECL_CONTEXT (var) = NULL_TREE;
++  TREE_THIS_VOLATILE (var) = 0;
++  TREE_ADDRESSABLE (var) = 0;
++  TREE_READONLY (var) = 0;
++  if (is_global_var (var))
++    set_decl_tls_model (var, TLS_MODEL_NONE);
++}
++
++/* Return true if TYPE is stdarg va_list type.  */
++
++static inline bool
++is_va_list_type (tree type)
++{
++  return TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (va_list_type_node);
++}
++
++static const char *
++get_type_name (tree type)
++{
++  const char *tname = NULL;
++
++  if (type == NULL)
++    {
++      return NULL;
++    }
++
++  if (TYPE_NAME (type) != NULL)
++    {
++      if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE)
++	{
++	  tname = IDENTIFIER_POINTER (TYPE_NAME (type));
++	}
++      else if (DECL_NAME (TYPE_NAME (type)) != NULL)
++	{
++	  tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
++	}
++    }
++  return tname;
++}
++
++/* Return the inner most type for arrays and pointers of TYPE.  */
++
++tree
++inner_type (tree type)
++{
++  while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
++    type = TREE_TYPE (type);
++  return type;
++}
++
++/*  Return true if TYPE is a type which struct reorg should handled.  */
++
++bool
++handled_type (tree type)
++{
++  type = inner_type (type);
++  if (TREE_CODE (type) == RECORD_TYPE)
++    return !is_va_list_type (type);
++  return false;
++}
++
++/* Get the number of pointer layers.  */
++
++int
++get_ptr_layers (tree expr)
++{
++  int layers = 0;
++  while (POINTER_TYPE_P (expr) || TREE_CODE (expr) == ARRAY_TYPE)
++    {
++      layers++;
++      expr = TREE_TYPE (expr);
++    }
++  return layers;
++}
++
++/* Comparison pointer layers.  */
++
++bool
++cmp_ptr_layers (tree a, tree b)
++{
++  return get_ptr_layers (a) == get_ptr_layers (b);
++}
++
++/*  Return true if the ssa_name comes from the void* parameter.  */
++
++bool
++is_from_void_ptr_parm (tree ssa_name)
++{
++  gcc_assert (TREE_CODE (ssa_name) == SSA_NAME);
++  tree var = SSA_NAME_VAR (ssa_name);
++  return (var && TREE_CODE (var) == PARM_DECL
++	  && VOID_POINTER_P (TREE_TYPE (ssa_name)));
++}
++
++/* Enum the struct layout optimize level,
++   which should be the same as the option -fstruct-reorg=.  */
++
++enum struct_layout_opt_level
++{
++  NONE = 0,
++  STRUCT_SPLIT = 1 << 0,
++  COMPLETE_STRUCT_RELAYOUT = 1 << 1,
++  STRUCT_REORDER_FIELDS = 1 << 2,
++  DEAD_FIELD_ELIMINATION = 1 << 3,
++  POINTER_COMPRESSION_SAFE = 1 << 4,
++  POINTER_COMPRESSION_UNSAFE = 1 << 5,
++  SEMI_RELAYOUT = 1 << 6
++};
++
++/* Defines the target pointer size of compressed pointer, which should be 8,
++   16, 32.  */
++
++static int compressed_size = 32;
++
++static bool
++is_result_of_mult (tree arg, tree *num, tree struct_size);
++bool
++isptrptr (tree type);
++void
++get_base (tree &base, tree expr);
++
++static unsigned int current_layout_opt_level;
++
++hash_map replace_type_map;
++hash_map semi_relayout_map;
++
++/* Return true if one of these types is created by struct-reorg.  */
++
++static bool
++is_replace_type (tree type1, tree type2)
++{
++  if (replace_type_map.is_empty ())
++    return false;
++  if (type1 == NULL_TREE || type2 == NULL_TREE)
++    return false;
++  tree *type_value = replace_type_map.get (type1);
++  if (type_value)
++    if (types_compatible_p (*type_value, type2))
++      return true;
++  type_value = replace_type_map.get (type2);
++  if (type_value)
++    if (types_compatible_p (*type_value, type1))
++      return true;
++  return false;
++}
++
++} // namespace
++
++namespace struct_reorg {
++
++hash_map > fields_to_finish;
++
++/* Constructor of srfunction. */
++
++srfunction::srfunction (cgraph_node *n)
++  : node (n), old (NULL), newnode (NULL), newf (NULL), is_safe_func (false)
++{}
++
++/* Add an ARG to the list of arguments for the function. */
++
++void
++srfunction::add_arg (srdecl *arg)
++{
++  args.safe_push (arg);
++}
++
++/* Dump the SRFUNCTION to the file FILE.  */
++
++void
++srfunction::dump (FILE *file)
++{
++  if (node)
++    {
++      fprintf (file, "function : ");
++      print_generic_expr (file, node->decl);
++      fprintf (file, " with arguments: ");
++      for (unsigned i = 0; i < args.length (); i++)
++	{
++	  if (i == 0)
++	    fprintf (file, "\n  ");
++	  else
++	    fprintf (file, "\n,  ");
++	  args[i]->dump (file);
++	}
++
++      fprintf (file, "\nuses globals: ");
++      for (unsigned i = 0; i < globals.length (); i++)
++	{
++	  fprintf (file, "\n  ");
++	  globals[i]->dump (file);
++	}
++
++      fprintf (file, "\ndecls: ");
++    }
++  else
++    fprintf (file, "globals : ");
++
++  for (unsigned i = 0; i < decls.length (); i++)
++    {
++      fprintf (file, "\n  ");
++      decls[i]->dump (file);
++    }
++}
++
++/* Simple dump the SRFUNCTION to the file FILE; used so it is not recusive.  */
++
++void
++srfunction::simple_dump (FILE *file)
++{
++  print_generic_expr (file, node->decl);
++}
++
++/* Constructor of FIELD. */
++
++srfield::srfield (tree field, srtype *base)
++  : offset (int_byte_position (field)), fieldtype (TREE_TYPE (field)),
++    fielddecl (field), base (base), type (NULL), clusternum (0),
++    field_access (EMPTY_FIELD)
++{
++  for (int i = 0; i < max_split; i++)
++    newfield[i] = NULL_TREE;
++}
++
++/* Constructor of TYPE. */
++
++srtype::srtype (tree type)
++  : type (type), chain_type (false), escapes (does_not_escape),
++    pc_gptr (NULL_TREE), visited (false), pc_candidate (false),
++    has_legal_alloc_num (false), has_alloc_array (0), semi_relayout (false),
++    bucket_parts (0)
++{
++  for (int i = 0; i < max_split; i++)
++    newtype[i] = NULL_TREE;
++
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++	{
++	  if (DECL_BIT_FIELD (field))
++	    {
++	      escapes = escape_bitfields;
++	      continue;
++	    }
++	  else if (!DECL_SIZE (field)
++		   || TREE_CODE (DECL_SIZE (field)) != INTEGER_CST)
++	    {
++	      escapes = escape_variable_sized_array;
++	      break;
++	    }
++	  srfield *t = new srfield (field, this);
++	  fields.safe_push (t);
++	}
++    }
++}
++
++/* Check it if all fields in the RECORD_TYPE are referenced.  */
++
++bool
++srtype::has_dead_field (void)
++{
++  bool may_dfe = false;
++  srfield *this_field;
++  unsigned i;
++  FOR_EACH_VEC_ELT (fields, i, this_field)
++    {
++      if (!(this_field->field_access & READ_FIELD))
++	{
++	  may_dfe = true;
++	  break;
++	}
++    }
++  return may_dfe;
++}
++
++/* Mark the type as escaping type E at statement STMT. */
++
++void
++srtype::mark_escape (escape_type e, gimple *stmt)
++{
++  /* Once the type has escaped, it should never
++     change back to non escaping. */
++  gcc_assert (e != does_not_escape);
++  if (has_escaped ())
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nO type: ");
++	  simple_dump (dump_file);
++	  fprintf (dump_file, " has already escaped.");
++	  fprintf (dump_file, " old = \"%s\" ",
++		   escape_type_string[escapes - 1]);
++	  fprintf (dump_file, " new = \"%s\"\n", escape_type_string[e - 1]);
++	  if (stmt)
++	    print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++      return;
++    }
++  escapes = e;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nN type: ");
++      simple_dump (dump_file);
++      fprintf (dump_file, " new = \"%s\"\n", escape_reason ());
++      if (stmt)
++	print_gimple_stmt (dump_file, stmt, 0);
++      fprintf (dump_file, "\n");
++    }
++}
++
++/* Create a global header for compressed struct.  */
++
++void
++srtype::create_global_ptr_for_pc ()
++{
++  if (!pc_candidate || pc_gptr != NULL_TREE)
++    return;
++
++  const char *type_name = get_type_name (type);
++  gcc_assert (type_name != NULL);
++
++  char *gptr_name = concat (type_name, "_pc", NULL);
++  tree new_name = get_identifier (gptr_name);
++  tree new_type = build_pointer_type (newtype[0]);
++  tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type);
++  set_var_attributes (new_var);
++  pc_gptr = new_var;
++
++  varpool_node::add (pc_gptr);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file,
++	     "\nType: %s has create global header for pointer"
++	     " compression: %s\n",
++	     type_name, gptr_name);
++
++  free (gptr_name);
++}
++
++/* Add FIELD to the list of fields that use this type.  */
++
++void
++srtype::add_field_site (srfield *field)
++{
++  field_sites.safe_push (field);
++}
++
++/* Constructor of DECL. */
++
++srdecl::srdecl (srtype *tp, tree decl, int argnum, tree orig_type)
++  : type (tp), decl (decl), func (NULL_TREE), argumentnum (argnum),
++    visited (false), orig_type (orig_type)
++{
++  if (TREE_CODE (decl) == SSA_NAME)
++    func = current_function_decl;
++  else if (!is_global_var (decl))
++    func = DECL_CONTEXT (decl);
++  for (int i = 0; i < max_split; i++)
++    newdecl[i] = NULL_TREE;
++}
++
++/* Find DECL in the function. */
++
++srdecl *
++srfunction::find_decl (tree decl)
++{
++  for (unsigned i = 0; i < decls.length (); i++)
++    if (decls[i]->decl == decl)
++      return decls[i];
++  return NULL;
++}
++
++/* Record DECL of the TYPE with argument num ARG. */
++
++srdecl *
++srfunction::record_decl (srtype *type, tree decl, int arg, tree orig_type)
++{
++  /* Search for the decl to see if it is already there.  */
++  srdecl *decl1 = find_decl (decl);
++
++  if (decl1)
++    {
++      /* Added the orig_type information.  */
++      if (!decl1->orig_type && orig_type && isptrptr (orig_type))
++	{
++	  decl1->orig_type = orig_type;
++	}
++      return decl1;
++    }
++
++  gcc_assert (type);
++
++  orig_type = isptrptr (TREE_TYPE (decl)) ? TREE_TYPE (decl) : orig_type;
++  decl1 = new srdecl (type, decl, arg, isptrptr (orig_type) ? orig_type : NULL);
++  decls.safe_push (decl1);
++  return decl1;
++}
++
++/* Find the field at OFF offset.  */
++
++srfield *
++srtype::find_field (unsigned HOST_WIDE_INT off)
++{
++  unsigned int i;
++  srfield *field;
++
++  /* FIXME: handle array/struct field inside the current struct. */
++  /* NOTE This does not need to be fixed to handle libquatumn */
++  FOR_EACH_VEC_ELT (fields, i, field)
++    {
++      if (off == field->offset)
++	return field;
++    }
++  return NULL;
++}
++
++/* Add the function FN to the list of functions if it
++   is there not already. */
++
++void
++srtype::add_function (srfunction *fn)
++{
++  unsigned decluid;
++  unsigned i;
++  decluid = DECL_UID (fn->node->decl);
++
++  srfunction *fn1;
++  // Search for the decl to see if it is already there.
++  FOR_EACH_VEC_ELT (functions, i, fn1)
++    {
++      if (DECL_UID (fn1->node->decl) == decluid)
++	return;
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "Recording new function: %u.\n", decluid);
++
++  functions.safe_push (fn);
++}
++
++/* Dump out the type structure to FILE. */
++
++void
++srtype::dump (FILE *f)
++{
++  unsigned int i;
++  srfield *field;
++  srfunction *fn;
++  sraccess *access;
++
++  if (chain_type)
++    fprintf (f, "chain decl ");
++
++  fprintf (f, "type : ");
++  print_generic_expr (f, type);
++  fprintf (f, "(%d) { ", TYPE_UID (type));
++  if (escapes != does_not_escape)
++    {
++      fprintf (f, "escapes = \"%s\"", escape_reason ());
++    }
++  fprintf (f, "\nfields = {\n");
++  FOR_EACH_VEC_ELT (fields, i, field)
++    {
++      field->dump (f);
++    }
++  fprintf (f, "}\n ");
++
++  fprintf (f, "\naccesses = {\n");
++  FOR_EACH_VEC_ELT (accesses, i, access)
++    {
++      access->dump (f);
++    }
++  fprintf (f, "}\n ");
++
++  fprintf (f, "\nfunctions = {\n");
++  FOR_EACH_VEC_ELT (functions, i, fn)
++    {
++      fn->simple_dump (f);
++    }
++  fprintf (f, "}\n");
++  fprintf (f, "}\n");
++}
++
++/* A simplified dump out the type structure to FILE. */
++
++void
++srtype::simple_dump (FILE *f)
++{
++  print_generic_expr (f, type);
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      fprintf (f, "(%d)", TYPE_UID (type));
++    }
++}
++
++/* Analyze the type and decide what to be done with it. */
++
++void
++srtype::analyze (void)
++{
++  /* Chain decl types can't be split
++     so don't try. */
++  if (chain_type)
++    return;
++
++  /* If there is only one field then there is nothing
++     to be done. */
++  if (fields.length () == 1)
++    return;
++
++  /*  For now we unconditionally split only structures with 2 fields
++      into 2 different structures.  In future we intend to add profile
++      info and/or static heuristics to differentiate splitting process.  */
++  if (fields.length () == 2)
++    {
++      for (hash_map::iterator it = replace_type_map.begin ();
++	   it != replace_type_map.end (); ++it)
++	{
++	  if (types_compatible_p ((*it).second, this->type))
++	    return;
++	}
++      fields[1]->clusternum = 1;
++    }
++
++  /* Otherwise we do nothing.  */
++  if (fields.length () >= 3)
++    {
++      return;
++    }
++}
++
++/* Create the new fields for this field. */
++
++void
++srfield::create_new_fields (tree newtype[max_split], tree newfields[max_split],
++			    tree newlast[max_split])
++{
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      create_new_optimized_fields (newtype, newfields, newlast);
++      return;
++    }
++
++  tree nt[max_split];
++
++  for (unsigned i = 0; i < max_split; i++)
++    nt[i] = NULL;
++
++  if (type == NULL)
++    nt[0] = fieldtype;
++  else
++    memcpy (nt, type->newtype, sizeof (type->newtype));
++
++  for (unsigned i = 0; i < max_split && nt[i] != NULL; i++)
++    {
++      tree field = make_node (FIELD_DECL);
++      if (nt[1] != NULL && DECL_NAME (fielddecl))
++	{
++	  const char *tname = IDENTIFIER_POINTER (DECL_NAME (fielddecl));
++	  char id[10];
++	  char *name;
++
++	  sprintf (id, "%d", i);
++	  name = concat (tname, ".reorg.", id, NULL);
++	  DECL_NAME (field) = get_identifier (name);
++	  free (name);
++	}
++      else
++	DECL_NAME (field) = DECL_NAME (fielddecl);
++
++      TREE_TYPE (field)
++	= reconstruct_complex_type (TREE_TYPE (fielddecl), nt[i]);
++      DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl);
++      SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
++      DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl);
++      TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl);
++      DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl);
++      TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl);
++      DECL_CONTEXT (field) = newtype[clusternum];
++
++      if (newfields[clusternum] == NULL)
++	newfields[clusternum] = newlast[clusternum] = field;
++      else
++	{
++	  DECL_CHAIN (newlast[clusternum]) = field;
++	  newlast[clusternum] = field;
++	}
++      newfield[i] = field;
++    }
++}
++
++/* Reorder fields.  */
++
++void
++srfield::reorder_fields (tree newfields[max_split], tree newlast[max_split],
++			 tree &field)
++{
++  /* Reorder fields in descending.
++     newfields: always stores the first member of the chain
++	and with the largest size.
++     field: indicates the node to be inserted.  */
++  if (newfields[clusternum] == NULL)
++    {
++      newfields[clusternum] = field;
++      newlast[clusternum] = field;
++    }
++  else
++    {
++      tree tmp = newfields[clusternum];
++      if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (field)))
++	  > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tmp))))
++	{
++	  DECL_CHAIN (field) = tmp;
++	  newfields[clusternum] = field;
++	}
++      else
++	{
++	  while (
++	    DECL_CHAIN (tmp)
++	    && (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (field)))
++		<= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (DECL_CHAIN (tmp))))))
++	    {
++	      tmp = DECL_CHAIN (tmp);
++	    }
++
++	  /* now tmp size > field size
++	     insert field: tmp -> xx ==> tmp -> field -> xx.  */
++	  DECL_CHAIN (field) = DECL_CHAIN (tmp); // field -> xx
++	  DECL_CHAIN (tmp) = field;		 // tmp -> field
++	}
++    }
++}
++
++/* Create the new optimized fields for this field.
++   newtype[max_split]: srtype's member variable,
++   newfields[max_split]: created by create_new_type func,
++   newlast[max_split]: created by create_new_type func.  */
++
++void
++srfield::create_new_optimized_fields (tree newtype[max_split],
++				      tree newfields[max_split],
++				      tree newlast[max_split])
++{
++  /* newtype, corresponding to newtype[max_split] in srtype.  */
++  tree nt = NULL_TREE;
++  if (type == NULL)
++    {
++      /* Common var.  */
++      nt = fieldtype;
++    }
++  else
++    {
++      /* RECORD_TYPE var.  */
++      if (type->has_escaped ())
++	{
++	  nt = type->type;
++	}
++      else
++	{
++	  nt = type->newtype[0];
++	}
++    }
++  tree field = make_node (FIELD_DECL);
++
++  /* Used for recursive types.
++     fields_to_finish: hase_map in the format of "type: {fieldA, fieldB}",
++     key : indicates the original type,
++     vaule: filed that need to be updated to newtype.  */
++  if (nt == NULL)
++    {
++      nt = make_node (RECORD_TYPE);
++      auto_vec &fields
++	= fields_to_finish.get_or_insert (inner_type (type->type));
++      fields.safe_push (field);
++    }
++
++  if (type == NULL)
++    {
++      DECL_NAME (field) = DECL_NAME (fielddecl);
++      /* Common members do not need to reconstruct.
++     Otherwise, int* -> int** or void* -> void**.  */
++      TREE_TYPE (field) = nt;
++      SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
++    }
++  else if (type->pc_candidate)
++    {
++      const char *old_name = IDENTIFIER_POINTER (DECL_NAME (fielddecl));
++      char *new_name = concat (old_name, "_pc", NULL);
++      DECL_NAME (field) = get_identifier (new_name);
++      free (new_name);
++      TREE_TYPE (field) = make_unsigned_type (compressed_size);
++      SET_DECL_ALIGN (field, compressed_size);
++    }
++  else
++    {
++      DECL_NAME (field) = DECL_NAME (fielddecl);
++      TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt);
++      SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
++    }
++
++  DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl);
++  DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl);
++  TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl);
++  DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl);
++  TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl);
++  DECL_CONTEXT (field) = newtype[clusternum];
++
++  reorder_fields (newfields, newlast, field);
++
++  /* srfield member variable, which stores the new field decl.  */
++  newfield[0] = field;
++}
++
++/* Given a struct s whose fields has already reordered by size, we try to
++   combine fields less than 8 bytes together to 8 bytes.  Example:
++   struct s {
++     uint64_t a,
++     uint32_t b,
++     uint32_t c,
++     uint32_t d,
++     uint16_t e,
++     uint8_t f
++   }
++
++   We allocate memory for arrays of struct S, before semi-relayout, their
++   layout in memory is shown as below:
++   [a,b,c,d,e,f,padding;a,b,c,d,e,f,padding;...]
++
++   During semi-relayout, we put a number of structs into a same region called
++   bucket.  The number is determined by param realyout-bucket-capacity-level.
++   Using 1024 here as example.  After semi-relayout, the layout in a bucket is
++   shown as below:
++   part1 [a;a;a...]
++   part2 [b,c;b,c;b,c;...]
++   part3 [d,e,f,pad;d,e,f,pad;d,e,f,pad;...]
++
++   In the last bucket, if the amount of rest structs is less than the capacity
++   of a bucket, the rest of allcated memory will be wasted as padding.  */
++
++unsigned
++srtype::calculate_bucket_size ()
++{
++  unsigned parts = 0;
++  unsigned bit_sum = 0;
++  unsigned relayout_offset = 0;
++  /* Currently, limit each 8 bytes with less than 2 fields.  */
++  unsigned curr_part_num = 0;
++  unsigned field_num = 0;
++  for (tree f = TYPE_FIELDS (newtype[0]); f; f = DECL_CHAIN (f))
++    {
++      unsigned size = TYPE_PRECISION (TREE_TYPE (f));
++      bit_sum += size;
++      field_num++;
++      if (++curr_part_num > 2 || bit_sum > 64)
++	{
++	  bit_sum = size;
++	  parts++;
++	  relayout_offset = relayout_part_size * parts;
++	  curr_part_num = 1;
++	}
++      else
++	{
++	  relayout_offset = relayout_part_size * parts + (bit_sum - size) / 8;
++	}
++      new_field_offsets.put (f, relayout_offset);
++    }
++  /* Donnot relayout a struct with only one field after DFE.  */
++  if (field_num == 1)
++    return 0;
++  bucket_parts = ++parts;
++  return parts * relayout_part_size;
++}
++
++/* Create the new TYPE corresponding to THIS type. */
++
++bool
++srtype::create_new_type (void)
++{
++  /* If the type has been visited,
++     then return if a new type was
++     created or not. */
++  if (visited)
++    return has_new_type ();
++
++  visited = true;
++
++  if (escapes != does_not_escape)
++    {
++      newtype[0] = type;
++      return false;
++    }
++
++  bool createnewtype = false;
++  unsigned maxclusters = 0;
++
++  /* Create a new type for each field. */
++  for (unsigned i = 0; i < fields.length (); i++)
++    {
++      srfield *field = fields[i];
++      if (field->type)
++	createnewtype |= field->type->create_new_type ();
++      if (field->clusternum > maxclusters)
++	maxclusters = field->clusternum;
++    }
++
++  /* If the fields' types did have a change or
++     we are not splitting the struct into two clusters,
++     then just return false and don't change the type. */
++  if (!createnewtype && maxclusters == 0
++      && current_layout_opt_level < STRUCT_REORDER_FIELDS)
++    {
++      newtype[0] = type;
++      return false;
++    }
++
++  /* Should have at most max_split clusters.  */
++  gcc_assert (maxclusters < max_split);
++
++  /* Record the first member of the field chain.  */
++  tree newfields[max_split];
++  tree newlast[max_split];
++
++  maxclusters++;
++
++  const char *tname = get_type_name (type);
++
++  for (unsigned i = 0; i < maxclusters; i++)
++    {
++      newfields[i] = NULL_TREE;
++      newlast[i] = NULL_TREE;
++      newtype[i] = make_node (RECORD_TYPE);
++
++      char *name = NULL;
++      char id[10];
++      sprintf (id, "%d", i);
++      if (tname)
++	{
++	  name = concat (tname, ".reorg.", id, NULL);
++	  TYPE_NAME (newtype[i])
++	    = build_decl (UNKNOWN_LOCATION, TYPE_DECL, get_identifier (name),
++			  newtype[i]);
++	  free (name);
++	}
++    }
++
++  for (unsigned i = 0; i < fields.length (); i++)
++    {
++      srfield *f = fields[i];
++      if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
++	  && !(f->field_access & READ_FIELD))
++	continue;
++      f->create_new_fields (newtype, newfields, newlast);
++    }
++
++  /* No reason to warn about these structs since the warning would
++     have happened already.  */
++  int save_warn_padded = warn_padded;
++  warn_padded = 0;
++
++  for (unsigned i = 0; i < maxclusters; i++)
++    {
++      TYPE_FIELDS (newtype[i]) = newfields[i];
++      layout_type (newtype[i]);
++      if (TYPE_NAME (newtype[i]) != NULL)
++	{
++	  layout_decl (TYPE_NAME (newtype[i]), 0);
++	}
++    }
++
++  warn_padded = save_warn_padded;
++
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++      && replace_type_map.get (this->newtype[0]) == NULL)
++    replace_type_map.put (this->newtype[0], this->type);
++  if (dump_file)
++    {
++      if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
++	  && has_dead_field ())
++	fprintf (dump_file, "Dead field elimination.\n");
++    }
++
++  if (pc_candidate && pc_gptr == NULL_TREE)
++    create_global_ptr_for_pc ();
++
++  if (semi_relayout)
++    {
++      bucket_size = calculate_bucket_size ();
++      if (bucket_size == 0)
++	return false;
++      if (semi_relayout_map.get (this->newtype[0]) == NULL)
++	semi_relayout_map.put (this->newtype[0], this->type);
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Created %d types:\n", maxclusters);
++      for (unsigned i = 0; i < maxclusters; i++)
++	{
++	  print_generic_expr (dump_file, newtype[i]);
++	  fprintf (dump_file, "(%d)", TYPE_UID (newtype[i]));
++	  fprintf (dump_file, "\n");
++	}
++    }
++
++  return true;
++}
++
++/* Helper function to copy some attributes from ORIG_DECL to the NEW_DECL. */
++
++static inline void
++copy_var_attributes (tree new_decl, tree orig_decl)
++{
++  DECL_ARTIFICIAL (new_decl) = 1;
++  DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_decl);
++  TREE_STATIC (new_decl) = TREE_STATIC (orig_decl);
++  TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_decl);
++  TREE_USED (new_decl) = TREE_USED (orig_decl);
++  DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_decl);
++  TREE_THIS_VOLATILE (new_decl) = TREE_THIS_VOLATILE (orig_decl);
++  TREE_ADDRESSABLE (new_decl) = TREE_ADDRESSABLE (orig_decl);
++  TREE_READONLY (new_decl) = TREE_READONLY (orig_decl);
++  if (is_global_var (orig_decl))
++    set_decl_tls_model (new_decl, DECL_TLS_MODEL (orig_decl));
++}
++
++/* Create all of the new decls (SSA_NAMES included) for THIS function. */
++
++void
++srfunction::create_new_decls (void)
++{
++  /* If this function has been cloned, we don't need to
++     create the new decls. */
++  if (newnode)
++    return;
++
++  if (node)
++    set_cfun (DECL_STRUCT_FUNCTION (node->decl));
++
++  for (unsigned i = 0; i < decls.length (); i++)
++    {
++      srdecl *decl = decls[i];
++      srtype *type = decl->type;
++      /* If the type of the decl does not change,
++     then don't create a new decl. */
++      if (!type->has_new_type ())
++	{
++	  decl->newdecl[0] = decl->decl;
++	  continue;
++	}
++
++      /* Handle SSA_NAMEs. */
++      if (TREE_CODE (decl->decl) == SSA_NAME)
++	{
++	  tree newtype1[max_split];
++	  tree inner = SSA_NAME_VAR (decl->decl);
++	  tree newinner[max_split];
++	  memset (newinner, 0, sizeof (newinner));
++	  for (unsigned j = 0; j < max_split && type->newtype[j]; j++)
++	    {
++	      newtype1[j]
++		= reconstruct_complex_type (isptrptr (decls[i]->orig_type)
++					      ? decls[i]->orig_type
++					      : TREE_TYPE (decls[i]->decl),
++					    type->newtype[j]);
++	    }
++	  if (inner)
++	    {
++	      srdecl *in = find_decl (inner);
++	      gcc_assert (in);
++	      memcpy (newinner, in->newdecl, sizeof (newinner));
++	    }
++	  tree od = decls[i]->decl;
++	  /* Create the new ssa names and copy some attributes from the old one.
++	   */
++	  for (unsigned j = 0; j < max_split && type->newtype[j]; j++)
++	    {
++	      tree nd = make_ssa_name (newinner[j] ? newinner[j] : newtype1[j]);
++	      decl->newdecl[j] = nd;
++	      /* If the old decl was a default defition, handle it specially. */
++	      if (SSA_NAME_IS_DEFAULT_DEF (od))
++		{
++		  SSA_NAME_IS_DEFAULT_DEF (nd) = true;
++		  SSA_NAME_DEF_STMT (nd) = gimple_build_nop ();
++
++		  /* Set the default definition for the ssaname if needed. */
++		  if (inner)
++		    {
++		      gcc_assert (newinner[j]);
++		      set_ssa_default_def (cfun, newinner[j], nd);
++		    }
++		}
++	      SSA_NAME_OCCURS_IN_ABNORMAL_PHI (nd)
++		= SSA_NAME_OCCURS_IN_ABNORMAL_PHI (od);
++	      statistics_counter_event (cfun, "Create new ssa_name", 1);
++	    }
++	}
++      else if (TREE_CODE (decls[i]->decl) == VAR_DECL)
++	{
++	  tree orig_var = decl->decl;
++	  const char *tname = NULL;
++	  if (DECL_NAME (orig_var))
++	    tname = IDENTIFIER_POINTER (DECL_NAME (orig_var));
++	  for (unsigned j = 0; j < max_split && type->newtype[j]; j++)
++	    {
++	      tree new_name = NULL;
++	      char *name = NULL;
++	      char id[10];
++	      sprintf (id, "%d", j);
++	      if (tname)
++		{
++		  name = concat (tname, ".reorg.", id, NULL);
++		  new_name = get_identifier (name);
++		  free (name);
++		}
++	      tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var),
++							type->newtype[j]);
++	      decl->newdecl[j] = build_decl (DECL_SOURCE_LOCATION (orig_var),
++					     VAR_DECL, new_name, newtype1);
++	      copy_var_attributes (decl->newdecl[j], orig_var);
++	      if (!is_global_var (orig_var))
++		add_local_decl (cfun, decl->newdecl[j]);
++	      else
++		varpool_node::add (decl->newdecl[j]);
++	      statistics_counter_event (cfun, "Create new var decl", 1);
++	    }
++	}
++      /* Paramater decls are already handled in create_new_functions. */
++      else if (TREE_CODE (decls[i]->decl) == PARM_DECL)
++	;
++      else
++	internal_error ("Unhandled decl type stored");
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Created New decls for decl:\n");
++	  decls[i]->dump (dump_file);
++	  fprintf (dump_file, "\n");
++	  for (unsigned j = 0; j < max_split && decls[i]->newdecl[j]; j++)
++	    {
++	      print_generic_expr (dump_file, decls[i]->newdecl[j]);
++	      fprintf (dump_file, "\n");
++	    }
++	  fprintf (dump_file, "\n");
++	}
++    }
++
++  set_cfun (NULL);
++}
++
++/* Dump out the field structure to FILE. */
++
++void
++srfield::dump (FILE *f)
++{
++  fprintf (f, "field (%d) { ", DECL_UID (fielddecl));
++  fprintf (f, "base = ");
++  base->simple_dump (f);
++  fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset);
++  fprintf (f, ", type = ");
++  print_generic_expr (f, fieldtype);
++  fprintf (f, "}\n");
++}
++
++/* A simplified dump out the field structure to FILE. */
++
++void
++srfield::simple_dump (FILE *f)
++{
++  if (fielddecl)
++    {
++      fprintf (f, "field (%d)", DECL_UID (fielddecl));
++    }
++}
++
++/* Dump out the access structure to FILE. */
++
++void
++sraccess::dump (FILE *f)
++{
++  fprintf (f, "access { ");
++  fprintf (f, "type = '(");
++  type->simple_dump (f);
++  fprintf (f, ")'");
++  if (field)
++    {
++      fprintf (f, ", field = '(");
++      field->simple_dump (f);
++      fprintf (f, ")'");
++    }
++  else
++    fprintf (f, ", whole type");
++  fprintf (f, " in function: %s/%d", node->name (), node->order);
++  fprintf (f, ", stmt:\n");
++  print_gimple_stmt (f, stmt, 0);
++  fprintf (f, "}\n");
++}
++
++/* Dump out the decl structure to FILE. */
++
++void
++srdecl::dump (FILE *file)
++{
++  if (!func)
++    fprintf (file, "global ");
++  if (argumentnum != -1)
++    fprintf (file, "argument(%d) ", argumentnum);
++  fprintf (file, "decl: ");
++  print_generic_expr (file, decl);
++  fprintf (file, " type: ");
++  type->simple_dump (file);
++}
++
++} // namespace struct_reorg
++
++namespace struct_relayout {
++
++/* Complete Structure Relayout Optimization.
++   It reorganizes all structure members, and puts same member together.
++   struct s {
++     long a;
++     int b;
++     struct s* c;
++   };
++   Array looks like
++     abcabcabcabc...
++   will be transformed to
++     aaaa...bbbb...cccc...
++*/
++
++#define GPTR_SIZE(i) TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gptr[i])))
++
++unsigned transformed = 0;
++
++unsigned
++csrtype::calculate_field_num (tree field_offset)
++{
++  if (field_offset == NULL)
++    {
++      return 0;
++    }
++
++  HOST_WIDE_INT off = int_byte_position (field_offset);
++  unsigned i = 1;
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      if (off == int_byte_position (field))
++	{
++	  return i;
++	}
++      i++;
++    }
++  return 0;
++}
++
++void
++csrtype::init_type_info (void)
++{
++  if (!type)
++    {
++      return;
++    }
++  new_size = old_size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
++
++  /* Close enough to pad to improve performance.
++     33~63 should pad to 64 but 33~48 (first half) are too far away, and
++     65~127 should pad to 128 but 65~70 (first half) are too far away.  */
++  if (old_size > 48 && old_size < 64)
++    {
++      new_size = 64;
++    }
++  if (old_size > 70 && old_size < 128)
++    {
++      new_size = 128;
++    }
++
++  /* For performance reasons, only allow structure size
++     that is a power of 2 and not too big.  */
++  if (new_size != 1 && new_size != 2 && new_size != 4 && new_size != 8
++      && new_size != 16 && new_size != 32 && new_size != 64 && new_size != 128)
++    {
++      new_size = 0;
++      field_count = 0;
++      return;
++    }
++
++  unsigned i = 0;
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++	{
++	  i++;
++	}
++    }
++  field_count = i;
++
++  struct_size = build_int_cstu (TREE_TYPE (TYPE_SIZE_UNIT (type)), new_size);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Type: ");
++      print_generic_expr (dump_file, type);
++      fprintf (dump_file, " has %d members.\n", field_count);
++      fprintf (dump_file, "Modify struct size from %ld to %ld.\n", old_size,
++	       new_size);
++    }
++}
++
++} // namespace struct_relayout
++
++namespace {
++
++/* Structure definition for ipa_struct_reorg and ipa_struct_relayout.  */
++
++struct ipa_struct_reorg
++{
++public:
++  // Constructors
++  ipa_struct_reorg (void) : current_function (NULL), done_recording (false) {}
++
++  unsigned execute (unsigned int opt);
++  void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL);
++
++  // fields
++  auto_vec_del types;
++  auto_vec_del functions;
++  srglobal globals;
++  srfunction *current_function;
++  hash_set safe_functions;
++
++  bool done_recording;
++
++  void dump_types (FILE *f);
++  void dump_newtypes (FILE *f);
++  void dump_types_escaped (FILE *f);
++  void dump_functions (FILE *f);
++  void record_accesses (void);
++  void detect_cycles (void);
++  bool walk_field_for_cycles (srtype *);
++  void prune_escaped_types (void);
++  void propagate_escape (void);
++  void propagate_escape_via_original (void);
++  void propagate_escape_via_empty_with_no_original (void);
++  void analyze_types (void);
++  void clear_visited (void);
++  bool create_new_types (void);
++  void create_new_decls (void);
++  srdecl *find_decl (tree);
++  void create_new_functions (void);
++  void create_new_args (cgraph_node *new_node);
++  unsigned rewrite_functions (void);
++  srdecl *record_var (tree decl, escape_type escapes = does_not_escape,
++		      int arg = -1);
++  void record_safe_func_with_void_ptr_parm (void);
++  srfunction *record_function (cgraph_node *node);
++  srfunction *find_function (cgraph_node *node);
++  void record_field_type (tree field, srtype *base_srtype);
++  void record_struct_field_types (tree base_type, srtype *base_srtype);
++  srtype *record_type (tree type);
++  void process_union (tree type);
++  srtype *find_type (tree type);
++  void maybe_record_stmt (cgraph_node *, gimple *);
++  void maybe_record_assign (cgraph_node *, gassign *);
++  void maybe_record_call (cgraph_node *, gcall *);
++  void maybe_record_allocation_site (cgraph_node *, gimple *);
++  void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt);
++  void mark_expr_escape (tree, escape_type, gimple *stmt);
++  bool handled_allocation_stmt (gimple *stmt);
++  tree allocate_size (srtype *t, srdecl *decl, gimple *stmt);
++
++  void mark_decls_in_as_not_needed (tree fn);
++
++  bool rewrite_stmt (gimple *, gimple_stmt_iterator *);
++  bool rewrite_assign (gassign *, gimple_stmt_iterator *);
++  bool rewrite_call (gcall *, gimple_stmt_iterator *);
++  bool rewrite_cond (gcond *, gimple_stmt_iterator *);
++  bool rewrite_debug (gimple *, gimple_stmt_iterator *);
++  bool rewrite_phi (gphi *);
++  bool rewrite_expr (tree expr, tree newexpr[max_split],
++		     bool ignore_missing_decl = false);
++  bool rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split],
++			tree newrhs[max_split]);
++  bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type,
++		       srfield *&field, bool &realpart, bool &imagpart,
++		       bool &address, bool &escape_from_base,
++		       bool should_create = false, bool can_escape = false);
++  bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t);
++
++  void check_alloc_num (gimple *stmt, srtype *type, bool ptrptr);
++  void check_definition_assign (srdecl *decl, vec &worklist);
++  void check_definition_call (srdecl *decl, vec &worklist);
++  void check_definition (srdecl *decl, vec &);
++  void check_uses (srdecl *decl, vec &);
++  void check_use (srdecl *decl, gimple *stmt, vec &);
++  void check_type_and_push (tree newdecl, srdecl *decl, vec &worklist,
++			    gimple *stmt);
++  void check_other_side (srdecl *decl, tree other, gimple *stmt,
++			 vec &worklist);
++  void check_ptr_layers (tree a_expr, tree b_expr, gimple *stmt);
++
++  void find_vars (gimple *stmt);
++  void find_var (tree expr, gimple *stmt);
++  void mark_types_asm (gasm *astmt);
++
++  bool has_rewritten_type (srfunction *);
++  void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt);
++  unsigned execute_struct_relayout (void);
++  bool remove_dead_field_stmt (tree lhs);
++
++  // Pointer compression methods:
++  void check_and_prune_struct_for_pointer_compression (void);
++  void try_rewrite_with_pointer_compression (gassign *, gimple_stmt_iterator *,
++					     tree, tree, tree &, tree &);
++  bool safe_void_cmp_p (tree, srtype *);
++  bool pc_candidate_st_type_p (tree);
++  bool pc_candidate_tree_p (tree);
++  bool pc_type_conversion_candidate_p (tree);
++  bool pc_direct_rewrite_chance_p (tree, tree &);
++  bool pc_simplify_chance_for_compress_p (gassign *, tree);
++  bool compress_candidate_without_check (gimple_stmt_iterator *, tree, tree &);
++  bool compress_candidate_with_check (gimple_stmt_iterator *, tree, tree &);
++  bool compress_candidate (gassign *, gimple_stmt_iterator *, tree, tree &);
++  bool decompress_candidate_without_check (gimple_stmt_iterator *, tree, tree,
++					   tree &, tree &);
++  bool decompress_candidate_with_check (gimple_stmt_iterator *, tree, tree &);
++  bool decompress_candidate (gimple_stmt_iterator *, tree, tree, tree &,
++			     tree &);
++  srtype *get_compression_candidate_type (tree);
++  tree compress_ptr_to_offset (tree, srtype *, gimple_stmt_iterator *);
++  tree decompress_offset_to_ptr (tree, srtype *, gimple_stmt_iterator *);
++  basic_block create_bb_for_compress_candidate (basic_block, tree, srtype *,
++						tree &);
++  basic_block create_bb_for_decompress_candidate (basic_block, tree, srtype *,
++						  tree &);
++  basic_block create_bb_for_compress_nullptr (basic_block, tree &);
++  basic_block create_bb_for_decompress_nullptr (basic_block, tree, tree &);
++
++  // Semi-relayout methods:
++  bool is_semi_relayout_candidate (tree);
++  srtype *get_semi_relayout_candidate_type (tree);
++  void check_and_prune_struct_for_semi_relayout (void);
++  tree rewrite_pointer_diff (gimple_stmt_iterator *, tree, tree, srtype *);
++  tree rewrite_pointer_plus_integer (gimple *, gimple_stmt_iterator *, tree,
++				     tree, srtype *);
++  tree build_div_expr (gimple_stmt_iterator *, tree, tree);
++  tree get_true_pointer_base (gimple_stmt_iterator *, tree, srtype *);
++  tree get_real_allocated_ptr (tree, gimple_stmt_iterator *);
++  tree set_ptr_for_use (tree, gimple_stmt_iterator *);
++  void record_allocated_size (tree, gimple_stmt_iterator *, tree);
++  tree read_allocated_size (tree, gimple_stmt_iterator *);
++  gimple *create_aligned_alloc (gimple_stmt_iterator *, srtype *, tree, tree &);
++  void create_memset_zero (tree, gimple_stmt_iterator *, tree);
++  void create_memcpy (tree, tree, tree, gimple_stmt_iterator *);
++  void create_free (tree, gimple_stmt_iterator *);
++  void copy_to_lhs (tree, tree, gimple_stmt_iterator *);
++  srtype *get_relayout_candidate_type (tree);
++  long unsigned int get_true_field_offset (srfield *, srtype *);
++  tree rewrite_address (tree, srfield *, srtype *, gimple_stmt_iterator *);
++  bool check_sr_copy (gimple *);
++  void relayout_field_copy (gimple_stmt_iterator *, gimple *, tree, tree,
++			    tree &, tree &);
++  void do_semi_relayout (gimple_stmt_iterator *, gimple *, tree &, tree &);
++};
++
++struct ipa_struct_relayout
++{
++public:
++  // fields
++  tree gptr[max_relayout_split + 1];
++  csrtype ctype;
++  ipa_struct_reorg *sr;
++  cgraph_node *current_node;
++
++  // Constructors
++  ipa_struct_relayout (tree type, ipa_struct_reorg *sr_)
++  {
++    ctype.type = type;
++    sr = sr_;
++    current_node = NULL;
++    for (int i = 0; i < max_relayout_split + 1; i++)
++      {
++	gptr[i] = NULL;
++      }
++  }
++
++  // Methods
++  tree create_new_vars (tree type, const char *name);
++  void create_global_ptrs (void);
++  unsigned int rewrite (void);
++  void rewrite_stmt_in_function (void);
++  bool rewrite_debug (gimple *stmt, gimple_stmt_iterator *gsi);
++  bool rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi);
++  bool handled_allocation_stmt (gcall *stmt);
++  void init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi);
++  bool check_call_uses (gcall *stmt);
++  bool rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi);
++  tree create_ssa (tree node, gimple_stmt_iterator *gsi);
++  bool is_candidate (tree xhs);
++  tree rewrite_address (tree xhs, gimple_stmt_iterator *gsi);
++  tree rewrite_offset (tree offset, HOST_WIDE_INT num);
++  bool rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi);
++  bool maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi,
++			  HOST_WIDE_INT ×);
++  unsigned int execute (void);
++};
++
++} // namespace
++
++namespace {
++
++/* Methods for ipa_struct_relayout.  */
++
++tree
++ipa_struct_relayout::create_new_vars (tree type, const char *name)
++{
++  gcc_assert (type);
++  tree new_type = build_pointer_type (type);
++
++  tree new_name = NULL;
++  if (name)
++    {
++      new_name = get_identifier (name);
++    }
++
++  tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type);
++
++  /* set new_var's attributes.  */
++  set_var_attributes (new_var);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Created new var: ");
++      print_generic_expr (dump_file, new_var);
++      fprintf (dump_file, "\n");
++    }
++  return new_var;
++}
++
++void
++ipa_struct_relayout::create_global_ptrs (void)
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Create global gptrs: {\n");
++    }
++
++  char *gptr0_name = NULL;
++  const char *type_name = get_type_name (ctype.type);
++
++  if (type_name)
++    {
++      gptr0_name = concat (type_name, "_gptr0", NULL);
++    }
++  tree var_gptr0 = create_new_vars (ctype.type, gptr0_name);
++  gptr[0] = var_gptr0;
++  varpool_node::add (var_gptr0);
++
++  unsigned i = 1;
++  for (tree field = TYPE_FIELDS (ctype.type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++	{
++	  tree type = TREE_TYPE (field);
++
++	  char *name = NULL;
++	  char id[10] = {0};
++	  sprintf (id, "%d", i);
++	  const char *decl_name = IDENTIFIER_POINTER (DECL_NAME (field));
++
++	  if (type_name && decl_name)
++	    {
++	      name = concat (type_name, "_", decl_name, "_gptr", id, NULL);
++	    }
++	  tree var = create_new_vars (type, name);
++
++	  gptr[i] = var;
++	  varpool_node::add (var);
++	  i++;
++	}
++    }
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nTotally create %d gptrs. }\n\n", i);
++    }
++  gcc_assert (ctype.field_count == i - 1);
++}
++
++void
++ipa_struct_relayout::rewrite_stmt_in_function (void)
++{
++  gcc_assert (cfun);
++
++  basic_block bb = NULL;
++  gimple_stmt_iterator si;
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
++	{
++	  gimple *stmt = gsi_stmt (si);
++	  if (rewrite_stmt (stmt, &si))
++	    {
++	      gsi_remove (&si, true);
++	    }
++	  else
++	    {
++	      gsi_next (&si);
++	    }
++	}
++    }
++
++  /* Debug statements need to happen after all other statements
++     have changed.  */
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
++	{
++	  gimple *stmt = gsi_stmt (si);
++	  if (gimple_code (stmt) == GIMPLE_DEBUG && rewrite_debug (stmt, &si))
++	    {
++	      gsi_remove (&si, true);
++	    }
++	  else
++	    {
++	      gsi_next (&si);
++	    }
++	}
++    }
++}
++
++unsigned int
++ipa_struct_relayout::rewrite (void)
++{
++  cgraph_node *cnode = NULL;
++  function *fn = NULL;
++  FOR_EACH_FUNCTION (cnode)
++    {
++      if (!cnode->real_symbol_p () || !cnode->has_gimple_body_p ())
++	{
++	  continue;
++	}
++      if (cnode->definition)
++	{
++	  fn = DECL_STRUCT_FUNCTION (cnode->decl);
++	  if (fn == NULL)
++	    {
++	      continue;
++	    }
++
++	  current_node = cnode;
++	  push_cfun (fn);
++
++	  rewrite_stmt_in_function ();
++	  update_ssa (TODO_update_ssa_only_virtuals);
++	  if (flag_tree_pta)
++	    {
++	      compute_may_aliases ();
++	    }
++
++	  remove_unused_locals ();
++
++	  cgraph_edge::rebuild_edges ();
++
++	  free_dominance_info (CDI_DOMINATORS);
++
++	  pop_cfun ();
++	  current_node = NULL;
++	}
++    }
++  return TODO_verify_all;
++}
++
++bool
++ipa_struct_relayout::rewrite_debug (gimple *stmt, gimple_stmt_iterator *gsi)
++{
++  /* Delete debug gimple now.  */
++  return true;
++}
++
++bool
++ipa_struct_relayout::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi)
++{
++  switch (gimple_code (stmt))
++    {
++    case GIMPLE_ASSIGN:
++      return rewrite_assign (as_a (stmt), gsi);
++    case GIMPLE_CALL:
++      return rewrite_call (as_a (stmt), gsi);
++    default:
++      break;
++    }
++  return false;
++}
++
++bool
++ipa_struct_relayout::handled_allocation_stmt (gcall *stmt)
++{
++  if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++    {
++      return true;
++    }
++  return false;
++}
++
++void
++ipa_struct_relayout::init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi)
++{
++  gcc_assert (handled_allocation_stmt (stmt));
++
++  tree lhs = gimple_call_lhs (stmt);
++
++  /* Case that gimple is at the end of bb.  */
++  if (gsi_one_before_end_p (*gsi))
++    {
++      gassign *gptr0 = gimple_build_assign (gptr[0], lhs);
++      gsi_insert_after (gsi, gptr0, GSI_SAME_STMT);
++    }
++  gsi_next (gsi);
++
++  /* Emit gimple gptr0 = _X and gptr1 = _X.  */
++  gassign *gptr0 = gimple_build_assign (gptr[0], lhs);
++  gsi_insert_before (gsi, gptr0, GSI_SAME_STMT);
++  gassign *gptr1 = gimple_build_assign (gptr[1], lhs);
++  gsi_insert_before (gsi, gptr1, GSI_SAME_STMT);
++
++  /* Emit gimple gptr_[i] = gptr_[i-1] + _Y[gap].  */
++  for (unsigned i = 2; i <= ctype.field_count; i++)
++    {
++      gimple *new_stmt = NULL;
++      tree gptr_i_prev_ssa = create_ssa (gptr[i - 1], gsi);
++      tree gptr_i_ssa = make_ssa_name (TREE_TYPE (gptr[i - 1]));
++
++      /* Emit gimple _Y[gap] = N * sizeof (member).  */
++      tree member_gap
++	= gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node,
++			   gimple_call_arg (stmt, 0), GPTR_SIZE (i - 1));
++
++      new_stmt = gimple_build_assign (gptr_i_ssa, POINTER_PLUS_EXPR,
++				      gptr_i_prev_ssa, member_gap);
++      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
++
++      gassign *gptr_i = gimple_build_assign (gptr[i], gptr_i_ssa);
++      gsi_insert_before (gsi, gptr_i, GSI_SAME_STMT);
++    }
++  gsi_prev (gsi);
++}
++
++bool
++ipa_struct_relayout::check_call_uses (gcall *stmt)
++{
++  gcc_assert (current_node);
++  srfunction *fn = sr->find_function (current_node);
++  tree lhs = gimple_call_lhs (stmt);
++
++  if (fn == NULL)
++    {
++      return false;
++    }
++
++  srdecl *d = fn->find_decl (lhs);
++  if (d == NULL)
++    {
++      return false;
++    }
++  if (types_compatible_p (d->type->type, ctype.type))
++    {
++      return true;
++    }
++
++  return false;
++}
++
++bool
++ipa_struct_relayout::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi)
++{
++  if (handled_allocation_stmt (stmt))
++    {
++      /* Rewrite stmt _X = calloc (N, sizeof (struct)).  */
++      tree size = gimple_call_arg (stmt, 1);
++      if (TREE_CODE (size) != INTEGER_CST)
++	{
++	  return false;
++	}
++      if (tree_to_uhwi (size) != ctype.old_size)
++	{
++	  return false;
++	}
++      if (!check_call_uses (stmt))
++	{
++	  return false;
++	}
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Rewrite allocation call:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "to\n");
++	}
++
++      /* Modify sizeof (struct).  */
++      gimple_call_set_arg (stmt, 1, ctype.struct_size);
++      update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++
++      init_global_ptrs (stmt, gsi);
++    }
++  return false;
++}
++
++tree
++ipa_struct_relayout::create_ssa (tree node, gimple_stmt_iterator *gsi)
++{
++  gcc_assert (TREE_CODE (node) == VAR_DECL);
++  tree node_ssa = make_ssa_name (TREE_TYPE (node));
++  gassign *stmt = gimple_build_assign (node_ssa, node);
++  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
++  return node_ssa;
++}
++
++bool
++ipa_struct_relayout::is_candidate (tree xhs)
++{
++  if (TREE_CODE (xhs) != COMPONENT_REF)
++    {
++      return false;
++    }
++  tree mem = TREE_OPERAND (xhs, 0);
++  if (TREE_CODE (mem) == MEM_REF)
++    {
++      tree type = TREE_TYPE (mem);
++      if (types_compatible_p (type, ctype.type))
++	{
++	  return true;
++	}
++    }
++  return false;
++}
++
++tree
++ipa_struct_relayout::rewrite_address (tree xhs, gimple_stmt_iterator *gsi)
++{
++  tree mem_ref = TREE_OPERAND (xhs, 0);
++  tree pointer = TREE_OPERAND (mem_ref, 0);
++  tree pointer_offset = TREE_OPERAND (mem_ref, 1);
++  tree field = TREE_OPERAND (xhs, 1);
++
++  tree pointer_ssa = fold_convert (long_unsigned_type_node, pointer);
++  tree gptr0_ssa = fold_convert (long_unsigned_type_node, gptr[0]);
++
++  /* Emit gimple _X1 = ptr - gptr0.  */
++  tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node,
++				pointer_ssa, gptr0_ssa);
++
++  /* Emit gimple _X2 = _X1 / sizeof (struct).  */
++  tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node,
++				step1, ctype.struct_size);
++
++  unsigned field_num = ctype.calculate_field_num (field);
++  gcc_assert (field_num > 0 && field_num <= ctype.field_count);
++
++  /* Emit gimple _X3 = _X2 * sizeof (member).  */
++  tree step3 = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, step2,
++				GPTR_SIZE (field_num));
++
++  /* Emit gimple _X4 = gptr[I].  */
++  tree gptr_field_ssa = create_ssa (gptr[field_num], gsi);
++  tree new_address = make_ssa_name (TREE_TYPE (gptr[field_num]));
++  gassign *new_stmt = gimple_build_assign (new_address, POINTER_PLUS_EXPR,
++					   gptr_field_ssa, step3);
++  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
++
++  /* MEM_REF with nonzero offset like
++       MEM[ptr + sizeof (struct)] = 0B
++     should be transformed to
++       MEM[gptr + sizeof (member)] = 0B
++  */
++  HOST_WIDE_INT size
++    = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_address))));
++  tree new_size = rewrite_offset (pointer_offset, size);
++  if (new_size)
++    {
++      TREE_OPERAND (mem_ref, 1) = new_size;
++    }
++
++  /* Update mem_ref pointer.  */
++  TREE_OPERAND (mem_ref, 0) = new_address;
++
++  /* Update mem_ref TREE_TYPE.  */
++  TREE_TYPE (mem_ref) = TREE_TYPE (TREE_TYPE (new_address));
++
++  return mem_ref;
++}
++
++tree
++ipa_struct_relayout::rewrite_offset (tree offset, HOST_WIDE_INT num)
++{
++  if (TREE_CODE (offset) == INTEGER_CST)
++    {
++      bool sign = false;
++      HOST_WIDE_INT off = TREE_INT_CST_LOW (offset);
++      if (off == 0)
++	{
++	  return NULL;
++	}
++      if (off < 0)
++	{
++	  off = -off;
++	  sign = true;
++	}
++      if (off % ctype.old_size == 0)
++	{
++	  HOST_WIDE_INT times = off / ctype.old_size;
++	  times = sign ? -times : times;
++	  return build_int_cst (TREE_TYPE (offset), num * times);
++	}
++    }
++  return NULL;
++}
++
++#define REWRITE_ASSIGN_TREE_IN_STMT(node)                                      \
++  do                                                                           \
++    {                                                                          \
++      tree node = gimple_assign_##node (stmt);                                 \
++      if (node && is_candidate (node))                                         \
++	{                                                                      \
++	  tree mem_ref = rewrite_address (node, gsi);                          \
++	  gimple_assign_set_##node (stmt, mem_ref);                            \
++	  update_stmt (stmt);                                                  \
++	}                                                                      \
++  } while (0)
++
++/*       COMPONENT_REF  = exp  =>     MEM_REF = exp
++      /       \              /     \
++       MEM_REF   field            gptr   offset
++       /    \
++   pointer offset
++*/
++bool
++ipa_struct_relayout::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Maybe rewrite assign:\n");
++      print_gimple_stmt (dump_file, stmt, 0);
++      fprintf (dump_file, "to\n");
++    }
++
++  switch (gimple_num_ops (stmt))
++    {
++    case 4:
++      REWRITE_ASSIGN_TREE_IN_STMT (rhs3); // FALLTHRU
++      case 3: {
++	REWRITE_ASSIGN_TREE_IN_STMT (rhs2);
++	tree rhs2 = gimple_assign_rhs2 (stmt);
++	if (rhs2 && TREE_CODE (rhs2) == INTEGER_CST)
++	  {
++	    /* Handle pointer++ and pointer-- or
++	       factor is euqal to struct size.  */
++	    HOST_WIDE_INT times = 1;
++	    if (maybe_rewrite_cst (rhs2, gsi, times))
++	      {
++		tree tmp
++		  = build_int_cst (TREE_TYPE (TYPE_SIZE_UNIT (ctype.type)),
++				   ctype.new_size * times);
++		gimple_assign_set_rhs2 (stmt, tmp);
++		update_stmt (stmt);
++	      }
++	  }
++      } // FALLTHRU
++    case 2:
++      REWRITE_ASSIGN_TREE_IN_STMT (rhs1); // FALLTHRU
++    case 1:
++      REWRITE_ASSIGN_TREE_IN_STMT (lhs); // FALLTHRU
++    case 0:
++      break;
++    default:
++      gcc_unreachable ();
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      print_gimple_stmt (dump_file, stmt, 0);
++      fprintf (dump_file, "\n");
++    }
++  return false;
++}
++
++bool
++ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi,
++					HOST_WIDE_INT ×)
++{
++  bool ret = false;
++  gcc_assert (TREE_CODE (cst) == INTEGER_CST);
++
++  gimple *stmt = gsi_stmt (*gsi);
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type)
++	  || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type))
++	{
++	  tree num = NULL;
++	  if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type)))
++	    {
++	      times = TREE_INT_CST_LOW (num);
++	      return true;
++	    }
++	}
++    }
++
++  if (gimple_assign_rhs_code (stmt) == MULT_EXPR)
++    {
++      if (gsi_one_before_end_p (*gsi))
++	{
++	  return false;
++	}
++      // Check uses.
++      imm_use_iterator imm_iter_lhs;
++      use_operand_p use_p_lhs;
++      FOR_EACH_IMM_USE_FAST (use_p_lhs, imm_iter_lhs, gimple_assign_lhs (stmt))
++	{
++	  gimple *stmt2 = USE_STMT (use_p_lhs);
++	  if (gimple_code (stmt2) != GIMPLE_ASSIGN)
++	    continue;
++	  if (gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR)
++	    {
++	      tree lhs = gimple_assign_lhs (stmt2);
++	      tree rhs1 = gimple_assign_rhs1 (stmt2);
++	      if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type)
++		  || types_compatible_p (inner_type (TREE_TYPE (lhs)),
++					 ctype.type))
++		{
++		  tree num = NULL;
++		  if (is_result_of_mult (cst, &num,
++					 TYPE_SIZE_UNIT (ctype.type)))
++		    {
++		      times = TREE_INT_CST_LOW (num);
++		      return true;
++		    }
++		}
++	    }
++	  // For pointer compression.
++	  else if (gimple_assign_rhs_code (stmt2) == PLUS_EXPR)
++	    {
++	      // Check uses.
++	      imm_use_iterator imm_iter_cast;
++	      use_operand_p use_p_cast;
++	      FOR_EACH_IMM_USE_FAST (use_p_cast, imm_iter_cast,
++				     gimple_assign_lhs (stmt2))
++		{
++		  gimple *stmt_cast = USE_STMT (use_p_cast);
++		  if (gimple_code (stmt_cast) != GIMPLE_ASSIGN)
++		    continue;
++		  if (gimple_assign_cast_p (stmt_cast))
++		    {
++		      tree lhs_type = inner_type (
++			TREE_TYPE (gimple_assign_lhs (stmt_cast)));
++		      if (types_compatible_p (lhs_type, ctype.type))
++			{
++			  tree num = NULL;
++			  if (is_result_of_mult (cst, &num,
++						 TYPE_SIZE_UNIT (ctype.type)))
++			    {
++			      times = TREE_INT_CST_LOW (num);
++			      return true;
++			    }
++			}
++		    }
++		}
++	    }
++	}
++    }
++  // For pointer compression.
++  if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR)
++    {
++      imm_use_iterator imm_iter;
++      use_operand_p use_p;
++      tree lhs = gimple_assign_lhs (stmt);
++      if (lhs == NULL_TREE)
++	return false;
++      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
++	{
++	  gimple *use_stmt = USE_STMT (use_p);
++	  if (is_gimple_debug (use_stmt))
++	    continue;
++	  if (gimple_code (use_stmt) != GIMPLE_ASSIGN)
++	    continue;
++	  if (gimple_assign_cast_p (use_stmt))
++	    {
++	      tree lhs_type
++		= inner_type (TREE_TYPE (gimple_assign_lhs (use_stmt)));
++	      if (TYPE_UNSIGNED (lhs_type)
++		  && TREE_CODE (lhs_type) == INTEGER_TYPE
++		  && TYPE_PRECISION (lhs_type) == compressed_size)
++		{
++		  tree num = NULL;
++		  if (is_result_of_mult (cst, &num,
++					 TYPE_SIZE_UNIT (ctype.type)))
++		    {
++		      times = TREE_INT_CST_LOW (num);
++		      return true;
++		    }
++		}
++	    }
++	}
++    }
++  return false;
++}
++
++unsigned int
++ipa_struct_relayout::execute (void)
++{
++  ctype.init_type_info ();
++  if (ctype.field_count < min_relayout_split
++      || ctype.field_count > max_relayout_split)
++    {
++      return 0;
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Complete Struct Relayout Type: ");
++      print_generic_expr (dump_file, ctype.type);
++      fprintf (dump_file, "\n");
++    }
++  transformed++;
++
++  create_global_ptrs ();
++  return rewrite ();
++}
++
++} // namespace
++
++namespace {
++
++/* Methods for ipa_struct_reorg.  */
++
++/* Dump all of the recorded types to file F. */
++
++void
++ipa_struct_reorg::dump_types (FILE *f)
++{
++  unsigned i;
++  srtype *type;
++  FOR_EACH_VEC_ELT (types, i, type)
++    {
++      fprintf (f, "======= the %dth type: ======\n", i);
++      type->dump (f);
++      fprintf (f, "\n");
++    }
++}
++
++/* Dump all of the created newtypes to file F.  */
++
++void
++ipa_struct_reorg::dump_newtypes (FILE *f)
++{
++  unsigned i = 0;
++  srtype *type = NULL;
++  FOR_EACH_VEC_ELT (types, i, type)
++    {
++      if (type->has_escaped ())
++	{
++	  continue;
++	}
++      fprintf (f, "======= the %dth newtype: ======\n", i);
++      fprintf (f, "type : ");
++      print_generic_expr (f, type->newtype[0]);
++      fprintf (f, "(%d) ", TYPE_UID (type->newtype[0]));
++      fprintf (f, "{ ");
++      fprintf (f, "\nfields = {\n");
++
++      for (tree field = TYPE_FIELDS (TYPE_MAIN_VARIANT (type->newtype[0]));
++	   field; field = DECL_CHAIN (field))
++	{
++	  fprintf (f, "field (%d) ", DECL_UID (field));
++	  fprintf (f, "{");
++	  fprintf (f, "type = ");
++	  print_generic_expr (f, TREE_TYPE (field));
++	  fprintf (f, "}\n");
++	}
++      fprintf (f, "}\n ");
++
++      fprintf (f, "\n");
++    }
++}
++
++/* Dump all of the recorded types to file F. */
++
++void
++ipa_struct_reorg::dump_types_escaped (FILE *f)
++{
++  unsigned i;
++  srtype *type;
++  FOR_EACH_VEC_ELT (types, i, type)
++    {
++      if (type->has_escaped ())
++	{
++	  type->simple_dump (f);
++	  fprintf (f, " has escaped: \"%s\"\n", type->escape_reason ());
++	}
++    }
++  fprintf (f, "\n");
++}
++
++/* Dump all of the record functions to file F. */
++
++void
++ipa_struct_reorg::dump_functions (FILE *f)
++{
++  unsigned i;
++  srfunction *fn;
++
++  globals.dump (f);
++  fprintf (f, "\n\n");
++  FOR_EACH_VEC_ELT (functions, i, fn)
++    {
++      fn->dump (f);
++      fprintf (f, "\n");
++    }
++  fprintf (f, "\n\n");
++}
++
++/* Find the recorded srtype corresponding to TYPE.  */
++
++srtype *
++ipa_struct_reorg::find_type (tree type)
++{
++  unsigned i;
++  /* Get the main variant as we are going
++     to find that type only. */
++  type = TYPE_MAIN_VARIANT (type);
++
++  srtype *type1;
++  // Search for the type to see if it is already there.
++  FOR_EACH_VEC_ELT (types, i, type1)
++    {
++      if (types_compatible_p (type1->type, type))
++	return type1;
++    }
++  return NULL;
++}
++
++/* Is TYPE a volatile type or one which points
++   to a volatile type. */
++
++bool
++isvolatile_type (tree type)
++{
++  if (TYPE_VOLATILE (type))
++    return true;
++  while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
++    {
++      type = TREE_TYPE (type);
++      if (TYPE_VOLATILE (type))
++	return true;
++    }
++  return false;
++}
++
++/* Is TYPE an array type or points to an array type. */
++
++bool
++isarraytype (tree type)
++{
++  if (TREE_CODE (type) == ARRAY_TYPE)
++    return true;
++  while (POINTER_TYPE_P (type))
++    {
++      type = TREE_TYPE (type);
++      if (TREE_CODE (type) == ARRAY_TYPE)
++	return true;
++    }
++  return false;
++}
++
++/*  Is TYPE a pointer to another pointer. */
++
++bool
++isptrptr (tree type)
++{
++  if (type == NULL)
++    {
++      return false;
++    }
++  bool firstptr = false;
++  while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
++    {
++      if (POINTER_TYPE_P (type))
++	{
++	  if (firstptr)
++	    return true;
++	  firstptr = true;
++	}
++      type = TREE_TYPE (type);
++    }
++  return false;
++}
++
++/* Adding node to map and stack.  */
++
++bool
++add_node (tree node, int layers, hash_map &map,
++	  auto_vec &stack)
++{
++  if (TREE_CODE (node) != SSA_NAME)
++    {
++      return false;
++    }
++  if (map.get (node) == NULL)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "    ");
++	  fprintf (dump_file, "add node: \t\t");
++	  print_generic_expr (dump_file, node);
++	  fprintf (dump_file, ",\t\tptr layers: %d: \n", layers);
++	}
++      map.put (node, layers);
++      stack.safe_push (node);
++    }
++  else if (*map.get (node) != layers)
++    {
++      return false;
++    }
++  return true;
++}
++
++/* Check the number of pointer layers of the gimple phi in definition.  */
++
++bool
++check_def_phi (tree def_node, hash_map &ptr_layers)
++{
++  bool res = true;
++  gimple *def_stmt = SSA_NAME_DEF_STMT (def_node);
++  for (unsigned j = 0; j < gimple_phi_num_args (def_stmt); j++)
++    {
++      tree phi_node = gimple_phi_arg_def (def_stmt, j);
++      if (integer_zerop (phi_node))
++	{
++	  continue;
++	}
++      if (ptr_layers.get (phi_node) == NULL)
++	{
++	  return false;
++	}
++      res &= *ptr_layers.get (def_node) == *ptr_layers.get (phi_node);
++    }
++  return res;
++}
++
++/* Check the number of pointer layers of the gimple assign in definition.  */
++
++bool
++check_def_assign (tree def_node, hash_map &ptr_layers)
++{
++  bool res = true;
++  gimple *def_stmt = SSA_NAME_DEF_STMT (def_node);
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (def_stmt);
++  tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
++  tree rhs1 = gimple_assign_rhs1 (def_stmt);
++  tree rhs1_base = TREE_CODE (rhs1) == MEM_REF ? TREE_OPERAND (rhs1, 0) : rhs1;
++  if (ptr_layers.get (rhs1_base) == NULL)
++    {
++      return false;
++    }
++  if (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS)
++    {
++      if (TREE_CODE (rhs1) == SSA_NAME)
++	{
++	  res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1);
++	}
++      else if (TREE_CODE (rhs1) == MEM_REF)
++	{
++	  res = *ptr_layers.get (def_node)
++		== *ptr_layers.get (TREE_OPERAND (rhs1, 0));
++	}
++      else
++	{
++	  return false;
++	}
++    }
++  else if (rhs_class == GIMPLE_BINARY_RHS)
++    {
++      if (rhs_code == POINTER_PLUS_EXPR)
++	{
++	  res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1);
++	}
++      else if (rhs_code == BIT_AND_EXPR)
++	{
++	  res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1);
++	}
++      else
++	{
++	  return false;
++	}
++    }
++  else
++    {
++      return false;
++    }
++  return res;
++}
++
++/* Check node definition.  */
++
++bool
++check_node_def (hash_map &ptr_layers)
++{
++  bool res = true;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\n======== check node definition ========\n");
++    }
++  for (unsigned i = 1; i < num_ssa_names; ++i)
++    {
++      tree name = ssa_name (i);
++      if (name && ptr_layers.get (name) != NULL)
++	{
++	  gimple *def_stmt = SSA_NAME_DEF_STMT (name);
++	  if (dump_file && (dump_flags & TDF_DETAILS)
++	      && gimple_code (def_stmt) != GIMPLE_DEBUG)
++	    {
++	      print_gimple_stmt (dump_file, def_stmt, 0);
++	    }
++
++	  if (gimple_code (def_stmt) == GIMPLE_PHI)
++	    {
++	      res = check_def_phi (name, ptr_layers);
++	    }
++	  else if (gimple_code (def_stmt) == GIMPLE_ASSIGN)
++	    {
++	      res = check_def_assign (name, ptr_layers);
++	    }
++	  else if (gimple_code (def_stmt) == GIMPLE_NOP)
++	    {
++	      continue;
++	    }
++	  else
++	    {
++	      return false;
++	    }
++	}
++    }
++  return res;
++}
++
++/* Check pointer usage.  */
++
++bool
++check_record_ptr_usage (gimple *use_stmt, tree ¤t_node,
++			hash_map &ptr_layers,
++			auto_vec &ssa_name_stack)
++{
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt);
++  tree rhs1 = gimple_assign_rhs1 (use_stmt);
++  tree lhs = gimple_assign_lhs (use_stmt);
++  if (rhs_class != GIMPLE_SINGLE_RHS
++      || (TREE_CODE (rhs1) != COMPONENT_REF && TREE_CODE (rhs1) != SSA_NAME)
++      || (TREE_CODE (lhs) != MEM_REF && TREE_CODE (lhs) != SSA_NAME))
++    {
++      return false;
++    }
++
++  bool res = true;
++  /* MEM[(long int *)a_1] = _1; (record).
++     If lhs is ssa_name, lhs cannot be the current node.
++     _2 = _1->flow; (No record).  */
++  if (TREE_CODE (rhs1) == SSA_NAME)
++    {
++      tree tmp = (rhs1 != current_node) ? rhs1 : lhs;
++      if (TREE_CODE (tmp) == MEM_REF)
++	{
++	  res = add_node (TREE_OPERAND (tmp, 0),
++			  *ptr_layers.get (current_node) + 1, ptr_layers,
++			  ssa_name_stack);
++	}
++      else
++	{
++	  res = add_node (tmp, *ptr_layers.get (current_node), ptr_layers,
++			  ssa_name_stack);
++	}
++    }
++  else if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == COMPONENT_REF)
++    {
++      res = !(POINTER_TYPE_P (TREE_TYPE (rhs1)));
++    }
++  else
++    {
++      res = false;
++    }
++  return res;
++}
++
++/* Check and record a single node.  */
++
++bool
++check_record_single_node (gimple *use_stmt, tree ¤t_node,
++			  hash_map &ptr_layers,
++			  auto_vec &ssa_name_stack)
++{
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt);
++  tree rhs1 = gimple_assign_rhs1 (use_stmt);
++  tree lhs = gimple_assign_lhs (use_stmt);
++  gcc_assert (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS);
++
++  if ((TREE_CODE (rhs1) != SSA_NAME && TREE_CODE (rhs1) != MEM_REF)
++      || (TREE_CODE (lhs) != SSA_NAME && TREE_CODE (lhs) != MEM_REF))
++    {
++      return false;
++    }
++
++  bool res = true;
++  if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == MEM_REF)
++    {
++      /* add such as: _2 = MEM[(struct arc_t * *)_1].  */
++      res = add_node (lhs, *ptr_layers.get (current_node) - 1, ptr_layers,
++		      ssa_name_stack);
++    }
++  else if (TREE_CODE (lhs) == MEM_REF && TREE_CODE (rhs1) == SSA_NAME)
++    {
++      /* add such as: MEM[(long int *)a_1] = _1.  */
++      if (rhs1 == current_node)
++	{
++	  res = add_node (TREE_OPERAND (lhs, 0),
++			  *ptr_layers.get (current_node) + 1, ptr_layers,
++			  ssa_name_stack);
++	}
++      else
++	{
++	  res = add_node (rhs1, *ptr_layers.get (current_node) - 1, ptr_layers,
++			  ssa_name_stack);
++	}
++    }
++  else if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == SSA_NAME)
++    {
++      res = add_node (lhs, *ptr_layers.get (current_node), ptr_layers,
++		      ssa_name_stack);
++    }
++  else
++    {
++      res = false;
++    }
++
++  return res;
++}
++
++/* Check and record multiple nodes.  */
++
++bool
++check_record_mult_node (gimple *use_stmt, tree ¤t_node,
++			hash_map &ptr_layers,
++			auto_vec &ssa_name_stack)
++{
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt);
++  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
++  tree rhs1 = gimple_assign_rhs1 (use_stmt);
++  tree lhs = gimple_assign_lhs (use_stmt);
++  tree rhs2 = gimple_assign_rhs2 (use_stmt);
++  gcc_assert (rhs_class == GIMPLE_BINARY_RHS);
++
++  if ((rhs_code != POINTER_PLUS_EXPR && rhs_code != POINTER_DIFF_EXPR
++       && rhs_code != BIT_AND_EXPR)
++      || (TREE_CODE (lhs) != SSA_NAME && TREE_CODE (rhs1) != SSA_NAME))
++    {
++      return false;
++    }
++
++  bool res = true;
++  if (rhs_code == POINTER_PLUS_EXPR)
++    {
++      res
++	= add_node (lhs == current_node ? rhs1 : lhs,
++		    *ptr_layers.get (current_node), ptr_layers, ssa_name_stack);
++    }
++  else if (rhs_code == POINTER_DIFF_EXPR)
++    {
++      res
++	= add_node (rhs1 != current_node ? rhs1 : rhs2,
++		    *ptr_layers.get (current_node), ptr_layers, ssa_name_stack);
++    }
++  else if (rhs_code == BIT_AND_EXPR)
++    {
++      if (TREE_CODE (rhs2) != INTEGER_CST)
++	{
++	  return false;
++	}
++      res
++	= add_node (lhs == current_node ? rhs1 : lhs,
++		    *ptr_layers.get (current_node), ptr_layers, ssa_name_stack);
++    }
++  return res;
++}
++
++/* Check whether gimple assign is correctly used and record node.  */
++
++bool
++check_record_assign (tree ¤t_node, gimple *use_stmt,
++		     hash_map &ptr_layers,
++		     auto_vec &ssa_name_stack)
++{
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt);
++  if (*ptr_layers.get (current_node) == 1)
++    {
++      return check_record_ptr_usage (use_stmt, current_node, ptr_layers,
++				     ssa_name_stack);
++    }
++  else if (*ptr_layers.get (current_node) > 1)
++    {
++      if (rhs_class != GIMPLE_BINARY_RHS && rhs_class != GIMPLE_UNARY_RHS
++	  && rhs_class != GIMPLE_SINGLE_RHS)
++	{
++	  return false;
++	}
++
++      if (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS)
++	{
++	  return check_record_single_node (use_stmt, current_node, ptr_layers,
++					   ssa_name_stack);
++	}
++      else if (rhs_class == GIMPLE_BINARY_RHS)
++	{
++	  return check_record_mult_node (use_stmt, current_node, ptr_layers,
++					 ssa_name_stack);
++	}
++    }
++  else
++    return false;
++
++  return true;
++}
++
++/* Check whether gimple phi is correctly used and record node.  */
++
++bool
++check_record_phi (tree ¤t_node, gimple *use_stmt,
++		  hash_map &ptr_layers,
++		  auto_vec &ssa_name_stack)
++{
++  bool res = true;
++  res &= add_node (gimple_phi_result (use_stmt), *ptr_layers.get (current_node),
++		   ptr_layers, ssa_name_stack);
++
++  for (unsigned i = 0; i < gimple_phi_num_args (use_stmt); i++)
++    {
++      if (integer_zerop (gimple_phi_arg_def (use_stmt, i)))
++	{
++	  continue;
++	}
++      res &= add_node (gimple_phi_arg_def (use_stmt, i),
++		       *ptr_layers.get (current_node), ptr_layers,
++		       ssa_name_stack);
++    }
++  return res;
++}
++
++/* Check the use of callee.  */
++
++bool
++check_callee (cgraph_node *node, gimple *stmt, hash_map &ptr_layers,
++	      int input_layers)
++{
++  /* caller main ()
++	{ spec_qsort.constprop (_649, _651); }
++     def    spec_qsort.constprop (void * a, size_t n)
++	{ spec_qsort.constprop (a_1, _139); }  */
++  /* In safe functions, only call itself is allowed.  */
++  if (node->get_edge (stmt)->callee != node)
++    {
++      return false;
++    }
++  tree input_node = gimple_call_arg (stmt, 0);
++  if (ptr_layers.get (input_node) == NULL
++      || *ptr_layers.get (input_node) != input_layers)
++    {
++      return false;
++    }
++  if (SSA_NAME_VAR (input_node) != DECL_ARGUMENTS (node->decl))
++    {
++      return false;
++    }
++
++  for (unsigned i = 1; i < gimple_call_num_args (stmt); i++)
++    {
++      if (ptr_layers.get (gimple_call_arg (stmt, i)) != NULL)
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Check the usage of input nodes and related nodes.  */
++
++bool
++check_node_use (cgraph_node *node, tree current_node,
++		hash_map &ptr_layers, auto_vec &ssa_name_stack,
++		int input_layers)
++{
++  imm_use_iterator imm_iter;
++  gimple *use_stmt = NULL;
++  bool res = true;
++  /* Use FOR_EACH_IMM_USE_STMT as an indirect edge
++     to search for possible related nodes and push to stack.  */
++  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, current_node)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS)
++	  && gimple_code (use_stmt) != GIMPLE_DEBUG)
++	{
++	  fprintf (dump_file, "%*s", 4, "");
++	  print_gimple_stmt (dump_file, use_stmt, 0);
++	}
++      /* For other types of gimple, do not record the node.  */
++      if (res)
++	{
++	  if (gimple_code (use_stmt) == GIMPLE_PHI)
++	    {
++	      res = check_record_phi (current_node, use_stmt, ptr_layers,
++				      ssa_name_stack);
++	    }
++	  else if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
++	    {
++	      res = check_record_assign (current_node, use_stmt, ptr_layers,
++					 ssa_name_stack);
++	    }
++	  else if (gimple_code (use_stmt) == GIMPLE_CALL)
++	    {
++	      res = check_callee (node, use_stmt, ptr_layers, input_layers);
++	    }
++	  else if (gimple_code (use_stmt) == GIMPLE_RETURN)
++	    {
++	      res = false;
++	    }
++	}
++    }
++  return res;
++}
++
++/* Update VOID_POINTER_P Type input layers */
++bool
++get_void_ptr_layers (tree input, int &input_layers)
++{
++  imm_use_iterator imm_iter;
++  gimple *use_stmt = NULL;
++
++  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, input)
++    {
++      if (gimple_code (use_stmt) == GIMPLE_ASSIGN
++	  && gimple_assign_rhs_class (use_stmt) == GIMPLE_SINGLE_RHS)
++	{
++	  tree rhs1 = gimple_assign_rhs1 (use_stmt);
++	  tree lhs = gimple_assign_lhs (use_stmt);
++	  if (TREE_CODE (lhs) == SSA_NAME && handled_type (TREE_TYPE (lhs)))
++	    {
++	      if (TREE_CODE (rhs1) == MEM_REF)
++		input_layers = get_ptr_layers (TREE_TYPE (lhs)) + 1;
++	      return true;
++	    }
++	}
++    }
++  return false;
++}
++
++/* Preparing the First Node for DFS.  */
++
++bool
++set_init_node (cgraph_node *node, cgraph_edge *caller,
++	       hash_map &ptr_layers, auto_vec &ssa_name_stack,
++	       int &input_layers)
++{
++  /* set input_layer
++     caller spec_qsort.constprop (_649, _651)
++		    |-- Obtains the actual ptr layer
++		    from the input node.  */
++  if (caller->call_stmt == NULL || gimple_call_num_args (caller->call_stmt) == 0
++      || gimple_num_ops (caller->call_stmt) == 0)
++    {
++      return false;
++    }
++  tree input = gimple_call_arg (caller->call_stmt, 0);
++  if (!(POINTER_TYPE_P (TREE_TYPE (input))
++	|| TREE_CODE (TREE_TYPE (input)) == ARRAY_TYPE))
++    {
++      return false;
++    }
++
++  if (handled_type (TREE_TYPE (input)))
++    input_layers = get_ptr_layers (TREE_TYPE (input));
++  else if (VOID_POINTER_P (TREE_TYPE (input)))
++    {
++      if (!get_void_ptr_layers (input, input_layers))
++	return false;
++    }
++
++  /* set initial node
++     def spec_qsort.constprop (void * a, size_t n)
++		      |-- Find the initial ssa_name
++		      from the parameter node.  */
++  tree parm = DECL_ARGUMENTS (node->decl);
++  for (unsigned j = 1; j < num_ssa_names; ++j)
++    {
++      tree name = ssa_name (j);
++      if (!name || has_zero_uses (name) || virtual_operand_p (name))
++	{
++	  continue;
++	}
++      if (SSA_NAME_VAR (name) == parm
++	  && gimple_code (SSA_NAME_DEF_STMT (name)) == GIMPLE_NOP)
++	{
++	  if (!add_node (name, input_layers, ptr_layers, ssa_name_stack))
++	    {
++	      return false;
++	    }
++	}
++    }
++  return !ssa_name_stack.is_empty ();
++}
++
++/* Check the usage of each call.  */
++
++bool
++check_each_call (cgraph_node *node, cgraph_edge *caller)
++{
++  hash_map ptr_layers;
++  auto_vec ssa_name_stack;
++  int input_layers = 0;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "======== check each call : %s/%u ========\n",
++	       node->name (), node->order);
++    }
++  if (!set_init_node (node, caller, ptr_layers, ssa_name_stack, input_layers))
++    {
++      return false;
++    }
++  int i = 0;
++  while (!ssa_name_stack.is_empty ())
++    {
++      tree current_node = ssa_name_stack.pop ();
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\ncur node %d: \t", i++);
++	  print_generic_expr (dump_file, current_node);
++	  fprintf (dump_file, ",\t\tptr layers: %d: \n",
++		   *ptr_layers.get (current_node));
++	}
++      if (get_ptr_layers (TREE_TYPE (current_node))
++	  > *ptr_layers.get (current_node))
++	{
++	  return false;
++	}
++      if (!check_node_use (node, current_node, ptr_layers, ssa_name_stack,
++			   input_layers))
++	{
++	  return false;
++	}
++    }
++
++  if (!check_node_def (ptr_layers))
++    {
++      return false;
++    }
++  return true;
++}
++
++/* Filter out function: void func (void*, int n),
++   and the function has no static variable, no structure-related variable,
++   and no global variable is used.  */
++
++bool
++filter_func (cgraph_node *node)
++{
++  tree parm = DECL_ARGUMENTS (node->decl);
++  if (!(parm && VOID_POINTER_P (TREE_TYPE (parm))
++	&& VOID_TYPE_P (TREE_TYPE (TREE_TYPE (node->decl)))))
++    {
++      return false;
++    }
++
++  for (parm = DECL_CHAIN (parm); parm; parm = DECL_CHAIN (parm))
++    {
++      if (TREE_CODE (TREE_TYPE (parm)) != INTEGER_TYPE)
++	{
++	  return false;
++	}
++    }
++
++  if (DECL_STRUCT_FUNCTION (node->decl)->static_chain_decl)
++    {
++      return false;
++    }
++
++  tree var = NULL_TREE;
++  unsigned int i = 0;
++  bool res = true;
++  FOR_EACH_LOCAL_DECL (cfun, i, var)
++    {
++      if (TREE_CODE (var) == VAR_DECL && handled_type (TREE_TYPE (var)))
++	{
++	  res = false;
++	}
++    }
++  if (!res)
++    {
++      return false;
++    }
++
++  for (unsigned j = 1; j < num_ssa_names; ++j)
++    {
++      tree name = ssa_name (j);
++      if (!name || has_zero_uses (name) || virtual_operand_p (name))
++	{
++	  continue;
++	}
++      tree var = SSA_NAME_VAR (name);
++      if (var && TREE_CODE (var) == VAR_DECL && is_global_var (var))
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Check whether the function with the void* parameter and uses the input node
++   safely.
++   In these functions only component_ref can be used to dereference the last
++   layer of the input structure pointer.  The hack operation pointer offset
++   after type cast cannot be used.
++*/
++
++bool
++is_safe_func_with_void_ptr_parm (cgraph_node *node)
++{
++  if (!filter_func (node))
++    {
++      return false;
++    }
++
++  /* Distinguish Recursive Callers
++     normal_callers:    main ()
++	    { spec_qsort.constprop (_649, _651); }
++     definition:    spec_qsort.constprop (void * a, size_t n)
++     recursive_callers: { spec_qsort.constprop (a_1, _139); }  */
++  auto_vec callers = node->collect_callers ();
++  auto_vec normal_callers;
++  if (callers.length () == 0)
++    return true;
++  for (unsigned i = 0; i < callers.length (); i++)
++    {
++      if (callers[i]->caller != node)
++	{
++	  normal_callers.safe_push (callers[i]);
++	}
++    }
++  if (normal_callers.length () == 0)
++    {
++      return false;
++    }
++
++  for (unsigned i = 0; i < normal_callers.length (); i++)
++    {
++      if (normal_callers[i]->call_stmt == NULL)
++	normal_callers[i]->caller->get_untransformed_body (); // continue;
++      if (!check_each_call (node, normal_callers[i]))
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Return the escape type which corresponds to if
++   this is an volatile type, an array type or a pointer
++   to a pointer type.  */
++
++escape_type
++escape_type_volatile_array_or_ptrptr (tree type)
++{
++  if (isvolatile_type (type))
++    return escape_volatile;
++  if (isarraytype (type))
++    return escape_array;
++  if (isptrptr (type) && (current_layout_opt_level < STRUCT_REORDER_FIELDS))
++    return escape_ptr_ptr;
++  return does_not_escape;
++}
++
++/* Record field type.  */
++
++void
++ipa_struct_reorg::record_field_type (tree field, srtype *base_srtype)
++{
++  tree field_type = TREE_TYPE (field);
++  /* The uid of the type in the structure is different
++     from that outside the structure.  */
++  srtype *field_srtype = record_type (inner_type (field_type));
++  srfield *field_srfield = base_srtype->find_field (int_byte_position (field));
++  /* We might have an variable sized type which we don't set the handle.  */
++  if (field_srfield)
++    {
++      field_srfield->type = field_srtype;
++      field_srtype->add_field_site (field_srfield);
++    }
++  if (field_srtype == base_srtype && current_layout_opt_level == STRUCT_SPLIT)
++    {
++      base_srtype->mark_escape (escape_rescusive_type, NULL);
++    }
++  /* Types of non-pointer field are difficult to track the correctness
++     of the rewrite when it used by the escaped type.  */
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++      && TREE_CODE (field_type) == RECORD_TYPE)
++    {
++      field_srtype->mark_escape (escape_instance_field, NULL);
++    }
++}
++
++/* Record structure all field types.  */
++
++void
++ipa_struct_reorg::record_struct_field_types (tree base_type,
++					     srtype *base_srtype)
++{
++  for (tree field = TYPE_FIELDS (base_type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++	{
++	  tree field_type = TREE_TYPE (field);
++	  process_union (field_type);
++	  if (TREE_CODE (inner_type (field_type)) == UNION_TYPE
++	      || TREE_CODE (inner_type (field_type)) == QUAL_UNION_TYPE)
++	    {
++	      base_srtype->mark_escape (escape_union, NULL);
++	    }
++	  if (isvolatile_type (field_type))
++	    {
++	      base_srtype->mark_escape (escape_volatile, NULL);
++	    }
++	  escape_type e = escape_type_volatile_array_or_ptrptr (field_type);
++	  if (e != does_not_escape)
++	    {
++	      base_srtype->mark_escape (e, NULL);
++	    }
++	  /* Types of non-pointer field are difficult to track the correctness
++	     of the rewrite when it used by the escaped type.  */
++	  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++	      && TREE_CODE (field_type) == RECORD_TYPE)
++	    {
++	      base_srtype->mark_escape (escape_instance_field, NULL);
++	    }
++	  if (handled_type (field_type))
++	    {
++	      record_field_type (field, base_srtype);
++	    }
++	}
++    }
++}
++
++/* Record TYPE if not already recorded.  */
++
++srtype *
++ipa_struct_reorg::record_type (tree type)
++{
++  unsigned typeuid;
++
++  /* Get the main variant as we are going
++     to record that type only.  */
++  type = TYPE_MAIN_VARIANT (type);
++  typeuid = TYPE_UID (type);
++
++  srtype *type1;
++
++  type1 = find_type (type);
++  if (type1)
++    return type1;
++
++  /* If already done recording just return NULL.  */
++  if (done_recording)
++    return NULL;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "Recording new type: %u.\n", typeuid);
++
++  type1 = new srtype (type);
++  types.safe_push (type1);
++
++  /* If the type has an user alignment set,
++     that means the user most likely already setup the type.  */
++  if (TYPE_USER_ALIGN (type))
++    type1->mark_escape (escape_user_alignment, NULL);
++
++  record_struct_field_types (type, type1);
++
++  return type1;
++}
++
++/* Mark TYPE as escaping with ESCAPES as the reason.  */
++
++void
++ipa_struct_reorg::mark_type_as_escape (tree type, escape_type escapes,
++				       gimple *stmt)
++{
++  if (handled_type (type))
++    {
++      srtype *stype = record_type (inner_type (type));
++
++      if (!stype)
++	return;
++
++      stype->mark_escape (escapes, stmt);
++    }
++}
++
++/* Maybe process the union of type TYPE, such that marking all of the fields'
++   types as being escaping.  */
++
++void
++ipa_struct_reorg::process_union (tree type)
++{
++  static hash_set unions_recorded;
++
++  type = inner_type (type);
++  if (TREE_CODE (type) != UNION_TYPE && TREE_CODE (type) != QUAL_UNION_TYPE)
++    return;
++
++  type = TYPE_MAIN_VARIANT (type);
++
++  /* We already processed this type.  */
++  if (unions_recorded.add (type))
++    return;
++
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++	{
++	  mark_type_as_escape (TREE_TYPE (field), escape_union);
++	  process_union (TREE_TYPE (field));
++	}
++    }
++}
++
++/*  Used by record_var function as a callback to walk_tree.
++    Mark the type as escaping if it has expressions which
++    cannot be converted for global initializations. */
++
++static tree
++record_init_types (tree *tp, int *walk_subtrees, void *data)
++{
++  ipa_struct_reorg *c = (ipa_struct_reorg *) data;
++  switch (TREE_CODE (*tp))
++    {
++    CASE_CONVERT:
++    case COMPONENT_REF:
++    case VIEW_CONVERT_EXPR:
++      case ARRAY_REF: {
++	tree typeouter = TREE_TYPE (*tp);
++	tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0));
++	c->mark_type_as_escape (typeouter, escape_via_global_init);
++	c->mark_type_as_escape (typeinner, escape_via_global_init);
++	break;
++      }
++    case INTEGER_CST:
++      if (!integer_zerop (*tp))
++	c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init);
++      break;
++    case VAR_DECL:
++    case PARM_DECL:
++    case FIELD_DECL:
++      c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init);
++      *walk_subtrees = false;
++      break;
++    default:
++      *walk_subtrees = true;
++      break;
++    }
++  return NULL_TREE;
++}
++
++/* Record var DECL; optionally specify the escape reason and the argument
++   number in a function. */
++
++srdecl *
++ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg)
++{
++  srtype *type;
++  srdecl *sd = NULL;
++
++  process_union (TREE_TYPE (decl));
++
++  /* Only the structure type RECORD_TYPE is recorded.
++     Therefore, the void* type is filtered out.  */
++  if (handled_type (TREE_TYPE (decl)))
++    {
++      type = record_type (inner_type (TREE_TYPE (decl)));
++      escape_type e;
++
++      if (done_recording && !type)
++	return NULL;
++
++      gcc_assert (type);
++      if (TREE_CODE (decl) == VAR_DECL && is_global_var (decl))
++	sd = globals.record_decl (type, decl, arg);
++      else
++	{
++	  gcc_assert (current_function);
++	  sd = current_function->record_decl (type, decl, arg);
++	}
++
++      /* If the variable has the "used" attribute, then treat the type as
++       * escaping. */
++      if (escapes != does_not_escape)
++	e = escapes;
++      else if (TREE_CODE (decl) != SSA_NAME && DECL_PRESERVE_P (decl))
++	e = escape_marked_as_used;
++      else if (TREE_THIS_VOLATILE (decl))
++	e = escape_volatile;
++      else if (TREE_CODE (decl) != SSA_NAME && DECL_USER_ALIGN (decl))
++	e = escape_user_alignment;
++      else if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl)
++	       && TREE_PUBLIC (decl))
++	e = escape_via_global_var;
++      /* We don't have an initlizer. */
++      else if (TREE_CODE (decl) != SSA_NAME
++	       && DECL_INITIAL (decl) == error_mark_node)
++	e = escape_via_global_var;
++      else
++	e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl));
++
++      /* Separate instance is hard to trace in complete struct
++     relayout optimization.  */
++      if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT
++	  && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE)
++	{
++	  e = escape_separate_instance;
++	}
++
++      if (e != does_not_escape
++	  && (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT
++	      || replace_type_map.get (type->type) == NULL))
++	type->mark_escape (e, NULL);
++    }
++
++  /* Record the initial usage of variables as types escapes.  */
++  if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) && DECL_INITIAL (decl))
++    {
++      walk_tree_without_duplicates (&DECL_INITIAL (decl), record_init_types,
++				    this);
++      if (!integer_zerop (DECL_INITIAL (decl))
++	  && DECL_INITIAL (decl) != error_mark_node)
++	mark_type_as_escape (TREE_TYPE (decl), escape_via_global_init);
++    }
++  return sd;
++}
++
++/* Find void* ssa_names which are used inside MEM[] or if we have &a.c,
++   mark the type as escaping. */
++
++void
++ipa_struct_reorg::find_var (tree expr, gimple *stmt)
++{
++  /* If we have VCE mark the outer type as escaping and the inner one
++     Also mark the inner most operand.  */
++  if (TREE_CODE (expr) == VIEW_CONVERT_EXPR)
++    {
++      mark_type_as_escape (TREE_TYPE (expr), escape_vce, stmt);
++      mark_type_as_escape (TREE_TYPE (TREE_OPERAND (expr, 0)), escape_vce,
++			   stmt);
++    }
++
++  /* If we have &b.c then we need to mark the type of b
++     as escaping as tracking a will be hard.  */
++  if (TREE_CODE (expr) == ADDR_EXPR || TREE_CODE (expr) == VIEW_CONVERT_EXPR)
++    {
++      tree r = TREE_OPERAND (expr, 0);
++      tree orig_type = TREE_TYPE (expr);
++      if (handled_component_p (r) || TREE_CODE (r) == MEM_REF)
++	{
++	  while (handled_component_p (r) || TREE_CODE (r) == MEM_REF)
++	    {
++	      if (TREE_CODE (r) == VIEW_CONVERT_EXPR)
++		{
++		  mark_type_as_escape (TREE_TYPE (r), escape_vce, stmt);
++		  mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 0)),
++				       escape_vce, stmt);
++		}
++	      if (TREE_CODE (r) == MEM_REF)
++		{
++		  mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)),
++				       escape_addr, stmt);
++		  tree inner_type = TREE_TYPE (TREE_OPERAND (r, 0));
++		  if (orig_type != inner_type)
++		    {
++		      mark_type_as_escape (orig_type, escape_cast_another_ptr,
++					   stmt);
++		      mark_type_as_escape (inner_type, escape_cast_another_ptr,
++					   stmt);
++		    }
++		}
++	      r = TREE_OPERAND (r, 0);
++	    }
++	  mark_expr_escape (r, escape_addr, stmt);
++	}
++    }
++
++  tree base;
++  bool indirect;
++  srtype *type;
++  srfield *field;
++  bool realpart, imagpart, address;
++  bool escape_from_base = false;
++  /* The should_create flag is true, the declaration can be recorded.  */
++  get_type_field (expr, base, indirect, type, field, realpart, imagpart,
++		  address, escape_from_base, true, true);
++}
++
++void
++ipa_struct_reorg::find_vars (gimple *stmt)
++{
++  gasm *astmt;
++  switch (gimple_code (stmt))
++    {
++    case GIMPLE_ASSIGN:
++      if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS
++	  || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR
++	  || gimple_assign_rhs_code (stmt) == NOP_EXPR)
++	{
++	  tree lhs = gimple_assign_lhs (stmt);
++	  tree rhs = gimple_assign_rhs1 (stmt);
++	  find_var (gimple_assign_lhs (stmt), stmt);
++	  /* _2 = MEM[(struct arc_t * *)_1];
++	     records the right value _1 declaration.  */
++	  find_var (gimple_assign_rhs1 (stmt), stmt);
++
++	  /* Pointer types from non-zero pointer need to be escaped in pointer
++	     compression and complete relayout.
++	     e.g _1->t = (struct *) 0x400000.  */
++	  if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT
++	      && TREE_CODE (lhs) == COMPONENT_REF
++	      && TREE_CODE (TREE_TYPE (lhs)) == POINTER_TYPE
++	      && TREE_CODE (rhs) == INTEGER_CST && !integer_zerop (rhs))
++	    {
++	      mark_type_as_escape (inner_type (TREE_TYPE (lhs)),
++				   escape_cast_int, stmt);
++	    }
++
++	  /* Add a safe func mechanism.  */
++	  bool l_find = true;
++	  bool r_find = true;
++	  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	    {
++	      l_find = !(current_function->is_safe_func
++			 && TREE_CODE (lhs) == SSA_NAME
++			 && is_from_void_ptr_parm (lhs));
++	      r_find = !(current_function->is_safe_func
++			 && TREE_CODE (rhs) == SSA_NAME
++			 && is_from_void_ptr_parm (rhs));
++	    }
++
++	  if ((TREE_CODE (lhs) == SSA_NAME) && VOID_POINTER_P (TREE_TYPE (lhs))
++	      && handled_type (TREE_TYPE (rhs)) && l_find)
++	    {
++	      srtype *t = find_type (inner_type (TREE_TYPE (rhs)));
++	      srdecl *d = find_decl (lhs);
++	      if (!d && t)
++		{
++		  current_function->record_decl (t, lhs, -1,
++						 isptrptr (TREE_TYPE (rhs))
++						   ? TREE_TYPE (rhs)
++						   : NULL);
++		  tree var = SSA_NAME_VAR (lhs);
++		  if (var && VOID_POINTER_P (TREE_TYPE (var)))
++		    current_function->record_decl (t, var, -1,
++						   isptrptr (TREE_TYPE (rhs))
++						     ? TREE_TYPE (rhs)
++						     : NULL);
++		}
++	    }
++	  /* find void ssa_name such as:
++	     void * _1; struct arc * _2;
++	     _2 = _1 + _3; _1 = calloc (100, 40).  */
++	  if (TREE_CODE (rhs) == SSA_NAME && VOID_POINTER_P (TREE_TYPE (rhs))
++	      && handled_type (TREE_TYPE (lhs)) && r_find)
++	    {
++	      srtype *t = find_type (inner_type (TREE_TYPE (lhs)));
++	      srdecl *d = find_decl (rhs);
++	      if (!d && t)
++		{
++		  current_function->record_decl (t, rhs, -1,
++						 isptrptr (TREE_TYPE (lhs))
++						   ? TREE_TYPE (lhs)
++						   : NULL);
++		  tree var = SSA_NAME_VAR (rhs);
++		  if (var && VOID_POINTER_P (TREE_TYPE (var)))
++		    current_function->record_decl (t, var, -1,
++						   isptrptr (TREE_TYPE (lhs))
++						     ? TREE_TYPE (lhs)
++						     : NULL);
++		}
++	    }
++	}
++      else if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	       && (gimple_assign_rhs_code (stmt) == LE_EXPR
++		   || gimple_assign_rhs_code (stmt) == LT_EXPR
++		   || gimple_assign_rhs_code (stmt) == GE_EXPR
++		   || gimple_assign_rhs_code (stmt) == GT_EXPR))
++	{
++	  find_var (gimple_assign_lhs (stmt), stmt);
++	  find_var (gimple_assign_rhs1 (stmt), stmt);
++	  find_var (gimple_assign_rhs2 (stmt), stmt);
++	}
++      /* find void ssa_name from stmt such as: _2 = _1 - old_arcs_1.  */
++      else if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	       && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR
++	       && types_compatible_p (
++		 TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs1 (stmt))),
++		 TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs2 (stmt)))))
++	{
++	  find_var (gimple_assign_rhs1 (stmt), stmt);
++	  find_var (gimple_assign_rhs2 (stmt), stmt);
++	}
++      else
++	{
++	  /* Because we won't handle these stmts in rewrite phase,
++	     just mark these types as escaped.  */
++	  switch (gimple_num_ops (stmt))
++	    {
++	    case 4:
++	      mark_type_as_escape (TREE_TYPE (gimple_assign_rhs3 (stmt)),
++				   escape_unhandled_rewrite, stmt);
++	      // FALLTHRU
++	    case 3:
++	      mark_type_as_escape (TREE_TYPE (gimple_assign_rhs2 (stmt)),
++				   escape_unhandled_rewrite, stmt);
++	      // FALLTHRU
++	    case 2:
++	      mark_type_as_escape (TREE_TYPE (gimple_assign_rhs1 (stmt)),
++				   escape_unhandled_rewrite, stmt);
++	      // FALLTHRU
++	    case 1:
++	      mark_type_as_escape (TREE_TYPE (gimple_assign_lhs (stmt)),
++				   escape_unhandled_rewrite, stmt);
++	      // FALLTHRU
++	    case 0:
++	      break;
++	    default:
++	      gcc_unreachable ();
++	    }
++	}
++      break;
++
++    case GIMPLE_CALL:
++      if (gimple_call_lhs (stmt))
++	find_var (gimple_call_lhs (stmt), stmt);
++
++      if (gimple_call_chain (stmt))
++	find_var (gimple_call_chain (stmt), stmt);
++
++      for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
++	find_var (gimple_call_arg (stmt, i), stmt);
++      break;
++
++    case GIMPLE_ASM:
++      astmt = as_a (stmt);
++      for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++)
++	find_var (TREE_VALUE (gimple_asm_input_op (astmt, i)), stmt);
++      for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++)
++	find_var (TREE_VALUE (gimple_asm_output_op (astmt, i)), stmt);
++      mark_types_asm (astmt);
++      break;
++
++      case GIMPLE_RETURN: {
++	tree expr = gimple_return_retval (as_a (stmt));
++	if (expr)
++	  find_var (expr, stmt);
++	/* return &a; should mark the type of a as escaping through a return. */
++	if (expr && TREE_CODE (expr) == ADDR_EXPR)
++	  {
++	    expr = TREE_OPERAND (expr, 0);
++	    srdecl *d = find_decl (expr);
++	    if (d)
++	      d->type->mark_escape (escape_return, stmt);
++	  }
++      }
++      break;
++
++    default:
++      break;
++    }
++}
++
++static HOST_WIDE_INT
++get_offset (tree op, HOST_WIDE_INT offset)
++{
++  switch (TREE_CODE (op))
++    {
++      case COMPONENT_REF: {
++	return int_byte_position (TREE_OPERAND (op, 1));
++      }
++      case MEM_REF: {
++	return tree_to_uhwi (TREE_OPERAND (op, 1));
++      }
++    default:
++      return offset;
++    }
++  return offset;
++}
++
++/* Record field access.  */
++static void
++record_field_access (tree type, HOST_WIDE_INT offset, unsigned access,
++		     void *data)
++{
++  srtype *this_srtype = ((ipa_struct_reorg *) data)->find_type (type);
++  if (this_srtype == NULL)
++    return;
++  srfield *this_srfield = this_srtype->find_field (offset);
++  if (this_srfield == NULL)
++    return;
++
++  this_srfield->field_access |= access;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "record field access %d:", access);
++      print_generic_expr (dump_file, type);
++      fprintf (dump_file, "  field:");
++      print_generic_expr (dump_file, this_srfield->fielddecl);
++      fprintf (dump_file, "\n");
++    }
++  return;
++}
++
++/* Update field_access in srfield.  */
++
++static void
++update_field_access (tree node, tree op, unsigned access, void *data)
++{
++  HOST_WIDE_INT offset = 0;
++  offset = get_offset (op, offset);
++  tree node_type = inner_type (TREE_TYPE (node));
++  record_field_access (node_type, offset, access, data);
++  tree base = node;
++  get_base (base, node);
++  tree base_type = inner_type (TREE_TYPE (base));
++  if (!types_compatible_p (base_type, node_type))
++    {
++      record_field_access (base_type, get_offset (node, offset), access, data);
++    }
++  return;
++}
++
++/* A callback for walk_stmt_load_store_ops to visit store.  */
++
++static bool
++find_field_p_store (gimple *stmt ATTRIBUTE_UNUSED, tree node, tree op,
++		    void *data)
++{
++  update_field_access (node, op, WRITE_FIELD, data);
++
++  return false;
++}
++
++/* A callback for walk_stmt_load_store_ops to visit load.  */
++
++static bool
++find_field_p_load (gimple *stmt ATTRIBUTE_UNUSED, tree node, tree op,
++		   void *data)
++{
++  update_field_access (node, op, READ_FIELD, data);
++
++  return false;
++}
++
++/* Determine whether the stmt should be deleted.  */
++
++bool
++ipa_struct_reorg::remove_dead_field_stmt (tree lhs)
++{
++  tree base = NULL_TREE;
++  bool indirect = false;
++  srtype *t = NULL;
++  srfield *f = NULL;
++  bool realpart = false;
++  bool imagpart = false;
++  bool address = false;
++  bool escape_from_base = false;
++  if (!get_type_field (lhs, base, indirect, t, f, realpart, imagpart, address,
++		       escape_from_base))
++    return false;
++  if (t == NULL)
++    return false;
++  if (t->newtype[0] == t->type)
++    return false;
++  if (f == NULL)
++    return false;
++  if (f->newfield[0] == NULL)
++    return true;
++  return false;
++}
++
++/* Maybe record access of statement for further analaysis. */
++
++void
++ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt)
++{
++  switch (gimple_code (stmt))
++    {
++    case GIMPLE_ASSIGN:
++      maybe_record_assign (node, as_a (stmt));
++      break;
++    case GIMPLE_CALL:
++      maybe_record_call (node, as_a (stmt));
++      break;
++    case GIMPLE_DEBUG:
++      break;
++    case GIMPLE_GOTO:
++    case GIMPLE_SWITCH:
++      break;
++    default:
++      break;
++    }
++  if (current_layout_opt_level & DEAD_FIELD_ELIMINATION)
++    {
++      /* Look for loads and stores.  */
++      walk_stmt_load_store_ops (stmt, this, find_field_p_load,
++				find_field_p_store);
++    }
++}
++
++/* Calculate the multiplier.  */
++
++static bool
++calculate_mult_num (tree arg, tree *num, tree struct_size)
++{
++  gcc_assert (TREE_CODE (arg) == INTEGER_CST);
++  bool sign = false;
++  HOST_WIDE_INT size = TREE_INT_CST_LOW (arg);
++  if (size < 0)
++    {
++      size = -size;
++      sign = true;
++    }
++  tree arg2 = build_int_cst (TREE_TYPE (arg), size);
++  if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size)))
++    {
++      tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size);
++      if (sign)
++	{
++	  number = build_int_cst (TREE_TYPE (number), -tree_to_shwi (number));
++	}
++      *num = number;
++      return true;
++    }
++  return false;
++}
++
++/* Trace and calculate the multiplier of PLUS_EXPR.  */
++
++static bool
++trace_calculate_plus (gimple *size_def_stmt, tree *num, tree struct_size)
++{
++  gcc_assert (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR);
++
++  tree num1 = NULL_TREE;
++  tree num2 = NULL_TREE;
++  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
++  tree arg1 = gimple_assign_rhs2 (size_def_stmt);
++  if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE)
++    {
++      return false;
++    }
++  if (!is_result_of_mult (arg1, &num2, struct_size) || num2 == NULL_TREE)
++    {
++      return false;
++    }
++  *num = size_binop (PLUS_EXPR, num1, num2);
++  return true;
++}
++
++/* Trace and calculate the multiplier of MULT_EXPR.  */
++
++static bool
++trace_calculate_mult (gimple *size_def_stmt, tree *num, tree struct_size)
++{
++  gcc_assert (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR);
++
++  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
++  tree arg1 = gimple_assign_rhs2 (size_def_stmt);
++  tree num1 = NULL_TREE;
++
++  if (is_result_of_mult (arg0, &num1, struct_size) && num1 != NULL_TREE)
++    {
++      *num = size_binop (MULT_EXPR, arg1, num1);
++      return true;
++    }
++  if (is_result_of_mult (arg1, &num1, struct_size) && num1 != NULL_TREE)
++    {
++      *num = size_binop (MULT_EXPR, arg0, num1);
++      return true;
++    }
++  *num = NULL_TREE;
++  return false;
++}
++
++/* Trace and calculate the multiplier of NEGATE_EXPR.  */
++
++static bool
++trace_calculate_negate (gimple *size_def_stmt, tree *num, tree struct_size)
++{
++  gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NEGATE_EXPR);
++
++  /* support NEGATE_EXPR trace: _3 = -_2; _2 = _1 * 72.  */
++  tree num1 = NULL_TREE;
++  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
++  if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE)
++    {
++      return false;
++    }
++  tree num0 = build_int_cst (TREE_TYPE (num1), -1);
++  *num = size_binop (MULT_EXPR, num0, num1);
++  return true;
++}
++
++/* Trace and calculate the multiplier of POINTER_DIFF_EXPR.  */
++
++static bool
++trace_calculate_diff (gimple *size_def_stmt, tree *num)
++{
++  gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NOP_EXPR);
++
++  /* support POINTER_DIFF_EXPR trace:
++  _3 = (long unsigned int) _2; _2 = _1 - old_arcs_1.  */
++  tree arg = gimple_assign_rhs1 (size_def_stmt);
++  size_def_stmt = SSA_NAME_DEF_STMT (arg);
++  if (size_def_stmt && is_gimple_assign (size_def_stmt)
++      && gimple_assign_rhs_code (size_def_stmt) == POINTER_DIFF_EXPR)
++    {
++      *num = NULL_TREE;
++      return true;
++    }
++  *num = NULL_TREE;
++  return false;
++}
++
++/* This function checks whether ARG is a result of multiplication
++   of some number by STRUCT_SIZE.  If yes, the function returns true
++   and this number is filled into NUM.  */
++
++static bool
++is_result_of_mult (tree arg, tree *num, tree struct_size)
++{
++  if (!struct_size || TREE_CODE (struct_size) != INTEGER_CST
++      || integer_zerop (struct_size))
++    return false;
++
++  /* If we have a integer, just check if it is a multiply of STRUCT_SIZE.  */
++  if (TREE_CODE (arg) == INTEGER_CST)
++    {
++      return calculate_mult_num (arg, num, struct_size);
++    }
++
++  gimple *size_def_stmt = SSA_NAME_DEF_STMT (arg);
++
++  /* If the allocation statement was of the form
++     D.2229_10 =  (D.2228_9);
++     then size_def_stmt can be D.2228_9 = num.3_8 * 8;  */
++
++  while (size_def_stmt && is_gimple_assign (size_def_stmt))
++    {
++      tree lhs = gimple_assign_lhs (size_def_stmt);
++
++      /* We expect temporary here.  */
++      if (!is_gimple_reg (lhs))
++	return false;
++
++      // FIXME: this should handle SHIFT also.
++      tree_code rhs_code = gimple_assign_rhs_code (size_def_stmt);
++      if (rhs_code == PLUS_EXPR)
++	{
++	  return trace_calculate_plus (size_def_stmt, num, struct_size);
++	}
++      else if (rhs_code == MULT_EXPR)
++	{
++	  return trace_calculate_mult (size_def_stmt, num, struct_size);
++	}
++      else if (rhs_code == SSA_NAME)
++	{
++	  arg = gimple_assign_rhs1 (size_def_stmt);
++	  size_def_stmt = SSA_NAME_DEF_STMT (arg);
++	}
++      else if (rhs_code == NEGATE_EXPR
++	       && current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	{
++	  return trace_calculate_negate (size_def_stmt, num, struct_size);
++	}
++      else if (rhs_code == NOP_EXPR
++	       && current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	{
++	  return trace_calculate_diff (size_def_stmt, num);
++	}
++      else
++	{
++	  *num = NULL_TREE;
++	  return false;
++	}
++    }
++
++  *num = NULL_TREE;
++  return false;
++}
++
++/* Return TRUE if STMT is an allocation statement that is handled. */
++
++bool
++ipa_struct_reorg::handled_allocation_stmt (gimple *stmt)
++{
++  if ((current_layout_opt_level & STRUCT_REORDER_FIELDS)
++      && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)))
++    {
++      return true;
++    }
++  if ((current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT
++       || current_layout_opt_level & POINTER_COMPRESSION_SAFE)
++      && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++    return true;
++  if ((current_layout_opt_level == STRUCT_SPLIT)
++      && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)))
++    return true;
++  return false;
++}
++
++/* Returns the allocated size / T size for STMT.  That is the number of
++   elements in the array allocated.   */
++
++tree
++ipa_struct_reorg::allocate_size (srtype *type, srdecl *decl, gimple *stmt)
++{
++  if (!stmt || gimple_code (stmt) != GIMPLE_CALL
++      || !handled_allocation_stmt (stmt))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nNot a allocate statment:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++      return NULL;
++    }
++
++  if (type->has_escaped ())
++    return NULL;
++
++  tree struct_size = TYPE_SIZE_UNIT (type->type);
++
++  /* Specify the correct size to relax multi-layer pointer.  */
++  if (TREE_CODE (decl->decl) == SSA_NAME && isptrptr (decl->orig_type))
++    {
++      struct_size = TYPE_SIZE_UNIT (decl->orig_type);
++    }
++
++  tree size = gimple_call_arg (stmt, 0);
++
++  if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
++      || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC))
++    size = gimple_call_arg (stmt, 1);
++  else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++    {
++      tree arg1;
++      arg1 = gimple_call_arg (stmt, 1);
++      /* Check that second argument is a constant equal to the size of
++       * structure.  */
++      if (operand_equal_p (arg1, struct_size, 0))
++	return size;
++      /* ??? Check that first argument is a constant
++      equal to the size of structure.  */
++      /* If the allocated number is equal to the value of struct_size,
++     the value of arg1 is changed to the allocated number.  */
++      if (operand_equal_p (size, struct_size, 0))
++	return arg1;
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\ncalloc the correct size:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++      return NULL;
++    }
++
++  tree num;
++  if (!is_result_of_mult (size, &num, struct_size))
++    return NULL;
++
++  return num;
++}
++
++void
++ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other,
++						   gimple *stmt)
++{
++  gcc_assert (TREE_CODE (side) == SSA_NAME || TREE_CODE (side) == ADDR_EXPR);
++  srtype *type = NULL;
++  if (handled_type (TREE_TYPE (other)))
++    type = record_type (inner_type (TREE_TYPE (other)));
++  if (TREE_CODE (side) == ADDR_EXPR)
++    side = TREE_OPERAND (side, 0);
++  srdecl *d = find_decl (side);
++  if (!type)
++    {
++      if (!d)
++	return;
++      if (TREE_CODE (side) == SSA_NAME && VOID_POINTER_P (TREE_TYPE (side)))
++	return;
++      if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT
++	  || replace_type_map.get (d->type->type) == NULL)
++	d->type->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++
++  if (!d)
++    {
++      /* MEM[(struct arc *)_1].head = _2; _2 = calloc (100, 104).  */
++      if (VOID_POINTER_P (TREE_TYPE (side)) && TREE_CODE (side) == SSA_NAME)
++	{
++	  /* The type is other, the declaration is side.  */
++	  current_function->record_decl (type, side, -1,
++					 isptrptr (TREE_TYPE (other))
++					   ? TREE_TYPE (other)
++					   : NULL);
++	}
++      else
++	{
++	  /* *_1 = &MEM[(void *)&x + 8B].  */
++	  if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT
++	      || replace_type_map.get (type->type) == NULL)
++	    type->mark_escape (escape_cast_another_ptr, stmt);
++	}
++    }
++  else if (type != d->type)
++    {
++      if (!is_replace_type (d->type->type, type->type))
++	{
++	  type->mark_escape (escape_cast_another_ptr, stmt);
++	  d->type->mark_escape (escape_cast_another_ptr, stmt);
++	}
++    }
++  /* x_1 = y.x_nodes; void *x;
++     Mark the structure pointer type assigned
++     to the void* variable as escape.  Unless the void* is only used to compare
++     with variables of the same type.  */
++  else if (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++	   && TREE_CODE (side) == SSA_NAME && VOID_POINTER_P (TREE_TYPE (side))
++	   && SSA_NAME_VAR (side)
++	   && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (side))))
++    {
++      if (current_layout_opt_level < POINTER_COMPRESSION_SAFE
++	  || !safe_void_cmp_p (side, type))
++	{
++	  mark_type_as_escape (TREE_TYPE (other), escape_cast_void, stmt);
++	}
++    }
++
++  check_ptr_layers (side, other, stmt);
++}
++
++/* Record accesses in an assignment statement STMT.  */
++
++void
++ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt)
++{
++  /*  */
++
++  if (gimple_clobber_p (stmt))
++    {
++      record_stmt_expr (gimple_assign_lhs (stmt), node, stmt);
++      return;
++    }
++
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree num;
++      if (!handled_type (TREE_TYPE (lhs)))
++	return;
++      /* Check if rhs2 is a multiplication of the size of the type. */
++      /* The size adjustment and judgment of multi-layer pointers
++     are added.  */
++      if (is_result_of_mult (rhs2, &num,
++			     isptrptr (TREE_TYPE (lhs))
++			       ? TYPE_SIZE_UNIT (TREE_TYPE (lhs))
++			       : TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)))))
++	{
++	  record_stmt_expr (lhs, node, stmt);
++	  record_stmt_expr (rhs1, node, stmt);
++	}
++      else
++	{
++	  mark_expr_escape (lhs, escape_non_multiply_size, stmt);
++	  mark_expr_escape (rhs1, escape_non_multiply_size, stmt);
++	}
++      return;
++    }
++  /* Copies, References, Taking addresses. */
++  if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs = gimple_assign_rhs1 (stmt);
++      /* If we have a = &b.c then we need to mark the type of b
++     as escaping as tracking a will be hard.  */
++      if (TREE_CODE (rhs) == ADDR_EXPR)
++	{
++	  tree r = TREE_OPERAND (rhs, 0);
++	  if (handled_component_p (r))
++	    {
++	      while (handled_component_p (r))
++		r = TREE_OPERAND (r, 0);
++	      mark_expr_escape (r, escape_addr, stmt);
++	      return;
++	    }
++	}
++      if ((TREE_CODE (rhs) == SSA_NAME || TREE_CODE (rhs) == ADDR_EXPR))
++	maybe_mark_or_record_other_side (rhs, lhs, stmt);
++      if (TREE_CODE (lhs) == SSA_NAME)
++	maybe_mark_or_record_other_side (lhs, rhs, stmt);
++    }
++}
++
++bool
++check_mem_ref_offset (tree expr, tree *num)
++{
++  bool ret = false;
++
++  if (TREE_CODE (expr) != MEM_REF)
++    {
++      return false;
++    }
++
++  /* Try to find the structure size.  */
++  tree field_off = TREE_OPERAND (expr, 1);
++  tree tmp = TREE_OPERAND (expr, 0);
++  if (TREE_CODE (tmp) == ADDR_EXPR)
++    {
++      tmp = TREE_OPERAND (tmp, 0);
++    }
++  /* Specify the correct size for the multi-layer pointer.  */
++  tree size = isptrptr (TREE_TYPE (tmp))
++		? TYPE_SIZE_UNIT (TREE_TYPE (tmp))
++		: TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp)));
++  ret = is_result_of_mult (field_off, num, size);
++  return ret;
++}
++
++tree
++get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, bool &realpart,
++			 bool &imagpart, tree &accesstype, tree *num)
++{
++  offset = 0;
++  realpart = false;
++  imagpart = false;
++  accesstype = NULL_TREE;
++  if (TREE_CODE (e) == REALPART_EXPR)
++    {
++      e = TREE_OPERAND (e, 0);
++      realpart = true;
++    }
++  if (TREE_CODE (e) == IMAGPART_EXPR)
++    {
++      e = TREE_OPERAND (e, 0);
++      imagpart = true;
++    }
++  tree expr = e;
++  while (true)
++    {
++      switch (TREE_CODE (expr))
++	{
++	  case COMPONENT_REF: {
++	    /* x.a = _1; If expr is the lvalue of stmt,
++	       then field type is FIELD_DECL - POINTER_TYPE - RECORD_TYPE.  */
++	    tree field = TREE_OPERAND (expr, 1);
++	    // if (DECL_FIELD_BIT_OFFSET (field) == NULL ||
++	    //     DECL_FIELD_OFFSET (field) == NULL)
++	    //   return NULL;
++	    tree field_off = byte_position (field);
++	    if (TREE_CODE (field_off) != INTEGER_CST)
++	      return NULL;
++	    offset += tree_to_shwi (field_off);
++	    /* x.a = _1; If expr is the lvalue of stmt,
++	       then expr type is VAR_DECL - RECORD_TYPE (fetch x) */
++	    expr = TREE_OPERAND (expr, 0);
++	    accesstype = NULL;
++	    break;
++	  }
++	  case MEM_REF: {
++	    /* _2 = MEM[(struct s * *)_1];
++	       If expr is the right value of stmt,then field_off type is
++	       INTEGER_CST - POINTER_TYPE - POINTER_TYPE - RECORD_TYPE.  */
++	    tree field_off = TREE_OPERAND (expr, 1);
++	    gcc_assert (TREE_CODE (field_off) == INTEGER_CST);
++	    /* So we can mark the types as escaping if different. */
++	    accesstype = TREE_TYPE (field_off);
++	    if (!check_mem_ref_offset (expr, num))
++	      {
++		offset += tree_to_uhwi (field_off);
++	      }
++	    return TREE_OPERAND (expr, 0);
++	  }
++	default:
++	  return expr;
++	}
++    }
++}
++
++/* Return true if EXPR was accessing the whole type T.  */
++
++bool
++ipa_struct_reorg::wholeaccess (tree expr, tree base, tree accesstype, srtype *t)
++{
++  if (expr == base)
++    return true;
++
++  if (TREE_CODE (expr) == ADDR_EXPR && TREE_OPERAND (expr, 0) == base)
++    return true;
++
++  if (!accesstype)
++    return false;
++
++  if (!types_compatible_p (TREE_TYPE (expr), TREE_TYPE (accesstype)))
++    return false;
++
++  if (!handled_type (TREE_TYPE (expr)))
++    return false;
++
++  srtype *other_type = find_type (inner_type (TREE_TYPE (expr)));
++
++  if (t == other_type)
++    return true;
++
++  return false;
++}
++
++bool
++ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect,
++				  srtype *&type, srfield *&field,
++				  bool &realpart, bool &imagpart, bool &address,
++				  bool &escape_from_base, bool should_create,
++				  bool can_escape)
++{
++  tree num = NULL_TREE;
++  HOST_WIDE_INT offset;
++  tree accesstype;
++  address = false;
++  bool mark_as_bit_field = false;
++
++  if (TREE_CODE (expr) == BIT_FIELD_REF)
++    {
++      expr = TREE_OPERAND (expr, 0);
++      mark_as_bit_field = true;
++    }
++
++  /* ref is classified into two types: COMPONENT_REF or MER_REF.  */
++  base = get_ref_base_and_offset (expr, offset, realpart, imagpart, accesstype,
++				  &num);
++
++  /* Variable access, unkown type. */
++  if (base == NULL)
++    return false;
++
++  if (TREE_CODE (base) == ADDR_EXPR)
++    {
++      address = true;
++      base = TREE_OPERAND (base, 0);
++    }
++
++  if (offset != 0 && accesstype)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Non zero offset (%d) with MEM.\n", (int) offset);
++	  print_generic_expr (dump_file, expr);
++	  fprintf (dump_file, "\n");
++	  print_generic_expr (dump_file, base);
++	  fprintf (dump_file, "\n");
++	}
++    }
++
++  srdecl *d = find_decl (base);
++  srtype *t;
++
++  if (integer_zerop (base))
++    {
++      gcc_assert (!d);
++      if (!accesstype)
++	return false;
++      t = find_type (inner_type (inner_type (accesstype)));
++      if (!t && should_create && handled_type (accesstype))
++	t = record_type (inner_type (accesstype));
++      if (!t)
++	return false;
++    }
++  /* If no such decl is finded
++     or orig_type is not added to this decl, then add it.  */
++  else if (!d && accesstype)
++    {
++      if (!should_create)
++	return false;
++      if (!handled_type (accesstype))
++	return false;
++      t = find_type (inner_type (inner_type (accesstype)));
++      if (!t)
++	t = record_type (inner_type (accesstype));
++      if (!t || t->has_escaped ())
++	return false;
++      /* If base is not void* mark the type as escaping.
++     release INTEGER_TYPE cast to struct pointer.
++     (If t has escpaed above, then directly returns
++     and doesn't mark escape follow.). */
++      /* _1 = MEM[(struct arc_t * *)a_1].
++     then base a_1: ssa_name  - pointer_type - integer_type.  */
++      if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	{
++	  bool is_int_ptr
++	    = POINTER_TYPE_P (TREE_TYPE (base))
++	      && (TREE_CODE (inner_type (TREE_TYPE (base))) == INTEGER_TYPE);
++	  if (!(VOID_POINTER_P (TREE_TYPE (base))
++		|| (current_function->is_safe_func && is_int_ptr)))
++	    {
++	      gcc_assert (can_escape);
++	      t->mark_escape (escape_cast_another_ptr, NULL);
++	      return false;
++	    }
++	  if (TREE_CODE (base) == SSA_NAME
++	      && !(current_function->is_safe_func && is_int_ptr))
++	    {
++	      /* Add a safe func mechanism.  */
++	      if (!(current_function->is_safe_func
++		    && is_from_void_ptr_parm (base)))
++		{
++		  /* Add auxiliary information of the multi-layer pointer
++		     type.  */
++		  current_function->record_decl (
++		    t, base, -1, isptrptr (accesstype) ? accesstype : NULL);
++		}
++	    }
++	}
++      else
++	{
++	  if (!(VOID_POINTER_P (TREE_TYPE (base))))
++	    {
++	      gcc_assert (can_escape);
++	      t->mark_escape (escape_cast_another_ptr, NULL);
++	      return false;
++	    }
++	  if (TREE_CODE (base) == SSA_NAME)
++	    {
++	      /* Add auxiliary information of the multi-layer pointer
++	     type.  */
++	      current_function->record_decl (t, base, -1,
++					     isptrptr (accesstype) ? accesstype
++								   : NULL);
++	    }
++	}
++    }
++  else if (!d)
++    return false;
++  else
++    t = d->type;
++
++  if (t->has_escaped ())
++    {
++      escape_from_base = true;
++      return false;
++    }
++
++  if (mark_as_bit_field)
++    {
++      gcc_assert (can_escape);
++      t->mark_escape (escape_bitfields, NULL);
++      return false;
++    }
++
++  /* Escape the operation of fetching field with pointer offset such as:
++   *(&(t->right)) = malloc (0); -> MEM[(struct node * *)_1 + 8B] = malloc (0);
++   */
++  if (current_layout_opt_level > STRUCT_SPLIT && (TREE_CODE (expr) == MEM_REF)
++      && (offset != 0))
++    {
++      gcc_assert (can_escape);
++      t->mark_escape (escape_non_multiply_size, NULL);
++      return false;
++    }
++
++  if (wholeaccess (expr, base, accesstype, t))
++    {
++      field = NULL;
++      type = t;
++      indirect = accesstype != NULL;
++      return true;
++    }
++
++  srfield *f = t->find_field (offset);
++  if (!f)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nunkown field\n");
++	  print_generic_expr (dump_file, expr);
++	  fprintf (dump_file, "\n");
++	  print_generic_expr (dump_file, base);
++	}
++      gcc_assert (can_escape);
++      t->mark_escape (escape_unkown_field, NULL);
++      return false;
++    }
++  if (!types_compatible_p (f->fieldtype, TREE_TYPE (expr)))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nfieldtype = ");
++	  print_generic_expr (dump_file, f->fieldtype);
++	  fprintf (dump_file, "\naccess type = ");
++	  print_generic_expr (dump_file, TREE_TYPE (expr));
++	  fprintf (dump_file, "\noriginal expr = ");
++	  print_generic_expr (dump_file, expr);
++	}
++      gcc_assert (can_escape);
++      t->mark_escape (escape_unkown_field, NULL);
++      return false;
++    }
++  field = f;
++  type = t;
++  indirect = accesstype != NULL;
++  return true;
++}
++
++/* Mark the type used in EXPR as escaping. */
++
++void
++ipa_struct_reorg::mark_expr_escape (tree expr, escape_type escapes,
++				    gimple *stmt)
++{
++  tree base;
++  bool indirect;
++  srtype *type;
++  srfield *field;
++  bool realpart, imagpart, address;
++  bool escape_from_base = false;
++  if (!get_type_field (expr, base, indirect, type, field, realpart, imagpart,
++		       address, escape_from_base))
++    return;
++
++  type->mark_escape (escapes, stmt);
++}
++
++/* Record accesses in a call statement STMT.  */
++
++void
++ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
++{
++  tree argtype;
++  tree fndecl;
++  escape_type escapes = does_not_escape;
++  bool free_or_realloc = gimple_call_builtin_p (stmt, BUILT_IN_FREE)
++			 || gimple_call_builtin_p (stmt, BUILT_IN_REALLOC);
++
++  /* We check allocation sites in a different location. */
++  if (handled_allocation_stmt (stmt))
++    return;
++
++  /* A few cases here:
++     1) assigned from the lhs
++     2) Used in argument
++     If a function being called is global (or indirect)
++      then we reject the types as being escaping. */
++
++  if (tree chain = gimple_call_chain (stmt))
++    record_stmt_expr (chain, node, stmt);
++
++  /* Assigned from LHS.  */
++  if (tree lhs = gimple_call_lhs (stmt))
++    {
++      /* FIXME: handle return types.. */
++      mark_type_as_escape (TREE_TYPE (lhs), escape_return);
++    }
++
++  /* If we have an internal call, just record the stmt. */
++  if (gimple_call_internal_p (stmt))
++    {
++      for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
++	record_stmt_expr (gimple_call_arg (stmt, i), node, stmt);
++      return;
++    }
++
++  fndecl = gimple_call_fndecl (stmt);
++
++  /* If we have an indrect call, just mark the types as escape. */
++  if (!fndecl)
++    escapes = escape_pointer_function;
++  /* Non local functions cause escape except for calls to free
++     and realloc.
++     FIXME: should support function annotations too.  */
++  else if (!free_or_realloc && !cgraph_node::local_info_node (fndecl)->local)
++    escapes = escape_external_function;
++  else if (!free_or_realloc
++	   && !cgraph_node::local_info_node (fndecl)->can_change_signature)
++    escapes = escape_cannot_change_signature;
++  /* FIXME: we should be able to handle functions in other partitions.  */
++  else if (symtab_node::get (fndecl)->in_other_partition)
++    escapes = escape_external_function;
++
++  if (escapes != does_not_escape)
++    {
++      for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
++	{
++	  mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)), escapes);
++	  srdecl *d = current_function->find_decl (gimple_call_arg (stmt, i));
++	  if (d)
++	    d->type->mark_escape (escapes, stmt);
++	}
++      return;
++    }
++
++  /* get func param it's tree_list.  */
++  argtype = TYPE_ARG_TYPES (gimple_call_fntype (stmt));
++  for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
++    {
++      tree arg = gimple_call_arg (stmt, i);
++      if (argtype)
++	{
++	  tree argtypet = TREE_VALUE (argtype);
++	  /* callee_func (_1, _2);
++	     Check the callee func, instead of current func.  */
++	  if (!(free_or_realloc
++		|| (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++		    && safe_functions.contains (node->get_edge (stmt)->callee)))
++	      && VOID_POINTER_P (argtypet))
++	    {
++	      mark_type_as_escape (TREE_TYPE (arg), escape_cast_void, stmt);
++	    }
++	  else
++	    record_stmt_expr (arg, node, stmt);
++	}
++      else
++	mark_type_as_escape (TREE_TYPE (arg), escape_var_arg_function);
++
++      argtype = argtype ? TREE_CHAIN (argtype) : NULL_TREE;
++    }
++}
++
++void
++ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt)
++{
++  tree base;
++  bool indirect;
++  srtype *type;
++  srfield *field;
++  bool realpart, imagpart, address;
++  bool escape_from_base = false;
++  if (!get_type_field (expr, base, indirect, type, field, realpart, imagpart,
++		       address, escape_from_base))
++    return;
++
++  if (current_layout_opt_level > NONE)
++    {
++      if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg))
++	{
++	  type->mark_escape (escape_non_optimize, stmt);
++	}
++    }
++
++  /* Record it. */
++  type->add_access (new sraccess (stmt, node, type, field));
++}
++
++/* Find function corresponding to NODE.  */
++
++srfunction *
++ipa_struct_reorg::find_function (cgraph_node *node)
++{
++  for (unsigned i = 0; i < functions.length (); i++)
++    if (functions[i]->node == node)
++      return functions[i];
++  return NULL;
++}
++
++void
++ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
++				       vec &worklist, gimple *stmt)
++{
++  srtype *type = decl->type;
++  if (integer_zerop (newdecl))
++    return;
++
++  if (TREE_CODE (newdecl) == ADDR_EXPR)
++    {
++      srdecl *d = find_decl (TREE_OPERAND (newdecl, 0));
++      if (!d)
++	{
++	  type->mark_escape (escape_cast_another_ptr, stmt);
++	  return;
++	}
++      if (d->type == type
++	  && cmp_ptr_layers (TREE_TYPE (newdecl), TREE_TYPE (decl->decl)))
++	return;
++
++      srtype *type1 = d->type;
++      type->mark_escape (escape_cast_another_ptr, stmt);
++      type1->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++
++  srdecl *d = find_decl (newdecl);
++  if (!d)
++    {
++      if (TREE_CODE (newdecl) == INTEGER_CST)
++	{
++	  type->mark_escape (escape_int_const, stmt);
++	  return;
++	}
++      /* If we have a non void* or a decl (which is hard to track),
++     then mark the type as escaping.  */
++      if (replace_type_map.get (type->type) == NULL
++	  && (!VOID_POINTER_P (TREE_TYPE (newdecl)) || DECL_P (newdecl)))
++	{
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      fprintf (dump_file, "\nunkown decl: ");
++	      print_generic_expr (dump_file, newdecl);
++	      fprintf (dump_file, " in type:\n");
++	      print_generic_expr (dump_file, TREE_TYPE (newdecl));
++	      fprintf (dump_file, "\n");
++	    }
++	  type->mark_escape (escape_cast_another_ptr, stmt);
++	  return;
++	}
++      /* At this point there should only be unkown void* ssa names. */
++      gcc_assert (TREE_CODE (newdecl) == SSA_NAME);
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nrecording unkown decl: ");
++	  print_generic_expr (dump_file, newdecl);
++	  fprintf (dump_file, " as type:\n");
++	  type->simple_dump (dump_file);
++	  fprintf (dump_file, "\n");
++	}
++      d = current_function->record_decl (type, newdecl, -1);
++      worklist.safe_push (d);
++      return;
++    }
++
++  /* Only add to the worklist if the decl is a SSA_NAME.  */
++  if (TREE_CODE (newdecl) == SSA_NAME)
++    worklist.safe_push (d);
++  tree a_decl = d->orig_type ? d->orig_type : TREE_TYPE (newdecl);
++  tree b_decl = decl->orig_type ? decl->orig_type : TREE_TYPE (decl->decl);
++  if (d->type == type && cmp_ptr_layers (a_decl, b_decl))
++    return;
++
++  srtype *type1 = d->type;
++  type->mark_escape (escape_cast_another_ptr, stmt);
++  type1->mark_escape (escape_cast_another_ptr, stmt);
++}
++
++void
++ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type, bool ptrptr)
++{
++  if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT
++      && handled_allocation_stmt (stmt))
++    {
++      tree arg0 = gimple_call_arg (stmt, 0);
++      basic_block bb = gimple_bb (stmt);
++      cgraph_node *node = current_function->node;
++      if (!ptrptr && current_layout_opt_level >= SEMI_RELAYOUT
++	  && gimple_call_builtin_p (stmt, BUILT_IN_MALLOC))
++	{
++	  /* Malloc is commonly used for allocations of a single struct
++	     and semi-relayout will waste a mess of memory, so we skip it.  */
++	  type->has_alloc_array = -4;
++	  return;
++	}
++      if (integer_onep (arg0))
++	{
++	  /* Actually NOT an array, but may ruin other array.  */
++	  type->has_alloc_array = -1;
++	}
++      else if (bb->loop_father != NULL && loop_outer (bb->loop_father) != NULL)
++	{
++	  /* For semi-relayout, do not escape realloc.  */
++	  if (current_layout_opt_level & SEMI_RELAYOUT
++	      && gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++	    return;
++	  /* The allocation is in a loop.  */
++	  type->has_alloc_array = -2;
++	}
++      else if (node->callers != NULL)
++	{
++	  type->has_alloc_array = -3;
++	}
++      else
++	{
++	  type->has_alloc_array = type->has_alloc_array < 0
++				    ? type->has_alloc_array
++				    : type->has_alloc_array + 1;
++	}
++      if (current_layout_opt_level & POINTER_COMPRESSION_SAFE
++	  && TREE_CODE (arg0) == INTEGER_CST)
++	{
++	  /* Only known size during compilation can be optimized
++	     at this level.  */
++	  unsigned HOST_WIDE_INT max_alloc_size = 0;
++	  switch (compressed_size)
++	    {
++	    case 8:
++	      max_alloc_size = 0xff;
++	      break; // max of uint8
++	    case 16:
++	      max_alloc_size = 0xffff;
++	      break; // max of uint16
++	    case 32:
++	      max_alloc_size = 0xffffffff;
++	      break; // max of uint32
++	    default:
++	      gcc_unreachable ();
++	      break;
++	    }
++	  if (tree_to_uhwi (arg0) < max_alloc_size)
++	    type->has_legal_alloc_num = true;
++	}
++    }
++}
++
++/* Check the definition of gimple assign.  */
++
++void
++ipa_struct_reorg::check_definition_assign (srdecl *decl,
++					   vec &worklist)
++{
++  tree ssa_name = decl->decl;
++  srtype *type = decl->type;
++  gimple *stmt = SSA_NAME_DEF_STMT (ssa_name);
++  gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN);
++  /* a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
++    check to make sure the addition was a multiple of the size.
++    check the pointer type too.  */
++  tree rhs = gimple_assign_rhs1 (stmt);
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree num = NULL_TREE;
++      /* Specify the correct size for the multi-layer pointer.  */
++      if (!is_result_of_mult (rhs2, &num,
++			      isptrptr (decl->orig_type)
++				? TYPE_SIZE_UNIT (decl->orig_type)
++				: TYPE_SIZE_UNIT (type->type)))
++	{
++	  type->mark_escape (escape_non_multiply_size, stmt);
++	}
++
++      if (TREE_CODE (rhs) == SSA_NAME)
++	{
++	  check_type_and_push (rhs, decl, worklist, stmt);
++	}
++      return;
++    }
++
++  if (gimple_assign_rhs_code (stmt) == MAX_EXPR
++      || gimple_assign_rhs_code (stmt) == MIN_EXPR
++      || gimple_assign_rhs_code (stmt) == BIT_IOR_EXPR
++      || gimple_assign_rhs_code (stmt) == BIT_XOR_EXPR
++      || gimple_assign_rhs_code (stmt) == BIT_AND_EXPR)
++    {
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      if (TREE_CODE (rhs) == SSA_NAME)
++	{
++	  check_type_and_push (rhs, decl, worklist, stmt);
++	}
++      if (TREE_CODE (rhs2) == SSA_NAME)
++	{
++	  check_type_and_push (rhs2, decl, worklist, stmt);
++	}
++      return;
++    }
++
++  /* Casts between pointers and integer are escaping.  */
++  if (gimple_assign_cast_p (stmt))
++    {
++      if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT
++	  || replace_type_map.get (type->type) == NULL)
++	type->mark_escape (escape_cast_int, stmt);
++      return;
++    }
++
++  if (semi_relayout_map.get (type->type) != NULL)
++    {
++      if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT)
++	type->mark_escape (escape_unhandled_rewrite, stmt);
++      return;
++    }
++
++  /* d) if the name is from a cast/assignment, make sure it is used as
++    that type or void*
++    i) If void* then push the ssa_name into worklist.  */
++  gcc_assert (gimple_assign_single_p (stmt));
++  check_other_side (decl, rhs, stmt, worklist);
++  check_ptr_layers (decl->decl, rhs, stmt);
++}
++
++/* Check the definition of gimple call.  */
++
++void
++ipa_struct_reorg::check_definition_call (srdecl *decl, vec &worklist)
++{
++  tree ssa_name = decl->decl;
++  srtype *type = decl->type;
++  gimple *stmt = SSA_NAME_DEF_STMT (ssa_name);
++  gcc_assert (gimple_code (stmt) == GIMPLE_CALL);
++
++  /* For realloc, check the type of the argument.  */
++  if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++    {
++      check_type_and_push (gimple_call_arg (stmt, 0), decl, worklist, stmt);
++    }
++
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      if (!handled_allocation_stmt (stmt))
++	{
++	  type->mark_escape (escape_return, stmt);
++	}
++      if (!allocate_size (type, decl, stmt))
++	{
++	  type->mark_escape (escape_non_multiply_size, stmt);
++	}
++    }
++  else
++    {
++      if (!handled_allocation_stmt (stmt) || !allocate_size (type, decl, stmt))
++	{
++	  type->mark_escape (escape_return, stmt);
++	}
++    }
++
++  bool ptrptr = isptrptr (decl->orig_type);
++  check_alloc_num (stmt, type, ptrptr);
++  return;
++}
++
++/*
++  2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl)
++     a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
++    check to make sure the addition was a multiple of the size.
++    check the pointer type too.
++     b) If the name is sourced from an allocation check the allocation
++    i) Add SSA_NAME (void*) to the worklist if allocated from realloc
++     c) if the name is from a param, make sure the param type was of the
++  original type d) if the name is from a cast/assignment, make sure it is used
++  as that type or void* i) If void* then push the ssa_name into worklist
++*/
++void
++ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist)
++{
++  tree ssa_name = decl->decl;
++  srtype *type = decl->type;
++
++  /* c) if the name is from a param, make sure the param type was
++     of the original type */
++  if (SSA_NAME_IS_DEFAULT_DEF (ssa_name))
++    {
++      tree var = SSA_NAME_VAR (ssa_name);
++      if (var && TREE_CODE (var) == PARM_DECL
++	  && VOID_POINTER_P (TREE_TYPE (ssa_name)))
++	{
++	  type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name));
++	}
++      return;
++    }
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++      && SSA_NAME_VAR (ssa_name)
++      && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name))))
++    {
++      if (current_layout_opt_level < POINTER_COMPRESSION_SAFE
++	  || !safe_void_cmp_p (ssa_name, type))
++	{
++	  type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name));
++	}
++    }
++  gimple *stmt = SSA_NAME_DEF_STMT (ssa_name);
++
++  /*
++     b) If the name is sourced from an allocation check the allocation
++    i) Add SSA_NAME (void*) to the worklist if allocated from realloc
++  */
++  if (gimple_code (stmt) == GIMPLE_CALL)
++    {
++      check_definition_call (decl, worklist);
++    }
++  /* If the SSA_NAME is sourced from an inline-asm, just mark the type as
++   * escaping.  */
++  if (gimple_code (stmt) == GIMPLE_ASM)
++    {
++      type->mark_escape (escape_inline_asm, stmt);
++      return;
++    }
++
++  /* If the SSA_NAME is sourced from a PHI check add each name to the worklist
++     and check to make sure they are used correctly.  */
++  if (gimple_code (stmt) == GIMPLE_PHI)
++    {
++      for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
++	{
++	  check_type_and_push (gimple_phi_arg_def (stmt, i), decl, worklist,
++			       stmt);
++	}
++      return;
++    }
++  if (gimple_code (stmt) == GIMPLE_ASSIGN)
++    {
++      check_definition_assign (decl, worklist);
++    }
++}
++
++/* Mark the types used by the inline-asm as escaping.  It is unkown what happens
++   inside an inline-asm. */
++
++void
++ipa_struct_reorg::mark_types_asm (gasm *astmt)
++{
++  for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++)
++    {
++      tree v = TREE_VALUE (gimple_asm_input_op (astmt, i));
++      /* If we have &b, just strip the & here. */
++      if (TREE_CODE (v) == ADDR_EXPR)
++	v = TREE_OPERAND (v, 0);
++      mark_expr_escape (v, escape_inline_asm, astmt);
++    }
++  for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++)
++    {
++      tree v = TREE_VALUE (gimple_asm_output_op (astmt, i));
++      /* If we have &b, just strip the & here. */
++      if (TREE_CODE (v) == ADDR_EXPR)
++	v = TREE_OPERAND (v, 0);
++      mark_expr_escape (v, escape_inline_asm, astmt);
++    }
++}
++
++void
++ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt,
++				    vec &worklist)
++{
++  srtype *type = decl->type;
++
++  if (TREE_CODE (other) == SSA_NAME || DECL_P (other)
++      || TREE_CODE (other) == INTEGER_CST)
++    {
++      check_type_and_push (other, decl, worklist, stmt);
++      return;
++    }
++
++  tree t = TREE_TYPE (other);
++  if (!handled_type (t))
++    {
++      type->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++
++  srtype *t1 = find_type (inner_type (t));
++  if (t1 == type)
++    {
++      /* In Complete Struct Relayout opti, if lhs type is the same
++     as rhs type, we could return without any harm.  */
++      if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT)
++	{
++	  return;
++	}
++
++      tree base;
++      bool indirect;
++      srtype *type1;
++      srfield *field;
++      bool realpart, imagpart, address;
++      bool escape_from_base = false;
++      if (!get_type_field (other, base, indirect, type1, field, realpart,
++			   imagpart, address, escape_from_base))
++	{
++	  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	    {
++	      /* release INTEGER_TYPE cast to struct pointer.  */
++	      bool cast_from_int_ptr
++		= current_function->is_safe_func && base
++		  && find_decl (base) == NULL
++		  && POINTER_TYPE_P (TREE_TYPE (base))
++		  && (TREE_CODE (inner_type (TREE_TYPE (base)))
++		      == INTEGER_TYPE);
++
++	      /* Add a safe func mechanism.  */
++	      bool from_void_ptr_parm = current_function->is_safe_func
++					&& TREE_CODE (base) == SSA_NAME
++					&& is_from_void_ptr_parm (base);
++
++	      /* release type is used by a type which escapes.  */
++	      if (escape_from_base || cast_from_int_ptr || from_void_ptr_parm)
++		{
++		  return;
++		}
++	    }
++	  type->mark_escape (escape_cast_another_ptr, stmt);
++	}
++
++      return;
++    }
++  if (!is_replace_type (inner_type (t), type->type))
++    {
++      if (t1)
++	t1->mark_escape (escape_cast_another_ptr, stmt);
++
++      type->mark_escape (escape_cast_another_ptr, stmt);
++    }
++}
++
++/* Get the expr base.  */
++
++void
++get_base (tree &base, tree expr)
++{
++  if (TREE_CODE (expr) == MEM_REF)
++    {
++      base = TREE_OPERAND (expr, 0);
++    }
++  else if (TREE_CODE (expr) == COMPONENT_REF)
++    {
++      base = TREE_OPERAND (expr, 0);
++      base = (TREE_CODE (base) == MEM_REF) ? TREE_OPERAND (base, 0) : base;
++    }
++  else if (TREE_CODE (expr) == ADDR_EXPR)
++    {
++      base = TREE_OPERAND (expr, 0);
++    }
++}
++
++/* Check whether the number of pointer layers of exprs is equal,
++   marking unequals as escape.  */
++
++void
++ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple *stmt)
++{
++  if (current_layout_opt_level < STRUCT_REORDER_FIELDS
++      || current_function->is_safe_func
++      || !(POINTER_TYPE_P (TREE_TYPE (a_expr)))
++      || !(POINTER_TYPE_P (TREE_TYPE (b_expr)))
++      || !handled_type (TREE_TYPE (a_expr))
++      || !handled_type (TREE_TYPE (b_expr)))
++    {
++      return;
++    }
++
++  tree a_base = a_expr;
++  tree b_base = b_expr;
++  get_base (a_base, a_expr);
++  get_base (b_base, b_expr);
++
++  srdecl *a = find_decl (a_base);
++  srdecl *b = find_decl (b_base);
++  if (a && b == NULL && TREE_CODE (b_expr) != INTEGER_CST)
++    {
++      a->type->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++  else if (b && a == NULL && TREE_CODE (a_expr) != INTEGER_CST)
++    {
++      b->type->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++  else if (a == NULL && b == NULL)
++    {
++      return;
++    }
++
++  if (cmp_ptr_layers (TREE_TYPE (a_expr), TREE_TYPE (b_expr)))
++    {
++      return;
++    }
++
++  if (a)
++    {
++      a->type->mark_escape (escape_cast_another_ptr, stmt);
++    }
++  if (b)
++    {
++      b->type->mark_escape (escape_cast_another_ptr, stmt);
++    }
++}
++
++void
++ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt,
++			     vec &worklist)
++{
++  srtype *type = decl->type;
++
++  if (gimple_code (stmt) == GIMPLE_RETURN)
++    {
++      type->mark_escape (escape_return, stmt);
++      return;
++    }
++  /* If the SSA_NAME PHI check and add the src to the worklist and
++     check to make sure they are used correctly.  */
++  if (gimple_code (stmt) == GIMPLE_PHI)
++    {
++      check_type_and_push (gimple_phi_result (stmt), decl, worklist, stmt);
++      return;
++    }
++
++  if (gimple_code (stmt) == GIMPLE_ASM)
++    {
++      mark_types_asm (as_a (stmt));
++      return;
++    }
++
++  if (gimple_code (stmt) == GIMPLE_COND)
++    {
++      tree rhs1 = gimple_cond_lhs (stmt);
++      tree rhs2 = gimple_cond_rhs (stmt);
++      tree orhs = rhs1;
++      enum tree_code code = gimple_cond_code (stmt);
++      if ((current_layout_opt_level == STRUCT_SPLIT
++	   && (code != EQ_EXPR && code != NE_EXPR))
++	  || (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT
++	      && (code != EQ_EXPR && code != NE_EXPR && code != LT_EXPR
++		  && code != LE_EXPR && code != GT_EXPR && code != GE_EXPR)))
++	{
++	  mark_expr_escape (rhs1, escape_non_eq, stmt);
++	  mark_expr_escape (rhs2, escape_non_eq, stmt);
++	}
++      if (rhs1 == decl->decl)
++	orhs = rhs2;
++      if (integer_zerop (orhs))
++	return;
++      if (TREE_CODE (orhs) != SSA_NAME)
++	mark_expr_escape (rhs1, escape_non_eq, stmt);
++      check_type_and_push (orhs, decl, worklist, stmt);
++      return;
++    }
++
++  /* Casts between pointers and integer are escaping.  */
++  if (gimple_assign_cast_p (stmt))
++    {
++      if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT
++	  || replace_type_map.get (type->type) == NULL)
++	type->mark_escape (escape_cast_int, stmt);
++      return;
++    }
++
++  /* We might have a_1 = ptr_2 == ptr_3; */
++  if (is_gimple_assign (stmt)
++      && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree orhs = rhs1;
++      enum tree_code code = gimple_assign_rhs_code (stmt);
++      if ((current_layout_opt_level == STRUCT_SPLIT
++	   && (code != EQ_EXPR && code != NE_EXPR))
++	  || (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT
++	      && (code != EQ_EXPR && code != NE_EXPR && code != LT_EXPR
++		  && code != LE_EXPR && code != GT_EXPR && code != GE_EXPR)))
++	{
++	  mark_expr_escape (rhs1, escape_non_eq, stmt);
++	  mark_expr_escape (rhs2, escape_non_eq, stmt);
++	}
++      if (rhs1 == decl->decl)
++	orhs = rhs2;
++      if (integer_zerop (orhs))
++	return;
++      if (TREE_CODE (orhs) != SSA_NAME)
++	mark_expr_escape (rhs1, escape_non_eq, stmt);
++      check_type_and_push (orhs, decl, worklist, stmt);
++      return;
++    }
++
++  if (gimple_assign_single_p (stmt))
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs = gimple_assign_rhs1 (stmt);
++      /* Check if we have a_1 = b_2; that a_1 is in the correct type. */
++      if (decl->decl == rhs)
++	{
++	  check_other_side (decl, lhs, stmt, worklist);
++	  return;
++	}
++      check_ptr_layers (lhs, rhs, stmt);
++    }
++
++  if (is_gimple_assign (stmt)
++      && gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree lhs = gimple_assign_lhs (stmt);
++      tree num;
++      check_other_side (decl, lhs, stmt, worklist);
++      check_ptr_layers (lhs, decl->decl, stmt);
++      /* Specify the correct size for the multi-layer pointer.  */
++      if (!is_result_of_mult (rhs2, &num,
++			      isptrptr (decl->orig_type)
++				? TYPE_SIZE_UNIT (decl->orig_type)
++				: TYPE_SIZE_UNIT (type->type)))
++	type->mark_escape (escape_non_multiply_size, stmt);
++    }
++
++  if (is_gimple_assign (stmt)
++      && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR)
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree other = rhs1 == decl->decl ? rhs2 : rhs1;
++
++      check_other_side (decl, other, stmt, worklist);
++      check_ptr_layers (decl->decl, other, stmt);
++      return;
++    }
++}
++
++/*
++   2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl)
++    d) if the name is used in a cast/assignment, make sure it is used as
++   that type or void* i) If void* then push the ssa_name into worklist e) if
++   used in conditional check the other side i) If the conditional is non NE/EQ
++   then mark the type as non rejecting f) Check if the use in a Pointer PLUS
++   EXPR Is used by mulitplication of its size
++  */
++void
++ipa_struct_reorg::check_uses (srdecl *decl, vec &worklist)
++{
++  tree ssa_name = decl->decl;
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++
++  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name)
++    {
++      gimple *stmt = USE_STMT (use_p);
++
++      if (is_gimple_debug (stmt))
++	continue;
++
++      check_use (decl, stmt, worklist);
++    }
++}
++
++/* Record function corresponding to NODE. */
++
++srfunction *
++ipa_struct_reorg::record_function (cgraph_node *node)
++{
++  function *fn;
++  tree parm, var;
++  unsigned int i;
++  srfunction *sfn;
++  escape_type escapes = does_not_escape;
++
++  sfn = new srfunction (node);
++  functions.safe_push (sfn);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "\nRecording accesses and types from function: %s/%u\n",
++	     node->name (), node->order);
++
++  /* Nodes without a body are not interesting.  Especially do not
++     visit clones at this point for now - we get duplicate decls
++     there for inline clones at least.  */
++  if (!node->has_gimple_body_p () || node->inlined_to)
++    return sfn;
++
++  node->get_body ();
++  fn = DECL_STRUCT_FUNCTION (node->decl);
++
++  if (!fn)
++    return sfn;
++
++  current_function = sfn;
++
++  if (DECL_PRESERVE_P (node->decl))
++    escapes = escape_marked_as_used;
++  else if (!node->local)
++    {
++      if (current_layout_opt_level < STRUCT_REORDER_FIELDS)
++	{
++	  escapes = escape_visible_function;
++	}
++      else if (node->externally_visible)
++	{
++	  escapes = escape_visible_function;
++	}
++    }
++  else if (!node->can_change_signature)
++    escapes = escape_cannot_change_signature;
++  else if (!tree_versionable_function_p (node->decl))
++    escapes = escape_noclonable_function;
++
++  if (current_layout_opt_level > NONE)
++    {
++      if (!opt_for_fn (node->decl, flag_ipa_struct_reorg))
++	{
++	  escapes = escape_non_optimize;
++	}
++    }
++
++  basic_block bb;
++  gimple_stmt_iterator si;
++
++  /* Add a safe func mechanism.  */
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      current_function->is_safe_func = safe_functions.contains (node);
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nfunction %s/%u: is_safe_func = %d\n",
++		   node->name (), node->order, current_function->is_safe_func);
++	}
++    }
++
++  /* Record the static chain decl.  */
++  if (fn->static_chain_decl)
++    {
++      srdecl *sd = record_var (fn->static_chain_decl, escapes, -2);
++      if (sd)
++	{
++	  /* Specify that this type is used by the static
++	     chain so it cannot be split. */
++	  sd->type->chain_type = true;
++	  sfn->add_arg (sd);
++	  sd->type->add_function (sfn);
++	}
++    }
++
++  /* Record the arguments. */
++  for (parm = DECL_ARGUMENTS (node->decl), i = 0; parm;
++       parm = DECL_CHAIN (parm), i++)
++    {
++      srdecl *sd = record_var (parm, escapes, i);
++      if (sd)
++	{
++	  sfn->add_arg (sd);
++	  sd->type->add_function (sfn);
++	}
++    }
++
++  /* Mark the return type as escaping */
++  {
++    tree return_type = TREE_TYPE (TREE_TYPE (node->decl));
++    mark_type_as_escape (return_type, escape_return, NULL);
++  }
++
++  /* If the cfg does not exist for the function, don't process the function.  */
++  if (!fn->cfg)
++    {
++      current_function = NULL;
++      return sfn;
++    }
++
++  /* The following order is done for recording stage:
++     0) Record all variables/SSA_NAMES that are of struct type
++     1) Record MEM_REF/COMPONENT_REFs
++    a) Record SSA_NAMEs (void*) and record that as the accessed type.
++  */
++
++  push_cfun (fn);
++
++  FOR_EACH_LOCAL_DECL (cfun, i, var)
++    {
++      if (TREE_CODE (var) != VAR_DECL)
++	continue;
++
++      record_var (var);
++    }
++
++  for (i = 1; i < num_ssa_names; ++i)
++    {
++      tree name = ssa_name (i);
++      if (!name || has_zero_uses (name) || virtual_operand_p (name))
++	continue;
++
++      record_var (name);
++    }
++
++  /* Find the variables which are used via MEM_REF and are void* types. */
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
++	{
++	  gimple *stmt = gsi_stmt (si);
++	  find_vars (stmt);
++	}
++    }
++
++  auto_vec worklist;
++  for (unsigned i = 0; i < current_function->decls.length (); i++)
++    {
++      srdecl *decl = current_function->decls[i];
++      if (TREE_CODE (decl->decl) == SSA_NAME)
++	{
++	  decl->visited = false;
++	  worklist.safe_push (decl);
++	}
++    }
++
++  /*
++     2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl)
++    a) if the SSA_NAME is sourced from a pointer plus, record the pointer
++     and check to make sure the addition was a multiple of the size. check the
++     pointer type too. b) If the name is sourced from an allocation check the
++     allocation i) Add SSA_NAME (void*) to the worklist if allocated from
++     realloc c) if the name is from a param, make sure the param type was of the
++     original type d) if the name is used in a cast/assignment, make sure it is
++     used as that type or void* i) If void* then push the ssa_name into worklist
++    e) if used in conditional check the other side
++      i) If the conditional is non NE/EQ then mark the type as non rejecting
++    f) Check if the use in a POinter PLUS EXPR Is used by mulitplication of
++     its size
++  */
++
++  while (!worklist.is_empty ())
++    {
++      srdecl *decl = worklist.pop ();
++      if (decl->visited)
++	continue;
++      decl->visited = true;
++      check_definition (decl, worklist);
++      check_uses (decl, worklist);
++    }
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
++	{
++	  gimple *stmt = gsi_stmt (si);
++	  maybe_record_stmt (node, stmt);
++	}
++    }
++
++  pop_cfun ();
++  current_function = NULL;
++  return sfn;
++}
++
++/* For a function that contains the void* parameter and passes the structure
++   pointer, check whether the function uses the input node safely.
++   For these functions, the void* parameter and related ssa_name are not
++   recorded in record_function (), and the input structure type is not escaped.
++*/
++
++void
++ipa_struct_reorg::record_safe_func_with_void_ptr_parm ()
++{
++  cgraph_node *node = NULL;
++  FOR_EACH_FUNCTION (node)
++    {
++      if (!node->real_symbol_p ())
++	{
++	  continue;
++	}
++      if (node->definition)
++	{
++	  if (!node->has_gimple_body_p () || node->inlined_to)
++	    {
++	      continue;
++	    }
++	  node->get_body ();
++	  function *fn = DECL_STRUCT_FUNCTION (node->decl);
++	  if (!fn)
++	    {
++	      continue;
++	    }
++	  push_cfun (fn);
++	  if (is_safe_func_with_void_ptr_parm (node))
++	    {
++	      safe_functions.add (node);
++	      if (dump_file && (dump_flags & TDF_DETAILS))
++		{
++		  fprintf (dump_file, "\nfunction %s/%u is safe function.\n",
++			   node->name (), node->order);
++		}
++	    }
++	  pop_cfun ();
++	}
++    }
++}
++
++/* Record all accesses for all types including global variables. */
++
++void
++ipa_struct_reorg::record_accesses (void)
++{
++  varpool_node *var;
++  cgraph_node *cnode;
++
++  /* Record global (non-auto) variables first. */
++  FOR_EACH_VARIABLE (var)
++    {
++      if (!var->real_symbol_p ())
++	continue;
++
++      /* Record all variables including the accesses inside a variable. */
++      escape_type escapes = does_not_escape;
++      if (var->externally_visible || !var->definition)
++	escapes = escape_via_global_var;
++      if (var->in_other_partition)
++	escapes = escape_via_global_var;
++      if (!var->externally_visible && var->definition)
++	var->get_constructor ();
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Recording global variable: ");
++	  print_generic_expr (dump_file, var->decl);
++	  fprintf (dump_file, "\n");
++	}
++      record_var (var->decl, escapes);
++    }
++
++  /* Add a safe func mechanism.  */
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      record_safe_func_with_void_ptr_parm ();
++    }
++
++  FOR_EACH_FUNCTION (cnode)
++    {
++      if (!cnode->real_symbol_p ())
++	continue;
++      /* Record accesses inside a function. */
++      if (cnode->definition)
++	record_function (cnode);
++      else if (!cnode->has_gimple_body_p () || cnode->inlined_to)
++	continue;
++      else
++	{
++	  tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
++	  mark_type_as_escape (return_type, escape_return, NULL);
++	}
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\n");
++      fprintf (dump_file, "==============================================\n\n");
++      fprintf (dump_file, "======== all types (before pruning): ========\n\n");
++      dump_types (dump_file);
++      fprintf (dump_file, "======= all functions (before pruning): =======\n");
++      dump_functions (dump_file);
++    }
++  /* If record_var () is called later, new types will not be recorded.  */
++  done_recording = true;
++}
++
++/* A helper function to detect cycles (recusive) types.
++   Return TRUE if TYPE was a rescusive type.  */
++
++bool
++ipa_struct_reorg::walk_field_for_cycles (srtype *type)
++{
++  unsigned i;
++  srfield *field;
++
++  type->visited = true;
++  if (type->escaped_rescusive ())
++    return true;
++
++  if (type->has_escaped ())
++    return false;
++
++  FOR_EACH_VEC_ELT (type->fields, i, field)
++    {
++      if (!field->type)
++	;
++      /* If there are two members of the same structure pointer type? */
++      else if (field->type->visited || walk_field_for_cycles (field->type))
++	{
++	  type->mark_escape (escape_rescusive_type, NULL);
++	  return true;
++	}
++    }
++
++  return false;
++}
++
++/* Clear visited on all types.  */
++
++void
++ipa_struct_reorg::clear_visited (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    types[i]->visited = false;
++}
++
++/* Detect recusive types and mark them as escaping.  */
++
++void
++ipa_struct_reorg::detect_cycles (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      if (types[i]->has_escaped ())
++	continue;
++
++      clear_visited ();
++      walk_field_for_cycles (types[i]);
++    }
++}
++
++/* Propagate escaping to depdenent types.  */
++
++void
++ipa_struct_reorg::propagate_escape (void)
++{
++  unsigned i;
++  srtype *type;
++  bool changed = false;
++
++  do
++    {
++      changed = false;
++      FOR_EACH_VEC_ELT (types, i, type)
++	{
++	  for (tree field = TYPE_FIELDS (type->type); field;
++	       field = DECL_CHAIN (field))
++	    {
++	      if (TREE_CODE (field) == FIELD_DECL
++		  && handled_type (TREE_TYPE (field)))
++		{
++		  tree t = inner_type (TREE_TYPE (field));
++		  srtype *type1 = find_type (t);
++		  if (!type1)
++		    continue;
++		  if (type1->has_escaped () && !type->has_escaped ())
++		    {
++		      type->mark_escape (escape_dependent_type_escapes, NULL);
++		      changed = true;
++		    }
++		  if (type->has_escaped () && !type1->has_escaped ())
++		    {
++		      type1->mark_escape (escape_dependent_type_escapes, NULL);
++		      changed = true;
++		    }
++		}
++	    }
++	}
++  } while (changed);
++}
++
++/* If the original type (with members) has escaped, corresponding to the
++   struct pointer type (empty member) in the structure fields
++   should also marked as escape.  */
++
++void
++ipa_struct_reorg::propagate_escape_via_original (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      for (unsigned j = 0; j < types.length (); j++)
++	{
++	  const char *type1 = get_type_name (types[i]->type);
++	  const char *type2 = get_type_name (types[j]->type);
++	  if (type1 == NULL || type2 == NULL)
++	    {
++	      continue;
++	    }
++	  if (type1 == type2 && types[j]->has_escaped ())
++	    {
++	      if (!types[i]->has_escaped ())
++		{
++		  types[i]->mark_escape (escape_via_orig_escape, NULL);
++		}
++	      break;
++	    }
++	}
++    }
++}
++
++/* Marks the fileds as empty and does not have the original structure type
++   is escape.  */
++
++void
++ipa_struct_reorg::propagate_escape_via_empty_with_no_original (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      if (types[i]->fields.length () == 0)
++	{
++	  for (unsigned j = 0; j < types.length (); j++)
++	    {
++	      if (i != j && types[j]->fields.length ())
++		{
++		  const char *type1 = get_type_name (types[i]->type);
++		  const char *type2 = get_type_name (types[j]->type);
++		  if (type1 != NULL && type2 != NULL && type1 == type2)
++		    {
++		      break;
++		    }
++		}
++	      if (j == types.length () - 1)
++		{
++		  types[i]->mark_escape (escape_via_empty_no_orig, NULL);
++		}
++	    }
++	}
++    }
++}
++
++/* Prune the escaped types and their decls from what was recorded.  */
++
++void
++ipa_struct_reorg::prune_escaped_types (void)
++{
++  if (current_layout_opt_level == STRUCT_SPLIT)
++    {
++      /* Detect recusive types and mark them as escaping.  */
++      detect_cycles ();
++      /* If contains or is contained by the escape type,
++     mark them as escaping.  */
++      propagate_escape ();
++    }
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      propagate_escape_via_original ();
++      propagate_escape_via_empty_with_no_original ();
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "==============================================\n\n");
++      fprintf (dump_file, "all types (after prop but before pruning): \n\n");
++      dump_types (dump_file);
++      fprintf (dump_file, "all functions (after prop but before pruning): \n");
++      dump_functions (dump_file);
++    }
++
++  if (dump_file)
++    dump_types_escaped (dump_file);
++
++  /* Prune the function arguments which escape
++     and functions which have no types as arguments. */
++  for (unsigned i = 0; i < functions.length ();)
++    {
++      srfunction *function = functions[i];
++
++      /* Prune function arguments of types that escape. */
++      for (unsigned j = 0; j < function->args.length ();)
++	{
++	  if (function->args[j]->type->has_escaped ())
++	    function->args.ordered_remove (j);
++	  else
++	    j++;
++	}
++
++      /* Prune global variables that the function uses of types that escape. */
++      for (unsigned j = 0; j < function->globals.length ();)
++	{
++	  if (function->globals[j]->type->has_escaped ())
++	    function->globals.ordered_remove (j);
++	  else
++	    j++;
++	}
++
++      /* Prune variables that the function uses of types that escape. */
++      for (unsigned j = 0; j < function->decls.length ();)
++	{
++	  srdecl *decl = function->decls[j];
++	  if (decl->type->has_escaped ())
++	    {
++	      function->decls.ordered_remove (j);
++	      delete decl;
++	    }
++	  else
++	    j++;
++	}
++
++      /* Prune functions which don't refer to any variables any more.  */
++      if (function->args.is_empty () && function->decls.is_empty ()
++	  && function->globals.is_empty ()
++	  && current_layout_opt_level < STRUCT_REORDER_FIELDS)
++	{
++	  delete function;
++	  functions.ordered_remove (i);
++	}
++      else
++	i++;
++    }
++
++  /* Prune globals of types that escape, all references to those decls
++     will have been removed in the first loop.  */
++  for (unsigned j = 0; j < globals.decls.length ();)
++    {
++      srdecl *decl = globals.decls[j];
++      if (decl->type->has_escaped ())
++	{
++	  globals.decls.ordered_remove (j);
++	  delete decl;
++	}
++      else
++	j++;
++    }
++
++  /* Prune types that escape, all references to those types
++     will have been removed in the above loops.  */
++  /* The escape type is not deleted in current_layout_opt_level after
++     STRUCT_REORDER_FIELDS, then the type that contains the
++     escaped type fields can find complete information.  */
++  if (current_layout_opt_level < STRUCT_REORDER_FIELDS)
++    {
++      for (unsigned i = 0; i < types.length ();)
++	{
++	  srtype *type = types[i];
++	  if (type->has_escaped ())
++	    {
++	      /* All references to this type should have been removed now.  */
++	      delete type;
++	      types.ordered_remove (i);
++	    }
++	  else
++	    {
++	      i++;
++	    }
++	}
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "==============================================\n\n");
++      fprintf (dump_file, "========= all types (after pruning): =========\n\n");
++      dump_types (dump_file);
++      fprintf (dump_file, "======== all functions (after pruning): ========\n");
++      dump_functions (dump_file);
++    }
++}
++
++/* Analyze all of the types. */
++
++void
++ipa_struct_reorg::analyze_types (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      if (!types[i]->has_escaped ())
++	types[i]->analyze ();
++    }
++}
++
++/* Create all new types we want to create. */
++
++bool
++ipa_struct_reorg::create_new_types (void)
++{
++  int newtypes = 0;
++  clear_visited ();
++  for (unsigned i = 0; i < types.length (); i++)
++    newtypes += types[i]->create_new_type ();
++
++  /* Some new types may not have been created at create_new_type (), so
++     recreate new type for all struct fields.  */
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      for (unsigned i = 0; i < types.length (); i++)
++	{
++	  auto_vec *fields = fields_to_finish.get (types[i]->type);
++	  if (fields)
++	    {
++	      for (unsigned j = 0; j < fields->length (); j++)
++		{
++		  tree field = (*fields)[j];
++		  if (types[i]->pc_candidate)
++		    {
++		      TREE_TYPE (field) = make_unsigned_type (compressed_size);
++		      SET_DECL_ALIGN (field, compressed_size);
++		    }
++		  else
++		    {
++		      TREE_TYPE (field)
++			= reconstruct_complex_type (TREE_TYPE (field),
++						    types[i]->newtype[0]);
++		    }
++		}
++	    }
++	}
++      for (unsigned i = 0; i < types.length (); i++)
++	{
++	  layout_type (types[i]->newtype[0]);
++	}
++    }
++
++  if (current_layout_opt_level == STRUCT_SPLIT)
++    {
++      if (dump_file)
++	{
++	  if (newtypes)
++	    fprintf (dump_file,
++		     "\nNumber of structures to transform in"
++		     " struct split is %d\n",
++		     newtypes);
++	  else
++	    fprintf (dump_file, "\nNo structures to transform in"
++				" struct split.\n");
++	}
++    }
++  else
++    {
++      if (dump_file)
++	{
++	  if (newtypes)
++	    fprintf (dump_file,
++		     "\nNumber of structures to transform"
++		     " is %d\n",
++		     newtypes);
++	  else
++	    fprintf (dump_file, "\nNo structures to transform.\n");
++	}
++    }
++
++  return newtypes != 0;
++}
++
++/* Create all the new decls except for the new arguments
++   which create_new_functions would have created. */
++
++void
++ipa_struct_reorg::create_new_decls (void)
++{
++  globals.create_new_decls ();
++  for (unsigned i = 0; i < functions.length (); i++)
++    functions[i]->create_new_decls ();
++}
++
++/* Create the new arguments for the function corresponding to NODE. */
++
++void
++ipa_struct_reorg::create_new_args (cgraph_node *new_node)
++{
++  tree decl = new_node->decl;
++  auto_vec params;
++  push_function_arg_decls (¶ms, decl);
++  vec *adjs = NULL;
++  vec_safe_reserve (adjs, params.length ());
++  for (unsigned i = 0; i < params.length (); i++)
++    {
++      struct ipa_adjusted_param adj;
++      tree parm = params[i];
++      memset (&adj, 0, sizeof (adj));
++      adj.base_index = i;
++      adj.prev_clone_index = i;
++      srtype *t = find_type (inner_type (TREE_TYPE (parm)));
++      if (!t || t->has_escaped () || !t->has_new_type ())
++	{
++	  adj.op = IPA_PARAM_OP_COPY;
++	  vec_safe_push (adjs, adj);
++	  continue;
++	}
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Creating a new argument for: ");
++	  print_generic_expr (dump_file, params[i]);
++	  fprintf (dump_file, " in function: ");
++	  print_generic_expr (dump_file, decl);
++	  fprintf (dump_file, "\n");
++	}
++      adj.op = IPA_PARAM_OP_NEW;
++      adj.param_prefix_index = IPA_PARAM_PREFIX_REORG;
++      for (unsigned j = 0; j < max_split && t->newtype[j]; j++)
++	{
++	  adj.type = reconstruct_complex_type (TREE_TYPE (parm), t->newtype[j]);
++	  vec_safe_push (adjs, adj);
++	}
++    }
++  ipa_param_body_adjustments *adjustments
++    = new ipa_param_body_adjustments (adjs, decl);
++  adjustments->modify_formal_parameters ();
++  auto_vec new_params;
++  push_function_arg_decls (&new_params, decl);
++  unsigned veclen = vec_safe_length (adjs);
++  for (unsigned i = 0; i < veclen; i++)
++    {
++      if ((*adjs)[i].op != IPA_PARAM_OP_NEW)
++	continue;
++      tree decl = params[(*adjs)[i].base_index];
++      srdecl *d = find_decl (decl);
++      if (!d)
++	continue;
++      unsigned j = 0;
++      while (j < max_split && d->newdecl[j])
++	j++;
++      d->newdecl[j] = new_params[i];
++    }
++
++  function *fn = DECL_STRUCT_FUNCTION (decl);
++
++  if (!fn->static_chain_decl)
++    return;
++  srdecl *chain = find_decl (fn->static_chain_decl);
++  if (!chain)
++    return;
++
++  srtype *type = chain->type;
++  tree orig_var = chain->decl;
++  const char *tname = NULL;
++  if (DECL_NAME (orig_var))
++    tname = IDENTIFIER_POINTER (DECL_NAME (orig_var));
++  gcc_assert (!type->newtype[1]);
++  tree new_name = NULL;
++  char *name = NULL;
++  if (tname)
++    {
++      name = concat (tname, ".reorg.0", NULL);
++      new_name = get_identifier (name);
++      free (name);
++    }
++  tree newtype1
++    = reconstruct_complex_type (TREE_TYPE (orig_var), type->newtype[0]);
++  chain->newdecl[0] = build_decl (DECL_SOURCE_LOCATION (orig_var), PARM_DECL,
++				  new_name, newtype1);
++  copy_var_attributes (chain->newdecl[0], orig_var);
++  fn->static_chain_decl = chain->newdecl[0];
++}
++
++/* Find the refered DECL in the current function or globals.
++   If this is a global decl, record that as being used
++   in the current function.  */
++
++srdecl *
++ipa_struct_reorg::find_decl (tree decl)
++{
++  srdecl *d;
++  d = globals.find_decl (decl);
++  if (d)
++    {
++      /* Record the global usage in the current function.  */
++      if (!done_recording && current_function)
++	{
++	  bool add = true;
++	  /* No reason to add it to the current function if it is
++	     already recorded as such. */
++	  for (unsigned i = 0; i < current_function->globals.length (); i++)
++	    {
++	      if (current_function->globals[i] == d)
++		{
++		  add = false;
++		  break;
++		}
++	    }
++	  if (add)
++	    current_function->globals.safe_push (d);
++	}
++      return d;
++    }
++  if (current_function)
++    return current_function->find_decl (decl);
++  return NULL;
++}
++
++/* Create new function clones for the cases where the arguments
++   need to be changed.  */
++
++void
++ipa_struct_reorg::create_new_functions (void)
++{
++  for (unsigned i = 0; i < functions.length (); i++)
++    {
++      srfunction *f = functions[i];
++      bool anyargchanges = false;
++      cgraph_node *new_node;
++      cgraph_node *node = f->node;
++      int newargs = 0;
++      if (f->old)
++	continue;
++
++      if (f->args.length () == 0)
++	continue;
++
++      for (unsigned j = 0; j < f->args.length (); j++)
++	{
++	  srdecl *d = f->args[j];
++	  srtype *t = d->type;
++	  if (t->has_new_type ())
++	    {
++	      newargs += t->newtype[1] != NULL;
++	      anyargchanges = true;
++	    }
++	}
++      if (!anyargchanges)
++	continue;
++
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Creating a clone of function: ");
++	  f->simple_dump (dump_file);
++	  fprintf (dump_file, "\n");
++	}
++      statistics_counter_event (NULL, "Create new function", 1);
++      new_node = node->create_version_clone_with_body (vNULL, NULL, NULL, NULL,
++						       NULL, "struct_reorg");
++      new_node->can_change_signature = node->can_change_signature;
++      new_node->make_local ();
++      f->newnode = new_node;
++      srfunction *n = record_function (new_node);
++      current_function = n;
++      n->old = f;
++      f->newf = n;
++      /* Create New arguments. */
++      create_new_args (new_node);
++      current_function = NULL;
++    }
++}
++
++bool
++ipa_struct_reorg::rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split],
++				   tree newrhs[max_split])
++{
++  bool l = rewrite_expr (lhs, newlhs);
++  bool r = rewrite_expr (rhs, newrhs);
++
++  /* Handle NULL pointer specially. */
++  if (l && !r && integer_zerop (rhs))
++    {
++      r = true;
++      for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	newrhs[i] = fold_convert (TREE_TYPE (newlhs[i]), rhs);
++    }
++
++  return l || r;
++}
++
++bool
++ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split],
++				bool ignore_missing_decl)
++{
++  tree base;
++  bool indirect;
++  srtype *t;
++  srfield *f;
++  bool realpart, imagpart;
++  bool address;
++  bool escape_from_base = false;
++
++  tree newbase[max_split];
++  memset (newexpr, 0, sizeof (tree[max_split]));
++
++  if (TREE_CODE (expr) == CONSTRUCTOR)
++    {
++      srtype *t = find_type (TREE_TYPE (expr));
++      if (!t)
++	return false;
++      gcc_assert (CONSTRUCTOR_NELTS (expr) == 0);
++      if (!t->has_new_type ())
++	return false;
++      for (unsigned i = 0; i < max_split && t->newtype[i]; i++)
++	newexpr[i] = build_constructor (t->newtype[i], NULL);
++      return true;
++    }
++
++  if (!get_type_field (expr, base, indirect, t, f, realpart, imagpart, address,
++		       escape_from_base))
++    return false;
++
++  /* If the type is not changed, then just return false. */
++  if (!t->has_new_type ())
++    return false;
++
++  /*  NULL pointer handling is "special".  */
++  if (integer_zerop (base))
++    {
++      gcc_assert (indirect && !address);
++      for (unsigned i = 0; i < max_split && t->newtype[i]; i++)
++	{
++	  tree newtype1
++	    = reconstruct_complex_type (TREE_TYPE (base), t->newtype[i]);
++	  newbase[i] = fold_convert (newtype1, base);
++	}
++    }
++  else
++    {
++      srdecl *d = find_decl (base);
++
++      if (!d && dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Can't find decl:\n");
++	  print_generic_expr (dump_file, base);
++	  fprintf (dump_file, "\ntype:\n");
++	  t->dump (dump_file);
++	}
++      if (!d && ignore_missing_decl)
++	return true;
++      gcc_assert (d);
++      memcpy (newbase, d->newdecl, sizeof (d->newdecl));
++    }
++
++  if (f == NULL)
++    {
++      memcpy (newexpr, newbase, sizeof (newbase));
++      for (unsigned i = 0; i < max_split && newexpr[i]; i++)
++	{
++	  if (address)
++	    newexpr[i] = build_fold_addr_expr (newexpr[i]);
++	  if (indirect)
++	    newexpr[i] = build_simple_mem_ref (newexpr[i]);
++	  if (imagpart)
++	    newexpr[i]
++	      = build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])),
++			newexpr[i]);
++	  if (realpart)
++	    newexpr[i]
++	      = build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])),
++			newexpr[i]);
++	}
++      return true;
++    }
++
++  tree newdecl = newbase[f->clusternum];
++  for (unsigned i = 0; i < max_split && f->newfield[i]; i++)
++    {
++      tree newbase1 = newdecl;
++      if (address)
++	newbase1 = build_fold_addr_expr (newbase1);
++      if (indirect)
++	{
++	  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	    {
++	      /* Supports the MEM_REF offset.
++	     _1 = MEM[(struct arc *)ap_1 + 72B].flow;
++	     Old rewrite: _1 = ap.slo.0_8->flow;
++	     New rewrite: _1
++	      = MEM[(struct arc.slo.0 *)ap.slo.0_8 + 64B].flow;
++	      */
++	      HOST_WIDE_INT offset_tmp = 0;
++	      HOST_WIDE_INT mem_offset = 0;
++	      bool realpart_tmp = false;
++	      bool imagpart_tmp = false;
++	      tree accesstype_tmp = NULL_TREE;
++	      tree num = NULL_TREE;
++	      get_ref_base_and_offset (expr, offset_tmp, realpart_tmp,
++				       imagpart_tmp, accesstype_tmp, &num);
++
++	      tree ptype = TREE_TYPE (newbase1);
++	      /* Specify the correct size for the multi-layer pointer.  */
++	      tree size = isptrptr (ptype)
++			    ? TYPE_SIZE_UNIT (ptype)
++			    : TYPE_SIZE_UNIT (inner_type (ptype));
++	      mem_offset = (num != NULL)
++			     ? TREE_INT_CST_LOW (num) * tree_to_shwi (size)
++			     : 0;
++	      newbase1 = build2 (MEM_REF, TREE_TYPE (ptype), newbase1,
++				 build_int_cst (ptype, mem_offset));
++	    }
++	  else
++	    {
++	      newbase1 = build_simple_mem_ref (newbase1);
++	    }
++	}
++      newexpr[i] = build3 (COMPONENT_REF, TREE_TYPE (f->newfield[i]), newbase1,
++			   f->newfield[i], NULL_TREE);
++      if (imagpart)
++	newexpr[i] = build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])),
++			     newexpr[i]);
++      if (realpart)
++	newexpr[i] = build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])),
++			     newexpr[i]);
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "cluster: %d. decl = ", (int) f->clusternum);
++	  print_generic_expr (dump_file, newbase1);
++	  fprintf (dump_file, "\nnewexpr = ");
++	  print_generic_expr (dump_file, newexpr[i]);
++	  fprintf (dump_file, "\n");
++	}
++    }
++  return true;
++}
++
++/* Emit a series of gimples to compress the pointer to the index relative to
++   the global header.  The basic blocks where gsi is located must have at least
++   one stmt.  */
++
++tree
++ipa_struct_reorg::compress_ptr_to_offset (tree xhs, srtype *type,
++					  gimple_stmt_iterator *gsi)
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nCompress candidate pointer:\n");
++      print_generic_expr (dump_file, xhs);
++      fprintf (dump_file, "\nto offset:\n");
++    }
++
++  /* Emit gimple _X1 = ptr - gptr.  */
++  tree pointer_addr = fold_convert (long_unsigned_type_node, xhs);
++  tree gptr_addr = fold_convert (long_unsigned_type_node, type->pc_gptr);
++  tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node,
++				pointer_addr, gptr_addr);
++
++  /* Emit gimple _X2 = _X1 / sizeof (struct).  */
++  tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node,
++				step1, TYPE_SIZE_UNIT (type->newtype[0]));
++
++  /* Emit _X3 = (compressed_size) _X2.  */
++  tree pc_type = make_unsigned_type (compressed_size);
++  tree step3 = gimplify_build1 (gsi, NOP_EXPR, pc_type, step2);
++
++  /* Emit gimple _X4 = _X3 + 1.  */
++  tree step4
++    = gimplify_build2 (gsi, PLUS_EXPR, pc_type, step3, build_one_cst (pc_type));
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      print_generic_expr (dump_file, step4);
++      fprintf (dump_file, "\n");
++    }
++  return step4;
++}
++
++/* Emit a series of gimples to decompress the index into the original
++   pointer.  The basic blocks where gsi is located must have at least
++   one stmt.  */
++
++tree
++ipa_struct_reorg::decompress_offset_to_ptr (tree xhs, srtype *type,
++					    gimple_stmt_iterator *gsi)
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nDecompress candidate offset:\n");
++      print_generic_expr (dump_file, xhs);
++      fprintf (dump_file, "\nto pointer:\n");
++    }
++
++  /* Emit _X1 = xhs - 1.  */
++  tree offset = fold_convert (long_unsigned_type_node, xhs);
++  tree step1
++    = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, offset,
++		       build_one_cst (long_unsigned_type_node));
++
++  /* Emit _X2 = _X1 * sizeof (struct).  */
++  tree step2 = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, step1,
++				TYPE_SIZE_UNIT (type->newtype[0]));
++
++  /* Emit _X3 = phead + _X2.  */
++  tree gptr_addr = fold_convert (long_unsigned_type_node, type->pc_gptr);
++  tree step3 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node,
++				gptr_addr, step2);
++
++  /* Emit _X4 = (struct *) _X3.  */
++  tree step4
++    = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (type->pc_gptr), step3);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      print_generic_expr (dump_file, step4);
++      fprintf (dump_file, "\n");
++    }
++  return step4;
++}
++
++/* Return the compression candidate srtype of SSA_NAME or COMPONENT_REF.  */
++
++srtype *
++ipa_struct_reorg::get_compression_candidate_type (tree xhs)
++{
++  if (xhs == NULL_TREE)
++    return NULL;
++
++  if (TREE_CODE (xhs) == SSA_NAME || TREE_CODE (xhs) == COMPONENT_REF)
++    {
++      srtype *access_type = find_type (inner_type (TREE_TYPE (xhs)));
++      if (access_type != NULL && access_type->pc_candidate)
++	return access_type;
++    }
++  return NULL;
++}
++
++/* True if the input type is the candidate type for pointer compression.  */
++
++bool
++ipa_struct_reorg::pc_candidate_st_type_p (tree type)
++{
++  if (type == NULL_TREE)
++    return false;
++
++  if (TREE_CODE (type) == POINTER_TYPE)
++    {
++      if (TREE_CODE (TREE_TYPE (type)) == RECORD_TYPE)
++	{
++	  srtype *access_type = find_type (TREE_TYPE (type));
++	  if (access_type != NULL && access_type->pc_candidate)
++	    return true;
++	}
++    }
++  return false;
++}
++
++/* True if the input xhs is a candidate for pointer compression.  */
++
++bool
++ipa_struct_reorg::pc_candidate_tree_p (tree xhs)
++{
++  if (xhs == NULL_TREE)
++    return false;
++
++  if (TREE_CODE (xhs) == COMPONENT_REF)
++    {
++      srtype *base_type = find_type (TREE_TYPE (TREE_OPERAND (xhs, 0)));
++      if (base_type == NULL || base_type->has_escaped ())
++	return false;
++
++      return pc_candidate_st_type_p (TREE_TYPE (xhs));
++    }
++  return false;
++}
++
++srtype *
++ipa_struct_reorg::get_semi_relayout_candidate_type (tree xhs)
++{
++  if (xhs == NULL)
++    return NULL;
++  if (TREE_CODE (xhs) == SSA_NAME || TREE_CODE (xhs) == COMPONENT_REF)
++    {
++      srtype *access_type = find_type (inner_type (TREE_TYPE (xhs)));
++      if (access_type != NULL && access_type->semi_relayout)
++	return access_type;
++    }
++  return NULL;
++}
++
++bool
++ipa_struct_reorg::is_semi_relayout_candidate (tree xhs)
++{
++  if (xhs == NULL)
++    return false;
++
++  if (TREE_CODE (xhs) == SSA_NAME)
++    xhs = TREE_TYPE (xhs);
++
++  if (TREE_CODE (xhs) == POINTER_TYPE)
++    {
++      srtype *var_type = find_type (TREE_TYPE (xhs));
++      if (!var_type || var_type->has_escaped ())
++	return false;
++      if (var_type->semi_relayout)
++	return true;
++    }
++
++  if (TREE_CODE (xhs) == COMPONENT_REF)
++    {
++      tree mem = TREE_OPERAND (xhs, 0);
++      if (TREE_CODE (mem) == MEM_REF)
++	{
++	  tree type = TREE_TYPE (mem);
++	  srtype *old_type = get_relayout_candidate_type (type);
++	  if (!old_type)
++	    return false;
++	  if (types_compatible_p (type, old_type->type)
++	      && old_type->semi_relayout)
++	    return true;
++	}
++    }
++  return false;
++}
++
++/* True if xhs is a component_ref that base has escaped but uses a compression
++   candidate type.  */
++
++bool
++ipa_struct_reorg::pc_type_conversion_candidate_p (tree xhs)
++{
++  if (xhs == NULL_TREE)
++    return false;
++
++  if (TREE_CODE (xhs) == COMPONENT_REF)
++    {
++      srtype *base_type = find_type (TREE_TYPE (TREE_OPERAND (xhs, 0)));
++      if (base_type != NULL && base_type->has_escaped ())
++	return pc_candidate_st_type_p (TREE_TYPE (xhs));
++    }
++  return false;
++}
++
++/* Creates a new basic block with zero for compressed null pointers.  */
++
++basic_block
++ipa_struct_reorg::create_bb_for_compress_nullptr (basic_block last_bb,
++						  tree &phi)
++{
++  basic_block new_bb = create_empty_bb (last_bb);
++  if (last_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (new_bb, last_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++
++  /* Emit phi = 0.  */
++  gimple_stmt_iterator gsi = gsi_last_bb (new_bb);
++  phi = make_ssa_name (make_unsigned_type (compressed_size));
++  tree rhs = build_int_cst (make_unsigned_type (compressed_size), 0);
++  gimple *new_stmt = gimple_build_assign (phi, rhs);
++  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nCreate bb %d for compress nullptr:\n",
++	       new_bb->index);
++      gimple_dump_bb (dump_file, new_bb, 0, dump_flags);
++    }
++  return new_bb;
++}
++
++/* Create a new basic block to compress the pointer to the index relative to
++   the allocated memory pool header.  */
++
++basic_block
++ipa_struct_reorg::create_bb_for_compress_candidate (basic_block last_bb,
++						    tree new_rhs, srtype *type,
++						    tree &phi)
++{
++  basic_block new_bb = create_empty_bb (last_bb);
++  if (last_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (new_bb, last_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++
++  gimple_stmt_iterator gsi = gsi_last_bb (new_bb);
++  /* compress_ptr_to_offset () needs at least one stmt in target bb.  */
++  gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
++  phi = compress_ptr_to_offset (new_rhs, type, &gsi);
++  /* Remove the NOP created above.  */
++  gsi_remove (&gsi, true);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nCreate bb %d for compress candidate:\n",
++	       new_bb->index);
++      gimple_dump_bb (dump_file, new_bb, 0, dump_flags);
++    }
++  return new_bb;
++}
++
++/* Compression can be simplified by these following cases:
++     1.  if rhs is NULL, uses zero to represent it.
++     2.  if new_rhs has been converted into INTEGER_TYPE in the previous stmt,
++     just use it here.  For example:
++	_1 = t->s
++     -> tt->s = _1.  */
++
++bool
++ipa_struct_reorg::pc_direct_rewrite_chance_p (tree rhs, tree &new_rhs)
++{
++  if (integer_zerop (rhs))
++    {
++      new_rhs = build_int_cst (make_unsigned_type (compressed_size), 0);
++      return true;
++    }
++  else if (new_rhs && TREE_CODE (TREE_TYPE (new_rhs)) == INTEGER_TYPE)
++    {
++      return true;
++    }
++  return false;
++}
++
++/* The following cases can simplify the checking of null pointer:
++     1. rhs defined from POINTER_PLUS_EXPR.
++     2. rhs used as COMPONENT_REF in this basic block.  */
++
++bool
++ipa_struct_reorg::pc_simplify_chance_for_compress_p (gassign *stmt, tree rhs)
++{
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++  gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
++
++  if (def_stmt && is_gimple_assign (def_stmt)
++      && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR)
++    return true;
++
++  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, rhs)
++    {
++      gimple *use_stmt = USE_STMT (use_p);
++      if (use_stmt->bb != stmt->bb || !is_gimple_assign (use_stmt))
++	continue;
++
++      tree use_rhs = gimple_assign_rhs1 (use_stmt);
++      if (TREE_CODE (use_rhs) == COMPONENT_REF
++	  && TREE_OPERAND (TREE_OPERAND (use_rhs, 0), 0) == rhs)
++	return true;
++    }
++  return false;
++}
++
++/* Perform compression directly without checking null pointer.  */
++
++bool
++ipa_struct_reorg::compress_candidate_without_check (gimple_stmt_iterator *gsi,
++						    tree rhs, tree &new_rhs)
++{
++  srtype *type = get_compression_candidate_type (rhs);
++  gcc_assert (type != NULL);
++  new_rhs = compress_ptr_to_offset (new_rhs, type, gsi);
++  return true;
++}
++
++/* Perform pointer compression with check.  The conversion will be as shown in
++   the following example:
++     Orig bb:
++     bb <1>:
++     _1->t = _2
++
++     will be transformed to:
++     bb <1>:
++     _3 = _2
++     if (_2 == NULL)
++       goto bb <2>
++     else
++       goto bb <3>
++
++     bb <2>:
++     _3 = 0
++     goto bb <4>
++
++     bb <3>:
++     ...
++     _4 = compress (_2)
++     goto bb <4>
++
++     bb <4>:
++     _5 = PHI (_3, _4)
++     _1->t = _5
++   The gsi will move to the beginning of split dst bb <4>, _1->t = _5 will be
++   emitted by rewrite_assign ().  */
++
++bool
++ipa_struct_reorg::compress_candidate_with_check (gimple_stmt_iterator *gsi,
++						 tree rhs, tree &new_rhs)
++{
++  tree cond_lhs = make_ssa_name (TREE_TYPE (new_rhs));
++  gimple *assign_stmt = gimple_build_assign (cond_lhs, new_rhs);
++  gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
++
++  /* Insert cond stmt.  */
++  tree rhs_pointer_type = build_pointer_type (TREE_TYPE (new_rhs));
++  gcond *cond
++    = gimple_build_cond (EQ_EXPR, cond_lhs, build_int_cst (rhs_pointer_type, 0),
++			 NULL_TREE, NULL_TREE);
++  gimple_set_location (cond, UNKNOWN_LOCATION);
++  gsi_insert_before (gsi, cond, GSI_SAME_STMT);
++
++  // gimple* cur_stmt = as_a  (cond);
++  edge e = split_block (cond->bb, cond);
++  basic_block split_src_bb = e->src;
++  basic_block split_dst_bb = e->dest;
++
++  /* Create bb for nullptr.  */
++  tree phi1 = NULL_TREE;
++  basic_block true_bb = create_bb_for_compress_nullptr (split_src_bb, phi1);
++
++  /* Create bb for comprssion.  */
++  srtype *type = get_compression_candidate_type (rhs);
++  gcc_assert (type != NULL);
++  tree phi2 = NULL_TREE;
++  basic_block false_bb
++    = create_bb_for_compress_candidate (true_bb, new_rhs, type, phi2);
++
++  /* Rebuild and reset cfg.  */
++  remove_edge_raw (e);
++
++  edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE);
++  etrue->probability = profile_probability::unlikely ();
++  true_bb->count = etrue->count ();
++
++  edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE);
++  efalse->probability = profile_probability::likely ();
++  false_bb->count = efalse->count ();
++
++  edge e1 = make_single_succ_edge (true_bb, split_dst_bb, EDGE_FALLTHRU);
++  edge e2 = make_single_succ_edge (false_bb, split_dst_bb, EDGE_FALLTHRU);
++
++  tree phi = make_ssa_name (make_unsigned_type (compressed_size));
++  gphi *phi_node = create_phi_node (phi, split_dst_bb);
++  add_phi_arg (phi_node, phi1, e1, UNKNOWN_LOCATION);
++  add_phi_arg (phi_node, phi2, e2, UNKNOWN_LOCATION);
++
++  if (dom_info_available_p (CDI_DOMINATORS))
++    {
++      set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb);
++      set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb);
++      set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb);
++    }
++  *gsi = gsi_start_bb (split_dst_bb);
++  new_rhs = phi;
++  return true;
++}
++
++/* If there is a direct rewrite chance or simplification opportunity, perform
++   the simplified compression rewrite.  Otherwise, create a cond expression and
++   two basic blocks to implement pointer compression.  */
++
++bool
++ipa_struct_reorg::compress_candidate (gassign *stmt, gimple_stmt_iterator *gsi,
++				      tree rhs, tree &new_rhs)
++{
++  if (pc_direct_rewrite_chance_p (rhs, new_rhs))
++    return true;
++  else if (current_layout_opt_level & POINTER_COMPRESSION_UNSAFE
++	   && pc_simplify_chance_for_compress_p (stmt, rhs))
++    return compress_candidate_without_check (gsi, rhs, new_rhs);
++
++  return compress_candidate_with_check (gsi, rhs, new_rhs);
++}
++
++/* Create a new basic block to decompress the index to null pointer.  */
++
++basic_block
++ipa_struct_reorg::create_bb_for_decompress_nullptr (basic_block last_bb,
++						    tree new_rhs,
++						    tree &phi_node)
++{
++  basic_block new_bb = create_empty_bb (last_bb);
++  if (last_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (new_bb, last_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++  gimple_stmt_iterator gsi = gsi_last_bb (new_bb);
++  tree rhs_pointer_type = build_pointer_type (TREE_TYPE (new_rhs));
++  phi_node = make_ssa_name (rhs_pointer_type);
++  gimple *new_stmt
++    = gimple_build_assign (phi_node, build_int_cst (rhs_pointer_type, 0));
++  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nCreate bb %d for decompress nullptr:\n",
++	       new_bb->index);
++      gimple_dump_bb (dump_file, new_bb, 0, dump_flags);
++    }
++  return new_bb;
++}
++
++/* Create a new basic block to decompress the index into original pointer.  */
++
++basic_block
++ipa_struct_reorg::create_bb_for_decompress_candidate (basic_block last_bb,
++						      tree lhs, srtype *type,
++						      tree &phi_node)
++{
++  basic_block new_bb = create_empty_bb (last_bb);
++  if (last_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (new_bb, last_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++  gimple_stmt_iterator gsi = gsi_last_bb (new_bb);
++  /* decompress_ptr_to_offset () needs at least one stmt in target bb.  */
++  gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
++  phi_node = decompress_offset_to_ptr (lhs, type, &gsi);
++  /* Remove the NOP created above.  */
++  gsi_remove (&gsi, true);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nCreate bb %d for decompress candidate:\n",
++	       new_bb->index);
++      gimple_dump_bb (dump_file, new_bb, 0, dump_flags);
++    }
++  return new_bb;
++}
++
++/* Try decompress candidate without check.  */
++
++bool
++ipa_struct_reorg::decompress_candidate_without_check (gimple_stmt_iterator *gsi,
++						      tree lhs, tree rhs,
++						      tree &new_lhs,
++						      tree &new_rhs)
++{
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++  bool processed = false;
++
++  if (!gsi_one_before_end_p (*gsi))
++    {
++      gsi_next (gsi);
++      gimple *next_stmt = gsi_stmt (*gsi);
++      if (gimple_assign_rhs_class (next_stmt) == GIMPLE_SINGLE_RHS)
++	{
++	  tree next_rhs = gimple_assign_rhs1 (next_stmt);
++	  /* If current lhs is used as rhs in the next stmt:
++	     -> _1 = t->s
++	    tt->s = _1.  */
++	  if (lhs == next_rhs)
++	    {
++	      /* Check whether:
++	       1. the lhs is only used in the next stmt.
++	       2. the next lhs is candidate type.  */
++	      if (has_single_use (lhs)
++		  && pc_candidate_tree_p (gimple_assign_lhs (next_stmt)))
++		{
++		  processed = true;
++		  /* Copy directly without conversion after update type.  */
++		  TREE_TYPE (new_lhs) = make_unsigned_type (compressed_size);
++		}
++	    }
++	  /* -> _1 = t->s
++	    _2 = _1->s
++	     In this case, _1 might not be nullptr, so decompress it without
++	     check.  */
++	  else if (TREE_CODE (next_rhs) == COMPONENT_REF)
++	    {
++	      tree use_base = TREE_OPERAND (TREE_OPERAND (next_rhs, 0), 0);
++	      if (use_base == lhs)
++		{
++		  srtype *type = get_compression_candidate_type (rhs);
++		  gcc_assert (type != NULL);
++		  gsi_prev (gsi);
++		  tree new_ref = NULL_TREE;
++		  if (TREE_CODE (new_rhs) == MEM_REF)
++		    new_ref = new_rhs;
++		  else
++		    {
++		      tree base = TREE_OPERAND (TREE_OPERAND (new_rhs, 0), 0);
++		      tree new_mem_ref = build_simple_mem_ref (base);
++		      new_ref = build3 (COMPONENT_REF, TREE_TYPE (new_rhs),
++					new_mem_ref, TREE_OPERAND (new_rhs, 1),
++					NULL_TREE);
++		    }
++		  new_rhs = decompress_offset_to_ptr (new_ref, type, gsi);
++		  processed = true;
++		  gsi_next (gsi);
++		}
++	    }
++	}
++      gsi_prev (gsi);
++      return processed;
++    }
++  return false;
++}
++
++/* Perform pointer decompression with check.  The conversion will be as shown
++   in the following example:
++     Orig bb:
++     bb <1>:
++     _1 = _2->t
++
++     will be transformed to:
++     bb <1>:
++     _3 = _2->t
++     if (_3 == 0)
++       goto bb <2>
++     else
++       goto bb <3>
++
++     bb <2>:
++     _4 = NULL
++     goto bb <4>
++
++     bb <3>:
++     ...
++     _5 = decompress (_3)
++     goto bb <4>
++
++     bb <4>:
++     _6 = PHI (_4, _5)
++     _1 = _6
++   The gsi will move to the beginning of split dst bb <4>, _1 = _6 will be
++   emitted by rewrite_assign ().  */
++
++bool
++ipa_struct_reorg::decompress_candidate_with_check (gimple_stmt_iterator *gsi,
++						   tree rhs, tree &new_rhs)
++{
++  /* Insert cond stmt.  */
++  tree cond_lhs = make_ssa_name (TREE_TYPE (new_rhs));
++  gassign *cond_assign = gimple_build_assign (cond_lhs, new_rhs);
++  gsi_insert_before (gsi, cond_assign, GSI_SAME_STMT);
++
++  tree pc_type = make_unsigned_type (compressed_size);
++  gcond *cond
++    = gimple_build_cond (EQ_EXPR, cond_lhs, build_int_cst (pc_type, 0),
++			 NULL_TREE, NULL_TREE);
++  gimple_set_location (cond, UNKNOWN_LOCATION);
++  gsi_insert_before (gsi, cond, GSI_SAME_STMT);
++
++  /* Split bb.  */
++  // gimple* cur_stmt = as_a  (cond);
++  edge e = split_block (cond->bb, cond);
++  basic_block split_src_bb = e->src;
++  basic_block split_dst_bb = e->dest;
++
++  /* Create bb for decompress nullptr.  */
++  tree phi1 = NULL_TREE;
++  basic_block true_bb
++    = create_bb_for_decompress_nullptr (split_src_bb, new_rhs, phi1);
++
++  /* Create bb for decomprssion candidate.  */
++  tree phi2 = NULL_TREE;
++  srtype *type = get_compression_candidate_type (rhs);
++  gcc_assert (type != NULL);
++  basic_block false_bb
++    = create_bb_for_decompress_candidate (true_bb, cond_lhs, type, phi2);
++
++  /* Refresh and reset cfg.  */
++  remove_edge_raw (e);
++
++  edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE);
++  etrue->probability = profile_probability::unlikely ();
++  true_bb->count = etrue->count ();
++
++  edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE);
++  efalse->probability = profile_probability::likely ();
++  false_bb->count = efalse->count ();
++
++  edge e1 = make_single_succ_edge (true_bb, split_dst_bb, EDGE_FALLTHRU);
++  edge e2 = make_single_succ_edge (false_bb, split_dst_bb, EDGE_FALLTHRU);
++
++  tree phi = make_ssa_name (build_pointer_type (TREE_TYPE (cond_lhs)));
++  gphi *phi_node = create_phi_node (phi, split_dst_bb);
++  add_phi_arg (phi_node, phi1, e1, UNKNOWN_LOCATION);
++  add_phi_arg (phi_node, phi2, e2, UNKNOWN_LOCATION);
++
++  if (dom_info_available_p (CDI_DOMINATORS))
++    {
++      set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb);
++      set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb);
++      set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb);
++    }
++  *gsi = gsi_start_bb (split_dst_bb);
++  new_rhs = phi;
++  return true;
++}
++
++/* If there is a simplification opportunity, perform the simplified
++   decompression rewrite.  Otherwise, create a cond expression and two basic
++   blocks to implement pointer decompression.  */
++
++bool
++ipa_struct_reorg::decompress_candidate (gimple_stmt_iterator *gsi, tree lhs,
++					tree rhs, tree &new_lhs, tree &new_rhs)
++{
++  if (current_layout_opt_level & POINTER_COMPRESSION_UNSAFE
++      && decompress_candidate_without_check (gsi, lhs, rhs, new_lhs, new_rhs))
++    return true;
++
++  return decompress_candidate_with_check (gsi, rhs, new_rhs);
++}
++
++/* Try to perform pointer compression and decompression.  */
++
++void
++ipa_struct_reorg::try_rewrite_with_pointer_compression (
++  gassign *stmt, gimple_stmt_iterator *gsi, tree lhs, tree rhs, tree &new_lhs,
++  tree &new_rhs)
++{
++  bool l = pc_candidate_tree_p (lhs);
++  bool r = pc_candidate_tree_p (rhs);
++  if (!l && !r)
++    {
++      tree tmp_rhs = new_rhs == NULL_TREE ? rhs : new_rhs;
++      if (pc_type_conversion_candidate_p (lhs))
++	{
++	  /* Transfer MEM[(struct *)_1].files = _4;
++	     to MEM[(struct *)_1].files = (struct *)_4; */
++	  new_rhs = fold_convert (TREE_TYPE (lhs), tmp_rhs);
++	}
++      else if (pc_type_conversion_candidate_p (rhs))
++	{
++	  /* Transfer _4 = MEM[(struct *)_1].nodes;
++	     to _4  = (new_struct *) MEM[(struct *)_1].nodes; */
++	  new_rhs = fold_convert (TREE_TYPE (new_lhs), tmp_rhs);
++	}
++    }
++  else if (l && r)
++    gcc_unreachable ();
++  else if (l)
++    {
++      if (!compress_candidate (stmt, gsi, rhs, new_rhs))
++	gcc_unreachable ();
++    }
++  else if (r)
++    {
++      if (!decompress_candidate (gsi, lhs, rhs, new_lhs, new_rhs))
++	gcc_unreachable ();
++    }
++}
++
++tree
++ipa_struct_reorg::rewrite_pointer_diff (gimple_stmt_iterator *gsi, tree ptr1,
++					tree ptr2, srtype *type)
++{
++  tree shifts = build_int_cst (long_integer_type_node, semi_relayout_align);
++  tree pointer_type = build_pointer_type (unsigned_char_type_node);
++  /* addr_high_1 = (intptr_t)ptr1 >> shifts  */
++  tree ptr1_cvt = fold_convert (pointer_type, ptr1);
++  tree addr_high_1
++    = gimplify_build2 (gsi, RSHIFT_EXPR, pointer_type, ptr1_cvt, shifts);
++  /* addr_high_2 = (intptr_t)ptr2 >> shifts  */
++  tree ptr2_cvt = fold_convert (pointer_type, ptr2);
++  tree addr_high_2
++    = gimplify_build2 (gsi, RSHIFT_EXPR, pointer_type, ptr2_cvt, shifts);
++  /* off1 = (intptr_t)ptr1 - (addr_high_1 << shifts)  */
++  tree bucket_start_1
++    = gimplify_build2 (gsi, LSHIFT_EXPR, pointer_type, addr_high_1, shifts);
++  tree off1 = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node,
++			       ptr1_cvt, bucket_start_1);
++  /* off2 = (intptr_t)ptr2 - (addr_high_2 << shifts)  */
++  tree bucket_start_2
++    = gimplify_build2 (gsi, LSHIFT_EXPR, pointer_type, addr_high_2, shifts);
++  tree off2 = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node,
++			       ptr2_cvt, bucket_start_2);
++  /* group_diff = (addr_high_1 - addr_high_2) / bucket_parts  */
++  tree bucket_sub = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node,
++				     addr_high_1, addr_high_2);
++  tree bucket_parts
++    = build_int_cst (long_integer_type_node, type->bucket_parts);
++  tree group_diff
++    = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_integer_type_node, bucket_sub,
++		       bucket_parts);
++  /* off_addr_diff = off1 - off2  */
++  tree off_addr_diff
++    = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, off1, off2);
++  /* res = group_diff * bucket_capacity + off_diff / 8  */
++  tree capacity
++    = build_int_cst (long_integer_type_node, relayout_part_size / 8);
++  tree unit_size = build_int_cst (long_integer_type_node, 8);
++  tree bucket_index_diff
++    = gimplify_build2 (gsi, MULT_EXPR, long_integer_type_node, group_diff,
++		       capacity);
++  tree off_index = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_integer_type_node,
++				    off_addr_diff, unit_size);
++  tree res = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node,
++			      bucket_index_diff, off_index);
++  return res;
++}
++
++basic_block
++create_bb_for_group_diff_eq_0 (basic_block last_bb, tree phi, tree new_granule)
++{
++  basic_block new_bb = create_empty_bb (last_bb);
++  if (last_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (new_bb, last_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++  /* Emit res = new_granule;  */
++  gimple_stmt_iterator gsi = gsi_last_bb (new_bb);
++  gimple *new_stmt = gimple_build_assign (phi, new_granule);
++  gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
++  return new_bb;
++}
++
++basic_block
++create_bb_for_group_diff_ne_0 (basic_block new_bb, tree &phi, tree ptr,
++			       tree group_diff, tree off_times_8, srtype *type)
++{
++  tree shifts = build_int_cst (long_unsigned_type_node, semi_relayout_align);
++  gimple_stmt_iterator gsi = gsi_last_bb (new_bb);
++  gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
++  /* curr_group_start = (ptr >> shifts) << shifts;  */
++  tree ptr_r_1
++    = gimplify_build2 (&gsi, RSHIFT_EXPR, long_integer_type_node, ptr, shifts);
++  tree curr_group_start
++    = gimplify_build2 (&gsi, LSHIFT_EXPR, long_integer_type_node, ptr_r_1,
++		       shifts);
++  /* curr_off_from_group = ptr - curr_group_start;  */
++  tree curr_off_from_group
++    = gimplify_build2 (&gsi, MINUS_EXPR, long_integer_type_node, ptr,
++		       curr_group_start);
++  /* res = curr_group_start + ((group_diff * parts) << shifts)
++       + ((curr_off_from_group + off_times_8) % shifts);  */
++  tree step1
++    = gimplify_build2 (&gsi, MULT_EXPR, long_integer_type_node, group_diff,
++		       build_int_cst (long_integer_type_node,
++				      type->bucket_parts));
++  tree step2 = gimplify_build2 (&gsi, LSHIFT_EXPR, long_integer_type_node,
++				step1, shifts);
++  tree step3 = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node,
++				curr_off_from_group, off_times_8);
++  tree step4
++    = gimplify_build2 (&gsi, TRUNC_MOD_EXPR, long_integer_type_node, step3,
++		       build_int_cst (long_integer_type_node,
++				      relayout_part_size));
++  tree step5
++    = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node, step2, step4);
++  tree res_phi1 = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node,
++				   curr_group_start, step5);
++  /* if (group_diff < 0)  */
++  gcond *cond = gimple_build_cond (LT_EXPR, group_diff,
++				   build_int_cst (long_integer_type_node, 0),
++				   NULL_TREE, NULL_TREE);
++  gsi_insert_before (&gsi, cond, GSI_SAME_STMT);
++  /* remove nop  */
++  gsi_remove (&gsi, true);
++  /* res += shifts  */
++  basic_block true_bb = create_empty_bb (new_bb);
++  if (new_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (true_bb, new_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++  gimple_stmt_iterator true_gsi = gsi_last_bb (true_bb);
++  tree res_phi2 = make_ssa_name (long_integer_type_node);
++  gimple *new_stmt = gimple_build_assign (res_phi2, PLUS_EXPR, res_phi1,
++					  build_int_cst (long_integer_type_node,
++							 relayout_part_size));
++  gsi_insert_after (&true_gsi, new_stmt, GSI_NEW_STMT);
++  /* create phi bb  */
++  basic_block res_bb = create_empty_bb (true_bb);
++  if (new_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (res_bb, new_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++  /* rebuild cfg  */
++  edge etrue = make_edge (new_bb, true_bb, EDGE_TRUE_VALUE);
++  etrue->probability = profile_probability::unlikely ();
++  true_bb->count = etrue->count ();
++
++  edge efalse = make_edge (new_bb, res_bb, EDGE_FALSE_VALUE);
++  efalse->probability = profile_probability::likely ();
++  res_bb->count = efalse->count ();
++
++  edge efall = make_single_succ_edge (true_bb, res_bb, EDGE_FALLTHRU);
++
++  phi = make_ssa_name (long_integer_type_node);
++  gphi *phi_node = create_phi_node (phi, res_bb);
++  add_phi_arg (phi_node, res_phi2, efall, UNKNOWN_LOCATION);
++  add_phi_arg (phi_node, res_phi1, efalse, UNKNOWN_LOCATION);
++
++  if (dom_info_available_p (CDI_DOMINATORS))
++    {
++      set_immediate_dominator (CDI_DOMINATORS, true_bb, new_bb);
++      set_immediate_dominator (CDI_DOMINATORS, res_bb, new_bb);
++    }
++  return res_bb;
++}
++
++tree
++ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt,
++						gimple_stmt_iterator *gsi,
++						tree ptr, tree offset,
++						srtype *type)
++{
++  gcc_assert (type->semi_relayout);
++  tree off = fold_convert (long_integer_type_node, offset);
++  tree num_8 = build_int_cst (integer_type_node, 8);
++  tree shifts = build_int_cst (integer_type_node, semi_relayout_align);
++  /* off_times_8 = off * 8;  */
++  tree off_times_8
++    = gimplify_build2 (gsi, MULT_EXPR, long_integer_type_node, off, num_8);
++  /* new_granule = ptr + off * 8;  */
++  tree ptr_int = fold_convert (long_integer_type_node, ptr);
++  tree new_granule = gimplify_build2 (gsi, PLUS_EXPR, long_integer_type_node,
++				      ptr_int, off_times_8);
++  /* group_diff = (new_granule >> shifts) - (ptr >> shifts);  */
++  tree group_diff_rhs_1
++    = gimplify_build2 (gsi, RSHIFT_EXPR, long_integer_type_node, new_granule,
++		       shifts);
++  tree group_diff_rhs_2
++    = gimplify_build2 (gsi, RSHIFT_EXPR, long_integer_type_node, ptr, shifts);
++  tree group_diff = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node,
++				     group_diff_rhs_1, group_diff_rhs_2);
++  /* if (group_diff == 0)  */
++  gcond *cond = gimple_build_cond (EQ_EXPR, group_diff,
++				   build_int_cst (long_integer_type_node, 0),
++				   NULL_TREE, NULL_TREE);
++  gimple_set_location (cond, UNKNOWN_LOCATION);
++  gsi_insert_before (gsi, cond, GSI_SAME_STMT);
++
++  // gimple *curr_stmt = as_a  (cond);
++  edge e = split_block (cond->bb, cond);
++  basic_block split_src_bb = e->src;
++  basic_block split_dst_bb = e->dest;
++  remove_edge_raw (e);
++  /* if (group_diff == 0)
++       res = new_granule;  */
++  tree res_phi_1 = make_ssa_name (long_integer_type_node);
++  basic_block true_bb
++    = create_bb_for_group_diff_eq_0 (split_src_bb, res_phi_1, new_granule);
++  /* else  */
++  tree res_phi_2 = NULL_TREE;
++  basic_block false_bb = create_empty_bb (split_src_bb);
++  if (split_src_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (false_bb, split_src_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++
++  edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE);
++  etrue->probability = profile_probability::very_likely ();
++  true_bb->count = etrue->count ();
++
++  edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE);
++  efalse->probability = profile_probability::unlikely ();
++  false_bb->count = efalse->count ();
++  basic_block res_bb
++    = create_bb_for_group_diff_ne_0 (false_bb, res_phi_2, ptr_int, group_diff,
++				     off_times_8, type);
++  /* rebuild cfg  */
++  edge e_true_fall
++    = make_single_succ_edge (true_bb, split_dst_bb, EDGE_FALLTHRU);
++  edge e_false_fall
++    = make_single_succ_edge (res_bb, split_dst_bb, EDGE_FALLTHRU);
++  tree res_int = make_ssa_name (long_integer_type_node);
++  gphi *phi_node = create_phi_node (res_int, split_dst_bb);
++  add_phi_arg (phi_node, res_phi_1, e_true_fall, UNKNOWN_LOCATION);
++  add_phi_arg (phi_node, res_phi_2, e_false_fall, UNKNOWN_LOCATION);
++  if (dom_info_available_p (CDI_DOMINATORS))
++    {
++      set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb);
++      set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb);
++      set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb);
++    }
++  *gsi = gsi_start_bb (split_dst_bb);
++  tree pointer_type = build_pointer_type (unsigned_char_type_node);
++  tree res = gimplify_build1 (gsi, NOP_EXPR, pointer_type, res_int);
++  return res;
++}
++
++tree
++ipa_struct_reorg::build_div_expr (gimple_stmt_iterator *gsi, tree expr,
++				  tree orig_size)
++{
++  tree div_expr
++    = build2 (TRUNC_DIV_EXPR, long_unsigned_type_node, expr, orig_size);
++  tree num = make_ssa_name (long_unsigned_type_node);
++  gimple *g = gimple_build_assign (num, div_expr);
++  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++  return num;
++}
++
++srtype *
++ipa_struct_reorg::get_relayout_candidate_type (tree type)
++{
++  if (type == NULL)
++    return NULL;
++  if (TREE_CODE (type) != RECORD_TYPE)
++    return NULL;
++  return find_type (inner_type (type));
++}
++
++long unsigned int
++ipa_struct_reorg::get_true_field_offset (srfield *field, srtype *type)
++{
++  unsigned HOST_WIDE_INT new_offset;
++  new_offset = *(type->new_field_offsets.get (field->newfield[0]));
++  return new_offset;
++}
++
++tree
++ipa_struct_reorg::get_true_pointer_base (gimple_stmt_iterator *gsi,
++					 tree mem_ref, srtype *type)
++{
++  tree ptr = TREE_OPERAND (mem_ref, 0);
++  tree off_bytes = TREE_OPERAND (mem_ref, 1);
++  unsigned num = tree_to_shwi (off_bytes);
++  if (num == 0)
++    return ptr;
++  tree orig_size = TYPE_SIZE_UNIT (TREE_TYPE (mem_ref));
++  tree off
++    = build_int_cst (long_integer_type_node, num / tree_to_uhwi (orig_size));
++  gimple *stmt = gsi_stmt (*gsi);
++  tree new_pointer_base
++    = rewrite_pointer_plus_integer (stmt, gsi, ptr, off, type);
++  return new_pointer_base;
++}
++
++tree
++ipa_struct_reorg::rewrite_address (tree pointer_base, srfield *field,
++				   srtype *type, gimple_stmt_iterator *gsi)
++{
++  unsigned HOST_WIDE_INT field_offset = get_true_field_offset (field, type);
++
++  tree pointer_ssa = fold_convert (long_unsigned_type_node, pointer_base);
++  tree step1
++    = gimplify_build1 (gsi, NOP_EXPR, long_unsigned_type_node, pointer_ssa);
++  tree new_offset_ssa = build_int_cst (long_unsigned_type_node, field_offset);
++  tree step2 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, step1,
++				new_offset_ssa);
++  tree field_ssa
++    = fold_convert (build_pointer_type (TREE_TYPE (field->newfield[0])), step2);
++  tree step3
++    = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (field_ssa), field_ssa);
++
++  tree new_mem_ref
++    = fold_build2 (MEM_REF, TREE_TYPE (field->newfield[0]), step3,
++		   build_int_cst (TREE_TYPE (field_ssa), 0));
++  return new_mem_ref;
++}
++
++bool
++ipa_struct_reorg::check_sr_copy (gimple *stmt)
++{
++  tree lhs = gimple_assign_lhs (stmt);
++  tree rhs = gimple_assign_rhs1 (stmt);
++
++  if (TREE_CODE (lhs) != MEM_REF || TREE_CODE (rhs) != MEM_REF)
++    return false;
++  srtype *t1 = get_relayout_candidate_type (TREE_TYPE (lhs));
++  srtype *t2 = get_relayout_candidate_type (TREE_TYPE (rhs));
++  if (!t1 || !t2 || !t1->semi_relayout || !t2->semi_relayout || t1 != t2)
++    return false;
++  tree pointer1 = TREE_OPERAND (lhs, 0);
++  tree pointer2 = TREE_OPERAND (rhs, 0);
++  if (TREE_CODE (TREE_TYPE (pointer1)) != POINTER_TYPE
++      || TREE_CODE (TREE_TYPE (pointer2)) != POINTER_TYPE)
++    return false;
++
++  tree type1 = TREE_TYPE (TREE_TYPE (pointer1));
++  tree type2 = TREE_TYPE (TREE_TYPE (pointer2));
++
++  srtype *t3 = get_relayout_candidate_type (type1);
++  srtype *t4 = get_relayout_candidate_type (type2);
++
++  if (t3 != t4 || t3 != t1)
++    return false;
++
++  return true;
++}
++
++void
++ipa_struct_reorg::relayout_field_copy (gimple_stmt_iterator *gsi, gimple *stmt,
++				       tree lhs, tree rhs, tree &newlhs,
++				       tree &newrhs)
++{
++  srtype *type = get_relayout_candidate_type (TREE_TYPE (lhs));
++  tree lhs_base_pointer = get_true_pointer_base (gsi, newlhs, type);
++  tree rhs_base_pointer = get_true_pointer_base (gsi, newrhs, type);
++  tree new_l_mem_ref = NULL_TREE;
++  tree new_r_mem_ref = NULL_TREE;
++  srfield *field = NULL;
++  unsigned i = 0;
++  FOR_EACH_VEC_ELT (type->fields, i, field)
++    {
++      if (!field->newfield[0])
++	continue;
++      new_l_mem_ref = rewrite_address (lhs_base_pointer, field, type, gsi);
++      new_r_mem_ref = rewrite_address (rhs_base_pointer, field, type, gsi);
++      gimple *new_stmt = gimple_build_assign (new_l_mem_ref, new_r_mem_ref);
++      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
++    }
++  newlhs = new_l_mem_ref;
++  newrhs = new_r_mem_ref;
++}
++
++void
++ipa_struct_reorg::do_semi_relayout (gimple_stmt_iterator *gsi, gimple *stmt,
++				    tree &newlhs, tree &newrhs)
++{
++  tree lhs = gimple_assign_lhs (stmt);
++  tree rhs = gimple_assign_rhs1 (stmt);
++
++  bool l = TREE_CODE (lhs) == COMPONENT_REF ? is_semi_relayout_candidate (lhs)
++					    : false;
++  bool r = TREE_CODE (rhs) == COMPONENT_REF ? is_semi_relayout_candidate (rhs)
++					    : false;
++
++  gcc_assert (!(l && r));
++
++  if (!l && !r)
++    {
++      if (check_sr_copy (stmt))
++	relayout_field_copy (gsi, stmt, lhs, rhs, newlhs, newrhs);
++    }
++  else if (l)
++    {
++      srtype *type
++	= get_relayout_candidate_type (TREE_TYPE (TREE_OPERAND (lhs, 0)));
++      srfield *new_field
++	= type->find_field (int_byte_position (TREE_OPERAND (lhs, 1)));
++      tree pointer_base
++	= get_true_pointer_base (gsi, TREE_OPERAND (newlhs, 0), type);
++      newlhs = rewrite_address (pointer_base, new_field, type, gsi);
++    }
++  else if (r)
++    {
++      srtype *type
++	= get_relayout_candidate_type (TREE_TYPE (TREE_OPERAND (rhs, 0)));
++      srfield *new_field
++	= type->find_field (int_byte_position (TREE_OPERAND (rhs, 1)));
++      tree pointer_base
++	= get_true_pointer_base (gsi, TREE_OPERAND (newrhs, 0), type);
++      newrhs = rewrite_address (pointer_base, new_field, type, gsi);
++    }
++}
++
++bool
++ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
++{
++  bool remove = false;
++
++  if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
++      && remove_dead_field_stmt (gimple_assign_lhs (stmt)))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\n rewriting statement (remove): \n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	}
++      /* Replace the dead field in stmt by creating a dummy ssa.  */
++      tree dummy_ssa = make_ssa_name (TREE_TYPE (gimple_assign_lhs (stmt)));
++      gimple_assign_set_lhs (stmt, dummy_ssa);
++      update_stmt (stmt);
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "To: \n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	}
++    }
++
++  if (gimple_clobber_p (stmt))
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree newlhs[max_split];
++      if (!rewrite_expr (lhs, newlhs))
++	return false;
++      for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	{
++	  tree clobber = build_constructor (TREE_TYPE (newlhs[i]), NULL);
++	  TREE_THIS_VOLATILE (clobber) = true;
++	  gimple *newstmt = gimple_build_assign (newlhs[i], clobber);
++	  gsi_insert_before (gsi, newstmt, GSI_SAME_STMT);
++	  remove = true;
++	}
++      return remove;
++    }
++
++  if ((current_layout_opt_level < STRUCT_REORDER_FIELDS
++       && (gimple_assign_rhs_code (stmt) == EQ_EXPR
++	   || gimple_assign_rhs_code (stmt) == NE_EXPR))
++      || (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++	  && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
++	      == tcc_comparison)))
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree newrhs1[max_split];
++      tree newrhs2[max_split];
++      tree_code rhs_code = gimple_assign_rhs_code (stmt);
++      tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR;
++      if (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++	  && rhs_code != EQ_EXPR && rhs_code != NE_EXPR)
++	{
++	  code = rhs_code;
++	}
++
++      if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2))
++	return false;
++      tree newexpr = NULL_TREE;
++      for (unsigned i = 0; i < max_split && newrhs1[i]; i++)
++	{
++	  tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node,
++				       newrhs1[i], newrhs2[i]);
++	  if (!newexpr)
++	    newexpr = expr;
++	  else
++	    newexpr
++	      = gimplify_build2 (gsi, code, boolean_type_node, newexpr, expr);
++	}
++
++      if (newexpr)
++	{
++	  newexpr
++	    = fold_convert (TREE_TYPE (gimple_assign_lhs (stmt)), newexpr);
++	  gimple_assign_set_rhs_from_tree (gsi, newexpr);
++	  update_stmt (stmt);
++	}
++      return false;
++    }
++
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree newlhs[max_split];
++      tree newrhs[max_split];
++
++      if (!rewrite_lhs_rhs (lhs, rhs1, newlhs, newrhs))
++	return false;
++      tree size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)));
++      tree num;
++      /* Check if rhs2 is a multiplication of the size of the type. */
++      if (!is_result_of_mult (rhs2, &num, size)
++	  && !(current_layout_opt_level & SEMI_RELAYOUT))
++	internal_error (
++	  "the rhs of pointer was not a multiplicate and it slipped through.");
++
++      /* Add the judgment of num, support for POINTER_DIFF_EXPR.
++     _6 = _4 + _5;
++     _5 = (long unsigned int) _3;
++     _3 = _1 - old_2.  */
++      if (current_layout_opt_level < STRUCT_REORDER_FIELDS
++	  || (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++	      && (num != NULL)))
++	{
++	  num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num);
++	}
++      for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	{
++	  gimple *new_stmt;
++
++	  if (num != NULL)
++	    {
++	      tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i])));
++	      newsize
++		= gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize);
++	      if (current_layout_opt_level >= SEMI_RELAYOUT)
++		{
++		  if (is_semi_relayout_candidate (lhs))
++		    {
++		      srtype *type = get_semi_relayout_candidate_type (lhs);
++		      newrhs[i]
++			= rewrite_pointer_plus_integer (stmt, gsi, newrhs[i],
++							num, type);
++		      newsize = build_int_cst (long_unsigned_type_node, 0);
++		    }
++		}
++	      new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR,
++					      newrhs[i], newsize);
++	    }
++	  else
++	    {
++	      /* rhs2 is not a const integer  */
++	      if (current_layout_opt_level >= SEMI_RELAYOUT)
++		{
++		  if (is_semi_relayout_candidate (lhs))
++		    {
++		      num = build_div_expr (
++			gsi, rhs2, build_int_cst (long_unsigned_type_node, 1));
++		      srtype *type = get_semi_relayout_candidate_type (lhs);
++		      newrhs[i]
++			= rewrite_pointer_plus_integer (stmt, gsi, newrhs[i],
++							num, type);
++		      rhs2 = build_int_cst (long_unsigned_type_node, 0);
++		    }
++		}
++	      new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR,
++					      newrhs[i], rhs2);
++	    }
++	  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
++	  remove = true;
++	}
++      return remove;
++    }
++
++  /* Support POINTER_DIFF_EXPR rewriting.  */
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++      && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR)
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree newrhs1[max_split];
++      tree newrhs2[max_split];
++
++      bool r1 = rewrite_expr (rhs1, newrhs1);
++      bool r2 = rewrite_expr (rhs2, newrhs2);
++
++      if (r1 != r2)
++	{
++	  /* Handle NULL pointer specially.  */
++	  if (r1 && !r2 && integer_zerop (rhs2))
++	    {
++	      r2 = true;
++	      for (unsigned i = 0; i < max_split && newrhs1[i]; i++)
++		{
++		  newrhs2[i] = fold_convert (TREE_TYPE (newrhs1[i]), rhs2);
++		}
++	    }
++	  else if (r2 && !r1 && integer_zerop (rhs1))
++	    {
++	      r1 = true;
++	      for (unsigned i = 0; i < max_split && newrhs2[i]; i++)
++		{
++		  newrhs1[i] = fold_convert (TREE_TYPE (newrhs2[i]), rhs1);
++		}
++	    }
++	  else
++	    {
++	      return false;
++	    }
++	}
++      else if (!r1 && !r2)
++	return false;
++
++      /* The two operands always have pointer/reference type.  */
++      if (current_layout_opt_level >= SEMI_RELAYOUT
++	  && (is_semi_relayout_candidate (rhs1)
++	      || is_semi_relayout_candidate (rhs2)))
++	{
++	  for (unsigned i = 0; i < max_split && newrhs1[i] && newrhs2[i]; i++)
++	    {
++	      srtype *type = get_semi_relayout_candidate_type (rhs1);
++	      if (!type)
++		type = get_semi_relayout_candidate_type (rhs2);
++	      gcc_assert (type != NULL);
++	      tree res
++		= rewrite_pointer_diff (gsi, newrhs1[i], newrhs2[i], type);
++	      gimple *g = gimple_build_assign (gimple_assign_lhs (stmt), res);
++	      gsi_insert_before (gsi, g, GSI_SAME_STMT);
++	    }
++	  remove = true;
++	}
++      else
++	{
++	  for (unsigned i = 0; i < max_split && newrhs1[i] && newrhs2[i]; i++)
++	    {
++	      gimple_assign_set_rhs1 (stmt, newrhs1[i]);
++	      gimple_assign_set_rhs2 (stmt, newrhs2[i]);
++	      update_stmt (stmt);
++	    }
++	}
++      return remove;
++    }
++
++  if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs = gimple_assign_rhs1 (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nrewriting stamtenet:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	}
++      tree newlhs[max_split];
++      tree newrhs[max_split];
++      if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs))
++	{
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    fprintf (dump_file, "Did nothing to statement.\n");
++	  return false;
++	}
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "replaced with:\n");
++      for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++)
++	{
++	  if (current_layout_opt_level & SEMI_RELAYOUT)
++	    do_semi_relayout (gsi, stmt, newlhs[i], newrhs[i]);
++	  if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE)
++	    try_rewrite_with_pointer_compression (stmt, gsi, lhs, rhs,
++						  newlhs[i], newrhs[i]);
++	  gimple *newstmt = gimple_build_assign (newlhs[i] ? newlhs[i] : lhs,
++						 newrhs[i] ? newrhs[i] : rhs);
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      print_gimple_stmt (dump_file, newstmt, 0);
++	      fprintf (dump_file, "\n");
++	    }
++	  gsi_insert_before (gsi, newstmt, GSI_SAME_STMT);
++	  remove = true;
++	}
++      return remove;
++    }
++
++  return remove;
++}
++
++tree
++ipa_struct_reorg::get_real_allocated_ptr (tree ptr, gimple_stmt_iterator *gsi)
++{
++  tree ptr_to_int = fold_convert (long_unsigned_type_node, ptr);
++  tree align = build_int_cst (long_unsigned_type_node, relayout_part_size);
++  tree real_addr = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node,
++				    ptr_to_int, align);
++  tree res
++    = gimplify_build1 (gsi, NOP_EXPR,
++		       build_pointer_type (long_unsigned_type_node), real_addr);
++  return res;
++}
++
++tree
++ipa_struct_reorg::set_ptr_for_use (tree ptr, gimple_stmt_iterator *gsi)
++{
++  tree ptr_to_int = fold_convert (long_unsigned_type_node, ptr);
++  tree align = build_int_cst (long_unsigned_type_node, relayout_part_size);
++  tree ptr_int = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node,
++				  ptr_to_int, align);
++  tree res
++    = gimplify_build1 (gsi, NOP_EXPR,
++		       build_pointer_type (long_unsigned_type_node), ptr_int);
++  return res;
++}
++
++void
++ipa_struct_reorg::record_allocated_size (tree ptr, gimple_stmt_iterator *gsi,
++					 tree size)
++{
++  tree to_type = build_pointer_type (long_unsigned_type_node);
++  tree type_cast = fold_convert (to_type, ptr);
++  tree lhs
++    = fold_build2 (MEM_REF, long_unsigned_type_node, ptr,
++		   build_int_cst (build_pointer_type (long_unsigned_type_node),
++				  0));
++  gimple *stmt = gimple_build_assign (lhs, size);
++  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
++}
++
++tree
++ipa_struct_reorg::read_allocated_size (tree ptr, gimple_stmt_iterator *gsi)
++{
++  tree to_type = build_pointer_type (long_unsigned_type_node);
++  tree off = build_int_cst (to_type, 0);
++  tree size = gimplify_build2 (gsi, MEM_REF, long_unsigned_type_node, ptr, off);
++  return size;
++}
++
++gimple *
++ipa_struct_reorg::create_aligned_alloc (gimple_stmt_iterator *gsi, srtype *type,
++					tree num, tree &size)
++{
++  tree fn = builtin_decl_implicit (BUILT_IN_ALIGNED_ALLOC);
++
++  tree align = build_int_cst (long_unsigned_type_node, relayout_part_size);
++  unsigned bucket_size = type->bucket_size;
++
++  tree nbuckets
++    = gimplify_build2 (gsi, CEIL_DIV_EXPR, long_unsigned_type_node, num,
++		       build_int_cst (long_unsigned_type_node,
++				      relayout_part_size / 8));
++  tree use_size
++    = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, nbuckets,
++		       build_int_cst (long_unsigned_type_node, bucket_size));
++  size = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, use_size,
++			  align);
++  gimple *g = gimple_build_call (fn, 2, align, size);
++  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++  return g;
++}
++
++void
++ipa_struct_reorg::create_memset_zero (tree ptr, gimple_stmt_iterator *gsi,
++				      tree size)
++{
++  tree fn = builtin_decl_implicit (BUILT_IN_MEMSET);
++  tree val = build_int_cst (long_unsigned_type_node, 0);
++  gimple *g = gimple_build_call (fn, 3, ptr, val, size);
++  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++}
++
++void
++ipa_struct_reorg::create_memcpy (tree src, tree dst, tree size,
++				 gimple_stmt_iterator *gsi)
++{
++  tree fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
++  gimple *g = gimple_build_call (fn, 3, dst, src, size);
++  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++}
++
++void
++ipa_struct_reorg::create_free (tree ptr, gimple_stmt_iterator *gsi)
++{
++  tree fn = builtin_decl_implicit (BUILT_IN_FREE);
++  gimple *g = gimple_build_call (fn, 1, ptr);
++  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++}
++
++void
++ipa_struct_reorg::copy_to_lhs (tree lhs, tree new_lhs,
++			       gimple_stmt_iterator *gsi)
++{
++  gimple *g = gimple_build_assign (lhs, new_lhs);
++  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++}
++
++/* Rewrite function call statement STMT.  Return TRUE if the statement
++   is to be removed. */
++
++bool
++ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi)
++{
++  /* Handled allocation calls are handled seperately from normal
++     function calls. */
++  if (handled_allocation_stmt (stmt))
++    {
++      tree lhs = gimple_call_lhs (stmt);
++      tree newrhs1[max_split];
++      srdecl *decl = find_decl (lhs);
++      if (!decl || !decl->type)
++	return false;
++      srtype *type = decl->type;
++      tree num = allocate_size (type, decl, stmt);
++      gcc_assert (num);
++      memset (newrhs1, 0, sizeof (newrhs1));
++
++      /* The realloc call needs to have its first argument rewritten. */
++      if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++	{
++	  tree rhs1 = gimple_call_arg (stmt, 0);
++	  if (integer_zerop (rhs1))
++	    {
++	      for (unsigned i = 0; i < max_split; i++)
++		newrhs1[i] = rhs1;
++	    }
++	  else if (!rewrite_expr (rhs1, newrhs1))
++	    internal_error ("rewrite failed for realloc");
++	}
++
++      /* Go through each new lhs.  */
++      for (unsigned i = 0; i < max_split && decl->newdecl[i]; i++)
++	{
++	  /* Specify the correct size for the multi-layer pointer.  */
++	  tree newsize = isptrptr (decl->orig_type)
++			   ? TYPE_SIZE_UNIT (decl->orig_type)
++			   : TYPE_SIZE_UNIT (type->newtype[i]);
++	  gimple *g;
++	  bool rewrite = false;
++	  if (current_layout_opt_level >= SEMI_RELAYOUT && type->semi_relayout)
++	    {
++	      if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC))
++		;
++	      else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++		{
++		  tree rhs2 = gimple_call_arg (stmt, 1);
++		  if (tree_to_uhwi (rhs2)
++		      == tree_to_uhwi (TYPE_SIZE_UNIT (type->type)))
++		    {
++		      rewrite = true;
++		      tree size = NULL_TREE;
++		      g = create_aligned_alloc (gsi, type, num, size);
++		      tree real_ptr = make_ssa_name (
++			build_pointer_type (unsigned_char_type_node));
++		      gimple_set_lhs (g, real_ptr);
++		      create_memset_zero (real_ptr, gsi, size);
++		      record_allocated_size (real_ptr, gsi, size);
++		      tree lhs_use = set_ptr_for_use (real_ptr, gsi);
++		      copy_to_lhs (decl->newdecl[i], lhs_use, gsi);
++		    }
++		}
++	      else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++		{
++		  rewrite = true;
++		  tree size = NULL_TREE;
++		  g = create_aligned_alloc (gsi, type, num, size);
++		  tree real_ptr = make_ssa_name (
++		    build_pointer_type (unsigned_char_type_node));
++		  gimple_set_lhs (g, real_ptr);
++		  create_memset_zero (real_ptr, gsi, size);
++		  tree src = get_real_allocated_ptr (newrhs1[i], gsi);
++		  tree old_size = read_allocated_size (src, gsi);
++		  create_memcpy (src, real_ptr, old_size, gsi);
++		  record_allocated_size (real_ptr, gsi, size);
++		  tree lhs_use = set_ptr_for_use (real_ptr, gsi);
++		  create_free (src, gsi);
++		  copy_to_lhs (decl->newdecl[i], lhs_use, gsi);
++		}
++	      else
++		{
++		  gcc_assert (false);
++		  internal_error ("supported type for semi-relayout.");
++		}
++	    }
++	  if (!rewrite
++	      && (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++		  || current_layout_opt_level == STRUCT_SPLIT))
++	    {
++	      /* Every allocation except for calloc needs the size
++	     multiplied out. */
++	      if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++		newsize
++		  = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize);
++	      if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
++		  || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA))
++		g = gimple_build_call (gimple_call_fndecl (stmt), 1, newsize);
++	      else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++		g = gimple_build_call (gimple_call_fndecl (stmt), 2, num,
++				       newsize);
++	      else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++		g = gimple_build_call (gimple_call_fndecl (stmt), 2, newrhs1[i],
++				       newsize);
++	      else
++		gcc_assert (false);
++	      gimple_call_set_lhs (g, decl->newdecl[i]);
++	      gsi_insert_before (gsi, g, GSI_SAME_STMT);
++	    }
++	  if (type->pc_candidate)
++	    {
++	      /* Init global header for pointer compression.  */
++	      gassign *gptr
++		= gimple_build_assign (type->pc_gptr, decl->newdecl[i]);
++	      gsi_insert_before (gsi, gptr, GSI_SAME_STMT);
++	    }
++	}
++      return true;
++    }
++
++  /* The function call free needs to be handled special. */
++  if (gimple_call_builtin_p (stmt, BUILT_IN_FREE))
++    {
++      tree expr = gimple_call_arg (stmt, 0);
++      tree newexpr[max_split];
++      if (!rewrite_expr (expr, newexpr))
++	return false;
++
++      srtype *t = find_type (TREE_TYPE (TREE_TYPE (expr)));
++      if (newexpr[1] == NULL)
++	{
++	  if (t && t->semi_relayout)
++	    newexpr[0] = get_real_allocated_ptr (newexpr[0], gsi);
++	  gimple_call_set_arg (stmt, 0, newexpr[0]);
++	  update_stmt (stmt);
++	  return false;
++	}
++
++      for (unsigned i = 0; i < max_split && newexpr[i]; i++)
++	{
++	  gimple *g
++	    = gimple_build_call (gimple_call_fndecl (stmt), 1, newexpr[i]);
++	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++	}
++      return true;
++    }
++
++  /* Otherwise, look up the function to see if we have cloned it
++     and rewrite the arguments. */
++  tree fndecl = gimple_call_fndecl (stmt);
++
++  /* Indirect calls are already marked as escaping so ignore.  */
++  if (!fndecl)
++    return false;
++
++  cgraph_node *node = cgraph_node::get (fndecl);
++  gcc_assert (node);
++  srfunction *f = find_function (node);
++
++  /* Add a safe func mechanism.  */
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS && f && f->is_safe_func)
++    {
++      tree expr = gimple_call_arg (stmt, 0);
++      tree newexpr[max_split];
++      if (!rewrite_expr (expr, newexpr))
++	{
++	  return false;
++	}
++
++      if (newexpr[1] == NULL)
++	{
++	  gimple_call_set_arg (stmt, 0, newexpr[0]);
++	  update_stmt (stmt);
++	  return false;
++	}
++      return false;
++    }
++
++  /* Did not find the function or had not cloned it return saying don't
++     change the function call. */
++  if (!f || !f->newf)
++    return false;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Changing arguments for function call :\n");
++      print_gimple_expr (dump_file, stmt, 0);
++      fprintf (dump_file, "\n");
++    }
++
++  /* Move over to the new function. */
++  f = f->newf;
++
++  tree chain = gimple_call_chain (stmt);
++  unsigned nargs = gimple_call_num_args (stmt);
++  auto_vec vargs (nargs);
++
++  if (chain)
++    {
++      tree newchains[max_split];
++      if (rewrite_expr (chain, newchains))
++	{
++	  /* Chain decl's type cannot be split and but it can change. */
++	  gcc_assert (newchains[1] == NULL);
++	  chain = newchains[0];
++	}
++    }
++
++  for (unsigned i = 0; i < nargs; i++)
++    vargs.quick_push (gimple_call_arg (stmt, i));
++
++  int extraargs = 0;
++
++  for (unsigned i = 0; i < f->args.length (); i++)
++    {
++      srdecl *d = f->args[i];
++      if (d->argumentnum == -2)
++	continue;
++      gcc_assert (d->argumentnum != -1);
++      tree arg = vargs[d->argumentnum + extraargs];
++      tree newargs[max_split];
++      if (!rewrite_expr (arg, newargs))
++	continue;
++
++      /* If this ARG has a replacement handle the replacement.  */
++      for (unsigned j = 0; j < max_split && d->newdecl[j]; j++)
++	{
++	  gcc_assert (newargs[j]);
++	  /* If this is the first replacement of the arugment,
++	     then just replace it.  */
++	  if (j == 0)
++	    vargs[d->argumentnum + extraargs] = newargs[j];
++	  else
++	    {
++	      /* More than one replacement, we need to insert into the array. */
++	      extraargs++;
++	      vargs.safe_insert (d->argumentnum + extraargs, newargs[j]);
++	    }
++	}
++    }
++
++  gcall *new_stmt;
++
++  new_stmt = gimple_build_call_vec (f->node->decl, vargs);
++
++  if (gimple_call_lhs (stmt))
++    gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
++
++  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
++  gimple_set_vdef (new_stmt, gimple_vdef (stmt));
++
++  if (gimple_has_location (stmt))
++    gimple_set_location (new_stmt, gimple_location (stmt));
++  gimple_call_copy_flags (new_stmt, stmt);
++  gimple_call_set_chain (new_stmt, chain);
++
++  gimple_set_modified (new_stmt, true);
++
++  if (gimple_vdef (new_stmt) && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
++    SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
++
++  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
++
++  /* We need to defer cleaning EH info on the new statement to
++     fixup-cfg.  We may not have dominator information at this point
++     and thus would end up with unreachable blocks and have no way
++     to communicate that we need to run CFG cleanup then.  */
++  int lp_nr = lookup_stmt_eh_lp (stmt);
++  if (lp_nr != 0)
++    {
++      remove_stmt_from_eh_lp (stmt);
++      add_stmt_to_eh_lp (new_stmt, lp_nr);
++    }
++
++  return true;
++}
++
++/* Rewrite the conditional statement STMT.  Return TRUE if the
++   old statement is to be removed. */
++
++bool
++ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi)
++{
++  tree_code rhs_code = gimple_cond_code (stmt);
++
++  /* Handle only equals or not equals conditionals. */
++  if ((current_layout_opt_level < STRUCT_REORDER_FIELDS
++       && (rhs_code != EQ_EXPR && rhs_code != NE_EXPR))
++      || (current_layout_opt_level >= STRUCT_REORDER_FIELDS
++	  && TREE_CODE_CLASS (rhs_code) != tcc_comparison))
++    return false;
++  tree lhs = gimple_cond_lhs (stmt);
++  tree rhs = gimple_cond_rhs (stmt);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nCOND: Rewriting\n");
++      print_gimple_stmt (dump_file, stmt, 0);
++      print_generic_expr (dump_file, lhs);
++      fprintf (dump_file, "\n");
++      print_generic_expr (dump_file, rhs);
++      fprintf (dump_file, "\n");
++    }
++
++  tree newlhs[max_split] = {};
++  tree newrhs[max_split] = {};
++  if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Did nothing to statement.\n");
++	}
++      return false;
++    }
++
++  /*  Old rewrite: if (x_1 != 0B)
++	-> _1 = x.slo.0_1 != 0B; if (_1 != 1)
++	   The logic is incorrect.
++      New rewrite: if (x_1 != 0B)
++	-> if (x.slo.0_1 != 0B)ï¼›*/
++  for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++)
++    {
++      if (newlhs[i])
++	{
++	  gimple_cond_set_lhs (stmt, newlhs[i]);
++	}
++      if (newrhs[i])
++	{
++	  gimple_cond_set_rhs (stmt, newrhs[i]);
++	}
++      update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "replaced with:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++    }
++  return false;
++}
++
++/* Rewrite debug statments if possible.  Return TRUE if the statement
++   should be removed. */
++
++bool
++ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
++{
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      /* Delete debug gimple now.  */
++      return true;
++    }
++  bool remove = false;
++  if (gimple_debug_bind_p (stmt))
++    {
++      tree var = gimple_debug_bind_get_var (stmt);
++      tree newvar[max_split];
++      if (rewrite_expr (var, newvar, true))
++	remove = true;
++      if (gimple_debug_bind_has_value_p (stmt))
++	{
++	  var = gimple_debug_bind_get_value (stmt);
++	  if (TREE_CODE (var) == POINTER_PLUS_EXPR)
++	    var = TREE_OPERAND (var, 0);
++	  if (rewrite_expr (var, newvar, true))
++	    remove = true;
++	}
++    }
++  else if (gimple_debug_source_bind_p (stmt))
++    {
++      tree var = gimple_debug_source_bind_get_var (stmt);
++      tree newvar[max_split];
++      if (rewrite_expr (var, newvar, true))
++	remove = true;
++      var = gimple_debug_source_bind_get_value (stmt);
++      if (TREE_CODE (var) == POINTER_PLUS_EXPR)
++	var = TREE_OPERAND (var, 0);
++      if (rewrite_expr (var, newvar, true))
++	remove = true;
++    }
++
++  return remove;
++}
++
++/* Rewrite PHI nodes, return true if the PHI was replaced. */
++
++bool
++ipa_struct_reorg::rewrite_phi (gphi *phi)
++{
++  tree newlhs[max_split];
++  gphi *newphi[max_split];
++  tree result = gimple_phi_result (phi);
++  gphi_iterator gsi;
++
++  memset (newphi, 0, sizeof (newphi));
++
++  if (!rewrite_expr (result, newlhs))
++    return false;
++
++  if (newlhs[0] == NULL)
++    return false;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nrewriting PHI:\n");
++      print_gimple_stmt (dump_file, phi, 0);
++    }
++
++  for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++    newphi[i] = create_phi_node (newlhs[i], gimple_bb (phi));
++
++  for (unsigned i = 0; i < gimple_phi_num_args (phi); i++)
++    {
++      tree newrhs[max_split];
++      phi_arg_d rhs = *gimple_phi_arg (phi, i);
++
++      /* Handling the NULL phi Node.  */
++      bool r = rewrite_expr (rhs.def, newrhs);
++      if (!r && integer_zerop (rhs.def))
++	{
++	  for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	    {
++	      newrhs[i] = fold_convert (TREE_TYPE (newlhs[i]), rhs.def);
++	    }
++	}
++
++      for (unsigned j = 0; j < max_split && newlhs[j]; j++)
++	{
++	  SET_PHI_ARG_DEF (newphi[j], i, newrhs[j]);
++	  gimple_phi_arg_set_location (newphi[j], i, rhs.locus);
++	  update_stmt (newphi[j]);
++	}
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "into:\n");
++      for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	{
++	  print_gimple_stmt (dump_file, newphi[i], 0);
++	  fprintf (dump_file, "\n");
++	}
++    }
++
++  gsi = gsi_for_phi (phi);
++  remove_phi_node (&gsi, false);
++
++  return true;
++}
++
++/* Rewrite gimple statement STMT, return true if the STATEMENT
++   is to be removed. */
++
++bool
++ipa_struct_reorg::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi)
++{
++  switch (gimple_code (stmt))
++    {
++    case GIMPLE_ASSIGN:
++      return rewrite_assign (as_a (stmt), gsi);
++    case GIMPLE_CALL:
++      return rewrite_call (as_a (stmt), gsi);
++    case GIMPLE_COND:
++      return rewrite_cond (as_a (stmt), gsi);
++      break;
++    case GIMPLE_GOTO:
++    case GIMPLE_SWITCH:
++      break;
++    case GIMPLE_DEBUG:
++    case GIMPLE_ASM:
++      break;
++    default:
++      break;
++    }
++  return false;
++}
++
++/* Does the function F uses any decl which has changed. */
++
++bool
++ipa_struct_reorg::has_rewritten_type (srfunction *f)
++{
++  for (unsigned i = 0; i < f->decls.length (); i++)
++    {
++      srdecl *d = f->decls[i];
++      if (d->newdecl[0] != d->decl)
++	return true;
++    }
++
++  for (unsigned i = 0; i < f->globals.length (); i++)
++    {
++      srdecl *d = f->globals[i];
++      if (d->newdecl[0] != d->decl)
++	return true;
++    }
++  return false;
++}
++
++/* Rewrite the functions if needed, return
++   the TODOs requested.  */
++
++unsigned
++ipa_struct_reorg::rewrite_functions (void)
++{
++  unsigned retval = 0;
++
++  /* Create new types, if we did not create any new types,
++     then don't rewrite any accesses. */
++  if (!create_new_types ())
++    {
++      if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	{
++	  for (unsigned i = 0; i < functions.length (); i++)
++	    {
++	      srfunction *f = functions[i];
++	      cgraph_node *node = f->node;
++	      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
++	      if (dump_file && (dump_flags & TDF_DETAILS))
++		{
++		  fprintf (dump_file, "\nNo rewrite:\n");
++		  dump_function_to_file (current_function_decl, dump_file,
++					 dump_flags | TDF_VOPS);
++		}
++	      pop_cfun ();
++	    }
++	}
++      return 0;
++    }
++
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS && dump_file)
++    {
++      fprintf (dump_file, "=========== all created newtypes: ===========\n\n");
++      dump_newtypes (dump_file);
++    }
++
++  if (functions.length ())
++    {
++      retval = TODO_remove_functions;
++      create_new_functions ();
++      if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++	{
++	  prune_escaped_types ();
++	}
++    }
++
++  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
++    {
++      for (unsigned i = 0; i < functions.length (); i++)
++	{
++	  srfunction *f = functions[i];
++	  cgraph_node *node = f->node;
++	  push_cfun (DECL_STRUCT_FUNCTION (node->decl));
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      fprintf (dump_file, "==== Before create decls: %dth_%s ====\n\n",
++		       i, f->node->name ());
++	      dump_function_to_file (current_function_decl, dump_file,
++				     dump_flags | TDF_VOPS);
++	    }
++	  pop_cfun ();
++	}
++    }
++
++  create_new_decls ();
++
++  for (unsigned i = 0; i < functions.length (); i++)
++    {
++      srfunction *f = functions[i];
++      if (f->newnode)
++	continue;
++
++      /* Function uses no rewriten types so don't cause a rewrite. */
++      if (!has_rewritten_type (f))
++	continue;
++
++      cgraph_node *node = f->node;
++      basic_block bb;
++
++      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
++      current_function = f;
++
++      if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE)
++	{
++	  calculate_dominance_info (CDI_DOMINATORS);
++	  loop_optimizer_init (0);
++	}
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nBefore rewrite: %dth_%s\n", i,
++		   f->node->name ());
++	  dump_function_to_file (current_function_decl, dump_file,
++				 dump_flags | TDF_VOPS);
++	  fprintf (dump_file, "\n======== Start to rewrite: %dth_%s ========\n",
++		   i, f->node->name ());
++	}
++      FOR_EACH_BB_FN (bb, cfun)
++	{
++	  for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);)
++	    {
++	      if (rewrite_phi (si.phi ()))
++		si = gsi_start_phis (bb);
++	      else
++		gsi_next (&si);
++	    }
++
++	  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);)
++	    {
++	      gimple *stmt = gsi_stmt (si);
++	      if (rewrite_stmt (stmt, &si))
++		gsi_remove (&si, true);
++	      else
++		gsi_next (&si);
++	    }
++	}
++
++      /* Debug statements need to happen after all other statements
++     have changed. */
++      FOR_EACH_BB_FN (bb, cfun)
++	{
++	  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);)
++	    {
++	      gimple *stmt = gsi_stmt (si);
++	      if (gimple_code (stmt) == GIMPLE_DEBUG
++		  && rewrite_debug (stmt, &si))
++		gsi_remove (&si, true);
++	      else
++		gsi_next (&si);
++	    }
++	}
++
++      /* Release the old SSA_NAMES for old arguments.  */
++      if (f->old)
++	{
++	  for (unsigned i = 0; i < f->args.length (); i++)
++	    {
++	      srdecl *d = f->args[i];
++	      if (d->newdecl[0] != d->decl)
++		{
++		  tree ssa_name = ssa_default_def (cfun, d->decl);
++		  if (dump_file && (dump_flags & TDF_DETAILS))
++		    {
++		      fprintf (dump_file, "Found ");
++		      print_generic_expr (dump_file, ssa_name);
++		      fprintf (dump_file, " to be released.\n");
++		    }
++		  release_ssa_name (ssa_name);
++		}
++	    }
++	}
++
++      update_ssa (TODO_update_ssa_only_virtuals);
++
++      if (flag_tree_pta)
++	compute_may_aliases ();
++
++      for (unsigned j = 0; j < f->decls.length (); j++)
++	{
++	  if (TREE_CODE (f->decls[j]->decl) != SSA_NAME)
++	    continue;
++	  if (f->decls[j]->has_new_decl ())
++	    {
++	      release_ssa_name (f->decls[j]->decl);
++	    }
++	}
++
++      remove_unused_locals ();
++
++      cgraph_edge::rebuild_edges ();
++
++      free_dominance_info (CDI_DOMINATORS);
++
++      if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE)
++	loop_optimizer_finalize ();
++
++      if (dump_file)
++	{
++	  fprintf (dump_file, "\nAfter rewrite: %dth_%s\n", i,
++		   f->node->name ());
++	  dump_function_to_file (current_function_decl, dump_file,
++				 dump_flags | TDF_VOPS);
++	}
++
++      pop_cfun ();
++      current_function = NULL;
++    }
++
++  return retval | TODO_verify_all;
++}
++
++unsigned int
++ipa_struct_reorg::execute_struct_relayout (void)
++{
++  unsigned retval = 0;
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      tree type = types[i]->type;
++      if (TYPE_FIELDS (type) == NULL)
++	{
++	  continue;
++	}
++      if (types[i]->has_alloc_array != 1)
++	{
++	  continue;
++	}
++      if (types[i]->chain_type)
++	{
++	  continue;
++	}
++      if (get_type_name (types[i]->type) == NULL)
++	{
++	  continue;
++	}
++      retval |= ipa_struct_relayout (type, this).execute ();
++    }
++
++  if (dump_file)
++    {
++      if (transformed)
++	{
++	  fprintf (dump_file,
++		   "\nNumber of structures to transform in "
++		   "Complete Structure Relayout is %d\n",
++		   transformed);
++	}
++      else
++	{
++	  fprintf (dump_file, "\nNo structures to transform in "
++			      "Complete Structure Relayout.\n");
++	}
++    }
++
++  return retval;
++}
++
++/* True if the var with void type is only used to compare with the same
++   target type.  */
++
++bool
++ipa_struct_reorg::safe_void_cmp_p (tree var, srtype *type)
++{
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
++    {
++      gimple *use_stmt = USE_STMT (use_p);
++      if (is_gimple_debug (use_stmt))
++	continue;
++
++      if (gimple_code (use_stmt) == GIMPLE_COND)
++	{
++	  tree lhs = gimple_cond_lhs (use_stmt);
++	  tree rhs = gimple_cond_rhs (use_stmt);
++	  tree xhs = lhs == var ? rhs : lhs;
++	  if (types_compatible_p (inner_type (TREE_TYPE (xhs)), type->type))
++	    continue;
++	}
++      return false;
++    }
++  return true;
++}
++
++/* Mark the structure that should perform pointer compression.  */
++
++void
++ipa_struct_reorg::check_and_prune_struct_for_pointer_compression (void)
++{
++  unsigned pc_transform_num = 0;
++
++  if (dump_file)
++    fprintf (dump_file, "\nMark the structure that should perform pointer"
++			" compression:\n");
++
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      srtype *type = types[i];
++      if (dump_file)
++	print_generic_expr (dump_file, type->type);
++
++      if (type->has_escaped ())
++	{
++	  if (dump_file)
++	    fprintf (dump_file, " has escaped by %s, skip compression.\n",
++		     type->escape_reason ());
++	  continue;
++	}
++      if (TYPE_FIELDS (type->type) == NULL)
++	{
++	  if (dump_file)
++	    fprintf (dump_file, " has zero field, skip compression.\n");
++	  continue;
++	}
++      if (type->chain_type)
++	{
++	  if (dump_file)
++	    fprintf (dump_file, " is chain_type, skip compression.\n");
++	  continue;
++	}
++      if (type->has_alloc_array != 1)
++	{
++	  if (dump_file)
++	    fprintf (dump_file, " has alloc number: %d, skip compression.\n",
++		     type->has_alloc_array);
++	  continue;
++	}
++      if (get_type_name (type->type) == NULL)
++	{
++	  if (dump_file)
++	    fprintf (dump_file, " has empty struct name,"
++				" skip compression.\n");
++	  continue;
++	}
++      if (!type->has_legal_alloc_num)
++	{
++	  if (current_layout_opt_level & POINTER_COMPRESSION_UNSAFE)
++	    {
++	      if (dump_file)
++		fprintf (dump_file, " has unknown alloc size, but"
++				    " in unsafe mode, so");
++	    }
++	  else
++	    {
++	      if (dump_file)
++		fprintf (dump_file, " has illegal struct array size,"
++				    " skip compression.\n");
++	      continue;
++	    }
++	}
++      pc_transform_num++;
++      type->pc_candidate = true;
++      if (dump_file)
++	fprintf (dump_file, " attempts to do pointer compression.\n");
++    }
++
++  if (dump_file)
++    {
++      if (pc_transform_num)
++	fprintf (dump_file,
++		 "\nNumber of structures to transform in "
++		 "pointer compression is %d\n",
++		 pc_transform_num);
++      else
++	fprintf (dump_file, "\nNo structures to transform in "
++			    "pointer compression.\n");
++    }
++}
++
++void
++ipa_struct_reorg::check_and_prune_struct_for_semi_relayout (void)
++{
++  unsigned relayout_transform = 0;
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      srtype *type = types[i];
++      if (dump_file)
++	{
++	  print_generic_expr (dump_file, type->type);
++	}
++      if (type->has_escaped ())
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, " has escaped by %s, skip relayout.\n",
++		       type->escape_reason ());
++	    }
++	  continue;
++	}
++      if (TYPE_FIELDS (type->type) == NULL)
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, " has zero field, skip relayout.\n");
++	    }
++	  continue;
++	}
++      if (type->chain_type)
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, " is chain_type, skip relayout.\n");
++	    }
++	  continue;
++	}
++      if (type->has_alloc_array == 0 || type->has_alloc_array == 1
++	  || type->has_alloc_array == -1 || type->has_alloc_array == -3
++	  || type->has_alloc_array == -4)
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, " has alloc number: %d, skip relayout.\n",
++		       type->has_alloc_array);
++	    }
++	  continue;
++	}
++      if (get_type_name (type->type) == NULL)
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, " has empty struct name,"
++				  " skip relayout.\n");
++	    }
++	  continue;
++	}
++      relayout_transform++;
++      type->semi_relayout = true;
++      if (dump_file)
++	{
++	  fprintf (dump_file, " attempts to do semi-relayout.\n");
++	}
++    }
++
++  if (dump_file)
++    {
++      if (relayout_transform)
++	{
++	  fprintf (dump_file,
++		   "\nNumber of structures to transform in "
++		   "semi-relayout is %d\n",
++		   relayout_transform);
++	}
++      else
++	{
++	  fprintf (dump_file, "\nNo structures to transform in "
++			      "semi-relayout.\n");
++	}
++    }
++}
++
++/* Init pointer size from parameter param_pointer_compression_size.  */
++
++static void
++init_pointer_size_for_pointer_compression (void)
++{
++  switch (param_pointer_compression_size)
++    {
++    case 8:
++    // FALLTHRU
++    case 16:
++    // FALLTHRU
++    case 32:
++      compressed_size = param_pointer_compression_size;
++      break;
++    default:
++      error ("Invalid pointer compression size, using the following param: "
++	     "\"--param pointer-compression-size=[8,16,32]\"");
++    }
++}
++
++unsigned int
++ipa_struct_reorg::execute (unsigned int opt)
++{
++  unsigned int ret = 0;
++
++  if (opt != COMPLETE_STRUCT_RELAYOUT)
++    {
++      current_layout_opt_level = opt;
++      /* If there is a top-level inline-asm,
++     the pass immediately returns.  */
++      if (symtab->first_asm_symbol ())
++	{
++	  return 0;
++	}
++      record_accesses ();
++      prune_escaped_types ();
++      if (opt == STRUCT_SPLIT)
++	{
++	  analyze_types ();
++	}
++      if (opt >= POINTER_COMPRESSION_SAFE)
++	check_and_prune_struct_for_pointer_compression ();
++      if (opt >= SEMI_RELAYOUT)
++	check_and_prune_struct_for_semi_relayout ();
++      ret = rewrite_functions ();
++    }
++  else // do COMPLETE_STRUCT_RELAYOUT
++    {
++      if (dump_file)
++	{
++	  fprintf (dump_file, "\n\nTry Complete Struct Relayout:\n");
++	}
++      current_layout_opt_level = COMPLETE_STRUCT_RELAYOUT;
++      if (symtab->first_asm_symbol ())
++	{
++	  return 0;
++	}
++      record_accesses ();
++      prune_escaped_types ();
++
++      ret = execute_struct_relayout ();
++    }
++
++  return ret;
++}
++
++const pass_data pass_data_ipa_struct_reorg = {
++  SIMPLE_IPA_PASS,     /* type */
++  "struct_reorg",      /* name */
++  OPTGROUP_NONE,       /* optinfo_flags */
++  TV_IPA_STRUCT_REORG, /* tv_id */
++  0,		       /* properties_required */
++  0,		       /* properties_provided */
++  0,		       /* properties_destroyed */
++  0,		       /* todo_flags_start */
++  0,		       /* todo_flags_finish */
++};
++
++class pass_ipa_struct_reorg : public simple_ipa_opt_pass
++{
++public:
++  pass_ipa_struct_reorg (gcc::context *ctxt)
++    : simple_ipa_opt_pass (pass_data_ipa_struct_reorg, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  virtual bool gate (function *);
++  virtual unsigned int execute (function *)
++  {
++    unsigned int ret = 0;
++    unsigned int ret_reorg = 0;
++    unsigned int level = 0;
++    switch (struct_layout_optimize_level)
++      {
++      case 6:
++	level |= SEMI_RELAYOUT;
++      // FALLTHRU
++      case 5:
++	level |= POINTER_COMPRESSION_UNSAFE;
++      // FALLTHRU
++      case 4:
++	level |= POINTER_COMPRESSION_SAFE;
++      // FALLTHRU
++      case 3:
++	level |= DEAD_FIELD_ELIMINATION;
++      // FALLTHRU
++      case 2:
++	level |= STRUCT_REORDER_FIELDS;
++      // FALLTHRU
++      case 1:
++	level |= COMPLETE_STRUCT_RELAYOUT;
++	level |= STRUCT_SPLIT;
++	break;
++      case 0:
++	break;
++      default:
++	gcc_unreachable ();
++      }
++
++    if (level & POINTER_COMPRESSION_SAFE)
++      init_pointer_size_for_pointer_compression ();
++
++    if (level & SEMI_RELAYOUT)
++      {
++	semi_relayout_align = semi_relayout_level;
++	relayout_part_size = 1 << semi_relayout_level;
++      }
++
++    /* Preserved for backward compatibility, reorder fields needs run before
++       struct split and complete struct relayout.  */
++    if (flag_ipa_reorder_fields && level < STRUCT_REORDER_FIELDS)
++      ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS);
++
++    if (level >= STRUCT_REORDER_FIELDS)
++      ret = ipa_struct_reorg ().execute (level);
++
++    if (level >= COMPLETE_STRUCT_RELAYOUT)
++      {
++	/* Preserved for backward compatibility.  */
++	ret_reorg = ipa_struct_reorg ().execute (STRUCT_SPLIT);
++	if (!ret_reorg)
++	  ret_reorg = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT);
++      }
++
++    return ret | ret_reorg;
++  }
++
++}; // class pass_ipa_struct_reorg
++
++bool
++pass_ipa_struct_reorg::gate (function *)
++{
++  return (optimize >= 3
++	  && flag_ipa_struct_reorg
++	  /* Don't bother doing anything if the program has errors.  */
++	  && !seen_error ()
++	  && flag_lto_partition == LTO_PARTITION_ONE
++	  /* Only enable struct optimizations in C since other
++	     languages' grammar forbid.  */
++	  && lang_c_p ()
++	  /* Only enable struct optimizations in lto or whole_program.  */
++	  && (in_lto_p || flag_whole_program));
++}
++
++} // namespace
++
++simple_ipa_opt_pass *
++make_pass_ipa_struct_reorg (gcc::context *ctxt)
++{
++  return new pass_ipa_struct_reorg (ctxt);
++}
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+new file mode 100644
+index 00000000000..da82d1346b8
+--- /dev/null
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+@@ -0,0 +1,265 @@
++/* Struct-reorg optimizations.
++   Copyright (C) 2016-2017 Free Software Foundation, Inc.
++   Contributed by Andrew Pinski  
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++.  */
++
++#ifndef IPA_STRUCT_REORG_H
++#define IPA_STRUCT_REORG_H
++
++namespace struct_reorg {
++
++const int max_split = 2;
++
++unsigned semi_relayout_align = semi_relayout_level;
++unsigned relayout_part_size = 1 << semi_relayout_level;
++
++template  struct auto_vec_del : auto_vec
++{
++  ~auto_vec_del ();
++};
++
++template  auto_vec_del::~auto_vec_del (void)
++{
++  unsigned i;
++  T *t;
++  FOR_EACH_VEC_ELT (*this, i, t)
++    {
++      delete t;
++    }
++}
++
++enum escape_type
++{
++  does_not_escape,
++#define DEF_ESCAPE(ENUM, TEXT) ENUM,
++#include "escapes.def"
++  escape_max_escape
++};
++
++const char *escape_type_string[escape_max_escape - 1] = {
++#define DEF_ESCAPE(ENUM, TEXT) TEXT,
++#include "escapes.def"
++};
++
++struct srfield;
++struct srtype;
++struct sraccess;
++struct srdecl;
++struct srfunction;
++
++struct srfunction
++{
++  cgraph_node *node;
++  auto_vec args;
++  auto_vec globals;
++  auto_vec_del decls;
++  srdecl *record_decl (srtype *, tree, int arg, tree orig_type = NULL);
++
++  srfunction *old;
++  cgraph_node *newnode;
++  srfunction *newf;
++
++  bool is_safe_func;
++
++  // Constructors
++  srfunction (cgraph_node *n);
++
++  // Methods
++  void add_arg (srdecl *arg);
++  void dump (FILE *file);
++  void simple_dump (FILE *file);
++
++  bool check_args (void);
++  void create_new_decls (void);
++  srdecl *find_decl (tree);
++};
++
++struct srglobal : private srfunction
++{
++  srglobal () : srfunction (NULL) {}
++
++  using srfunction::create_new_decls;
++  using srfunction::decls;
++  using srfunction::dump;
++  using srfunction::find_decl;
++  using srfunction::record_decl;
++};
++
++struct srtype
++{
++  tree type;
++  auto_vec_del fields;
++
++  // array of fields that use this type.
++  auto_vec field_sites;
++
++  // array of functions which use directly the type
++  auto_vec functions;
++
++  auto_vec_del accesses;
++  bool chain_type;
++
++private:
++  escape_type escapes;
++
++public:
++  tree newtype[max_split];
++  tree pc_gptr;
++  bool visited;
++  bool pc_candidate;
++  bool has_legal_alloc_num;
++  int has_alloc_array;
++  bool semi_relayout;
++  hash_map new_field_offsets;
++  unsigned bucket_parts;
++  unsigned bucket_size;
++
++  // Constructors
++  srtype (tree type);
++
++  // Methods
++  void dump (FILE *file);
++  void simple_dump (FILE *file);
++  void add_function (srfunction *);
++  void add_access (sraccess *a) { accesses.safe_push (a); }
++  void add_field_site (srfield *);
++
++  srfield *find_field (unsigned HOST_WIDE_INT offset);
++
++  bool create_new_type (void);
++  void analyze (void);
++  bool has_dead_field (void);
++  void mark_escape (escape_type, gimple *stmt);
++  void create_global_ptr_for_pc ();
++  unsigned calculate_bucket_size ();
++  bool has_escaped (void) { return escapes != does_not_escape; }
++  const char *escape_reason (void)
++  {
++    if (!has_escaped ())
++      return NULL;
++    return escape_type_string[escapes - 1];
++  }
++  bool escaped_rescusive (void) { return escapes == escape_rescusive_type; }
++  bool has_new_type (void) { return newtype[0] && newtype[0] != type; }
++};
++
++/* Bitflags used for determining if a field
++     is never accessed, read or written.  */
++const unsigned EMPTY_FIELD = 0x0u;
++const unsigned READ_FIELD = 0x01u;
++const unsigned WRITE_FIELD = 0x02u;
++
++struct srfield
++{
++  unsigned HOST_WIDE_INT offset;
++  tree fieldtype;
++  tree fielddecl;
++  srtype *base;
++  srtype *type;
++
++  unsigned clusternum;
++
++  tree newfield[max_split];
++  unsigned field_access; /* FIELD_DECL -> bitflag (use for dfe).  */
++  // Constructors
++  srfield (tree field, srtype *base);
++
++  // Methods
++  void dump (FILE *file);
++  void simple_dump (FILE *file);
++
++  void create_new_fields (tree newtype[max_split], tree newfields[max_split],
++			  tree newlast[max_split]);
++  void reorder_fields (tree newfields[max_split], tree newlast[max_split],
++		       tree &field);
++  void create_new_optimized_fields (tree newtype[max_split],
++				    tree newfields[max_split],
++				    tree newlast[max_split]);
++};
++
++struct sraccess
++{
++  gimple *stmt;
++  cgraph_node *node;
++
++  srtype *type;
++  // NULL field means the whole type is accessed
++  srfield *field;
++
++  // constructors
++  sraccess (gimple *s, cgraph_node *n, srtype *t, srfield *f = NULL)
++    : stmt (s), node (n), type (t), field (f)
++  {}
++
++  // Methods
++  void dump (FILE *file);
++};
++
++struct srdecl
++{
++  srtype *type;
++  tree decl;
++  tree func;
++  /* -1 : not an argument
++     -2 : static chain */
++  int argumentnum;
++
++  bool visited;
++
++  tree newdecl[max_split];
++
++  /* Auxiliary record complete original type information of the void* type.  */
++  tree orig_type;
++
++  // Constructors
++  srdecl (srtype *type, tree decl, int argumentnum = -1, tree orgtype = NULL);
++
++  // Methods
++  void dump (FILE *file);
++  bool has_new_decl (void) { return newdecl[0] && newdecl[0] != decl; }
++};
++
++} // namespace struct_reorg
++
++namespace struct_relayout {
++
++const int min_relayout_split = 8;
++const int max_relayout_split = 16;
++
++struct csrtype
++{
++  tree type;
++  unsigned HOST_WIDE_INT old_size;
++  unsigned HOST_WIDE_INT new_size;
++  unsigned field_count;
++  tree struct_size;
++
++  // Constructors
++  csrtype ()
++    : type (NULL), old_size (0), new_size (0), field_count (0),
++      struct_size (NULL)
++  {}
++
++  // Methods
++  unsigned calculate_field_num (tree field_offset);
++  void init_type_info (void);
++};
++
++} // namespace struct_relayout
++
++#endif
+diff --git a/gcc/opts.cc b/gcc/opts.cc
+index a97630d1c9a..261da080062 100644
+--- a/gcc/opts.cc
++++ b/gcc/opts.cc
+@@ -2946,6 +2946,22 @@ common_handle_option (struct gcc_options *opts,
+       enable_fdo_optimizations (opts, opts_set, value);
+       SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
+       break;
++    case OPT_fipa_struct_reorg_:
++      /* No break here - do -fipa-struct-reorg processing.  */
++      /* FALLTHRU.  */
++    case OPT_fipa_struct_reorg:
++      opts->x_flag_ipa_struct_reorg = value;
++      if (value && !opts->x_struct_layout_optimize_level)
++  {
++    /* Using the -fipa-struct-reorg option is equivalent to using
++       -fipa-struct-reorg=1.  */
++    opts->x_struct_layout_optimize_level = 1;
++  }
++      break;
++
++    case OPT_fipa_reorder_fields:
++      SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_reorg, value);
++      break;
+ 
+     case OPT_fprofile_generate_:
+       opts->x_profile_data_prefix = xstrdup (arg);
+diff --git a/gcc/params.opt b/gcc/params.opt
+index e0ff9e21054..78c60bc64d3 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -865,6 +865,9 @@ Enum(parloops_schedule_type) String(runtime) Value(PARLOOPS_SCHEDULE_RUNTIME)
+ Common Joined UInteger Var(param_partial_inlining_entry_probability) Init(70) Optimization IntegerRange(0, 100) Param
+ Maximum probability of the entry BB of split region (in percent relative to entry BB of the function) to make partial inlining happen.
+ 
++-param=struct-reorg-cold-struct-ratio=
++Common Joined UInteger Var(param_struct_reorg_cold_struct_ratio) Init(10) IntegerRange(0, 100) Param Optimization+The threshold ratio between current and hottest structure counts.
++
+ -param=predictable-branch-outcome=
+ Common Joined UInteger Var(param_predictable_branch_outcome) Init(2) IntegerRange(0, 50) Param Optimization
+ Maximal estimated outcome of branch considered predictable.
+@@ -1201,4 +1204,12 @@ Enum(vrp_mode) String(vrp) Value(VRP_MODE_VRP)
+ EnumValue
+ Enum(vrp_mode) String(ranger) Value(VRP_MODE_RANGER)
+ 
++-param=compressed-pointer-size=
++Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization
++Target size of compressed pointer, which should be 8, 16 or 32.
++
++-param=semi-relayout-level=
++Common Joined UInteger Var(semi_relayout_level) Init(13) IntegerRange(11, 15) Param Optimization
++Set capacity of each bucket to semi-relayout to (1 << semi-relayout-level) / 8 .
++
+ ; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/passes.def b/gcc/passes.def
+index 8dbb7983e3e..66989aa6967 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -178,6 +178,8 @@ along with GCC; see the file COPYING3.  If not see
+      compiled unit.  */
+   INSERT_PASSES_AFTER (all_late_ipa_passes)
+   NEXT_PASS (pass_ipa_pta);
++  /* FIXME: this should a normal IP pass */
++  NEXT_PASS (pass_ipa_struct_reorg);
+   NEXT_PASS (pass_omp_simd_clone);
+   TERMINATE_PASS_LIST (all_late_ipa_passes)
+ 
+diff --git a/gcc/pointer-query.cc b/gcc/pointer-query.cc
+index 9a12bb27c34..43373994496 100644
+--- a/gcc/pointer-query.cc
++++ b/gcc/pointer-query.cc
+@@ -2015,6 +2015,8 @@ handle_ssa_name (tree ptr, bool addr, int ostype,
+     }
+ 
+   gimple *stmt = SSA_NAME_DEF_STMT (ptr);
++  if (!stmt)
++    return false;
+   if (is_gimple_call (stmt))
+     {
+       /* If STMT is a call to an allocation function get the size
+diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
+index c54d3084cc4..3fe64047c8b 100644
+--- a/gcc/symbol-summary.h
++++ b/gcc/symbol-summary.h
+@@ -103,6 +103,12 @@ protected:
+   /* Allocates new data that are stored within map.  */
+   T* allocate_new ()
+   {
++    /* In structure optimizatons, we call new to ensure that
++       the allocated memory is initialized to 0.  */
++    if (flag_ipa_struct_reorg)
++      return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
++		       : new T ();
++
+     /* Call gcc_internal_because we do not want to call finalizer for
+        a type T.  We call dtor explicitly.  */
+     return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
+@@ -115,7 +121,12 @@ protected:
+     if (is_ggc ())
+       ggc_delete (item);
+     else
+-      m_allocator.remove (item);
++      {
++	if (flag_ipa_struct_reorg)
++	  delete item;
++	else
++	  m_allocator.remove (item);
++      }
+   }
+ 
+   /* Unregister all call-graph hooks.  */
+diff --git a/gcc/testsuite/g++.dg/struct/no-body-function.cpp b/gcc/testsuite/g++.dg/struct/no-body-function.cpp
+new file mode 100644
+index 00000000000..4e56e73fcae
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/struct/no-body-function.cpp
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-std=gnu++17 -Wno-builtin-declaration-mismatch -O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -S" } */
++
++struct S {
++    int x;
++    double y;
++};
++S f();
++
++const auto [x0, y0] = f();
++const auto [x1, y1] = f();
++
++static union {
++int a;
++double b;
++};
++
++const auto [x2, y2] = f();
+diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp
+new file mode 100644
+index 00000000000..6ab71abe140
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details -S" } */
++
++struct Foo { int foo; int a; };
++Foo& ignoreSetMutex = *(new Foo);
++
++struct Goo { int goo; int a; };
++
++int main ()
++{
++  Goo* a;
++  return a->goo = 90;
++}
+diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp
+new file mode 100644
+index 00000000000..72b7db8a9ce
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp
+@@ -0,0 +1,17 @@
++/* { dg-do run } */
++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details" } */
++
++#include 
++
++struct testg {
++  int b;
++  float c;
++};
++
++testg *testgvar;
++int main ()
++{
++  testgvar = (testg*) calloc(10, sizeof(testg));
++  int b = testgvar->b;
++  return b;
++}
+diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp
+new file mode 100644
+index 00000000000..771164a96e7
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp
+@@ -0,0 +1,24 @@
++/* { dg-do run } */
++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details" } */
++
++#include 
++
++struct testg {
++  int b;
++  float c;
++  double d;
++  double e;
++  double f;
++  double h;
++  double i;
++  double j;
++  int k;
++};
++
++testg *testgvar;
++int main ()
++{
++  testgvar = (testg*) calloc(10, sizeof(testg));
++  int b = testgvar->b;
++  return b;
++}
+diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg.exp b/gcc/testsuite/g++.dg/struct/struct-reorg.exp
+new file mode 100644
+index 00000000000..4307f69e2cd
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/struct/struct-reorg.exp
+@@ -0,0 +1,26 @@
++#   Copyright (C) 2021-2021 Free Software Foundation, Inc.
++
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# .
++
++load_lib g++-dg.exp
++
++# Initialize `dg'.
++dg-init
++
++g++-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.cpp]] \
++	"" ""
++
++# All done.
++dg-finish
+diff --git a/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c b/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c
+new file mode 100644
+index 00000000000..811030bf167
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c
+@@ -0,0 +1,60 @@
++// { dg-do run }
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 10000;
++node_p n;
++
++int
++main ()
++{
++  n = (node_p) calloc (MAX, sizeof (node_t));
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = 100;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].l = n[i].a;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].l != 100)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in Complete Structure Relayout is 1" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c
+new file mode 100644
+index 00000000000..63bb695ae14
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c
+@@ -0,0 +1,46 @@
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 1;
++node_p n;
++
++int
++main ()
++{
++  n = (node_p) calloc (MAX, sizeof (node_t));
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = 100;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c
+new file mode 100644
+index 00000000000..0f75d5d121c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c
+@@ -0,0 +1,59 @@
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 10;
++node_p n;
++node_p m;
++
++int main()
++{
++  int i;
++  for (i = 0; i < MAX / 5; i++)
++    {
++      n = (node_p) calloc(MAX, sizeof(node_t));
++      if (i == 0)
++	{
++	  m = n;
++	}
++    }
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = 100;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      m[i].a = 50;
++    }
++
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c
+new file mode 100644
+index 00000000000..3dcb674c6e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c
+@@ -0,0 +1,77 @@
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 10;
++node_p n;
++node_p m;
++
++void test (int, int) __attribute__((noinline));
++
++void
++test (int num, int flag)
++{
++  if (num <= 0)
++    {
++      return;
++    }
++  n = (node_p) calloc (num, sizeof (node_t));
++  if (flag)
++    {
++      m = n;
++    }
++  return;
++}
++
++int
++main ()
++{
++  test (MAX, 1);
++  test (MAX, 0);
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = 100;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      m[i].a = 50;
++    }
++
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (m[i].a != 50)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/csr_cast_int.c b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c
+new file mode 100644
+index 00000000000..6907158c9b0
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c
+@@ -0,0 +1,52 @@
++// { dg-do run }
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 100;
++node_p n;
++unsigned long y;
++
++int
++main ()
++{
++  n = (node_p) calloc (MAX, sizeof (node_t));
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].b = 50;
++    }
++
++  node_p x = &n[5];
++  y = (unsigned long) x;
++  y += 8;
++
++  if (*((unsigned long*) y) != 50)
++    {
++      abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes a cast from/to intergral type\"" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c
+new file mode 100644
+index 00000000000..9e5e05838e6
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c
+@@ -0,0 +1,48 @@
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 10000;
++node_p n;
++node_t t;
++
++int
++main ()
++{
++  n = (node_p) calloc (MAX, sizeof (node_t));
++  t.a = 100;
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = t.a;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a separate instance\"" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c b/gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c
+new file mode 100644
+index 00000000000..c5e4968d914
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c
+@@ -0,0 +1,53 @@
++// Structures without names should not be optimized
++/* { dg-do compile } */
++#include 
++#include 
++
++typedef struct
++{
++  int a;
++  float b;
++  double s1;
++  double s2;
++  double s3;
++  double s4;
++  double s5;
++  double s6;
++  double s7;
++  double s8;
++} str_t1;
++
++#define N 1000
++
++int num;
++
++int
++main ()
++{
++  int i, r;
++
++  r = rand ();
++  num = r > N ? N : r;
++  str_t1 *p1 = calloc (num, sizeof (str_t1));
++
++  if (p1 == NULL)
++    return 0;
++
++  for (i = 0; i < num; i++)
++    p1[i].a = 1;
++
++  for (i = 0; i < num; i++)
++    p1[i].b = 2;
++
++  for (i = 0; i < num; i++)
++    if (p1[i].a != 1)
++      abort ();
++
++  for (i = 0; i < num; i++)
++    if (fabsf (p1[i].b - 2) > 0.0001)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
+new file mode 100644
+index 00000000000..afa181e07e1
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
+@@ -0,0 +1,86 @@
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++};
++
++
++const int MAX = 100;
++
++/* let it escape_array, "Type is used in an array [not handled yet]".  */
++network_t* net[2];
++arc_p stop_arcs = NULL;
++
++int
++main ()
++{
++  net[0] = (network_t*) calloc (1, sizeof(network_t));
++  net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++
++  net[0]->arcs->id = 100;
++
++  for (unsigned i = 0; i < 3; i++)
++    {        
++      net[0]->arcs->id = net[0]->arcs->id + 2;
++      stop_arcs->cost = net[0]->arcs->id / 2;
++      stop_arcs->net_add = net[0];
++      printf("stop_arcs->cost = %ld\n", stop_arcs->cost);
++      net[0]->arcs++;
++      stop_arcs++;
++    }
++
++  if( net[1] != 0 && stop_arcs != 0)
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
+new file mode 100644
+index 00000000000..c87db2aba57
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
+@@ -0,0 +1,60 @@
++// verify newarc[cmp-1].flow
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_p ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  printf("%d\n", ap[0].id);
++  for (int i = 1; i < MAX; i++)
++    {
++      ap[i-1].id = 500;
++    }
++  printf("%d\n", ap[0].id);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
+new file mode 100644
+index 00000000000..d217f7bd80e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c
+@@ -0,0 +1,77 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_5__ TYPE_2__;
++typedef struct TYPE_4__ TYPE_1__;
++
++struct TYPE_4__
++{
++  int Pin;
++  int Pull;
++  int Mode;
++  int Speed;
++};
++
++struct TYPE_5__
++{
++  int MEMRMP;
++};
++typedef TYPE_1__ GPIO_InitTypeDef;
++
++int BT_RST_PIN;
++int BT_RST_PORT;
++int CONN_POS10_PIN;
++int CONN_POS10_PORT;
++int GPIO_HIGH (int, int);
++int GPIO_MODE_INPUT;
++int GPIO_MODE_OUTPUT_PP;
++int GPIO_NOPULL;
++int GPIO_PULLUP;
++int GPIO_SPEED_FREQ_LOW;
++int HAL_GPIO_Init (int, TYPE_1__ *);
++scalar_t__ IS_GPIO_RESET (int, int);
++TYPE_2__ *SYSCFG;
++int __HAL_RCC_GPIOB_CLK_ENABLE ();
++int __HAL_RCC_GPIOC_CLK_ENABLE ();
++
++__attribute__((used)) static void 
++LBF_DFU_If_Needed (void)
++{
++  GPIO_InitTypeDef GPIO_InitStruct;
++  __HAL_RCC_GPIOC_CLK_ENABLE ();
++  GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP;
++  GPIO_InitStruct.Pull = GPIO_NOPULL;
++  GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW;
++  GPIO_InitStruct.Pin = BT_RST_PIN;
++  HAL_GPIO_Init (BT_RST_PORT, &GPIO_InitStruct);
++
++  GPIO_HIGH (BT_RST_PORT, BT_RST_PIN);
++  __HAL_RCC_GPIOB_CLK_ENABLE ();
++  GPIO_InitStruct.Mode = GPIO_MODE_INPUT;
++  GPIO_InitStruct.Pull = GPIO_PULLUP;
++  GPIO_InitStruct.Pin = CONN_POS10_PIN;
++  HAL_GPIO_Init (CONN_POS10_PORT, &GPIO_InitStruct);
++
++  if (IS_GPIO_RESET (CONN_POS10_PORT, CONN_POS10_PIN))
++    {
++      SYSCFG->MEMRMP = 0x00000001;
++      asm (
++	"LDR     R0, =0x000000\n\t"
++	"LDR     SP, [R0, #0]\n\t"
++	  );
++      asm (
++	"LDR     R0, [R0, #0]\n\t"
++	"BX       R0\n\t"
++	  );
++    }
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
+new file mode 100644
+index 00000000000..e56bf467bf9
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c
+@@ -0,0 +1,77 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_2__ TYPE_1__;
++
++struct net_device
++{
++  struct claw_privbk* ml_priv;
++};
++struct clawctl
++{
++  int linkid;
++};
++struct claw_privbk
++{
++  int system_validate_comp;
++  TYPE_1__* p_env;
++  int ctl_bk;
++};
++typedef int __u8;
++struct TYPE_2__
++{
++  scalar_t__ packing;
++  int api_type;
++};
++
++int CLAW_DBF_TEXT (int, int, char*);
++int CONNECTION_REQUEST;
++int HOST_APPL_NAME;
++scalar_t__ PACKING_ASK;
++scalar_t__ PACK_SEND;
++int WS_APPL_NAME_IP_NAME;
++int WS_APPL_NAME_PACKED;
++int claw_send_control (struct net_device*, int, int, int, int, int, int);
++int setup;
++
++__attribute__((used)) static int
++claw_snd_conn_req (struct net_device *dev, __u8 link)
++{
++  int rc;
++  struct claw_privbk *privptr = dev->ml_priv;
++  struct clawctl *p_ctl;
++  CLAW_DBF_TEXT (2, setup, "snd_conn");
++  rc = 1;
++  p_ctl = (struct clawctl *)&privptr->ctl_bk;
++  p_ctl->linkid = link;
++  if (privptr->system_validate_comp == 0x00)
++    {
++      return rc;
++    }
++  if (privptr->p_env->packing == PACKING_ASK)
++    {
++      rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
++			WS_APPL_NAME_PACKED, WS_APPL_NAME_PACKED);
++    }
++  if (privptr->p_env->packing == PACK_SEND)
++    {
++      rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
++			WS_APPL_NAME_IP_NAME, WS_APPL_NAME_IP_NAME);
++    }
++  if (privptr->p_env->packing == 0)
++    {
++      rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0,
++			HOST_APPL_NAME, privptr->p_env->api_type);
++    }
++  return rc;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
+new file mode 100644
+index 00000000000..c86c4bb3cd0
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
+@@ -0,0 +1,56 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_4__ TYPE_2__;
++typedef struct TYPE_3__ TYPE_1__;
++
++typedef int uint8_t;
++typedef int uint16_t;
++
++struct TYPE_4__
++{
++  size_t cpu_id;
++};
++
++struct TYPE_3__
++{
++  int cpuc_dtrace_flags;
++};
++
++TYPE_2__ *CPU;
++volatile int CPU_DTRACE_FAULT;
++TYPE_1__ *cpu_core;
++scalar_t__ dtrace_load8 (uintptr_t);
++
++__attribute__((used)) static int
++dtrace_bcmp (const void *s1, const void *s2, size_t len)
++{
++  volatile uint16_t *flags;
++  flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
++  if (s1 == s2)
++    return (0);
++  if (s1 == NULL || s2 == NULL)
++    return (1);
++  if (s1 != s2 && len != 0)
++    {
++      const uint8_t *ps1 = s1;
++      const uint8_t *ps2 = s2;
++      do
++	{
++	  if (dtrace_load8 ((uintptr_t)ps1++) != *ps2++)
++	    return (1);
++	}
++      while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
++    }
++  return (0);
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
+new file mode 100644
+index 00000000000..8484d29d256
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
+@@ -0,0 +1,162 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++struct mrb_context
++{
++  size_t stack;
++  size_t stbase;
++  size_t stend;
++  size_t eidx;
++  int *ci;
++  int *cibase;
++  int status;
++};
++
++struct RObject
++{
++  int dummy;
++};
++
++struct RHash
++{
++  int dummy;
++};
++
++struct RFiber
++{
++  struct mrb_context *cxt;
++};
++
++struct RClass
++{
++  int dummy;
++};
++
++struct RBasic
++{
++  int tt;
++};
++
++struct RArray
++{
++  int dummy;
++};
++
++typedef int mrb_state;
++typedef int mrb_gc;
++typedef int mrb_callinfo;
++size_t ARY_LEN (struct RArray *);
++size_t MRB_ENV_STACK_LEN (struct RBasic *);
++int MRB_FIBER_TERMINATED;
++
++#define MRB_TT_ARRAY 140
++#define MRB_TT_CLASS 139
++#define MRB_TT_DATA 138
++#define MRB_TT_ENV 137
++#define MRB_TT_EXCEPTION 136
++#define MRB_TT_FIBER 135
++#define MRB_TT_HASH 134
++#define MRB_TT_ICLASS 133
++#define MRB_TT_MODULE 132
++#define MRB_TT_OBJECT 131
++#define MRB_TT_PROC 130
++#define MRB_TT_RANGE 129
++#define MRB_TT_SCLASS 128
++
++size_t ci_nregs (int *);
++int gc_mark_children (int *, int *, struct RBasic *);
++size_t mrb_gc_mark_hash_size (int *, struct RHash *);
++size_t mrb_gc_mark_iv_size (int *, struct RObject *);
++size_t mrb_gc_mark_mt_size (int *, struct RClass *);
++
++__attribute__((used)) static size_t
++gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj)
++{
++  size_t children = 0;
++  gc_mark_children (mrb, gc, obj);
++  switch (obj->tt)
++    {
++      case MRB_TT_ICLASS:
++	children++;
++	break;
++
++      case MRB_TT_CLASS:
++      case MRB_TT_SCLASS:
++      case MRB_TT_MODULE:
++	{
++	  struct RClass *c = (struct RClass *)obj;
++	  children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
++	  children += mrb_gc_mark_mt_size (mrb, c);
++	  children ++;
++	}
++	break;
++
++      case MRB_TT_OBJECT:
++      case MRB_TT_DATA:
++      case MRB_TT_EXCEPTION:
++	children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
++	break;
++
++      case MRB_TT_ENV:
++	children += MRB_ENV_STACK_LEN (obj);
++	break;
++
++      case MRB_TT_FIBER:
++	{
++	  struct mrb_context *c = ((struct RFiber *)obj)->cxt;
++	  size_t i;
++	  mrb_callinfo *ci;
++	  if (!c || c->status == MRB_FIBER_TERMINATED)
++	    break;
++
++	  i = c->stack - c->stbase;
++	  if (c->ci)
++	    {
++	      i += ci_nregs (c->ci);
++	    }
++	  if (c->stbase + i > c->stend)
++	    i = c->stend - c->stbase;
++
++	  children += i;
++	  children += c->eidx;
++	  if (c->cibase)
++	    {
++	      for (i = 0, ci = c->cibase; ci <= c->ci; i++, ci++)
++		;
++	    }
++	  children += i;
++	}
++	break;
++
++      case MRB_TT_ARRAY:
++	{
++	  struct RArray *a = (struct RArray *)obj;
++	  children += ARY_LEN (a);
++	}
++	break;
++
++      case MRB_TT_HASH:
++	children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
++	children += mrb_gc_mark_hash_size (mrb, (struct RHash *)obj);
++	break;
++
++      case MRB_TT_PROC:
++      case MRB_TT_RANGE:
++	children += 2;
++	break;
++      default:
++	break;
++    }
++
++  return children;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
+new file mode 100644
+index 00000000000..300b2dac4db
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
+@@ -0,0 +1,126 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_6__ TYPE_3__;
++typedef struct TYPE_5__ TYPE_2__;
++typedef struct TYPE_4__ TYPE_1__;
++
++struct io_accel2_cmd
++{
++  int dummy;
++};
++
++struct hpsa_tmf_struct
++{
++  int it_nexus;
++};
++
++struct hpsa_scsi_dev_t
++{
++  int nphysical_disks;
++  int ioaccel_handle;
++  struct hpsa_scsi_dev_t **phys_disk;
++};
++
++struct ctlr_info
++{
++  TYPE_3__ *pdev;
++  struct io_accel2_cmd *ioaccel2_cmd_pool;
++};
++struct TYPE_4__
++{
++  int LunAddrBytes;
++};
++
++struct TYPE_5__
++{
++  TYPE_1__ LUN;
++};
++
++struct CommandList
++{
++  size_t cmdindex;
++  int cmd_type;
++  struct hpsa_scsi_dev_t *phys_disk;
++  TYPE_2__ Header;
++};
++
++struct TYPE_6__
++{
++  int dev;
++};
++
++int BUG ();
++#define CMD_IOACCEL1 132
++#define CMD_IOACCEL2 131
++#define CMD_IOCTL_PEND 130
++#define CMD_SCSI 129
++#define IOACCEL2_TMF 128
++int dev_err (int *, char *, int);
++scalar_t__ hpsa_is_cmd_idle (struct CommandList *);
++int le32_to_cpu (int);
++int test_memcmp (unsigned char *, int *, int);
++
++__attribute__((used)) static bool
++hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c,
++		    struct hpsa_scsi_dev_t *dev, unsigned char *scsi3addr)
++{
++  int i;
++  bool match = false;
++  struct io_accel2_cmd * c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
++  struct hpsa_tmf_struct *ac = (struct hpsa_tmf_struct *)c2;
++
++  if (hpsa_is_cmd_idle (c))
++    return false;
++
++  switch (c->cmd_type)
++    {
++      case CMD_SCSI:
++      case CMD_IOCTL_PEND:
++	match = !test_memcmp (scsi3addr, &c->Header.LUN.LunAddrBytes,
++			      sizeof (c->Header.LUN.LunAddrBytes));
++	break;
++
++      case CMD_IOACCEL1:
++      case CMD_IOACCEL2:
++	if (c->phys_disk == dev)
++	  {
++	    match = true;
++	  }
++	else
++	  {
++	    for (i = 0; i < dev->nphysical_disks && !match; i++)
++	      {
++		match = dev->phys_disk[i] == c->phys_disk;
++	      }
++	  }
++	break;
++
++      case IOACCEL2_TMF:
++	for (i = 0; i < dev->nphysical_disks && !match; i++)
++	  {
++	    match = dev->phys_disk[i]->ioaccel_handle == 
++		    le32_to_cpu (ac->it_nexus);
++	  }
++	break;
++
++      case 0:
++	match = false;
++	break;
++      default:
++	dev_err (&h->pdev->dev, "unexpected cmd_type: %d\n", c->cmd_type);
++	BUG ();
++    }
++
++  return match;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
+new file mode 100644
+index 00000000000..9397b98eaef
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c
+@@ -0,0 +1,82 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_4__ TYPE_2__;
++typedef struct TYPE_3__ TYPE_1__;
++typedef int u32;
++
++struct mv_udc
++{
++  TYPE_2__ *op_regs;
++  TYPE_1__ *ep_dqh;
++  struct mv_ep *eps;
++};
++
++struct mv_ep
++{
++  TYPE_1__ *dqh;
++  struct mv_udc *udc;
++};
++
++struct TYPE_4__
++{
++  int *epctrlx;
++};
++
++struct TYPE_3__
++{
++  int max_packet_length;
++  int next_dtd_ptr;
++};
++
++int EP0_MAX_PKT_SIZE;
++int EPCTRL_RX_ENABLE;
++int EPCTRL_RX_EP_TYPE_SHIFT;
++int EPCTRL_TX_ENABLE;
++int EPCTRL_TX_EP_TYPE_SHIFT;
++int EP_QUEUE_HEAD_IOS;
++int EP_QUEUE_HEAD_MAX_PKT_LEN_POS;
++int EP_QUEUE_HEAD_NEXT_TERMINATE;
++int USB_ENDPOINT_XFER_CONTROL;
++int readl (int *);
++int writel (int, int *);
++
++__attribute__((used)) static void
++ep0_reset (struct mv_udc *udc)
++{
++  struct mv_ep *ep;
++  u32 epctrlx;
++  int i = 0;
++  for (i = 0; i < 2; i++)
++    {
++      ep = &udc->eps[i];
++      ep->udc = udc;
++      ep->dqh = &udc->ep_dqh[i];
++      ep->dqh->max_packet_length =
++	(EP0_MAX_PKT_SIZE << EP_QUEUE_HEAD_MAX_PKT_LEN_POS)
++	| EP_QUEUE_HEAD_IOS;
++      ep->dqh->next_dtd_ptr = EP_QUEUE_HEAD_NEXT_TERMINATE;
++      epctrlx = readl (&udc->op_regs->epctrlx[0]);
++      if (i)
++	{
++	  epctrlx |= EPCTRL_TX_ENABLE
++		  | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_TX_EP_TYPE_SHIFT);
++	}
++      else
++	{
++	  epctrlx |= EPCTRL_RX_ENABLE
++		  | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_RX_EP_TYPE_SHIFT);
++	}
++      writel (epctrlx, &udc->op_regs->epctrlx[0]);
++    }
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
+new file mode 100644
+index 00000000000..0ae75e13e8f
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
+@@ -0,0 +1,58 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++struct tcpcb
++{
++  int t_state;
++};
++
++struct socket
++{
++  int dummy;
++};
++
++struct proc
++{
++  int dummy;
++};
++
++struct inpcb
++{
++  scalar_t__ inp_lport;
++};
++
++int COMMON_END (int);
++int COMMON_START ();
++int PRU_LISTEN;
++int TCPS_LISTEN;
++int in_pcbbind (struct inpcb *, int *, struct proc *);
++struct inpcb* sotoinpcb (struct socket *);
++
++__attribute__((used)) static void
++tcp_usr_listen (struct socket *so, struct proc *p)
++{
++  int error = 0;
++  struct inpcb *inp = sotoinpcb (so);
++  struct tcpcb *tp;
++
++  COMMON_START ();
++  if (inp->inp_lport == 0)
++  {
++    error = in_pcbbind (inp, NULL, p);
++  }
++  if (error == 0)
++  {
++    tp->t_state = TCPS_LISTEN;
++  }
++  COMMON_END (PRU_LISTEN);
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
+new file mode 100644
+index 00000000000..512fb37a7f4
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
+@@ -0,0 +1,61 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_4__ TYPE_2__;
++typedef struct TYPE_3__ TYPE_1__;
++
++struct TYPE_4__
++{
++  size_t modCount;
++  TYPE_1__ *modList;
++};
++
++struct TYPE_3__
++{
++  void *modDescr;
++  void *modName;
++};
++
++size_t MAX_MODS;
++void *String_Alloc (char *);
++int test_strlen (char *);
++int trap_FD_GetFileList (char *, char *, char *, int);
++TYPE_2__ uiInfo;
++
++__attribute__((used)) static void
++UI_LoadMods ()
++{
++  int numdirs;
++  char dirlist[2048];
++  char *dirptr;
++  char *descptr;
++  int i;
++  int dirlen;
++
++  uiInfo.modCount = 0;
++  numdirs = trap_FD_GetFileList ("$modelist", "", dirlist, sizeof (dirlist));
++  dirptr = dirlist;
++  for (i = 0; i < numdirs; i++)
++    {
++      dirlen = test_strlen (dirptr) + 1;
++      descptr = dirptr + dirlen;
++      uiInfo.modList[uiInfo.modCount].modName = String_Alloc (dirptr);
++      uiInfo.modList[uiInfo.modCount].modDescr = String_Alloc (descptr);
++      dirptr += dirlen + test_strlen (descptr) + 1;
++      uiInfo.modCount++;
++      if (uiInfo.modCount >= MAX_MODS)
++        {
++	  break;
++        }
++    }
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
+new file mode 100644
+index 00000000000..0dea5517cd8
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
+@@ -0,0 +1,58 @@
++/* Supports the MEM_REF offset.
++   _1 = MEM[(struct arc *)ap_4 + 72B].flow;
++   Old rewrite:_1 = ap.reorder.0_8->flow;
++   New rewrite:_1 = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow.  */
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  const int MAX = 100;
++  /* A similar scenario can be reproduced only by using local variables.  */
++  arc_p ap = NULL;
++  ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  printf("%d\n", ap[1].flow);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
+new file mode 100644
+index 00000000000..00bd911c1c0
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
+@@ -0,0 +1,30 @@
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct T_HASH_ENTRY
++{ 
++  unsigned int hash;
++  unsigned int klen;
++  char *key;
++} iHashEntry;
++
++typedef struct T_HASH
++{
++  unsigned int size;
++  unsigned int fill;
++  unsigned int keys;
++
++  iHashEntry **array;
++} uHash;
++
++uHash *retval;
++
++int
++main() {
++  retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
+new file mode 100644
+index 00000000000..0cfa6554e8a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
+@@ -0,0 +1,71 @@
++// support POINTER_DIFF_EXPR & NOP_EXPR to avoid 
++// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt"
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  arc_t *old_arcs;
++  node_t *node;
++  node_t *stop;
++  size_t off;
++  network_t* net;
++
++  for( ; node->number < stop->number; node++ )
++    {
++      off = node->basic_arc - old_arcs;
++      node->basic_arc = (arc_t *)(net->arcs + off);
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
+new file mode 100644
+index 00000000000..4a70692444a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
+@@ -0,0 +1,55 @@
++// support NEGATE_EXPR rewriting
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  int64_t susp = 0;
++  const int MAX = 100;
++  arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  ap -= susp;
++  printf("%d\n", ap[1].flow);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
+new file mode 100644
+index 00000000000..b91efe10f13
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
+@@ -0,0 +1,55 @@
++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]";
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_t **ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_t**) malloc(MAX * sizeof(arc_t*));
++  (*ap)[0].id = 300;
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/pc_cast_int.c b/gcc/testsuite/gcc.dg/struct/pc_cast_int.c
+new file mode 100644
+index 00000000000..6f67fc556af
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/pc_cast_int.c
+@@ -0,0 +1,91 @@
++// Escape cast int for pointer compression
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++};
++
++
++const int MAX = 100;
++network_t* net;
++node_p node;
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, sizeof(network_t));
++  net->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  net->nodes = (node_p) calloc (MAX, sizeof (node_t));
++  net->arcs->id = 100;
++
++  node = net->nodes;
++  node_p n1 = (node_p) 0x123456;
++
++  for (unsigned i = 0; i < MAX; i++)
++    {
++      node->pred = n1;
++      node = node + 1;
++    }
++
++  node = net->nodes;
++
++  for (unsigned i = 0; i < MAX; i++)
++    {
++      if (node->pred != n1)
++	{
++	  abort ();
++	}
++      node = node + 1;
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in pointer compression" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c b/gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c
+new file mode 100644
+index 00000000000..d0b8d1afa48
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c
+@@ -0,0 +1,90 @@
++// Support basic pointer compression and decompression
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++};
++
++
++const int MAX = 100;
++network_t* net;
++node_p node;
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, sizeof(network_t));
++  net->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  net->nodes = (node_p) calloc (MAX, sizeof (node_t));
++  net->arcs->id = 100;
++
++  node = net->nodes;
++
++  for (unsigned i = 0; i < MAX; i++)
++    {
++      node->pred = node;
++      node = node + 1;
++    }
++
++  node = net->nodes;
++
++  for (unsigned i = 0; i < MAX; i++)
++    {
++      if (node->pred != node)
++	{
++	  abort ();
++	}
++      node = node + 1;
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in pointer compression is 1" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/pc_ptr2void.c b/gcc/testsuite/gcc.dg/struct/pc_ptr2void.c
+new file mode 100644
+index 00000000000..5022c1967bc
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/pc_ptr2void.c
+@@ -0,0 +1,87 @@
++// Partially support escape_cast_void for pointer compression.
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs, sorted_arcs;
++  int x;
++  node_p nodes, stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++network_t* net = NULL;
++int cnt = 0;
++
++__attribute__((noinline)) int
++primal_feasible (network_t *net)
++{
++  void* stop;
++  node_t *node;
++
++  node = net->nodes;
++  stop = (void *)net->stop_nodes;
++  for( node++; node < (node_t *)stop; node++ )
++    {
++      net->x = 1;
++      printf( "PRIMAL NETWORK SIMPLEX: ");
++    }
++  return 0;
++}
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, 20);
++  net->nodes = calloc (MAX, sizeof (node_t));
++  net->stop_nodes = net->nodes + MAX - 1;
++  cnt = primal_feasible( net );
++
++  net = (network_t*) calloc (1, 20);
++  if( !(net->arcs) )
++    {
++      return -1;
++    }
++  return cnt;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in pointer compression is 1" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c b/gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c
+new file mode 100644
+index 00000000000..98943c9b813
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c
+@@ -0,0 +1,112 @@
++// Check simplify rewrite chance for pointer compression and decompression
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++};
++
++
++const int MAX = 100;
++network_t* net;
++node_p node;
++arc_p arc;
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, sizeof(network_t));
++  net->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  net->nodes = (node_p) calloc (MAX, sizeof (node_t));
++  net->arcs->id = 100;
++
++  node = net->nodes;
++  arc = net->arcs;
++
++  for (unsigned i = 0; i < MAX; i++)
++    {
++      arc->head = node;
++      arc->head->child = node;
++      node->potential = i + 1;
++      arc->cost = arc->head->potential;
++      arc->tail = node->sibling;
++      if (i % 2)
++	node->pred = net->nodes + i;
++      else
++	node->pred = NULL;
++
++      if (node->pred && node->pred->child != NULL)
++	node->number = 0;
++      else
++	node->number = 1;
++
++      node = node + 1;
++      arc = arc + 1;
++    }
++
++  node = net->nodes;
++  arc = net->arcs;
++
++  for (unsigned i = 0; i < MAX; i++)
++    {
++      node_p t = i % 2 ? node : NULL;
++      int tt = i % 2 ? 0 : 1;
++      if (arc->head->pred != t || arc->cost == 0
++	  || arc->tail != node->sibling || node->number != tt)
++	{
++	  abort ();
++	}
++      arc = arc + 1;
++      node = node + 1;
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in pointer compression is 1" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c b/gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c
+new file mode 100644
+index 00000000000..a0e191267b7
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c
+@@ -0,0 +1,53 @@
++// Structures without names should not be optimized
++/* { dg-do compile } */
++#include 
++#include 
++
++typedef struct
++{
++  int a;
++  float b;
++  double s1;
++  double s2;
++  double s3;
++  double s4;
++  double s5;
++  double s6;
++  double s7;
++  double s8;
++} str_t1;
++
++#define N 1000
++
++int num;
++
++int
++main ()
++{
++  int i, r;
++
++  r = rand ();
++  num = r > N ? N : r;
++  str_t1 *p1 = calloc (num, sizeof (str_t1));
++
++  if (p1 == NULL)
++    return 0;
++
++  for (i = 0; i < num; i++)
++    p1[i].a = 1;
++
++  for (i = 0; i < num; i++)
++    p1[i].b = 2;
++
++  for (i = 0; i < num; i++)
++    if (p1[i].a != 1)
++      abort ();
++
++  for (i = 0; i < num; i++)
++    if (fabsf (p1[i].b - 2) > 0.0001)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in pointer compression" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c
+new file mode 100644
+index 00000000000..1b6a462e271
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c
+@@ -0,0 +1,75 @@
++// escape_instance_field, "Type escapes via a field of instance".
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++typedef struct network
++{
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++  node_t node;
++} network_t;
++
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++  node_t node;
++};
++
++
++const int MAX = 100;
++
++/* let it escape_array, "Type is used in an array [not handled yet]".  */
++network_t* net[2];
++
++int
++main ()
++{
++  net[0] = (network_t*) calloc (1, sizeof(network_t));
++  net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++
++  /* Contains an escape type and has structure instance field.  */
++  net[0]->arcs->node = net[0]->node;
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c
+new file mode 100644
+index 00000000000..346c7126446
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c
+@@ -0,0 +1,94 @@
++// Verify in escape_dependent_type_escapes,
++// the multi-layer dereference is rewriting correctly,and the memory access
++// is correct.
++
++// release
++// escape_dependent_type_escapes,
++// "Type uses a type which escapes or is used by a type which escapes"
++// avoid escape_cast_another_ptr, "Type escapes a cast to a different pointer"
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++};
++
++
++const int MAX = 100;
++
++/* let it escape_array, "Type is used in an array [not handled yet]".  */
++network_t* net[2];
++arc_p stop_arcs = NULL;
++
++int
++main ()
++{
++  net[0] = (network_t*) calloc (1, sizeof(network_t));
++  net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++
++  net[0]->arcs->id = 100;
++
++  for (unsigned i = 0; i < 3; i++)
++    {        
++      net[0]->arcs->id = net[0]->arcs->id + 2;
++      stop_arcs->cost = net[0]->arcs->id / 2;
++      stop_arcs->net_add = net[0];
++      printf("stop_arcs->cost = %ld\n", stop_arcs->cost);
++      net[0]->arcs++;
++      stop_arcs++;
++    }
++
++  if( net[1] != 0 && stop_arcs != 0)
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c
+new file mode 100644
+index 00000000000..7d7641f011d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c
+@@ -0,0 +1,82 @@
++// bugfix: 
++// Common members do not need to reconstruct. 
++// Otherwise, eg:int* -> int** and void* -> void**.
++/* { dg-do compile } */
++
++#include 
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t* cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t** org_cost;
++};
++
++struct a
++{
++  int t;
++  int t1;
++};
++
++__attribute__((noinline)) int
++f(int i, int j)
++{
++  struct a *t = NULL;
++  struct a t1 = {i, j};
++  t = &t1;
++  auto int g(void) __attribute__((noinline));
++  int g(void)
++    {
++      return t->t + t->t1;
++    }
++  return g();
++}
++
++arc_t **ap = NULL;
++const int MAX = 100;
++
++int
++main()
++{
++  if (f(1, 2) != 3)
++    {
++      abort ();
++    }
++  ap = (arc_t**) malloc(MAX * sizeof(arc_t*));
++  (*ap)[0].id = 300;
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c
+new file mode 100644
+index 00000000000..63fb3f8284c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c
+@@ -0,0 +1,56 @@
++/* { dg-do compile } */
++
++#include 
++#include 
++
++#define MallocOrDie(x)     sre_malloc((x))
++
++struct gki_elem {
++  char            *key;
++  int              idx;
++  struct gki_elem *nxt;
++};
++
++typedef struct {
++  struct gki_elem **table;
++
++  int primelevel;
++  int nhash;
++  int nkeys;
++} GKI;
++
++void
++Die(char *format, ...)
++{
++  exit(1);
++}
++
++void *
++sre_malloc(size_t size)
++{
++  void *ptr;
++
++  if ((ptr = malloc (size)) == NULL)
++    {
++      Die("malloc of %ld bytes failed", size);
++    }
++  return ptr;
++}
++
++
++__attribute__((noinline)) int
++GKIStoreKey(GKI *hash, char *key)
++{
++  hash->table[0] = MallocOrDie(sizeof(struct gki_elem));
++}
++
++int
++main ()
++{
++  GKI *hash;
++  char *key;
++  GKIStoreKey(hash, key);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c
+new file mode 100644
+index 00000000000..8c431e15ffd
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c
+@@ -0,0 +1,60 @@
++// verify newarc[cmp-1].flow
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_p ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  printf("%d\n", ap[0].id);
++  for (int i = 1; i < MAX; i++)
++    {
++      ap[i-1].id = 500;
++    }
++  printf("%d\n", ap[0].id);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c
+new file mode 100644
+index 00000000000..efc95a4cd56
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c
+@@ -0,0 +1,83 @@
++// release type is used by a type which escapes.
++// avoid escape_cast_another_ptr, "Type escapes a cast to a different pointer"
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++network_t* net = NULL;
++arc_p stop_arcs = NULL;
++int cnt = 0;
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, 20);
++  net->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  if(!(net->arcs))
++    {
++      return -1;
++    }
++
++  for( int i = 0; i < MAX; i++, net->arcs = stop_arcs)
++    {
++      cnt++;
++    }
++
++  net = (network_t*) calloc (1, 20); 
++  if( !(net->arcs) )
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c
+new file mode 100644
+index 00000000000..75fc10575d5
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c
+@@ -0,0 +1,72 @@
++// release escape_cast_another_ptr, "Type escapes a cast to a different pointer"
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++typedef int cmp_t(const void *, const void *);
++
++__attribute__((noinline)) void
++spec_qsort(void *a, cmp_t *cmp)
++{
++  char *pb = NULL;
++  while (cmp(pb, a))
++    {
++      pb += 1;
++    }
++}
++
++static int arc_compare( arc_t **a1, int a2 )
++{
++  if( (*a1)->id < a2 )
++    {
++      return -1;
++    }
++  return 1;
++}
++
++int
++main()
++{
++  spec_qsort(NULL, (int (*)(const void *, const void *))arc_compare);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c
+new file mode 100644
+index 00000000000..9fb06877bcb
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c
+@@ -0,0 +1,58 @@
++/* Supports the MEM_REF offset.
++   _1 = MEM[(struct arc *)ap_4 + 72B].flow;
++   Old rewrite:_1 = ap.reorder.0_8->flow;
++   New rewrite:_1 = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow.  */
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  const int MAX = 100;
++  /* A similar scenario can be reproduced only by using local variables.  */
++  arc_p ap = NULL;
++  ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  printf("%d\n", ap[1].flow);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
+new file mode 100644
+index 00000000000..e8eb0eaa09a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
+@@ -0,0 +1,30 @@
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct T_HASH_ENTRY
++{ 
++  unsigned int hash;
++  unsigned int klen;
++  char *key;
++} iHashEntry;
++
++typedef struct T_HASH
++{
++  unsigned int size;
++  unsigned int fill;
++  unsigned int keys;
++
++  iHashEntry **array;
++} uHash;
++
++uHash *retval;
++
++int
++main() {
++  retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
+new file mode 100644
+index 00000000000..bd535afd08d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
+@@ -0,0 +1,109 @@
++// For testing:
++/*
++Compile options: gcc -O3 -g 
++-flto -flto-partition=one -fipa-reorder-fields -fipa-struct-reorg 
++-v -save-temps -fdump-ipa-all-details test.c -o  test
++
++in COMPLETE_STRUCT_RELAYOUT pass:
++N type: struct node.reorder.0 new = "Type escapes a cast to a different pointer"
++copy$head_26 = test_arc.reorder.0_49->head;
++
++type : struct arc.reorder.0(1599) { 
++fields = {
++field (5382) {type = cost_t}
++field (5383) {type = struct node.reorder.0 *} // but node has escaped.
++field (5384) {type = struct node.reorder.0 *}
++field (5386) {type = struct arc.reorder.0 *}
++field (5387) {type = struct arc.reorder.0 *}
++field (5388) {type = flow_t}
++field (5389) {type = cost_t}
++field (5381) {type = int}
++field (5385) {type = short int}
++}
++
++// The types of the two types are inconsistent after the rewriting.
++newarc_2(D)->tail = tail_1(D);
++vs
++struct_reorder.0_61(D)->tail = tail_1(D); 
++*/
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++__attribute__((noinline)) void
++replace_weaker_arc( arc_t *newarc, node_t *tail, node_t *head)
++{
++    printf("test");
++}
++
++__attribute__((noinline)) int64_t
++switch_arcs(arc_t** deleted_arcs, arc_t* arcnew)
++{
++  int64_t count = 0;
++  arc_t *test_arc, copy;
++
++  if (!test_arc->ident)
++    {
++      copy = *test_arc;
++      count++;
++      *test_arc = arcnew[0];
++      replace_weaker_arc(arcnew, NULL, NULL);
++    }
++  return count;
++}
++
++int
++main ()
++{
++  switch_arcs(NULL, NULL);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
+new file mode 100644
+index 00000000000..11393a197a3
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
+@@ -0,0 +1,87 @@
++// escape_cast_void, "Type escapes a cast to/from void*"
++// stop_393 = net.stop_nodes; void *stop;
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs, sorted_arcs;
++  int x;
++  node_p nodes, stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++network_t* net = NULL;
++int cnt = 0;
++
++__attribute__((noinline)) int
++primal_feasible (network_t *net)
++{
++  void* stop;
++  node_t *node;
++
++  node = net->nodes;
++  stop = (void *)net->stop_nodes;
++  for( node++; node < (node_t *)stop; node++ )
++    {
++      printf( "PRIMAL NETWORK SIMPLEX: " );
++    }
++  return 0;
++}
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, 20);
++  net->nodes = calloc (MAX, sizeof (node_t));
++  net->stop_nodes = calloc (MAX, sizeof (node_t));
++  cnt = primal_feasible( net ); 
++    
++  net = (network_t*) calloc (1, 20); 
++  if( !(net->arcs) )
++    {
++      return -1;
++    }
++  return cnt;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c
+new file mode 100644
+index 00000000000..d601fae64d4
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c
+@@ -0,0 +1,71 @@
++// support POINTER_DIFF_EXPR & NOP_EXPR to avoid 
++// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt"
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  arc_t *old_arcs;
++  node_t *node;
++  node_t *stop;
++  size_t off;
++  network_t* net;
++
++  for( ; node->number < stop->number; node++ )
++    {
++      off = node->basic_arc - old_arcs;
++      node->basic_arc = (arc_t *)(net->arcs + off);
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c
+new file mode 100644
+index 00000000000..4d5f25aa164
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c
+@@ -0,0 +1,55 @@
++// support NEGATE_EXPR rewriting
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  int64_t susp = 0;
++  const int MAX = 100;
++  arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  ap -= susp;
++  printf("%d\n", ap[1].flow);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
+new file mode 100644
+index 00000000000..b3891fde928
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
+@@ -0,0 +1,34 @@
++/* { dg-do compile } */
++
++#include 
++#include 
++
++struct node
++{
++    struct node *left, *right;
++    double a, b, c, d, e, f;
++}
++*a;
++int b, c;
++void
++CreateNode (struct node **p1)
++{
++    *p1 = calloc (10, sizeof (struct node));
++}
++
++int
++main ()
++{
++    a->left = 0;
++    struct node *t = a;
++    CreateNode (&t->right);
++
++    struct node p = *a;
++    b = 1;
++    if (p.left)
++        b = 0;
++    if (b)
++        printf ("   Tree.\n");
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c
+new file mode 100644
+index 00000000000..4df79e4f0e8
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c
+@@ -0,0 +1,55 @@
++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]";
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_t **ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_t**) malloc(MAX * sizeof(arc_t*));
++  (*ap)[0].id = 300;
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c
+new file mode 100644
+index 00000000000..49d2106d1dc
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c
+@@ -0,0 +1,58 @@
++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]"
++
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_p **ap;
++
++
++int
++main ()
++{
++  ap = (arc_p**) calloc(MAX, sizeof(arc_p*));
++  (**ap)[0].id = 500;
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c
+new file mode 100644
+index 00000000000..f71c7894f36
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c
+@@ -0,0 +1,57 @@
++// release escape_rescusive_type, "Recusive type"
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_p ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_p) calloc (MAX, sizeof (arc_t));
++  ap[0].id = 100;
++  ap[0].head = (node_p) calloc (MAX, sizeof (node_t));
++    
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c
+new file mode 100644
+index 00000000000..721cee2c6ae
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c
+@@ -0,0 +1,65 @@
++// support more gimple assign rhs code
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++__attribute__((noinline)) int
++compare(arc_p p1, arc_p p2)
++{
++  return p1 < p2;
++}
++
++int n = 0;
++int m = 0;
++
++int
++main ()
++{
++  scanf ("%d %d", &n, &m);
++  arc_p p = calloc (10, sizeof (struct arc));
++  if (compare (&p[n], &p[m]))
++    {
++      printf ("ss!");
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c
+new file mode 100644
+index 00000000000..3871d3d99f1
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c
+@@ -0,0 +1,72 @@
++// rewrite_cond bugfixï¼›
++/*
++if (iterator_600 != 0B)
++old rewrite: _1369 = iterator.reorder.0_1249 != 0B; if (_1369 != 1)
++new rewrite: if (iterator.reorder.0_1249 != 0B)
++*/
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct list_elem
++{
++  arc_t* arc;
++  struct list_elem* next;
++}list_elem;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int i = 0;
++
++int
++main ()
++{
++  register list_elem *first_list_elem;
++  register list_elem* iterator;
++  iterator = first_list_elem->next;
++  while (iterator)
++    {
++      iterator = iterator->next;
++      i++;
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c
+new file mode 100644
+index 00000000000..5ad206433e0
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c
+@@ -0,0 +1,58 @@
++// support if (_150 >= _154)
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main()
++{
++  arc_p **ap = (arc_p**) malloc(1 * sizeof(arc_p*));
++  arc_p **arcs_pointer_sorted = (arc_p**) malloc(1 * sizeof(arc_p*));
++  arcs_pointer_sorted[0] = (arc_p*) calloc (1, sizeof(arc_p));
++
++  if (arcs_pointer_sorted >= ap)
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c
+new file mode 100644
+index 00000000000..a002f98892e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c
+@@ -0,0 +1,81 @@
++/* 
++Exclude the rewriting error caused by 
++first_list_elem = (list_elem *)NULL;
++rewriting PHI:first_list_elem_700 = PHI <0B(144), 0B(146)>
++into:
++first_list_elem.reorder.0_55 = PHI <(144), (146)>
++*/
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct list_elem
++{
++  arc_t* arc;
++  struct list_elem* next;
++}list_elem;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout, firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail, head;
++  short ident;
++  arc_p nextout, nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++
++list_elem* new_list_elem;
++list_elem* first_list_elem;
++
++int
++main ()
++{
++  int i = 0;
++  list_elem *first_list_elem;
++  list_elem *new_list_elem;
++  arc_t *arcout;
++  for( ; i < MAX && arcout->ident == -1; i++);
++
++  first_list_elem = (list_elem *)NULL;
++  for( ; i < MAX; i++)
++    {
++      new_list_elem = (list_elem*) calloc(1, sizeof(list_elem));
++      new_list_elem->next = first_list_elem;
++      first_list_elem = new_list_elem;
++    }
++  if (first_list_elem != 0)
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_shwi.c b/gcc/testsuite/gcc.dg/struct/rf_shwi.c
+new file mode 100644
+index 00000000000..2bb326ff200
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_shwi.c
+@@ -0,0 +1,23 @@
++/* { dg-do compile } */
++
++struct foo {int dx; long dy; int dz; };
++struct goo {long offset; struct foo* pfoo; };
++
++void* func (long); 
++
++__attribute__((used)) static void
++test(struct goo* g)
++{
++  void* pvoid;
++  struct foo* f;
++
++  for (f = g->pfoo; f->dx; f++)
++    {
++      if (f->dy)
++	break;
++    }
++  f--;
++
++  pvoid = func(f->dz + g->offset);
++  return;
++}
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c
+new file mode 100644
+index 00000000000..f77a062bda6
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c
+@@ -0,0 +1,92 @@
++// release escape_visible_function, "Type escapes via expternally visible function call"
++// compile options: gcc -O3 -fno-inline -fwhole-program 
++// -flto-partition=one -fipa-struct-reorg arc_compare.c -fdump-ipa-all -S -v
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++__attribute__((noinline)) static int
++arc_compare( arc_t **a1, arc_t **a2 )
++{
++  if( (*a1)->flow > (*a2)->flow )
++    {
++      return 1;
++    }
++  if( (*a1)->flow < (*a2)->flow )
++    {
++      return -1;
++    }
++  if( (*a1)->id < (*a2)->id )
++    {
++      return -1;
++    }
++
++    return 1;
++}
++
++__attribute__((noinline)) void
++spec_qsort(void *array, int nitems, int size,
++	   int (*cmp)(const void*,const void*))
++{
++  for (int i = 0; i < nitems - 1; i++)
++  {
++    if (cmp (array , array))
++      {
++        printf ("CMP 1\n");
++      }
++    else
++      {
++        printf ("CMP 2\n");
++      }
++  }
++}
++
++typedef int cmp_t(const void *, const void *);
++
++int
++main ()
++{
++  void *p = calloc (100, sizeof (arc_t **));
++  spec_qsort (p, 100, 0, (int (*)(const void *, const void *))arc_compare);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c
+new file mode 100644
+index 00000000000..cba6225a5e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c
+@@ -0,0 +1,54 @@
++// Add a safe func mechanism.
++// avoid escape_unkown_field, "Type escapes via an unkown field accessed"
++// avoid escape_cast_void, "Type escapes a cast to/from void*" eg: GIMPLE_NOP
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++void
++__attribute__((noinline)) spec_qsort (void *a, size_t es) 
++{
++  char *pa;
++  char *pb;
++  int cmp_result;
++
++  while ((*(arc_t **)a)->id < *((int *)a))
++    { 
++      if (cmp_result == 0)
++	{
++	  spec_qsort (a, es);
++	  pa = (char *)a - es;
++	  a += es;
++	  *(long *)pb = *(long *)pa;
++	}
++      else
++	{
++	  a -= pa - pb;
++	}
++    }  
++}
++
++int
++main()
++{
++  arc_p **arcs_pointer_sorted;
++  spec_qsort (arcs_pointer_sorted[0], sizeof (arc_p));
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c b/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c
+new file mode 100644
+index 00000000000..87c756c79d5
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c
+@@ -0,0 +1,86 @@
++// Check simplify rewrite chance for semi-relayout
++/* { dg-do compile } */
++
++#include 
++#include 
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++};
++
++
++const int MAX = 100;
++network_t* net;
++node_p node;
++arc_p arc;
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, sizeof(network_t));
++  net->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  net->nodes = (node_p) calloc (MAX, sizeof (node_t));
++  net->arcs->id = 100;
++
++  node = net->nodes;
++  arc = net->arcs;
++
++  for (unsigned i = 0; i < MAX; i++)
++    {
++      arc->head = node;
++      arc->head->child = node;
++      node->potential = i + 1;
++      arc->cost = arc->head->potential;
++      arc->tail = node->sibling;
++      node = node + 1;
++      arc = arc + 1;
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in semi-relayout is 1" "struct_reorg" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c
+new file mode 100644
+index 00000000000..9d58edab80a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c
+@@ -0,0 +1,37 @@
++/* { dg-do run } */
++
++static struct S {
++  int *p1;
++  int *p2;
++} s;
++
++typedef __UINTPTR_TYPE__ uintptr_t;
++
++int
++foo ()
++{
++  int i = 1;
++  int j = 2;
++  struct S s;
++  int **p;
++  s.p1 = &i;
++  s.p2 = &j;
++  p = &s.p1;
++  uintptr_t pi = (uintptr_t) p;
++  pi = pi + sizeof (int *);
++  p = (int **)pi;
++  **p = 3;
++  return j;
++}
++
++int
++main ()
++{
++  if (foo () != 3)
++    {
++      __builtin_abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "struct S has escaped: \"Type escapes via taking the address of field\"" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c b/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c
+new file mode 100644
+index 00000000000..a99ee0de484
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c
+@@ -0,0 +1,23 @@
++/* { dg-do compile } */
++
++struct T1 {
++  long var1;
++  int  var2;
++};
++
++struct T2 {
++  long var1;
++  int  var2;
++};
++
++void test (void*);
++
++__attribute__((used)) void
++foo (struct T2 *t2)
++{
++  struct T1* t1 = (void *)(&t2[1]);
++  void*  data   = (void *)(&t1[1]);
++
++  test(data);
++  return;
++}
+diff --git a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c
+new file mode 100644
+index 00000000000..e3d219fe1e0
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c
+@@ -0,0 +1,25 @@
++// { dg-do compile }
++
++#include 
++
++struct S {
++  unsigned long a;
++  unsigned long b;
++};
++
++struct S* s;
++struct S* t = (struct S*) 1000;
++
++int
++main ()
++{
++  s = (struct S*) calloc (1000, sizeof (struct S));
++  s = s > t ? s : t;
++  if (s == 0)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c
+new file mode 100644
+index 00000000000..9a4b10d9aef
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++
++struct test {long val; struct test* next; };
++
++unsigned long P_DATA;
++
++void func (struct test*);
++
++__attribute__((used)) static void
++foo (struct test* pt)
++{
++  struct test t;
++
++  t.next = (void *)((unsigned long)pt->next & P_DATA);
++  func(&t);
++  return;
++}
+diff --git a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c
+new file mode 100644
+index 00000000000..a0614a1bae7
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c
+@@ -0,0 +1,33 @@
++// { dg-do compile }
++
++#include 
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++};
++
++int max;
++int x;
++
++node_p n;
++node_p z;
++
++int
++main ()
++{
++  n = (node_p) calloc (max, sizeof (node_t));
++
++  node_p xp = &n[x];
++
++  if (xp - z == 10)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "has escaped: \"Type escapes via a unhandled rewrite stmt\"" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+new file mode 100644
+index 00000000000..281046b48ae
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+@@ -0,0 +1,102 @@
++#   Copyright (C) 1997-2019 Free Software Foundation, Inc.
++
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# .
++
++load_lib gcc-dg.exp
++load_lib target-supports.exp
++
++# Initialize `dg'.
++dg-init
++torture-init
++
++set STRUCT_REORG_TORTURE_OPTIONS [list \
++	{ -O3 } \
++	{ -Ofast } ]
++
++set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}}
++
++# -fipa-struct-reorg
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \
++ 	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-reorder-fields
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \
++	"" "-fipa-reorder-fields -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-struct-reorg=1
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \
++ 	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-struct-reorg=2
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \
++ 	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-struct-reorg=3
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \
++	"" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-struct-reorg=4
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \
++	"" "-fipa-struct-reorg=4 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-struct-reorg=5
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \
++	"" "-fipa-struct-reorg=5 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-struct-reorg=6
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/semi_relayout*.c]] \
++	"" "-fipa-struct-reorg=6 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# All done.
++torture-finish
++dg-finish
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
+new file mode 100644
+index 00000000000..a73ff8e7ec9
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
+@@ -0,0 +1,30 @@
++// { dg-do compile }
++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" }
++
++struct a
++{
++  int t, t1;
++};
++
++static struct a *b;
++
++void *xmalloc(int);
++
++
++void f(void)
++{
++  b = xmalloc (sizeof(*b));
++}
++
++int g(void)
++{
++  return b->t;
++}
++
++int main()
++{
++  f ();
++  return g ();
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c
+new file mode 100644
+index 00000000000..d7ab7d21c62
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c
+@@ -0,0 +1,29 @@
++// { dg-do run }
++
++#include 
++
++struct a
++{
++  int t;
++  int t1;
++};
++
++__attribute__((noinline)) int f(int i, int j)
++{
++  struct a *t;
++  struct a t1 = {i, j};
++  t = &t1;
++  auto int g(void) __attribute__((noinline));
++  int g(void)
++  {
++    return t->t + t->t1;
++  }
++  return g();
++}
++
++int main()
++{
++  assert (f(1, 2) == 3);
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
+new file mode 100644
+index 00000000000..9e5b192eb02
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
+@@ -0,0 +1,28 @@
++// { dg-do compile }
++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" }
++
++#include 
++typedef struct {
++   long laststart_offset;
++   unsigned regnum;
++} compile_stack_elt_t;
++typedef struct {
++   compile_stack_elt_t *stack;
++   unsigned size;
++} compile_stack_type;
++__attribute__((noinline)) void f (const char *p, const char *pend, int c)
++{
++  compile_stack_type compile_stack;
++  while (p != pend)
++    if (c)
++      compile_stack.stack = realloc (compile_stack.stack,
++				     (compile_stack.size << 1)
++				       * sizeof (compile_stack_elt_t));
++}
++
++int main()
++{
++  f (NULL, NULL, 1);
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c
+new file mode 100644
+index 00000000000..27b4b56e0d2
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c
+@@ -0,0 +1,59 @@
++/* { dg-do run } */
++
++extern void abort (void);
++
++struct S
++{
++  int b;
++  int *c;
++};
++static int d, e;
++
++static struct S s;
++
++static int *
++__attribute__((noinline, const))
++foo (void)
++{
++  return &s.b;
++}
++
++int *
++__attribute__((noinline))
++bar (int **f)
++{
++  s.c = &d;
++  *f = &e;
++  /* As nothing ever takes the address of any int * field in struct S,
++     the write to *f can't alias with the s.c field.  */
++  return s.c;
++}
++
++int
++__attribute__((noinline))
++baz (int *x)
++{
++  s.b = 1;
++  *x = 4;
++  /* Function foo takes address of an int field in struct S,
++     so *x can alias with the s.b field (and it does in this testcase).  */
++  return s.b;
++}
++
++int
++__attribute__((noinline))
++t (void)
++{
++  int *f = (int *) 0;
++  return 10 * (bar (&f) != &d) + baz (foo ());
++}
++
++int
++main (void)
++{
++  if (t () != 4)
++    abort ();
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
+new file mode 100644
+index 00000000000..273baa9a368
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c
+@@ -0,0 +1,31 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-flto -fno-use-linker-plugin" } */
++
++struct D
++{
++  int n;
++  int c [8];
++};
++
++struct A
++{
++  int i;
++  char *p;
++};
++
++struct B
++{
++  struct A *a;
++  struct D *d;
++};
++
++int dtInsert1 (struct B *b)
++{
++  struct A a = { 0, 0 };
++  struct D *d;
++  b->a = &a;
++  d = b->d;
++  &d->c [d->n];
++  return 0;
++}
++
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
+new file mode 100644
+index 00000000000..455f9b501d6
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c
+@@ -0,0 +1,54 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-flto -fno-use-linker-plugin" } */
++
++typedef struct basic_block_def *basic_block;
++typedef struct gimple_seq_node_d *gimple_seq_node;
++typedef struct gimple_seq_d *gimple_seq;
++typedef struct
++{
++  gimple_seq_node ptr;
++  gimple_seq seq;
++  basic_block bb;
++} gimple_stmt_iterator;
++typedef void *gimple;
++extern void exit(int);
++struct gimple_seq_node_d
++{
++  gimple stmt;
++  struct gimple_seq_node_d *next;
++};
++struct gimple_seq_d
++{
++};
++static __inline__ gimple_stmt_iterator
++gsi_start (gimple_seq seq)
++{
++  gimple_stmt_iterator i;
++  i.seq = seq;
++  return i;
++}
++static __inline__ unsigned char
++gsi_end_p (gimple_stmt_iterator i)
++{
++  return i.ptr == ((void *)0);
++}
++static __inline__ void
++gsi_next (gimple_stmt_iterator *i)
++{
++  i->ptr = i->ptr->next;
++}
++static __inline__ gimple
++gsi_stmt (gimple_stmt_iterator i)
++{
++  return i.ptr->stmt;
++}
++void
++c_warn_unused_result (gimple_seq seq)
++{
++  gimple_stmt_iterator i;
++  for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
++    {
++      gimple g = gsi_stmt (i);
++      if (!g) exit(0);
++    }
++}
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
+new file mode 100644
+index 00000000000..afc0bd86ca5
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c
+@@ -0,0 +1,38 @@
++/* { dg-do run } */
++
++#include 
++#include 
++
++struct gki_elem {
++  char            *key;
++  int              idx;
++};
++
++typedef struct {
++  struct gki_elem *table;
++
++  int primelevel;
++  int nhash;
++  int nkeys;
++} GKI;
++
++void *
++sre_malloc(size_t size)
++{
++  void *ptr = malloc (size);
++  return ptr;
++}
++
++__attribute__((noinline)) int
++GKIStoreKey(GKI *hash)
++{
++  hash->table = sre_malloc(sizeof(struct gki_elem));
++}
++
++int
++main ()
++{
++  GKI *hash = malloc (sizeof(GKI));
++  GKIStoreKey(hash);
++  return 0;
++}
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
+new file mode 100644
+index 00000000000..9bcfaf3681b
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c
+@@ -0,0 +1,25 @@
++/* { dg-do run } */
++
++#include 
++#include 
++#include 
++
++typedef struct {
++    unsigned char blue;
++    unsigned char green;
++} Pixel;
++
++typedef struct {
++    unsigned short colormaplength;
++    Pixel         *colormapdata;
++} TargaImage;
++
++TargaImage *img;
++
++int main() {
++    img = (TargaImage *) malloc( sizeof(TargaImage) );
++    if (img->colormaplength > 0) {
++        img->colormapdata = (Pixel *) malloc(sizeof(Pixel) * img->colormaplength);
++        memset(img->colormapdata, 0, (sizeof(Pixel) * img->colormaplength) );
++    }
++}
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
+new file mode 100644
+index 00000000000..052f4e3bdc1
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c
+@@ -0,0 +1,54 @@
++/* { dg-do run } */
++
++extern void abort(void);
++
++struct packed_ushort {
++    unsigned short ucs;
++} __attribute__((packed));
++
++struct source {
++    int pos, length;
++};
++
++static int flag;
++
++static void __attribute__((noinline)) fetch(struct source *p)
++{
++    p->length = 128;
++}
++    
++static struct packed_ushort __attribute__((noinline)) next(struct source *p)
++{
++    struct packed_ushort rv;
++
++    if (p->pos >= p->length) {
++	if (flag) {
++	    flag = 0;
++	    fetch(p);
++	    return next(p);
++	}
++	flag = 1;
++	rv.ucs = 0xffff;
++	return rv;
++    }
++    rv.ucs = 0;
++    return rv;
++}
++
++int main(void)
++{
++    struct source s;
++    int i;
++
++    s.pos = 0;
++    s.length = 0;
++    flag = 0;
++
++    for (i = 0; i < 16; i++) {
++	struct packed_ushort rv = next(&s);
++	if ((i == 0 && rv.ucs != 0xffff)
++	    || (i > 0 && rv.ucs != 0))
++	    abort();
++    }
++    return 0;
++}
+diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c
+new file mode 100644
+index 00000000000..9e0f84da8ca
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c
+@@ -0,0 +1,29 @@
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#define N 1000
++str_t A[N];
++
++int
++main ()
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    {
++      A[i].a = 0;
++    }
++
++  for (i = 0; i < N; i++)
++    if (A[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg"  { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c
+new file mode 100644
+index 00000000000..c868347e33f
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c
+@@ -0,0 +1,42 @@
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++str_t *p;
++
++int
++main ()
++{
++  int i, sum;
++
++  p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].b = i;
++
++  for (i = 0; i < N; i++)
++    p[i].a = p[i].b + 1;
++
++  for (i = 0; i < N; i++)
++    if (p[i].a != p[i].b + 1)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c
+new file mode 100644
+index 00000000000..185ff3125ee
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c
+@@ -0,0 +1,37 @@
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i;
++  str_t A[N];
++
++  for (i = 0; i < N; i++)
++    {
++      A[i].a = 0;
++    }
++
++  for (i = 0; i < N; i++)
++    if (A[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c
+new file mode 100644
+index 00000000000..6294fb2a222
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c
+@@ -0,0 +1,40 @@
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i, sum;
++
++  str_t * p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].b = i;
++
++  for (i = 0; i < N; i++)
++    p[i].a = p[i].b + 1;
++
++  for (i = 0; i < N; i++)
++    if (p[i].a != p[i].b + 1)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c
+new file mode 100644
+index 00000000000..3ca4e0e7180
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c
+@@ -0,0 +1,31 @@
++#include 
++typedef struct
++{
++  int a;
++  int b;
++}str_t;
++
++#define N 3
++
++str_t str;
++
++int
++main ()
++{
++  int i;
++  int res = 1<<(1<
++
++typedef struct
++{
++  int a;
++  float b;
++}str_t1;
++
++typedef struct
++{
++  int c;
++  float d;
++}str_t2;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 16000
++#define N 1000
++#else
++#define N (STACK_SIZE/16)
++#endif
++#else
++#define N 1000
++#endif
++
++str_t1 *p1;
++str_t2 *p2;
++int num;
++
++void
++foo (void)
++{
++  int i;
++
++  for (i=0; i < num; i++)
++    p2[i].c = 2;
++}
++
++int
++main ()
++{
++  int i, r;
++
++  r = rand ();
++  num = r > N ? N : r; 
++  p1 = malloc (num * sizeof (str_t1));
++  p2 = malloc (num * sizeof (str_t2));
++
++  if (p1 == NULL || p2 == NULL)
++    return 0;
++
++  for (i = 0; i < num; i++)
++    p1[i].a = 1;
++
++  foo ();
++
++  for (i = 0; i < num; i++)
++    if (p1[i].a != 1 || p2[i].c != 2)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c
+new file mode 100644
+index 00000000000..afa145a5722
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c
+@@ -0,0 +1,43 @@
++#include 
++typedef struct
++{
++  int a;
++  int b;
++}str_t1;
++
++typedef struct
++{
++  float a;
++  float b;
++}str_t2;
++
++#define N1 1000
++#define N2 100
++str_t1 A1[N1];
++str_t2 A2[N2];
++
++int
++main ()
++{
++  int i;
++
++  for (i = 0; i < N1; i++)
++    A1[i].a = 0;
++
++  for (i = 0; i < N2; i++)
++    A2[i].a = 0;
++
++  for (i = 0; i < N1; i++)
++    if (A1[i].a != 0) 
++      abort ();
++
++  for (i = 0; i < N2; i++)
++    if (A2[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* Arrays are not handled. */
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c
+new file mode 100644
+index 00000000000..7fa6ae27521
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c
+@@ -0,0 +1,26 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct basic
++{
++  int a;
++  int b[10];
++} type_struct;
++
++type_struct *str1;
++
++int main()
++{
++  int i;
++
++  str1 = malloc (10 * sizeof (type_struct));
++
++  for (i=0; i<=9; i++)
++    str1[i].a = str1[i].b[0];
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg"  { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c
+new file mode 100644
+index 00000000000..b3bde58365a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c
+@@ -0,0 +1,38 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i;
++  str_t A[N];
++  str_t *p = A;
++
++  for (i = 0; i < N; i++)
++    p[i].a = 0;
++
++  for (i = 0; i < N; i++)
++    if (p[i].a != 0)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c
+new file mode 100644
+index 00000000000..f2bb82b94aa
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c
+@@ -0,0 +1,29 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++
++typedef struct test_struct
++{
++  int a;
++  int b;
++} type_struct;
++
++typedef type_struct **struct_pointer2;
++
++struct_pointer2 str1;
++
++int main()
++{
++  int i, j;
++
++  str1 = malloc (2 * sizeof (type_struct *));
++
++  for (i = 0; i <= 1; i++)
++    str1[i] = malloc (2 * sizeof (type_struct));
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c
+new file mode 100644
+index 00000000000..0685cf8fe74
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c
+@@ -0,0 +1,44 @@
++/* { dg-do run } */
++
++#include 
++
++struct S { int a; struct V *b; };
++typedef struct { int c; } T;
++typedef struct { int d; int e; } U;
++
++void * 
++fn (void *x) 
++{
++  return x;
++}
++
++int
++foo (struct S *s)
++{
++  T x;
++  
++  T y = *(T *)fn (&x);
++  return y.c;
++}
++
++int
++bar (struct S *s)
++{
++  U x;
++  
++  U y = *(U *)fn (&x);
++  return y.d + s->a;
++}
++
++int 
++main ()
++{
++  struct S s;
++
++  foo(&s) + bar (&s);
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "No structures to transform in struct split" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c
+new file mode 100644
+index 00000000000..1a0a5a9c6d3
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c
+@@ -0,0 +1,44 @@
++/* { dg-do run } */
++
++#include 
++struct str
++{
++  int a;
++  float b;
++};
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++foo (struct str * p_str)
++{
++  static int sum = 0;
++
++  sum = sum + p_str->a;
++  return sum;
++}
++
++int
++main ()
++{
++  int i, sum;
++  struct str * p = malloc (N * sizeof (struct str));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    sum = foo (p+i);
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg"   } } */
++
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c
+new file mode 100644
+index 00000000000..d0dce8b536f
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c
+@@ -0,0 +1,33 @@
++/* { dg-do run } */
++/* { dg-additional-options "-fno-ipa-sra" } */
++
++#include 
++
++struct A {
++  int d;
++  int d1;
++};
++
++struct A a;
++
++struct A *foo () __attribute__((noinline));
++struct A *foo ()
++{
++  a.d = 5;
++  return &a;
++}
++
++int
++main ()
++{
++  a.d = 0;
++  foo ();
++
++  if (a.d != 5)
++    abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped. .Type escapes via a return" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c
+new file mode 100644
+index 00000000000..71167182d50
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c
+@@ -0,0 +1,32 @@
++/* { dg-do run } */
++/* { dg-additional-options "-fno-ipa-sra" } */
++
++#include 
++
++struct A {
++  int d;
++};
++
++struct A a;
++
++struct A foo () __attribute__((noinline));
++struct A foo ()
++{
++  a.d = 5;
++  return a;
++}
++
++int
++main ()
++{
++  a.d = 0;
++  foo ();
++
++  if (a.d != 5)
++    abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped: \"Type escapes via a return" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c
+new file mode 100644
+index 00000000000..74fa11f3940
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c
+@@ -0,0 +1,31 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  int b;
++}str_t;
++
++#define N 2
++
++str_t A[2] = {{1,1},{2,2}};
++
++int
++main ()
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    A[i].b = A[i].a;
++
++  for (i = 0; i < N; i++)
++    if (A[i].b != A[i].a)
++      abort ();
++  
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c
+new file mode 100644
+index 00000000000..9533538c47c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c
+@@ -0,0 +1,33 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#define N 1000
++
++typedef struct 
++{
++  str_t A[N];
++  int c;
++}str_with_substr_t;
++
++str_with_substr_t a;
++
++int
++main ()
++{
++  int i;
++  
++  for (i = 0; i < N; i++)
++    a.A[i].b = 0;
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg"  { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c
+new file mode 100644
+index 00000000000..baf617816d6
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c
+@@ -0,0 +1,48 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 16000
++#define N 1000
++#else
++#define N (STACK_SIZE/16)
++#endif
++#else
++#define N 1000
++#endif
++
++typedef struct 
++{
++  str_t * sub_str;
++  int c;
++}str_with_substr_t;
++
++int foo;
++
++int
++main (void)
++{
++  int i;
++  str_with_substr_t A[N];
++  str_t a[N];
++
++  for (i=0; i < N; i++)
++    A[i].sub_str = &(a[i]);
++
++  for (i=0; i < N; i++)
++    A[i].sub_str->a = 5;
++
++  foo = A[56].sub_str->a;
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c
+new file mode 100644
+index 00000000000..33fce3b2350
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c
+@@ -0,0 +1,45 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++
++typedef struct 
++{
++  str_t sub_str;
++  int c;
++}str_with_substr_t;
++
++int
++main ()
++{
++  int i;
++  str_with_substr_t A[N];
++
++  for (i = 0; i < N; i++)
++    A[i].sub_str.a = 5;
++
++  for (i = 0; i < N; i++)
++    if (A[i].sub_str.a != 5)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c
+new file mode 100644
+index 00000000000..100a93868e5
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c
+@@ -0,0 +1,32 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#define N 1000
++str_t A[N];
++
++int
++main ()
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    {
++      A[i].a = 0;
++    }
++
++  for (i = 0; i < N; i++)
++    if (A[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c
+new file mode 100644
+index 00000000000..669d0b886ac
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c
+@@ -0,0 +1,45 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++str_t *p;
++
++int
++main ()
++{
++  int i, sum;
++
++  p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].b = i;
++
++  for (i = 0; i < N; i++)
++    p[i].b = p[i].a + 1;
++
++  for (i = 0; i < N; i++)
++    if (p[i].b != p[i].a + 1)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c
+new file mode 100644
+index 00000000000..ce6c1544c20
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c
+@@ -0,0 +1,40 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i;
++  str_t A[N];
++
++  for (i = 0; i < N; i++)
++    {
++      A[i].a = 0;
++    }
++
++  for (i = 0; i < N; i++)
++    if (A[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c
+new file mode 100644
+index 00000000000..eca2ebf32ec
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c
+@@ -0,0 +1,43 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i, sum;
++
++  str_t * p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].b = i;
++
++  for (i = 0; i < N; i++)
++    p[i].b = p[i].a + 1;
++
++  for (i = 0; i < N; i++)
++    if (p[i].b != p[i].a + 1)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c
+new file mode 100644
+index 00000000000..6f8f94d7d59
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c
+@@ -0,0 +1,47 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  long i, num;
++
++  num = rand();
++  num = num > N ? N : num; 
++  str_t * p = malloc (num * sizeof (str_t));
++
++  if (p == 0)
++    return 0;
++
++  for (i = 1; i <= num; i++)
++    p[i-1].b = i;
++
++  for (i = 1; i <= num; i++)
++    p[i-1].a = p[i-1].b + 1;
++
++  for (i = 0; i < num; i++)
++    if (p[i].a != p[i].b + 1)
++      abort ();
++  
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c
+new file mode 100644
+index 00000000000..2ca729d1f5d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c
+@@ -0,0 +1,47 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i, num;
++
++  num = rand();
++  num = num > N ? N : num; 
++  str_t * p = malloc (num * sizeof (str_t));
++
++  if (p == 0)
++    return 0;
++
++  for (i = 0; i < num; i++)
++    p[i].b = i;
++
++  for (i = 0; i < num; i++)
++    p[i].a = p[i].b + 1;
++
++  for (i = 0; i < num; i++)
++    if (p[i].a != p[i].b + 1)
++      abort ();
++  
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c
+new file mode 100644
+index 00000000000..6000b2919ab
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c
+@@ -0,0 +1,42 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  float b;
++  int c;
++  float d;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 1600
++#define N 100
++#else
++#define N (STACK_SIZE/16)
++#endif
++#else
++#define N 100
++#endif
++
++int 
++main ()
++{
++  int i;
++  str_t *p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].a = 5;
++
++  for (i = 0; i < N; i++)
++    if (p[i].a != 5)      
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* Two more fields structure is not splitted.  */
++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c
+new file mode 100644
+index 00000000000..f4a10340912
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c
+@@ -0,0 +1,34 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++typedef struct
++{
++  int a;
++  int b;
++}str_t;
++
++#define N 3
++
++str_t str;
++
++int
++main ()
++{
++  int i;
++  int res = 1<<(1<
++typedef struct
++{
++  int a;
++  int b;
++}str_t;
++
++#define N 3
++
++int
++main ()
++{
++  int i;
++  int res = 1<<(1<
++typedef struct
++{
++  int a;
++  int *b;
++}str_t;
++
++#define N 3
++
++str_t *p;
++
++int
++main ()
++{
++  str_t str;
++  int i;
++  int res = 1 << (1 << N);
++  p = &str;
++  str.a = 2;
++ 
++  p->b = &(p->a);
++
++  for (i=0; i < N; i++)
++    p->a = *(p->b)*(*(p->b));
++
++  if (p->a != res)
++    abort ();
++  
++  /* POSIX ignores all but the 8 low-order bits, but other
++     environments may not.  */
++  return (p->a & 255);
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped...Type escapes a cast to a different" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c
+new file mode 100644
+index 00000000000..bc8eacc779e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c
+@@ -0,0 +1,67 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include 
++
++typedef struct
++{
++  int a;
++  float b;
++}str_t1;
++
++typedef struct
++{
++  int c;
++  float d;
++}str_t2;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 16000
++#define N 1000
++#else
++#define N (STACK_SIZE/16)
++#endif
++#else
++#define N 1000
++#endif
++
++str_t1 *p1;
++str_t2 *p2;
++int num;
++
++void
++foo (void)
++{
++  int i;
++
++  for (i=0; i < num; i++)
++    p2[i].c = 2;
++}
++
++int
++main ()
++{
++  int i, r;
++
++  r = rand ();
++  num = r > N ? N : r; 
++  p1 = malloc (num * sizeof (str_t1));
++  p2 = malloc (num * sizeof (str_t2));
++
++  if (p1 == NULL || p2 == NULL)
++    return 0;
++
++  for (i = 0; i < num; i++)
++    p1[i].a = 1;
++
++  foo ();
++
++  for (i = 0; i < num; i++)
++    if (p1[i].a != 1 || p2[i].c != 2)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index 794b8017d18..98a5a490f5c 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -80,6 +80,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP     , "ipa cp")
+ DEFTIMEVAR (TV_IPA_INLINING          , "ipa inlining heuristics")
+ DEFTIMEVAR (TV_IPA_FNSPLIT           , "ipa function splitting")
+ DEFTIMEVAR (TV_IPA_COMDATS	     , "ipa comdats")
++DEFTIMEVAR (TV_IPA_STRUCT_REORG      , "ipa struct reorg optimization")
+ DEFTIMEVAR (TV_IPA_OPT		     , "ipa various optimizations")
+ DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS    , "lto stream decompression")
+ DEFTIMEVAR (TV_IPA_LTO_COMPRESS      , "lto stream compression")
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index 55ee2fe7f9e..56898e01994 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -527,6 +527,7 @@ extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
++extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt);
+-- 
+2.22.0
+
diff --git a/HYGON-0005-basick-block-reorder.patch b/HYGON-0005-basick-block-reorder.patch
new file mode 100644
index 0000000..f3929dc
--- /dev/null
+++ b/HYGON-0005-basick-block-reorder.patch
@@ -0,0 +1,2664 @@
+From d419fe1f30c97f8852ab6b0a363b0564487417ac Mon Sep 17 00:00:00 2001
+From: He Dian 
+Date: Tue, 26 Mar 2024 11:43:39 +0800
+Subject: [PATCH] [feat][gcc] add basic block reorder feature
+
+Signed-off-by: He Dian 
+---
+ gcc/Makefile.in                               |    1 +
+ gcc/cgraph.h                                  |   11 +-
+ gcc/common.opt                                |    6 +-
+ gcc/ipa-bbr.cc                                | 1795 +++++++++++++++++
+ gcc/lto-cgraph.cc                             |    7 +-
+ gcc/opt-functions.awk                         |    2 +-
+ gcc/passes.def                                |    2 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-0.C         |   27 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-1.C         |   28 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-10.C        |   27 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-11.C        |   29 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-12.C        |   36 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-13.C        |   35 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-14.C        |   30 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-15.C        |   30 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-16.C        |   37 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-2.C         |   34 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-3.C         |   26 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-4.C         |   27 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-5.C         |   29 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-6.C         |   32 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-7.C         |   38 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-8.C         |   46 +
+ gcc/testsuite/g++.dg/tree-ssa/bbr-9.C         |   26 +
+ gcc/timevar.def                               |    2 +
+ gcc/tree-pass.h                               |    2 +
+ 27 files changed, 2361 insertions(+), 4 deletions(-)
+ create mode 100644 gcc/ipa-bbr.cc
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-0.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-1.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-10.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-11.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-12.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-13.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-14.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-15.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-16.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-2.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-3.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-4.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-5.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-6.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-7.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-8.C
+ create mode 100644 gcc/testsuite/g++.dg/tree-ssa/bbr-9.C
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 0aabc6ea3f2..d115879ed9a 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1454,6 +1454,7 @@ OBJS = \
+ 	ipa-cp.o \
+ 	ipa-sra.o \
+ 	ipa-devirt.o \
++	ipa-bbr.o \
+ 	ipa-fnsummary.o \
+ 	ipa-polymorphic-call.o \
+ 	ipa-split.o \
+diff --git a/gcc/cgraph.h b/gcc/cgraph.h
+index d96690326d1..0d0ad3b4c8c 100644
+--- a/gcc/cgraph.h
++++ b/gcc/cgraph.h
+@@ -892,7 +892,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
+       versionable (false), can_change_signature (false),
+       redefined_extern_inline (false), tm_may_enter_irr (false),
+       ipcp_clone (false), declare_variant_alt (false),
+-      calls_declare_variant_alt (false), m_uid (uid), m_summary_id (-1)
++      calls_declare_variant_alt (false), m_uid (uid), m_summary_id (-1),
++      side_effect_state (3), side_effect_aux (0), bbr_cost (0)
+   {}
+ 
+   /* Remove the node from cgraph and all inline clones inlined into it.
+@@ -1491,6 +1492,14 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
+   unsigned declare_variant_alt : 1;
+   /* True if the function calls declare_variant_alt functions.  */
+   unsigned calls_declare_variant_alt : 1;
++  /*0:no side effect; 1:have local side effect; 2:have global side effect;
++   * 3:pending */
++  unsigned side_effect_state : 2;
++  /*used to describle the indexs of parameter which result in local side
++   * effect, initial value:0*/
++  uint64_t side_effect_aux;
++  /*cgraph_node bbr cost. initial value:0*/
++  uint64_t bbr_cost;
+ 
+ private:
+   /* Unique id of the node.  */
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 30f979870f6..1cc920c22b1 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1121,6 +1121,10 @@ Common Var(flag_array_widen_compare) Optimization
+ Extends types for pointers to arrays to improve array comparsion performance.
+ In some extreme situations this may result in unsafe behavior.
+ 
++fipa-bbr
++Common Var(flag_ipa_bbr) Optimization
++Execute the reordering of blocks which end with 'gcond'.
++
+ fauto-inc-dec
+ Common Var(flag_auto_inc_dec) Init(1) Optimization
+ Generate auto-inc/dec instructions.
+diff --git a/gcc/ipa-bbr.cc b/gcc/ipa-bbr.cc
+new file mode 100644
+index 00000000000..477ccdbda1d
+--- /dev/null
++++ b/gcc/ipa-bbr.cc
+@@ -0,0 +1,1795 @@
++/*this file implement the feature of basic block reorder; it will find the find
++the chain of consecutive basic block ends with "gcond", and then reorder the
++chain by the cost;
++==----------------------------------------------------------------------===//
++ \author: hedian
++ \emial: hedian@hygon.cn
++ \version: hygonGCC1.2
++ \date 2023/09/08
++==----------------------------------------------------------------------===//
++*/
++#include 
++#include 
++#include 
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include 
++#include "input.h"
++#include "target.h"
++#include "tree.h"
++#include "gimple.h"
++#include "tree-cfg.h"
++#include "tree-pass.h"
++#include "tree-pretty-print.h"
++#include "tree-ssa-loop-manip.h"
++#include "print-tree.h"
++#include "gimple-iterator.h"
++#include "gimple-ssa.h"
++#include "gimple-fold.h"
++#include "gimple-pretty-print.h"
++#include "gimplify.h"
++#include "tree-ssa-loop.h"
++#include "tree-into-ssa.h"
++#include "ssa.h"
++#include "ssa-iterators.h"
++#include "vec.h"
++#include "dominance.h"
++#include "fold-const.h"
++#include "cgraph.h"
++#include "ipa-utils.h"
++#include "basic-block.h"
++#include "cfghooks.h"
++
++using namespace std;
++
++int new_chain_no = -1;
++// the declaration of basic block reorder struct
++struct bb_info
++{
++  basic_block bb;              // the basic block which is to be processed
++  unsigned HOST_WIDE_INT cost; // the cost of basic block
++
++  bb_info () : bb (bb), cost (0) {}
++  bb_info (basic_block bb) : bb (bb), cost (0) {}
++};
++
++#define UNKNOWN_FUNCTION_BODY_COST 100
++#define GIMPLE_CALL_INIT_COST 5
++
++// key is the code of builtin function;
++// value is a pair data, first is the boolean which means have global
++// side_effect, second means the indexes of args which result in the local side
++// effect
++unordered_map > >
++    side_effect_builtin_funs = {
++      // write
++      { BUILT_IN_PUTC, { true, { 0 } } },
++      { BUILT_IN_PUTC_UNLOCKED, { true, { 0 } } },
++      { BUILT_IN_FPUTC, { true, { 0 } } },
++      { BUILT_IN_FPUTC_UNLOCKED, { true, { 0 } } },
++      { BUILT_IN_FPUTS, { true, { 0 } } },
++      { BUILT_IN_FPUTS_UNLOCKED, { true, { 0 } } },
++      { BUILT_IN_PUTCHAR, { true, { 0 } } },
++      { BUILT_IN_PUTCHAR_UNLOCKED, { true, { 0 } } },
++      { BUILT_IN_PUTS, { true, { 0 } } },
++      { BUILT_IN_PUTS_UNLOCKED, { true, { 0 } } },
++      { BUILT_IN_FWRITE, { true, { 0 } } },
++      { BUILT_IN_FWRITE_UNLOCKED, { true, { 0 } } },
++      { BUILT_IN_FPRINTF, { true, { 0 } } },
++      { BUILT_IN_FPRINTF_UNLOCKED, { true, { 0 } } },
++      { BUILT_IN_PRINTF, { true, { 0 } } },
++      { BUILT_IN_PRINTF_UNLOCKED, { true, { 0 } } },
++      { BUILT_IN_SNPRINTF, { true, { 0 } } },
++      { BUILT_IN_SPRINTF, { true, { 0 } } },
++      { BUILT_IN_VFPRINTF, { true, { 0 } } },
++      { BUILT_IN_VPRINTF, { true, { 0 } } },
++      { BUILT_IN_VSNPRINTF, { true, { 0 } } },
++      { BUILT_IN_VSPRINTF, { true, { 0 } } },
++      { BUILT_IN_ISPRINT, { true, { 0 } } },
++      { BUILT_IN_ISWPRINT, { true, { 0 } } },
++      { BUILT_IN_SNPRINTF_CHK, { true, { 0 } } },
++      { BUILT_IN_SPRINTF_CHK, { true, { 0 } } },
++      { BUILT_IN_VSNPRINTF_CHK, { true, { 0 } } },
++      { BUILT_IN_VSPRINTF_CHK, { true, { 0 } } },
++      { BUILT_IN_FPRINTF_CHK, { true, { 0 } } },
++      { BUILT_IN_PRINTF_CHK, { true, { 0 } } },
++      { BUILT_IN_VFPRINTF_CHK, { true, { 0 } } },
++      { BUILT_IN_VPRINTF_CHK, { true, { 0 } } },
++
++      // read
++      { BUILT_IN_SCANF, { false, { 1, 63 } } },
++      { BUILT_IN_FSCANF, { false, { 2, 63 } } },
++      { BUILT_IN_SSCANF, { false, { 2, 63 } } },
++      { BUILT_IN_VFSCANF, { false, { 2, 63 } } },
++      { BUILT_IN_VSCANF, { false, { 1, 63 } } },
++      { BUILT_IN_VSSCANF, { false, { 2, 63 } } },
++
++      // exception
++      { BUILT_IN_UNWIND_RESUME, { false, { 0 } } },
++      { BUILT_IN_CXA_END_CLEANUP, { false, { 0 } } },
++      { BUILT_IN_EH_POINTER, { false, { 0 } } },
++      { BUILT_IN_EH_FILTER, { false, { 0 } } },
++      { BUILT_IN_EH_COPY_VALUES, { false, { 0 } } },
++
++      // mem str op
++      { BUILT_IN_MEMSET, { false, { 0 } } },
++      { BUILT_IN_MEMSET_CHK, { false, { 0 } } },
++      { BUILT_IN_MEMMOVE, { false, { 0 } } },
++      { BUILT_IN_MEMMOVE_CHK, { false, { 0 } } },
++      { BUILT_IN_MEMCPY, { false, { 0 } } },
++      { BUILT_IN_MEMCPY_CHK, { false, { 0 } } },
++      { BUILT_IN_STRCPY, { false, { 0 } } },
++      { BUILT_IN_STRCPY_CHK, { false, { 0 } } },
++      { BUILT_IN_STRNCPY, { false, { 0 } } },
++      { BUILT_IN_STRNCPY_CHK, { false, { 0 } } },
++      { BUILT_IN_STRCAT, { false, { 0 } } },
++      { BUILT_IN_STRCAT_CHK, { false, { 0 } } },
++      { BUILT_IN_STRNCAT, { false, { 0 } } },
++      { BUILT_IN_STRNCAT_CHK, { false, { 0 } } },
++      { BUILT_IN_INIT_TRAMPOLINE, { false, { 0 } } },
++      { BUILT_IN_TM_MALLOC, { false, { 0 } } },
++      { BUILT_IN_FREE, { false, { 0 } } },
++    };
++
++// key is the name string of function;
++// value is a pair data, first is the boolean which means have global
++// side_effect, second means the indexes of args which result in the local side
++// effect
++unordered_map > >
++    side_effect_customized_funs = {
++      { "fstat", { false, { 1 } } },
++      { "oeprator delete", { false, { 0 } } },
++      { "oeprator delete []", { false, { { 0 } } } },
++      { "oeprator new", { false, { { 0 } } } },
++      { "oeprator new []", { false, { 0 } } },
++      { "malloc", { false, { 0 } } },
++      { "calloc", { false, { 0 } } },
++      { "free", { false, { 0 } } },
++      { "pthread_once", { false, { 0 } } },
++      { "regexec", { false, { 3 } } },
++      { "putback", { false, { 0 } } },
++      { "__fxstat", { false, { 2 } } },
++    };
++
++// information about basic block reorder.
++struct aggr_bbr
++{
++  function *f;
++  // the processing basic blocks array for reorder
++  auto_vec br_reorder_chain;
++  auto_vec ori_br_reorder_chain;
++  // the list for the basic blocks which has been processed
++  auto_vec processed_bbs;
++  // return true if the basic_block bb is available for reorder
++  unsigned HOST_WIDE_INT num_chain;
++
++  // return true if the basic_block bb is available for reorder
++  bool one_bbr_avail_p (basic_block bb, int mode);
++  // return true if the basic_block chain_cur_bb and chain_next_bb are both
++  // available for reorder
++  bool two_bbr_avail_p (basic_block chain_cur_bb, basic_block chain_next_bb,
++                        basic_block chain_same_succ, int mode);
++  // return true if the basic_block chain_cur_bb and chain_next_bb has no
++  // use-def relationships between each other
++  bool two_bbr_use_def_independence_p (basic_block chain_cur_bb,
++                                       basic_block chain_next_bb);
++  // return true if the basic_block chain_cur_bb and chain_next_bb has no data
++  // dependent relationships  between each other
++  bool two_bbr_mem_independence_p (basic_block chain_cur_bb,
++                                   basic_block chain_next_bb);
++  // get the cost for basic block reorder
++  void get_bbr_cost ();
++  // set the positions for basic block reorder
++  bool set_bbr_position ();
++  // main transform for basic block reorder
++  bool bbr_transform_1 (basic_block chain_pred, basic_block chain_same_succ,
++                        basic_block chain_diff_succ);
++  // sink transform for basic block reorder
++  void bbr_sink_transform (basic_block chain_pred, basic_block chain_same_succ,
++                           basic_block chain_diff_succ);
++  // switch gimple transform for basic block reorder
++  basic_block switch_transform (basic_block chain_last_bb,
++                                basic_block chain_same_succ);
++  // max numbers of basic_blocks for reorder
++  unsigned HOST_WIDE_INT chain_max_num_bb = 20;
++  // aggr basic_block reorder
++  aggr_bbr (function *fun) : f (fun), num_chain (0) {}
++  // return true if basic_block reorder successfully
++  bool bbr_successfully_p (basic_block bb);
++
++  void
++  dump_chain (FILE *dump_file, auto_vec &v)
++  {
++    for (auto i = 0; i < v.length (); ++i)
++      {
++        fprintf (dump_file, "#%d bb cost: %lu\n", i, v[i]->cost);
++        dump_bb (dump_file, v[i]->bb, 0, dump_flags);
++      }
++  }
++};
++
++static bool
++bb_has_gphi_p (basic_block bb)
++{
++  if (!bb)
++    return false;
++  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      auto g = gsi_stmt (gsi);
++      if (g && TREE_CODE (gimple_phi_result (g)) == SSA_NAME)
++        return true;
++    }
++  return false;
++}
++
++static unsigned HOST_WIDE_INT
++get_function_gimple_count (function *fun)
++{
++  if (!fun)
++    return 0;
++  basic_block bb;
++  unsigned HOST_WIDE_INT res = 0;
++  FOR_EACH_BB_FN (bb, fun)
++  {
++    for (auto gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi);
++         gsi_next_nondebug (&gsi))
++      ++res;
++  }
++  return res;
++}
++
++static int
++bb_cost_cmp (const void *p0, const void *p1)
++{
++  auto bb_info0 = *(bb_info **)p0;
++  auto bb_info1 = *(bb_info **)p1;
++  auto cost0 = bb_info0->cost;
++  auto cost1 = bb_info1->cost;
++  if (cost0 > cost1)
++    return 1;
++  if (cost0 == cost1)
++    return 0;
++  if (cost0 < cost1)
++    return -1;
++}
++
++static unsigned HOST_WIDE_INT
++get_bb_num_nonedebug_gimple (basic_block bb)
++{
++  if (!bb)
++    return -1;
++  unsigned HOST_WIDE_INT res = 0;
++  for (auto gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi);
++       gsi_next_nondebug (&gsi))
++    {
++      ++res;
++    }
++  return res;
++}
++
++static tree
++find_mem_ref_in_tree (tree t)
++{
++  if (!t || CONSTANT_CLASS_P (t) || VAR_P (t) || DECL_P (t))
++    return NULL_TREE;
++  auto t_code = TREE_CODE (t);
++  switch (t_code)
++    {
++    case MEM_REF:
++      return t;
++    case SSA_NAME:
++      break;
++    default:
++      return find_mem_ref_in_tree (TREE_OPERAND (t, 0));
++    }
++  return NULL_TREE;
++}
++
++static bool
++gimple_has_side_effect_p (gimple *g)
++{
++  if (!g)
++    return false;
++  auto lhs = gimple_get_lhs (g);
++
++  // we conservatively think !SSA_NAME lhs is store gimple, Todo:clear the what
++  // gimple has side effect to discover more optimze chances
++  if (lhs && TREE_CODE (lhs) != SSA_NAME)
++    return true;
++
++  if (!is_gimple_call (g))
++    return false;
++  auto g_call = dyn_cast (g);
++  auto callee_fn = gimple_call_fn (g_call);
++  tree callee_fndecl = gimple_call_fndecl (g_call);
++  if (callee_fndecl)
++    {
++      cgraph_node *n = cgraph_node::get (callee_fndecl);
++      if (n)
++        {
++          n = n->function_symbol ();
++          if (n->side_effect_state == 0)
++            return false;
++        }
++    }
++  else
++    { // gimple_call_fndecl(g) null
++      if (!callee_fn)
++        return true;
++      if (virtual_method_call_p (callee_fn))
++        {
++          auto final = false, all_targets_have_no_side_effect = true;
++          auto targets
++              = possible_polymorphic_call_targets (callee_fn, g, &final, 0);
++          for (auto i = 0; i < targets.length (); ++i)
++            {
++              auto target = targets[i];
++              auto rtarget = target->function_symbol ();
++              if (target->side_effect_state != 0)
++                all_targets_have_no_side_effect = false;
++            }
++          if (all_targets_have_no_side_effect)
++            return false;
++        }
++    }
++  return true;
++}
++
++static bool
++bb_succs_have_nonvirtual_gphi_p (basic_block bb)
++{
++  if (!bb)
++    return false;
++  for (auto i = 0; i < EDGE_COUNT (bb->succs); ++i)
++    {
++      auto succ = EDGE_SUCC (bb, i)->dest;
++      for (auto gsi = gsi_start_nonvirtual_phis (succ); !gsi_end_p (gsi);
++           gsi_next (&gsi))
++        {
++          auto g = gsi_stmt (gsi);
++          if (g)
++            return true;
++        }
++    }
++  return false;
++}
++
++static bool
++bb_has_side_effect_p (basic_block bb)
++{
++  if (!bb)
++    return false;
++  for (auto gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi);
++       gsi_next_nondebug (&gsi))
++    {
++      auto g = gsi_stmt (gsi);
++      if (gimple_has_side_effect_p (g))
++        return true;
++    }
++  return false;
++}
++
++static bool
++compare_tree_code_p (tree_code t)
++{
++  if (!t)
++    return false;
++  if (t == NE_EXPR || t == EQ_EXPR || t == GE_EXPR || t == GT_EXPR
++      || t == LE_EXPR || t == LT_EXPR)
++    return true;
++  return false;
++}
++
++static bool
++get_data_from_vtable (tree rhs1)
++{
++  if (!rhs1)
++    return false;
++  for (auto type = TREE_TYPE (rhs1); type; type = TREE_TYPE (type))
++    {
++      auto type_name = TYPE_NAME (type);
++      if (!type_name)
++        continue;
++      if (TREE_CODE (type_name) != IDENTIFIER_NODE)
++        continue;
++      auto type_name_str = IDENTIFIER_POINTER (type_name);
++      if (!strcmp (type_name_str, "__vtbl_ptr_type"))
++        return true;
++    }
++  return false;
++}
++
++bool
++aggr_bbr::one_bbr_avail_p (basic_block bb, int mode)
++{
++  if (!bb)
++    return true;
++  auto gsi = gsi_last_nondebug_bb (bb);
++  auto g = gsi_stmt (gsi);
++  if (!g)
++    return false;
++  auto g_code = gimple_code (g);
++  if (g_code != GIMPLE_COND && g_code != GIMPLE_SWITCH)
++    return false;
++  if (g_code == GIMPLE_COND && !(mode == 0 && EDGE_COUNT (bb->succs) == 2))
++    return false;
++  if (g_code == GIMPLE_SWITCH)
++    {
++      auto g_swtich = dyn_cast (g);
++      auto num_labels = gimple_switch_num_labels (g_swtich);
++      if (!(mode == 1 && num_labels == 3))
++        return false;
++    }
++  if (!single_pred_p (bb))
++    return false;
++  if (bb_has_gphi_p (bb))
++    return false;
++  if (bb_succs_have_nonvirtual_gphi_p (bb))
++    return false;
++  if (EDGE_COUNT (bb->succs) == 2
++      && EDGE_SUCC (bb, 0)->dest
++             == EDGE_SUCC (bb, 1)
++                    ->dest) // should not have the same successor basic block.
++    return false;
++  if (bb_has_side_effect_p (bb))
++    return false;
++  for (gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi);
++       gsi_next_nondebug (&gsi))
++    {
++      g = gsi_stmt (gsi);
++      g_code = gimple_code (g);
++      auto lhs = gimple_get_lhs (g);
++      if (!lhs || TREE_CODE (lhs) != SSA_NAME)
++        continue;
++      gimple *g_user;
++      imm_use_iterator imm_iter;
++      FOR_EACH_IMM_USE_STMT (g_user, imm_iter, lhs)
++      {
++        // ensure the other bb does not use operand in this bb
++        if (gimple_bb (g_user) != bb)
++          return false;
++        // ensure no overflow compare
++        if (is_gimple_assign (g) && gimple_assign_rhs_code (g) == PLUS_EXPR)
++          {
++            auto rhs1 = gimple_assign_rhs1 (g);
++            auto rhs2 = gimple_assign_rhs2 (g);
++            if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != INTEGER_CST
++                || !tree_fits_uhwi_p (rhs2))
++              continue;
++            auto rhs2_uhwi = tree_to_uhwi (rhs2);
++            auto user_code = gimple_code (g_user);
++            if (user_code != GIMPLE_ASSIGN && user_code != GIMPLE_COND)
++              continue;
++            auto assign_p = is_gimple_assign (g_user);
++            auto cmp_lhs = assign_p ? gimple_assign_rhs1 (g_user)
++                                    : gimple_cond_lhs (g_user);
++            auto cmp_rhs = assign_p ? gimple_assign_rhs2 (g_user)
++                                    : gimple_cond_rhs (g_user);
++            auto cmp_code = assign_p ? gimple_assign_rhs_code (g_user)
++                                     : gimple_cond_code (g_user);
++            if (cmp_lhs != lhs || !compare_tree_code_p (cmp_code)
++                || !tree_fits_uhwi_p (cmp_rhs))
++              continue;
++            auto cmp_rhs_uhwi = tree_to_uhwi (cmp_rhs);
++            if (cmp_rhs_uhwi < rhs2_uhwi)
++              return false;
++          }
++        // should deference the pointer lhs in same bb but to get data by vptr
++        auto type_code = TREE_CODE (TREE_TYPE (lhs));
++        if (type_code == POINTER_TYPE && gimple_code (g_user) == GIMPLE_ASSIGN)
++          {
++            auto rhs1 = gimple_assign_rhs1 (g_user);
++            auto mem_ref = find_mem_ref_in_tree (rhs1);
++            if (!mem_ref)
++              continue;
++            auto op0 = TREE_OPERAND (mem_ref, 0);
++            if (op0 == lhs && !get_data_from_vtable (rhs1))
++              return false;
++          }
++      } // FOR_EACH_IMM_USE_STMT end
++    }   // for end
++  return true;
++}
++
++static tree
++find_virtual_operand_in_bb (basic_block bb)
++{
++  if (!bb)
++    return NULL_TREE;
++  // look for the last lhs virtual operand of gimple except gphi
++  for (auto gsi0 = gsi_last_nondebug_bb (bb); !gsi_end_p (gsi0);
++       gsi_prev (&gsi0))
++    {
++      auto g0 = gsi_stmt (gsi0);
++      auto vdef0 = gimple_vdef (g0);
++      auto vuse0 = gimple_vuse (g0);
++      if (!vdef0 && !vuse0)
++        continue;
++      // find vdef or vuse of a gimple
++      return vdef0 ? vdef0 : vuse0;
++    }
++
++  // look for the last virtual phi result
++  auto vdef = NULL_TREE;
++  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      auto g = gsi_stmt (gsi);
++      if (!virtual_operand_p (gimple_get_lhs (g)))
++        continue;
++      vdef = gimple_phi_result (g);
++    }
++
++  return vdef;
++}
++
++static void
++fixup_bb_virtual_phis (basic_block bb, basic_block chain_pred)
++{
++  if (!bb)
++    return;
++  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      auto g = gsi_stmt (gsi);
++      gcc_assert (virtual_operand_p (gimple_phi_result (g)));
++      auto g_phi = dyn_cast (g);
++      if (gimple_phi_num_args (g_phi) <= 0)
++        return;
++      for (auto i = 0; i < gimple_phi_num_args (g_phi); ++i)
++        {
++          auto arg_def = gimple_phi_arg_def (g_phi, i);
++          if (arg_def)
++            continue;
++          // when arg_def is null, look for the vdef
++          tree vdef = NULL_TREE;
++          auto e = gimple_phi_arg_edge (g_phi, i);
++          auto pred = e->src;
++          for (auto bb0 = pred; bb0;
++               bb0 = get_immediate_dominator (CDI_DOMINATORS, bb0))
++            {
++              vdef = find_virtual_operand_in_bb (bb0);
++              if (vdef)
++                goto set_vir_val;
++            }
++          // no virtual operand which want, look for the oldest .MEM operand
++          basic_block bb0;
++          FOR_EACH_BB_FN (bb0, cfun)
++          {
++            for (auto gsi = gsi_start_phis (bb0); !gsi_end_p (gsi);
++                 gsi_next (&gsi))
++              {
++                auto g0 = gsi_stmt (gsi);
++                auto lhs = gimple_get_lhs (g0);
++                if (virtual_operand_p (lhs))
++                  {
++                    vdef = lhs;
++                    goto set_vir_val;
++                  }
++              }
++            for (auto gsi = gsi_start_nondebug_bb (bb0); !gsi_end_p (gsi);
++                 gsi_next (&gsi))
++              {
++                auto g0 = gsi_stmt (gsi);
++                auto vdef0 = gimple_vdef (g0);
++                auto vuse0 = gimple_vuse (g0);
++                if (!vdef0 && !vuse0)
++                  continue;
++                vdef = vdef0 ? vdef0 : vuse0;
++                goto set_vir_val;
++              }
++          }
++        set_vir_val:
++          gcc_assert (vdef);
++          // set null def phi arg
++          location_t l = gimple_phi_arg_location_from_edge (g_phi, e);
++          SET_PHI_ARG_DEF (g_phi, i, vdef);
++          gimple_phi_arg_set_location (g_phi, i, l);
++        }
++    }
++}
++
++// main real transform for basic block reorder
++bool
++aggr_bbr::bbr_transform_1 (basic_block chain_pred, basic_block chain_same_succ,
++                           basic_block chain_diff_succ)
++{
++  unsigned HOST_WIDE_INT n = br_reorder_chain.length ();
++  gcc_assert (n > 1 && "n > 1");
++  // step1 : switch_transform
++  auto last_bb_info = br_reorder_chain[n - 1];
++  auto gsi = gsi_last_nondebug_bb (last_bb_info->bb);
++  auto g = gsi_stmt (gsi);
++  auto g_code = gimple_code (g);
++  if (g_code == GIMPLE_SWITCH)
++    {
++      chain_diff_succ = switch_transform (last_bb_info->bb, chain_same_succ);
++      fixup_bb_virtual_phis (chain_same_succ, chain_pred);
++      fixup_bb_virtual_phis (chain_diff_succ, chain_pred);
++    }
++  for (auto i = 0; i < br_reorder_chain.length (); ++i)
++    {
++      auto bb = br_reorder_chain[i]->bb;
++      for (gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi);
++           gsi_next_nondebug (&gsi))
++        {
++          g = gsi_stmt (gsi);
++          gimple *g_user;
++          imm_use_iterator imm_iter;
++          tree lhs = gimple_get_lhs (g);
++          if (!(lhs && TREE_CODE (lhs) == SSA_NAME))
++            continue;
++          FOR_EACH_IMM_USE_STMT (g_user, imm_iter, lhs)
++          {
++            if (gimple_bb (g_user) == chain_same_succ)
++              return false;
++          } // for each gimple user end
++        }   // for each gimple end
++    }       // for each block end
++
++  // step2 : get_bbr_cost
++  get_bbr_cost ();
++  if (dump_file)
++    {
++      fprintf (dump_file,
++               "=== after switch transform and getting cost, chain:\n");
++      dump_chain (dump_file, br_reorder_chain);
++    }
++
++  // step3 : set_bbr_position
++  if (!set_bbr_position ())
++    {
++      if (dump_file)
++        fprintf (dump_file, "=== reordering not change chain\n");
++      return false;
++    }
++
++  // step4 : bbr_sink_transform
++  ++new_chain_no;
++  bbr_sink_transform (chain_pred, chain_same_succ, chain_diff_succ);
++  if (dump_file)
++    {
++      fprintf (dump_file, "=== after reordering, %s's #%d new chain: \n",
++               dump_file_name, new_chain_no);
++      dump_chain (dump_file, br_reorder_chain);
++      fprintf (dump_file, "\n\n");
++    }
++
++  return true;
++}
++
++static unsigned HOST_WIDE_INT
++gimple_get_bbr_cost (gimple *g)
++{
++  if (!g)
++    return 0;
++  if (!is_gimple_call (g))
++    return 1;
++  auto fun_exec_divisor = 3;
++  auto callee_fn = gimple_call_fn (g);
++  auto callee_fndecl = gimple_call_fndecl (g);
++  if (callee_fndecl)
++    {
++      auto n = cgraph_node::get (callee_fndecl);
++      if (n && n->bbr_cost != 0)
++        return n->bbr_cost / fun_exec_divisor;
++      if (n->alias)
++        n = n->get_alias_target ();
++      if (n && n->bbr_cost)
++        return n->bbr_cost / fun_exec_divisor;
++    }
++  else
++    { // gimple_call_fndecl(g) null
++      if (!callee_fn)
++        return UNKNOWN_FUNCTION_BODY_COST;
++      if (virtual_method_call_p (callee_fn))
++        {
++          bool final = false;
++          auto targets
++              = possible_polymorphic_call_targets (callee_fn, g, &final, 0);
++          unsigned HOST_WIDE_INT polymorphic_call_cost_sum = 0;
++          auto n = targets.length ();
++          if (n <= 0)
++            return UNKNOWN_FUNCTION_BODY_COST;
++          for (auto i = 0; i < n; ++i)
++            {
++              auto target = targets[i];
++              polymorphic_call_cost_sum += target->bbr_cost != 0
++                                               ? target->bbr_cost
++                                               : UNKNOWN_FUNCTION_BODY_COST;
++            }
++          auto avg_polymorphic_call_cost = polymorphic_call_cost_sum / n;
++          return avg_polymorphic_call_cost / fun_exec_divisor;
++        }
++      // no virtual method
++      return UNKNOWN_FUNCTION_BODY_COST;
++    }
++  return UNKNOWN_FUNCTION_BODY_COST;
++}
++
++static unsigned HOST_WIDE_INT
++bb_get_bbr_cost (basic_block bb)
++{
++  if (!bb)
++    return 0;
++  unsigned HOST_WIDE_INT cost = 0;
++  for (auto gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi);
++       gsi_next_nondebug (&gsi))
++    {
++      auto g = gsi_stmt (gsi);
++      cost += gimple_get_bbr_cost (g);
++    }
++  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      auto g = gsi_stmt (gsi);
++      cost += gimple_get_bbr_cost (g);
++    }
++  return cost;
++}
++
++void
++aggr_bbr::get_bbr_cost ()
++{
++  for (auto i = 0; i < br_reorder_chain.length (); ++i)
++    {
++      auto bb_info = br_reorder_chain[i];
++      auto bb = bb_info->bb;
++      bb_info->cost = bb_get_bbr_cost (bb);
++    }
++}
++
++// set the positions for basic block reorder
++bool
++aggr_bbr::set_bbr_position ()
++{
++  // reorder the basic blocks by the cost from small to large
++  auto n = br_reorder_chain.length ();
++  auto cost = br_reorder_chain[0]->cost;
++  for (auto i = 0; i < n; ++i)
++    {
++      if (cost != br_reorder_chain[i]->cost)
++        {
++          cost = 0;
++          break;
++        }
++    }
++  if (cost == 0) // have different bb cost
++    br_reorder_chain.qsort (bb_cost_cmp);
++  for (auto i = 0; i < n; ++i)
++    {
++      if (br_reorder_chain[i]->bb != ori_br_reorder_chain[i]->bb)
++        return true;
++    }
++  return false;
++}
++
++// sink transform for basic block reorder
++void
++aggr_bbr::bbr_sink_transform (basic_block chain_pred,
++                              basic_block chain_same_succ,
++                              basic_block chain_diff_succ)
++{
++  edge e;
++  edge_iterator ei;
++
++  // redirect the edge of br_reorder chain
++  for (auto i = 0; i < br_reorder_chain.length (); ++i)
++    {
++      auto bb = br_reorder_chain[i]->bb;
++      e = EDGE_SUCC (bb, 0)->dest == chain_same_succ ? EDGE_SUCC (bb, 1)
++                                                     : EDGE_SUCC (bb, 0);
++      auto new_dest = i == br_reorder_chain.length () - 1
++                          ? chain_diff_succ
++                          : br_reorder_chain[i + 1]->bb;
++      redirect_edge_and_branch (e, new_dest);
++    }
++  e = EDGE_PRED (ori_br_reorder_chain[0]->bb, 0);
++  auto new_dest = br_reorder_chain[0]->bb;
++  redirect_edge_and_branch (e, new_dest);
++
++  // fixup the function dominator tree
++  basic_block bb;
++  auto_vec all_blocks_for_fn;
++  FOR_EACH_BB_FN (bb, cfun)
++  all_blocks_for_fn.safe_push (bb);
++  iterate_fix_dominators (CDI_DOMINATORS, all_blocks_for_fn, false);
++
++  fixup_bb_virtual_phis (chain_same_succ, chain_pred);
++  fixup_bb_virtual_phis (chain_diff_succ, chain_pred);
++}
++
++// split the bb ends with 'gswitch' into two bbs;
++basic_block
++aggr_bbr::switch_transform (basic_block chain_last_bb,
++                            basic_block chain_same_succ)
++{
++  auto gsi = gsi_last_nondebug_bb (chain_last_bb);
++  auto g = gsi_stmt (gsi);
++  auto g_code = gimple_code (g);
++  auto g_switch = dyn_cast (g);
++  auto num_nondebug_gimple = get_bb_num_nonedebug_gimple (chain_last_bb);
++  gsi_prev_nondebug (&gsi);
++  g = gsi_stmt (gsi);
++  auto e = split_block (chain_last_bb, g);
++  auto splited_switch_bb = e->dest;
++  // remove_edge(e);
++  // auto splited_switch_gimple_seq=gsi_split_seq_after(gsi);
++  gimple_seq new_if_gimple_seq = nullptr;
++  unsigned case_num = gimple_switch_num_labels (g_switch);
++  auto g_switch_index = gimple_switch_index (g_switch);
++  tree t_cond = NULL_TREE;
++  gimple *g_cond = nullptr;
++  for (auto i = 1; i < case_num; ++i)
++    { // ignore the default label
++      auto case_label = gimple_switch_label (g_switch, i);
++      auto case_value = CASE_LOW (case_label);
++      auto t_cond_i = make_ssa_name (boolean_type_node);
++      auto g_cond_i = gimple_build_assign (t_cond_i, EQ_EXPR, g_switch_index,
++                                           case_value);
++      gimple_seq_add_stmt (&new_if_gimple_seq, g_cond_i);
++      if (i == 1)
++        {
++          t_cond = t_cond_i;
++        }
++      else if (i > 1)
++        {
++          g_cond = gimple_build_assign (make_ssa_name (boolean_type_node),
++                                        TRUTH_OR_EXPR, t_cond, t_cond_i);
++          gimple_seq_add_stmt (&new_if_gimple_seq, g_cond);
++          t_cond = gimple_assign_lhs (g_cond);
++        }
++    }
++  g_cond = gimple_build_cond_from_tree (t_cond, NULL_TREE, NULL_TREE);
++  gimple_seq_add_stmt (&new_if_gimple_seq, g_cond);
++  gsi = gsi_last_nondebug_bb (chain_last_bb);
++  gsi_insert_seq_after (&gsi, new_if_gimple_seq, GSI_LAST_NEW_STMT);
++
++  remove_edge (e);
++  make_edge (chain_last_bb, splited_switch_bb, EDGE_TRUE_VALUE);
++  make_edge (chain_last_bb, chain_same_succ, EDGE_FALSE_VALUE);
++
++  return splited_switch_bb;
++}
++
++// analysis the two basic blocks whether has no use-def relationships between
++// each other
++bool
++aggr_bbr::two_bbr_use_def_independence_p (basic_block chain_cur_bb,
++                                          basic_block chain_next_bb)
++{
++  if (!chain_cur_bb || !chain_next_bb)
++    return false;
++  if (chain_cur_bb == chain_next_bb)
++    return false;
++
++  for (auto gsi = gsi_start_nondebug_bb (chain_cur_bb); !gsi_end_p (gsi);
++       gsi_next_nondebug (&gsi))
++    {
++      auto g = gsi_stmt (gsi);
++      imm_use_iterator imm_iter;
++      gimple *g_user;
++      auto lhs = gimple_get_lhs (g);
++      if (lhs && TREE_CODE (lhs) == SSA_NAME)
++        {
++          FOR_EACH_IMM_USE_STMT (g_user, imm_iter, lhs)
++          {
++            if (gimple_bb (g_user) == chain_next_bb)
++              return false;
++          }
++        }
++      tree var;
++      ssa_op_iter op_iter;
++      FOR_EACH_SSA_TREE_OPERAND (var, g, op_iter, SSA_OP_USE | SSA_OP_DEF)
++      {
++        auto g0 = SSA_NAME_DEF_STMT (var);
++        if (g0 && gimple_bb (g0) == chain_next_bb)
++          return false;
++      }
++    }
++  return true;
++}
++
++// analysis whether the basic_block chain_cur_bb and chain_next_bb has no data
++// dependent relationships between each other
++bool
++aggr_bbr::two_bbr_mem_independence_p (basic_block chain_bb,
++                                      basic_block chain_next_bb)
++{
++  for (auto gsi = gsi_start_nondebug_bb (chain_next_bb); !gsi_end_p (gsi);
++       gsi_next_nondebug (&gsi))
++    {
++      auto g = gsi_stmt (gsi);
++      auto g_code = gimple_code (g);
++      if (g_code != GIMPLE_ASSIGN)
++        continue;
++      auto rhs1 = gimple_assign_rhs1 (g);
++      auto mem_ref = find_mem_ref_in_tree (rhs1);
++      if (!mem_ref)
++        continue;
++      auto op0 = TREE_OPERAND (mem_ref, 0);
++      if (TREE_CODE (op0) != SSA_NAME
++          || TREE_CODE (TREE_TYPE (op0)) != POINTER_TYPE)
++        continue;
++      // if this pointer has been judged in the previous chain bbs, we cannot
++      // put chain_next_bb into the chain
++      gimple *g_user;
++      imm_use_iterator imm_iter;
++      FOR_EACH_IMM_USE_STMT (g_user, imm_iter, op0)
++      {
++        if (gimple_bb (g_user) != chain_bb)
++          continue;
++        auto user_code = gimple_code (g_user);
++        if (user_code != GIMPLE_ASSIGN && user_code != GIMPLE_COND)
++          continue;
++        auto assign_p = is_gimple_assign (g_user);
++        auto cmp_lhs = assign_p ? gimple_assign_rhs1 (g_user)
++                                : gimple_cond_lhs (g_user);
++        auto cmp_rhs = assign_p ? gimple_assign_rhs2 (g_user)
++                                : gimple_cond_rhs (g_user);
++        auto cmp_code = assign_p ? gimple_assign_rhs_code (g_user)
++                                 : gimple_cond_code (g_user);
++        if (cmp_lhs == op0 && tree_fits_uhwi_p (cmp_rhs)
++            && integer_zerop (cmp_rhs))
++          return false;
++      }
++    }
++  return true;
++}
++
++static unsigned HOST_WIDE_INT
++get_max_val (unsigned int size)
++{
++  if (size == 0 || size == 1)
++    return size;
++  unsigned HOST_WIDE_INT ret = 1;
++  for (auto i = 0; i < size - 1; ++i)
++    ret |= ret << 1;
++  return ret;
++}
++
++// analysis whether the basic_block chain_cur_bb and chain_next_bb are both
++// available for reorder
++bool
++aggr_bbr::two_bbr_avail_p (basic_block chain_cur_bb, basic_block chain_next_bb,
++                           basic_block chain_same_succ, int mode)
++{
++  if (chain_cur_bb == chain_next_bb)
++    return false;
++  if (!one_bbr_avail_p (chain_cur_bb, 0))
++    return false;
++  if (!one_bbr_avail_p (chain_next_bb, mode))
++    return false;
++  if (!two_bbr_use_def_independence_p (chain_cur_bb, chain_next_bb))
++    return false;
++  if (!two_bbr_mem_independence_p (chain_cur_bb, chain_next_bb))
++    return false;
++  edge e0, e1;
++  edge_iterator ei0, ei1;
++  FOR_EACH_EDGE (e0, ei0, chain_cur_bb->succs)
++  {
++    FOR_EACH_EDGE (e1, ei1, chain_next_bb->succs)
++    {
++      auto chain_cur_bb_succ = e0->dest, chain_next_bb_succ = e1->dest;
++      if (chain_cur_bb_succ == chain_next_bb_succ
++          && chain_cur_bb_succ == chain_same_succ)
++        return true;
++    }
++  }
++  return false;
++}
++
++bool
++aggr_bbr::bbr_successfully_p (basic_block chain_cur_bb)
++{
++  if (!chain_cur_bb)
++    return false;
++  gswitch *g_switch = nullptr;
++  auto transform_successfully = false;
++  unsigned HOST_WIDE_INT n = 0;
++
++  // clear the processing branch reorder chain
++  for (auto i = 0; i < br_reorder_chain.length (); ++i)
++    delete br_reorder_chain[i];
++  br_reorder_chain.release ();
++
++  if (processed_bbs.contains (chain_cur_bb))
++    return false;
++  auto chain_pred = single_pred (chain_cur_bb);
++  auto g = gsi_stmt (gsi_last_nondebug_bb (chain_cur_bb));
++
++  // chain_cur_bb must ends with 'gcond'
++  if (!one_bbr_avail_p (chain_cur_bb, 0))
++    return false;
++
++  br_reorder_chain.safe_push (new bb_info (chain_cur_bb));
++
++  // find the chain_next_bb and chain_same_succ
++  basic_block chain_next_bb = nullptr, chain_same_succ = nullptr;
++  for (auto i = 0; i < 2; ++i)
++    {
++      auto chain_cur_bb_succ0 = EDGE_SUCC (chain_cur_bb, i)->dest;
++      auto chain_cur_bb_succ1 = EDGE_SUCC (chain_cur_bb, i == 0 ? 1 : 0)->dest;
++      for (auto j = 0; j < EDGE_COUNT (chain_cur_bb_succ0->succs); ++j)
++        {
++          auto succ = EDGE_SUCC (chain_cur_bb_succ0, j)->dest;
++          if (succ == chain_cur_bb_succ1)
++            {
++              chain_next_bb = chain_cur_bb_succ0;
++              chain_same_succ = chain_cur_bb_succ1;
++              break;
++            }
++        }
++    }
++  if (!(chain_next_bb && chain_same_succ))
++    return false;
++
++  // 1.find the the chain bb which ends with 'gcond' as much as possible
++find_longest_chain_ends_with_gcond:
++  while (br_reorder_chain.length () < chain_max_num_bb)
++    {
++      if (!one_bbr_avail_p (chain_next_bb, 0))
++        goto analyze_gswitch_bb;
++      for (auto i = 0; i < br_reorder_chain.length (); ++i)
++        {
++          auto bb
++              = br_reorder_chain[i]->bb; // bb0 is the processing basic block
++          if (!two_bbr_avail_p (bb, chain_next_bb, chain_same_succ, 0))
++            goto analyze_gswitch_bb;
++        }
++      br_reorder_chain.safe_push (new bb_info (chain_next_bb));
++      chain_next_bb = EDGE_SUCC (chain_next_bb, 0)->dest == chain_same_succ
++                          ? EDGE_SUCC (chain_next_bb, 1)->dest
++                          : EDGE_SUCC (chain_next_bb, 0)->dest;
++      for (auto j = 0; j < br_reorder_chain.length (); j++)
++        {
++          if (chain_next_bb == br_reorder_chain[j]->bb)
++            goto analyze_gswitch_bb;
++        }
++    }
++
++  // 2.the analysis for the basic block endswith 'gswitch'
++analyze_gswitch_bb:
++  g = gsi_stmt (gsi_last_nondebug_bb (chain_next_bb));
++  if (!g || gimple_code (g) != GIMPLE_SWITCH
++      || !one_bbr_avail_p (chain_next_bb, 1))
++    goto transform;
++  for (auto i = 0; i != br_reorder_chain.length (); ++i)
++    {
++      auto bb = br_reorder_chain[i]->bb;
++      if (!two_bbr_avail_p (bb, chain_next_bb, chain_same_succ, 1))
++        goto transform;
++    }
++  g_switch
++      = dyn_cast (gsi_stmt (gsi_last_nondebug_bb (chain_next_bb)));
++  if (gimple_switch_default_bb (f, g_switch)
++      != chain_same_succ) // this switch default case bb must be chain_same_bb
++    goto transform;
++  // bb of cases except default must have a single succ==chain_same_bb
++  for (unsigned i = 1; i < gimple_switch_num_labels (g_switch); ++i)
++    {
++      auto case_bb = gimple_switch_label_bb (f, g_switch, i);
++      if (!(single_succ_p (case_bb)
++            && single_succ (case_bb) == chain_same_succ))
++        {
++          goto transform;
++        }
++    }
++  // we find the chain last bb ends with 'gswtich'
++  br_reorder_chain.safe_push (new bb_info (chain_next_bb));
++
++  // 3.transform
++transform:
++  n = br_reorder_chain.length ();
++  for (auto i = 0; i < n; ++i)
++    processed_bbs.safe_push (br_reorder_chain[i]->bb);
++  if (n < 2)
++    return false;
++  ori_br_reorder_chain = br_reorder_chain.copy ();
++  ++num_chain;
++  if (dump_file)
++    {
++      fprintf (
++          dump_file,
++          "\n======= find #%d ori branch-reorder-chain of %d basic blocks:\n",
++          num_chain - 1, n);
++      dump_chain (dump_file, ori_br_reorder_chain);
++    }
++  return bbr_transform_1 (chain_pred, chain_same_succ, chain_next_bb);
++}
++
++static int
++bbr_transform ()
++{
++  auto abbr = aggr_bbr (cfun);
++  basic_block bb;
++  auto cfg_change = false;
++  auto ret = 0;
++  // for debug
++  if (dump_file)
++    {
++      fprintf (dump_file, "=== ori function body:\n");
++      dump_function_to_file (current_function_decl, dump_file, dump_flags);
++    }
++
++  do
++    {
++      cfg_change = false;
++      FOR_EACH_BB_FN (bb, cfun)
++      {
++        if (abbr.bbr_successfully_p (bb))
++          {
++            cfg_change = true;
++            ret = 1;
++            break;
++          }
++      }
++    }
++  while (cfg_change);
++  return ret;
++}
++
++static bool
++cn_bbr_data_has_been_set_p (cgraph_node *n)
++{
++  if (!n)
++    return false;
++  if (n->side_effect_state != 3)
++    return true;
++  return false;
++}
++
++static void
++set_cn_side_effect_state (cgraph_node *n, int state)
++{
++  auto s = n->side_effect_state;
++  if (s == 0 || state == 3)
++    return;
++  n->side_effect_state = s == 3 ? state : s;
++}
++
++static void
++add_cn_bbr_cost (cgraph_node *n, unsigned HOST_WIDE_INT cost)
++{
++  if (!n)
++    return;
++  n->bbr_cost += cost ? cost : UNKNOWN_FUNCTION_BODY_COST;
++}
++
++static bool
++tree_local_p (tree t)
++{
++  gcc_assert (t);
++  auto lds = cfun->local_decls;
++  if (lds && lds->contains (t))
++    return true;
++  return false;
++}
++
++static void
++set_cn_side_effect_aux (cgraph_node *n, int i)
++{
++  if (!n || i < 0)
++    return;
++  auto tmp = n->side_effect_aux;
++  auto bitwidth = sizeof (tmp) * 8;
++  if (i >= bitwidth)
++    return;
++  tmp = 1;
++  n->side_effect_aux |= (tmp << i);
++}
++
++static int
++find_cfun_arg_index (tree t)
++{
++  if (!t)
++    return -1;
++  auto args = DECL_ARGUMENTS (current_function_decl);
++  int i = 0;
++  for (tree arg = args; arg; arg = TREE_CHAIN (arg), ++i)
++    {
++      if (t == arg)
++        return i;
++    }
++  return -1;
++}
++
++static tree
++find_decl_in_tree (tree t, gimple *g = nullptr)
++{
++  if (!t)
++    return NULL_TREE;
++  auto t_code = TREE_CODE (t);
++  switch (t_code)
++    {
++    case INTEGER_CST:
++    case RESULT_DECL:
++    case STRING_CST:
++    case CONST_DECL:
++      return NULL_TREE;
++
++    case VAR_DECL:
++    case PARM_DECL:
++      return t;
++    case SSA_NAME:
++      {
++        auto def_stmt = SSA_NAME_DEF_STMT (t);
++        auto type_code = TREE_CODE (TREE_TYPE (t));
++        if (def_stmt && type_code == POINTER_TYPE
++            && is_gimple_assign (def_stmt) && def_stmt != g)
++          return find_decl_in_tree (gimple_assign_rhs1 (def_stmt), g);
++        auto var = SSA_NAME_VAR (t);
++        if (var)
++          return find_decl_in_tree (var, g);
++        break;
++      }
++    default:
++      return find_decl_in_tree (TREE_OPERAND (t, 0), g);
++    } // switch end
++  return NULL_TREE;
++}
++
++static void
++analyze_gassign_side_effect (gimple *g, cgraph_node *n)
++{
++  if (!g)
++    return;
++  auto lhs = gimple_get_lhs (g);
++  auto decl = find_decl_in_tree (lhs, g);
++  auto in_mem_ref = find_mem_ref_in_tree (lhs) ? true : false;
++  if (!decl)
++    return;
++  auto t_code = TREE_CODE (decl);
++  switch (t_code)
++    {
++    case VAR_DECL:
++      {
++        if (!tree_local_p (decl))
++          set_cn_side_effect_state (n, 2);
++        break;
++      }
++    case PARM_DECL:
++      {
++        auto type_code = TREE_CODE (TREE_TYPE (decl));
++        if (in_mem_ref
++            && (type_code == POINTER_TYPE || type_code == REFERENCE_TYPE))
++          {
++            set_cn_side_effect_state (n, 1);
++            auto idx = find_cfun_arg_index (decl);
++            if (idx >= 0)
++              set_cn_side_effect_aux (n, idx);
++          }
++        break;
++      }
++    }
++}
++
++auto_vec
++get_cn_side_effect_aux_arr (cgraph_node *n, gimple *g = nullptr)
++{
++  auto_vec v;
++  if (!n)
++    return v;
++  auto tmp = n->side_effect_aux;
++  tmp = 1;
++  auto bitwidth = sizeof (tmp) * 8;
++  auto num_args = g ? gimple_call_num_args (g) : bitwidth - 1;
++  auto max_num_args = bitwidth - 1 > num_args ? num_args : (bitwidth - 1);
++
++  for (auto i = 0; i < max_num_args; ++i)
++    {
++      if ((n->side_effect_aux & (tmp << i)))
++        {
++          v.safe_push (i);
++        }
++    }
++  if (v.length () == 0)
++    return v;
++  if (n->side_effect_aux & (tmp << bitwidth - 1))
++    {
++      for (auto i = v[v.length () - 1] + 1; i < max_num_args; i++)
++        v.safe_push (i);
++    }
++  return v;
++}
++
++static void
++analyze_bbr_cost_for_cgraph_edge (cgraph_node *caller, cgraph_node *callee)
++{
++  if (!callee)
++    {
++      add_cn_bbr_cost (caller, UNKNOWN_FUNCTION_BODY_COST);
++      return;
++    }
++  callee = callee->function_symbol ();
++  if (!callee->bbr_cost)
++    {
++      add_cn_bbr_cost (caller, UNKNOWN_FUNCTION_BODY_COST);
++      return;
++    }
++  add_cn_bbr_cost (caller, callee->bbr_cost);
++}
++
++static void
++analyze_side_effect_for_cgraph_edge (cgraph_node *caller, cgraph_node *callee,
++                                     gimple *g)
++{
++  if (!caller || caller->side_effect_state == 2
++      || caller->side_effect_state == 0 || caller == callee)
++    return;
++  if (!callee)
++    {
++      set_cn_side_effect_state (caller, 2);
++      return;
++    }
++  // have non-null callee
++  callee = callee->function_symbol ();
++  auto state = callee->side_effect_state;
++  switch (state)
++    {
++    case 1:
++      {
++        auto_vec side_effect_arg_indexs
++            = get_cn_side_effect_aux_arr (callee, g);
++        for (auto i = 0; i < side_effect_arg_indexs.length (); ++i)
++          {
++            auto idx = side_effect_arg_indexs[i];
++            auto arg = gimple_call_arg (g, idx);
++            auto decl = find_decl_in_tree (arg);
++            if (!decl)
++              continue;
++            auto t_code = TREE_CODE (decl);
++            switch (t_code)
++              {
++              case VAR_DECL:
++                {
++                  if (!tree_local_p (decl))
++                    set_cn_side_effect_state (caller, 2);
++                  break;
++                }
++              case PARM_DECL:
++                {
++                  auto type_code = TREE_CODE (TREE_TYPE (decl));
++                  if (type_code == POINTER_TYPE || type_code == REFERENCE_TYPE)
++                    {
++                      set_cn_side_effect_state (caller, 1);
++                      auto idx = find_cfun_arg_index (decl);
++                      if (idx >= 0)
++                        set_cn_side_effect_aux (caller, idx);
++                    }
++                  break;
++                }
++              } // switch code end
++          }     // for end
++        break;
++      } // case 1 end
++    case 2:
++      set_cn_side_effect_state (caller, 2);
++      break;
++    } // swtich end;
++}
++
++static void
++analyze_local_side_effect_parm_indexes (cgraph_node *n, vector &v)
++{
++  if (!n)
++    return;
++  auto tmp = n->side_effect_aux;
++  tmp = 1;
++  for (auto i = 0; i < v.size (); ++i)
++    n->side_effect_aux |= (tmp << v[i]);
++}
++
++static void
++analyze_builtin_fun_side_effect (cgraph_node *n)
++{
++  if (!n || !n->decl)
++    return;
++  auto dfc = DECL_FUNCTION_CODE (n->decl);
++  if (dfc == BUILT_IN_NONE)
++    return;
++  if (dump_file)
++    fprintf (dump_file, "this cgraph_node is a builtin function\n");
++  if (side_effect_builtin_funs.find (dfc) != side_effect_builtin_funs.end ())
++    {
++      auto side_effect_info = side_effect_builtin_funs[dfc];
++      if (side_effect_info.first)
++        set_cn_side_effect_state (n, 2);
++      else
++        {
++          set_cn_side_effect_state (n, 1);
++          analyze_local_side_effect_parm_indexes (n, side_effect_info.second);
++        }
++    }
++}
++
++static void
++analyze_customized_fun_side_effect (cgraph_node *n)
++{
++  if (!n || !n->decl)
++    return;
++  auto node_name = n->name ();
++  if (side_effect_customized_funs.find (node_name)
++      != side_effect_customized_funs.end ())
++    {
++      auto side_effect_info = side_effect_customized_funs[node_name];
++      if (side_effect_info.first)
++        set_cn_side_effect_state (n, 2);
++      else
++        {
++          set_cn_side_effect_state (n, 1);
++          analyze_local_side_effect_parm_indexes (n, side_effect_info.second);
++        }
++    }
++}
++
++static void
++set_cn_bbr_data_with_fun_body (cgraph_node *n)
++{
++  // walk the gimple of function body
++  basic_block bb;
++  FOR_EACH_BB_FN (bb, cfun)
++  {
++    for (auto gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi);
++         gsi_next_nondebug (&gsi))
++      {
++        auto g = gsi_stmt (gsi);
++        auto g_code = gimple_code (g);
++        if (g_code == GIMPLE_LABEL)
++          continue;
++        n->bbr_cost += 1;
++        auto lhs = gimple_get_lhs (g);
++        switch (g_code)
++          {
++          case GIMPLE_ASSIGN:
++            {
++              auto in_mem_ref = false;
++              analyze_gassign_side_effect (g, n);
++              break;
++            } // GIMPLE_ASSIGN end
++          case GIMPLE_CALL:
++            {
++              add_cn_bbr_cost (n, GIMPLE_CALL_INIT_COST);
++              auto g_call = dyn_cast (g);
++              auto callee_fn = gimple_call_fn (g_call);
++              auto callee_fndecl = gimple_call_fndecl (g_call);
++              if (callee_fndecl)
++                {
++                  auto n0 = cgraph_node::get (callee_fndecl);
++                  analyze_side_effect_for_cgraph_edge (n, n0, g);
++                  analyze_bbr_cost_for_cgraph_edge (n, n0);
++                }
++              else
++                { // gimple_call_fndecl(g) is null
++                  if (!callee_fn)
++                    {
++                      set_cn_side_effect_state (n, 2);
++                      add_cn_bbr_cost (n, UNKNOWN_FUNCTION_BODY_COST);
++                      continue;
++                    }
++                  // gimple_call_fn(g) is non-null
++                  if (virtual_method_call_p (callee_fn))
++                    {
++                      bool final = false;
++                      auto targets = possible_polymorphic_call_targets (
++                          callee_fn, g, &final, 0);
++                      auto len = targets.length ();
++                      // polymorphic_call_targets num is 0
++                      if (len == 0)
++                        {
++                          set_cn_side_effect_state (n, 2);
++                          add_cn_bbr_cost (n, UNKNOWN_FUNCTION_BODY_COST);
++                          continue;
++                        }
++                      // polymorphic_call_targets num > 0
++                      unsigned HOST_WIDE_INT polymorphic_call_cost_sum = 0;
++                      for (auto i = 0; i < len; ++i)
++                        {
++                          auto target = targets[i];
++                          auto rtarget = target->function_symbol ();
++                          analyze_side_effect_for_cgraph_edge (n, rtarget, g);
++                          polymorphic_call_cost_sum
++                              += rtarget->bbr_cost != 0
++                                     ? rtarget->bbr_cost
++                                     : UNKNOWN_FUNCTION_BODY_COST;
++                        }
++                      unsigned HOST_WIDE_INT avg_polymorphic_call_cost
++                          = polymorphic_call_cost_sum / len;
++                      add_cn_bbr_cost (n, avg_polymorphic_call_cost);
++                      continue;
++                    }
++                  // no virtual method
++                  set_cn_side_effect_state (n, 2);
++                  add_cn_bbr_cost (n, UNKNOWN_FUNCTION_BODY_COST);
++                  continue;
++                }
++            } // GIMPLE_CALL
++          }   // switch
++      }       // for each gimple
++
++    for (auto gsi = gsi_start_nonvirtual_phis (bb); !gsi_end_p (gsi);
++         gsi_next (&gsi))
++      n->bbr_cost += 1;
++  } // for bb end
++}
++
++static void
++dump_cn_bbr_data (cgraph_node *n)
++{
++  if (!n)
++    return;
++  fprintf (dump_file, "side effect state: ");
++  switch (n->side_effect_state)
++    {
++    case 0:
++      fprintf (dump_file, "NO_SIDE_EFFECT\n");
++      break;
++    case 1:
++      {
++        fprintf (dump_file, "LOCAL_SIDE_EFFECT\n");
++        fprintf (dump_file,
++                 "indexes of parameters which resulted in local side effect:");
++        auto_vec local_side_effect_parm_indexes
++            = get_cn_side_effect_aux_arr (n);
++        for (auto i = 0; i < local_side_effect_parm_indexes.length (); ++i)
++          fprintf (dump_file, " %d", local_side_effect_parm_indexes[i]);
++        fprintf (dump_file, "\n");
++        break;
++      }
++    case 2:
++      fprintf (dump_file, "GLOBAL_SIDE_EFFECT\n");
++      break;
++    case 3:
++      fprintf (dump_file, "PENDING_SIDE_EFFECT\n");
++      break;
++    }
++  fprintf (dump_file, "bbr_cost: %lu\n", n->bbr_cost);
++}
++
++static void
++set_cn_bbr_data (cgraph_node *n)
++{
++  if (!n || !n->decl || n->alias || n->thunk)
++    return;
++  if (dump_file)
++    fprintf (dump_file,
++             "\n\n\n====================== start to set bbr data of "
++             "cgraph_node(name==%s, cgraph_uid==%d)\n",
++             n->name (), n->get_uid ());
++  if (cn_bbr_data_has_been_set_p (n))
++    {
++      if (dump_file)
++        fprintf (dump_file, "cgraph_node bbr data has been set\n");
++      return;
++    }
++  analyze_builtin_fun_side_effect (n);
++  analyze_customized_fun_side_effect (n);
++  if (!n->definition)
++    { // no definition
++      if (dump_file)
++        fprintf (dump_file, "this cgraph_node has no definiton\n");
++      set_cn_side_effect_state (n, 0);
++      add_cn_bbr_cost (n, UNKNOWN_FUNCTION_BODY_COST);
++    }
++  else
++    { // have definition
++      n->get_untransformed_body ();
++      auto fun = DECL_STRUCT_FUNCTION (n->decl);
++      gcc_assert (fun);
++      push_cfun (fun);
++      if (dump_file)
++        {
++          dump_function_header (dump_file, n->decl, dump_flags);
++          dump_function_to_file (n->decl, dump_file, dump_flags);
++        }
++      set_cn_bbr_data_with_fun_body (n);
++      pop_cfun ();
++    }
++  if (dump_file)
++    {
++      fprintf (dump_file, "=== set cgraph node \"%s(%s)\" bbr info:\n",
++               n->name (), n->asm_name ());
++      dump_cn_bbr_data (n);
++    }
++}
++
++static void
++fixup_cn_bbr_data_with_fun_body (cgraph_node *n)
++{
++  basic_block bb;
++  FOR_EACH_BB_FN (bb, cfun)
++  {
++    for (auto gsi = gsi_start_nondebug_bb (bb); !gsi_end_p (gsi);
++         gsi_next_nondebug (&gsi))
++      {
++        auto g = gsi_stmt (gsi);
++        if (!is_gimple_call (g))
++          continue;
++        auto callee_fn = gimple_call_fn (g);
++        auto callee_fndecl = gimple_call_fndecl (g);
++        if (callee_fndecl)
++          {
++            auto n0 = cgraph_node::get (callee_fndecl);
++            analyze_side_effect_for_cgraph_edge (n, n0, g);
++          }
++        else
++          { // gimple_call_fndecl(g) is null
++            if (!callee_fn)
++              {
++                set_cn_side_effect_state (n, 2);
++                continue;
++              }
++            // gimple_call_fn(g) is non-null
++            if (virtual_method_call_p (callee_fn))
++              {
++                bool final = false;
++                auto targets = possible_polymorphic_call_targets (callee_fn, g,
++                                                                  &final, 0);
++                auto len = targets.length ();
++                for (auto i = 0; i < len; ++i)
++                  analyze_side_effect_for_cgraph_edge (n, targets[i], g);
++                continue;
++              }
++            // no virtual method
++            set_cn_side_effect_state (n, 2);
++          }
++      }
++  }
++}
++
++// used to fixup the cgraph_node side_effect info because some cgraph_node
++// side_effect info cannot be decided in the first time while its callee is
++// decided after it
++static void
++fixup_cn_bbr_data (cgraph_node *n)
++{
++  if (!n)
++    return;
++  if (!n || !n->decl || n->alias || n->thunk)
++    return;
++  if (dump_file)
++    fprintf (dump_file,
++             "\n\n\n====================== start to fixup bbr data of "
++             "cgraph_node(name==%s, cgraph_uid==%d)\n",
++             n->name (), n->get_uid ());
++  if (cn_bbr_data_has_been_set_p (n))
++    {
++      if (dump_file)
++        fprintf (dump_file, "cgraph_node bbr data has been set\n");
++      return;
++    }
++  gcc_assert (n->definition);
++  auto fndecl = n->decl;
++  n->get_untransformed_body ();
++  // definitely have non-null struct function and cfg info
++  auto fun = DECL_STRUCT_FUNCTION (fndecl);
++  push_cfun (fun);
++  if (dump_file)
++    {
++      dump_function_header (dump_file, fndecl, dump_flags);
++      dump_function_to_file (fndecl, dump_file, dump_flags);
++    }
++  fixup_cn_bbr_data_with_fun_body (n);
++  pop_cfun ();
++  if (!cn_bbr_data_has_been_set_p (n))
++    set_cn_side_effect_state (n, 0);
++  if (dump_file)
++    {
++      fprintf (dump_file, "=== fixup cgraph node \"%s(%s)\" bbr info:\n",
++               n->name (), n->asm_name ());
++      dump_cn_bbr_data (n);
++    }
++}
++
++static unsigned int
++ipa_bbr ()
++{
++  cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
++  auto order_pos = ipa_reverse_postorder (order);
++
++  for (auto i = order_pos - 1; i >= 0; i--)
++    set_cn_bbr_data (order[i]);
++  for (auto i = order_pos - 1; i >= 0; i--)
++    fixup_cn_bbr_data (order[i]);
++  free (order);
++  return 0;
++}
++
++namespace
++{
++
++const pass_data pass_data_ipa_bbr = {
++  IPA_PASS,              /* type */
++  "bbr",                 /* name */
++  OPTGROUP_OTHER,        /* optinfo_flags */
++  TV_IPA_BBR,            /* tv_id */
++  (PROP_ssa | PROP_cfg), /* properties_required */
++  0,                     /* properties_provided */
++  0,                     /* properties_destroyed */
++  0,                     /* todo_flags_start */
++  0,                     /* todo_flags_finish */
++};
++
++class pass_ipa_bbr : public ipa_opt_pass_d
++{
++public:
++  pass_ipa_bbr (gcc::context *ctxt)
++      : ipa_opt_pass_d (pass_data_ipa_bbr, ctxt, NULL, /* generate_summary */
++                        NULL,                          /* write_summary */
++                        NULL,                          /* read_summary */
++                        NULL, /* write_optimization_summary */
++                        NULL, /* read_optimization_summary */
++                        NULL, /* stmt_fixup */
++                        0,    /* function_transform_todo_flags_start */
++                        NULL, /* function_transform */
++                        NULL) /* variable_transform */
++  {
++  }
++
++  virtual bool
++  gate (function *)
++  {
++    return flag_ipa_bbr && flag_tree_ter && optimize && in_lto_p;
++  }
++
++  virtual unsigned int
++  execute (function *)
++  {
++    return ipa_bbr ();
++  }
++}; // class pass_ipa_bbr
++
++const pass_data pass_data_bbr = {
++  GIMPLE_PASS,    /* type */
++  "bbr",          /* name */
++  OPTGROUP_OTHER, /* optinfo_flags */
++  TV_TREE_BBR,    /* tv_id */
++  /* PROP_no_crit_edges is ensured by running split_edges_for_insertion in
++     pass_data_bbr::execute ().  */
++  (PROP_cfg | PROP_ssa), /* properties_required */
++  0,                     /* properties_provided */
++  0,                     /* properties_destroyed */
++  0,                     /* todo_flags_start */
++  TODO_update_ssa,       /* todo_flags_finish */
++};
++
++class pass_bbr : public gimple_opt_pass
++{
++public:
++  pass_bbr (gcc::context *ctxt) : gimple_opt_pass (pass_data_bbr, ctxt) {}
++
++  /* opt_pass methods: */
++  opt_pass *
++  clone ()
++  {
++    return new pass_bbr (m_ctxt);
++  }
++
++  virtual bool
++  gate (function *)
++  {
++    return flag_ipa_bbr && flag_tree_ter && optimize && in_lto_p;
++  }
++
++  virtual unsigned int
++  execute (function *)
++  {
++    unsigned int todo = 0;
++    todo = bbr_transform ();
++    todo |= execute_fixup_cfg ();
++    return todo;
++  }
++};
++
++} // anon namespace
++
++ipa_opt_pass_d *
++make_pass_ipa_bbr (gcc::context *ctxt)
++{
++  return new pass_ipa_bbr (ctxt);
++}
++
++gimple_opt_pass *
++make_pass_bbr (gcc::context *ctxt)
++{
++  return new pass_bbr (ctxt);
++}
+diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc
+index 237743ef0ba..49eb2a768f9 100644
+--- a/gcc/lto-cgraph.cc
++++ b/gcc/lto-cgraph.cc
+@@ -503,6 +503,8 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
+     section = "";
+ 
+   streamer_write_hwi_stream (ob->main_stream, node->tp_first_run);
++  streamer_write_uhwi_stream (ob->main_stream, node->side_effect_aux);
++  streamer_write_uhwi_stream (ob->main_stream, node->bbr_cost);
+ 
+   bp = bitpack_create (ob->main_stream);
+   bp_pack_value (&bp, node->local, 1);
+@@ -557,6 +559,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
+ 		   info next time we process the file.  */
+ 		flag_incremental_link ? LDPR_UNKNOWN : node->resolution);
+   bp_pack_value (&bp, node->split_part, 1);
++  bp_pack_value (&bp, node->side_effect_state, 2);
+   streamer_write_bitpack (&bp);
+   streamer_write_data_stream (ob->main_stream, section, strlen (section) + 1);
+ 
+@@ -1211,6 +1214,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data,
+   node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution,
+ 				     LDPR_NUM_KNOWN);
+   node->split_part = bp_unpack_value (bp, 1);
++  node->side_effect_state = bp_unpack_value (bp, 2);
+   verify_node_partition (node);
+ }
+ 
+@@ -1302,7 +1306,8 @@ input_node (struct lto_file_decl_data *file_data,
+ 		    "node with uid %d", node->get_uid ());
+ 
+   node->tp_first_run = streamer_read_uhwi (ib);
+-
++  node->side_effect_aux=streamer_read_uhwi(ib);
++  node->bbr_cost=streamer_read_uhwi(ib);
+   bp = streamer_read_bitpack (ib);
+ 
+   input_overwrite_node (file_data, node, tag, &bp, &has_thunk_info);
+diff --git a/gcc/opt-functions.awk b/gcc/opt-functions.awk
+index 0288fb68adc..a832eed0bf7 100644
+--- a/gcc/opt-functions.awk
++++ b/gcc/opt-functions.awk
+@@ -179,7 +179,7 @@ function switch_bit_fields (flags)
+ 	  flag_init("ToLower", flags) \
+ 	  byte_size_flag
+ 
+-	if (var_name(flags) != "flag_array_widen_compare" && flag_set_p("Report", flags))
++  if (flag_set_p("Report", flags))
+ 	    print "#error Report option property is dropped"
+ 
+ 	sub(", $", "", result)
+diff --git a/gcc/passes.def b/gcc/passes.def
+index 8dbb7983e3e..8597be472d8 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -156,6 +156,7 @@ along with GCC; see the file COPYING3.  If not see
+   NEXT_PASS (pass_ipa_profile);
+   NEXT_PASS (pass_ipa_icf);
+   NEXT_PASS (pass_ipa_devirt);
++  NEXT_PASS (pass_ipa_bbr);
+   NEXT_PASS (pass_ipa_cp);
+   NEXT_PASS (pass_ipa_sra);
+   NEXT_PASS (pass_ipa_cdtor_merge);
+@@ -260,6 +261,7 @@ along with GCC; see the file COPYING3.  If not see
+       NEXT_PASS (pass_lim);
+       NEXT_PASS (pass_walloca, false);
+       NEXT_PASS (pass_pre);
++      NEXT_PASS (pass_bbr);
+       NEXT_PASS (pass_sink_code, false /* unsplit edges */);
+       NEXT_PASS (pass_sancov);
+       NEXT_PASS (pass_asan);
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-0.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-0.C
+new file mode 100644
+index 00000000000..c3a2c572877
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-0.C
+@@ -0,0 +1,27 @@
++// we should reorder the f0 and rand()
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++__attribute__ ((noinline)) bool
++f0 ()
++{
++  int a = rand ();
++  a += 1;
++  if (a > 123456)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  if (f0 () && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-1.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-1.C
+new file mode 100644
+index 00000000000..6db0a935c28
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-1.C
+@@ -0,0 +1,28 @@
++// f0() have local side effect, cannot reorder
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++__attribute__ ((noinline)) bool
++f0 (int *ap)
++{
++  *ap = rand ();
++  *ap += 1;
++  if ((*ap) > 123456)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 (&a) && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-10.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-10.C
+new file mode 100644
+index 00000000000..bf7b3896d5b
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-10.C
+@@ -0,0 +1,27 @@
++// f0() have global side effect while amending the global variable by its
++// reference
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++#include 
++#include 
++
++int a = 1;
++
++__attribute__ ((noinline)) bool
++f0 (int &a)
++{
++  a = rand ();
++  return a > 0 ? true : false;
++}
++
++int
++main ()
++{
++  if (f0 (a) && rand ())
++    {
++      printf ("%d\n", a);
++    }
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-11.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-11.C
+new file mode 100644
+index 00000000000..e7bfb1af096
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-11.C
+@@ -0,0 +1,29 @@
++// f0() have local side effect, and the local side effect parameter index is
++// not 0; cannot reorder
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++__attribute__ ((noinline)) bool
++f0 (int a, int *ap)
++{
++  *ap = rand ();
++  *ap += a;
++  if ((*ap) > 123456)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 (rand (), &a) && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-12.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-12.C
+new file mode 100644
+index 00000000000..17d5bc23b55
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-12.C
+@@ -0,0 +1,36 @@
++// f0() have no side effect but f1() have local side effect while amend the
++// local variable of f0 by pointer parameter, and its index is not 0, can
++// reorder
++//
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++__attribute__ ((noinline)) void
++f1 (int a, int *ap)
++{
++  *ap = rand ();
++  *ap += a;
++}
++__attribute__ ((noinline)) bool
++f0 ()
++{
++  int a;
++  f1 (rand (), &a);
++  if (a > 123)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 () && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-13.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-13.C
+new file mode 100644
+index 00000000000..7473e9a7e6d
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-13.C
+@@ -0,0 +1,35 @@
++// f0() and f1() have no side effect by transmit the pointer parameter, and the
++// index is different; cannot reorder
++
++//
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++__attribute__ ((noinline)) void
++f1 (int a, int *ap)
++{
++  *ap = rand ();
++  *ap += a;
++}
++__attribute__ ((noinline)) bool
++f0 (int *ap)
++{
++  f1 (rand (), ap);
++  if (*ap > 123)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 (&a) && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-14.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-14.C
+new file mode 100644
+index 00000000000..d35a8a2ca0e
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-14.C
+@@ -0,0 +1,30 @@
++// f0() have local side effect by calling the function of the unkown function
++// body, ie scanf
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++__attribute__ ((noinline)) bool
++f0 (int *ap)
++{
++  *ap = rand ();
++  *ap += 1;
++  scanf ("%d\n", ap);
++  if ((*ap) > 123456)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 (&a) && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-15.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-15.C
+new file mode 100644
+index 00000000000..843ef876a8c
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-15.C
+@@ -0,0 +1,30 @@
++// f0() have local side effect by amend more than 2 variable;
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++int b = 0;
++
++__attribute__ ((noinline)) bool
++f0 (int *ap, int *bp)
++{
++  *ap = rand ();
++  *bp = rand ();
++  if ((*ap) > 123456 && (*bp) < 123)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 (&a, &b) && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-16.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-16.C
+new file mode 100644
+index 00000000000..fab09db3cd6
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-16.C
+@@ -0,0 +1,37 @@
++// f0() and f1() have no side effect by transmit the pointer parameter, and the
++// index is different; cannot reorder
++
++//
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++__attribute__ ((noinline)) void
++f1 (int *ap, int a, int *bp)
++{
++  *ap = rand ();
++  *bp = rand ();
++  *ap += *bp + a;
++}
++__attribute__ ((noinline)) bool
++f0 (int *ap)
++{
++  int b = 0;
++  f1 (ap, rand (), &b);
++  if (*ap > 123)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 (&a) && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-2.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-2.C
+new file mode 100644
+index 00000000000..1404d543c15
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-2.C
+@@ -0,0 +1,34 @@
++// f0() have no side effect but f1() have local side effect while amend the
++// local variable of f0 by pointer parameter, can reorder
++//
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++__attribute__ ((noinline)) void
++f1 (int *ap)
++{
++  *ap = rand ();
++}
++__attribute__ ((noinline)) bool
++f0 ()
++{
++  int a;
++  f1 (&a);
++  if (a > 123)
++    return true;
++  return false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 () && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-3.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-3.C
+new file mode 100644
+index 00000000000..e4b93c964f0
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-3.C
+@@ -0,0 +1,26 @@
++// f0() have global side effect by amend the global value, cannot reorder
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++int a = 0;
++
++__attribute__ ((noinline)) bool
++f0 ()
++{
++  a = rand ();
++  return a > 123 ? true : false;
++}
++
++int
++main ()
++{
++  int a = 0;
++  if (f0 () && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-4.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-4.C
+new file mode 100644
+index 00000000000..64c976a547e
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-4.C
+@@ -0,0 +1,27 @@
++// f0() have side effect while amend the global variable by deferencing its
++// pointer;
++//
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++
++int a = 0;
++
++__attribute__ ((noinline)) bool
++f0 (int *ap)
++{
++  *ap = rand ();
++  return (*ap) > 123 ? true : false;
++}
++
++int
++main ()
++{
++  if (f0 (&a) && rand ())
++    printf ("%d\n", rand ());
++  return 0;
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-5.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-5.C
+new file mode 100644
+index 00000000000..2d8b383991d
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-5.C
+@@ -0,0 +1,29 @@
++// gimple in the chain bb deference a pointer which are judged in the previous
++// chain bb, this bb cannot be add to chain, cannot reorder;
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++
++#include 
++#include 
++#include 
++
++__attribute__ ((noinline)) int *
++f0 ()
++{
++  if (rand () > 123)
++    return new int ();
++  return 0;
++}
++
++int
++main ()
++{
++  int *p = f0 ();
++  if (rand () && p && *p > 123)
++    {
++      printf ("%p %d\n", p, *p);
++    }
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-6.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-6.C
+new file mode 100644
+index 00000000000..fb792e6dd63
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-6.C
+@@ -0,0 +1,32 @@
++// next gimples deference the POINTER_TYPE gimple lhs in same bb and not to get
++// data from vtable
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++#include 
++#include 
++
++struct ac
++{
++  int *ip;
++};
++
++__attribute__ ((noinline)) void
++f0 (ac *acp)
++{
++  int *p = acp->ip;
++  if (rand () && *(acp->ip))
++    {
++      printf ("%d\n", *(acp->ip));
++    }
++}
++
++int
++main ()
++{
++  ac *acp = new ac ();
++  acp->ip = rand () ? 0 : (new int ());
++  f0 (acp);
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-7.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-7.C
+new file mode 100644
+index 00000000000..c7d45c86880
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-7.C
+@@ -0,0 +1,38 @@
++// next gimples deference the POINTER_TYPE gimple lhs in same bb but to get
++// data from vtable
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++#include 
++#include 
++
++struct ac
++{
++  int *ap;
++  __attribute__ ((noinline)) virtual bool
++  f ()
++  {
++    return rand () ? (rand () ? true : false) : false;
++  }
++};
++
++struct bc : public ac
++{
++  __attribute__ ((noinline)) bool
++  f ()
++  {
++    return rand () ? true : false;
++  }
++};
++
++int
++main ()
++{
++  ac *acp = rand () ? (new ac ()) : (new bc ());
++  if (rand () && acp->f () && rand ())
++    {
++      printf ("%p\n", acp);
++    }
++}
++
++/* { dg-final { scan-ltrans-tree-dump "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-8.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-8.C
+new file mode 100644
+index 00000000000..941355369dc
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-8.C
+@@ -0,0 +1,46 @@
++// use a POINTER_TYPE SSA_NAME to get a function POINTER_TYPE parameter, and
++// then write value by deferecing this SSA_NAME; f0() f1() have local side
++// effect, cannot reorder
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++#include 
++#include 
++
++struct bc
++{
++  int c;
++  int b;
++};
++
++struct ac
++{
++  int a;
++  bc bcm;
++};
++
++__attribute__ ((noinline)) void
++f1 (int *ap)
++{
++  *ap = rand ();
++}
++
++__attribute__ ((noinline)) bool
++f0 (ac *acp)
++{
++  int *p = &(acp->bcm.b);
++  f1 (p);
++  return *p > 1 ? true : false;
++}
++
++int
++main ()
++{
++  ac *acp = new ac ();
++  if (rand () && f0 (acp) && rand ())
++    {
++      printf ("%d\n", acp->bcm.b);
++    }
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/bbr-9.C b/gcc/testsuite/g++.dg/tree-ssa/bbr-9.C
+new file mode 100644
+index 00000000000..36a17824c24
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/bbr-9.C
+@@ -0,0 +1,26 @@
++// f0() have local side effect while amending the local variable by its
++// reference
++
++/* { dg-options "-O3 -flto -fipa-bbr -fdump-ipa-bbr-details -fdump-tree-bbr-details" } */
++/* { dg-do link } */
++#include 
++#include 
++
++__attribute__ ((noinline)) bool
++f0 (int &a)
++{
++  a = rand ();
++  return a > 0 ? true : false;
++}
++
++int
++main ()
++{
++  int a = 1;
++  if (f0 (a) && rand ())
++    {
++      printf ("%d\n", a);
++    }
++}
++
++/* { dg-final { scan-ltrans-tree-dump-not "find #" "bbr"} } */
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index 794b8017d18..59f03ac947d 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_INLINING          , "ipa inlining heuristics")
+ DEFTIMEVAR (TV_IPA_FNSPLIT           , "ipa function splitting")
+ DEFTIMEVAR (TV_IPA_COMDATS	     , "ipa comdats")
+ DEFTIMEVAR (TV_IPA_STRUCT_REORG      , "ipa struct reorg optimization")
++DEFTIMEVAR (TV_IPA_BBR	     	     , "ipa bbr")
+ DEFTIMEVAR (TV_IPA_OPT		     , "ipa various optimizations")
+ DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS    , "lto stream decompression")
+ DEFTIMEVAR (TV_IPA_LTO_COMPRESS      , "lto stream compression")
+@@ -177,6 +178,7 @@ DEFTIMEVAR (TV_TREE_SPLIT_EDGES      , "tree split crit edges")
+ DEFTIMEVAR (TV_TREE_REASSOC          , "tree reassociation")
+ DEFTIMEVAR (TV_TREE_PRE		     , "tree PRE")
+ DEFTIMEVAR (TV_TREE_FRE		     , "tree FRE")
++DEFTIMEVAR (TV_TREE_BBR		     , "tree bbr")
+ DEFTIMEVAR (TV_TREE_SINK             , "tree code sinking")
+ DEFTIMEVAR (TV_TREE_PHIOPT	     , "tree linearize phis")
+ DEFTIMEVAR (TV_TREE_BACKPROP	     , "tree backward propagate")
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index 55ee2fe7f9e..2eda6e31965 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -458,6 +458,7 @@ extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_rename_ssa_copies (gcc::context *ctxt);
++extern gimple_opt_pass *make_pass_bbr (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_sink_code (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_fre (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_check_data_deps (gcc::context *ctxt);
+@@ -521,6 +522,7 @@ extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt);
++extern ipa_opt_pass_d *make_pass_ipa_bbr (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt);
+-- 
+2.22.0
+
diff --git a/HYGON-0006-coop-vectorize.patch b/HYGON-0006-coop-vectorize.patch
new file mode 100644
index 0000000..f384214
--- /dev/null
+++ b/HYGON-0006-coop-vectorize.patch
@@ -0,0 +1,1427 @@
+From 9d456d79c9de561c4b47a20ec9938ddcdf4efefd Mon Sep 17 00:00:00 2001
+From: baozhaoling 
+Date: Fri, 29 Mar 2024 10:31:37 +0800
+Subject: [PATCH] Add loop-slp-coop & slp-accumulation-combine for some
+ unvectorizable cases. Add testsuite cases for loop-slp-coop-vectorize. Add
+ shuffle fusion pass for vec-perm-expr combine.
+
+Add option -mloop-slp-coop: While enabled, try both use slp vectorize and loop vectorize while
+performing vectorize pass.
+Add option -mslp-optimize: While disabled, disable vect_optimize_slp & vect_gather_slp_loads
+function.
+Add option -mslp-accumulation-combine: The vect pass will perform rotate while performing vect
+pass. Enabled the option, and the addtion gimple will try to combine, reusing the register
+for register pressure
+Add option -mwidening_mul_ahead: Move the widening_mul_ahead pass, for optimization priority.
+Add option -mshuffle-fusion: To combine the permute and permute-like instructions.
+Add testsuite cases: gcc/testsuite/g++.dg/vect/simd-coop.cc
+Add testsuite cases: gcc/testsuite/g++.target/i386/simd-coop.C
+Add testsuite cases: gcc/testsuite/g++.target/i386/shuffle-fusion.C
+---
+ gcc/Makefile.in                               |   1 +
+ gcc/common.opt                                |  20 +
+ gcc/config/i386/i386-expand.cc                |  62 ++-
+ gcc/expr.cc                                   |  25 +-
+ gcc/gimple.h                                  |  21 +
+ gcc/optabs.cc                                 |   4 +-
+ gcc/passes.def                                |   8 +-
+ gcc/shufflefusion.cc                          | 434 ++++++++++++++++++
+ gcc/testsuite/g++.dg/vect/simd-coop.cc        |  33 ++
+ .../g++.target/i386/shuffle-fusion.C          |  28 ++
+ gcc/testsuite/g++.target/i386/simd-coop.C     |  33 ++
+ gcc/testsuite/gcc.dg/tree-ssa/pr69270.c       |  12 +-
+ gcc/testsuite/gcc.dg/tree-ssa/pr70232.c       |   4 +-
+ gcc/testsuite/gcc.dg/tree-ssa/pr71437.c       |   4 +-
+ gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c       |  10 +-
+ gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c       |  10 +-
+ gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c       |  10 +-
+ .../gcc.dg/tree-ssa/ssa-dom-thread-7.c        |   4 +-
+ gcc/timevar.def                               |   1 +
+ gcc/tree-pass.h                               |   1 +
+ gcc/tree-ssa-loop.cc                          |   2 +
+ gcc/tree-ssa-math-opts.cc                     |  14 +-
+ gcc/tree-ssa-ter.cc                           |   2 +-
+ gcc/tree-vect-data-refs.cc                    |   8 +-
+ gcc/tree-vect-loop.cc                         |  15 +-
+ gcc/tree-vect-slp.cc                          |  18 +-
+ gcc/tree-vect-stmts.cc                        | 102 ++++
+ gcc/tree-vectorizer.h                         |   3 +
+ 28 files changed, 831 insertions(+), 58 deletions(-)
+ create mode 100644 gcc/shufflefusion.cc
+ create mode 100644 gcc/testsuite/g++.dg/vect/simd-coop.cc
+ create mode 100644 gcc/testsuite/g++.target/i386/shuffle-fusion.C
+ create mode 100644 gcc/testsuite/g++.target/i386/simd-coop.C
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 0aabc6ea3f2..c0696fef13f 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1581,6 +1581,7 @@ OBJS = \
+ 	selftest-run-tests.o \
+ 	sese.o \
+ 	shrink-wrap.o \
++	shufflefusion.o \
+ 	simplify-rtx.o \
+ 	sparseset.o \
+ 	spellcheck.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 30f979870f6..645d971a14c 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3616,4 +3616,24 @@ fipa-ra
+ Common Var(flag_ipa_ra) Optimization
+ Use caller save register across calls if possible.
+ 
++floop-slp-coop
++Target Var(flag_loop_slp_coop) Init(0)
++Perform loop-slp-cooperate vectorization.
++
++fslp-optimize
++Target Var(flag_vectorize_slp_optimize) Init(1)
++Enable slp optimize in vect pass.
++
++fslp-accumulation-combine
++Target Var(flag_accumulation_combine) Init(0)
++Combine the mul-add accumulation stmt for reg pressure.
++
++fwidening-mul-ahead
++Target Var(flag_widening_mul_ahead) Init(0)
++Move widening-mul pass ahead of the store-merging pass.
++
++fshuffle-fusion
++Target Var(flag_shuffle_fusion) Init(0)
++Combine the permute and permute-like gimple.
++
+ ; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
+index 77dda5dd44e..9985c1a635a 100644
+--- a/gcc/config/i386/i386-expand.cc
++++ b/gcc/config/i386/i386-expand.cc
+@@ -19011,13 +19011,17 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
+ 
+   if (vmode == V8SImode)
+     for (i = 0; i < 8; ++i)
+-      rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
++      rperm[i] = d->perm[i] != 255 ?
++		 GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7) :
++		 GEN_INT(255);
+   else if (vmode == V16SImode)
+     for (i = 0; i < 16; ++i)
+-      rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
++      rperm[i] = d->perm[i] != 255 ?
++		 GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15) :
++		 GEN_INT(255);
+   else
+     {
+-      eltsz = GET_MODE_UNIT_SIZE (d->vmode);
++      eltsz = GET_MODE_SIZE (d->vmode) / nelt;
+       if (!d->one_operand_p)
+ 	mask = 2 * nelt - 1;
+       else if (vmode == V64QImode)
+@@ -19027,11 +19031,15 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
+       else
+ 	mask = nelt - 1;
+ 
++      /* Reserve the index to be 255 for the shuffle instructions to insert a 0
++	 in op0 */
+       for (i = 0; i < nelt; ++i)
+ 	{
+ 	  unsigned j, e = d->perm[i] & mask;
+ 	  for (j = 0; j < eltsz; ++j)
+-	    rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
++	    if (d->perm[i] != 255)
++	      rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
++	    else rperm[i * eltsz + j] = GEN_INT (255);
+ 	}
+     }
+ 
+@@ -19229,6 +19237,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
+       for (i = 0; i < nelt; i++)
+ 	{
+ 	  nd.perm[i] = d->perm[i] & mask;
++	  if (d->perm[i] == 255) nd.perm[i] = 255;
+ 	  if (nd.perm[i] != i)
+ 	    identity_perm = false;
+ 	  if (nd.perm[i])
+@@ -19311,8 +19320,12 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
+ 	 every other permutation operand.  */
+       for (i = 0; i < nelt; i += 2)
+ 	{
+-	  nd.perm[i] = d->perm[i] & mask;
+-	  nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
++	  if (d->perm[i] != 255)
++  	    nd.perm[i] = d->perm[i] & mask;
++	  else nd.perm[i] = 255;
++	  if (d->perm[i + 1] != 255)
++	    nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
++	  else nd.perm[i + 1] = 255;
+ 	}
+       if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
+ 				  d->testing_p))
+@@ -19323,10 +19336,18 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
+ 	{
+ 	  for (i = 0; i < nelt; i += 4)
+ 	    {
+-	      nd.perm[i + 0] = d->perm[i + 0] & mask;
+-	      nd.perm[i + 1] = d->perm[i + 1] & mask;
+-	      nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
+-	      nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
++	      if (d->perm[i + 0] != 255)
++	        nd.perm[i + 0] = d->perm[i + 0] & mask;
++	      else nd.perm[i + 0] = 255;
++	      if (d->perm[i + 1] != 255)
++	        nd.perm[i + 1] = d->perm[i + 1] & mask;
++	      else nd.perm[i + 1] = 255;
++	      if (d->perm[i + 2] != 255)
++	        nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
++	      else nd.perm[i + 2] = 255;
++	      if (d->perm[i + 3] != 255)
++	        nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
++	      else nd.perm[i + 3] = 255;
+ 	    }
+ 
+ 	  if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
+@@ -20747,7 +20768,7 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
+     }
+ 
+   nelt = d->nelt;
+-  eltsz = GET_MODE_UNIT_SIZE (d->vmode);
++  eltsz = GET_MODE_SIZE (d->vmode) / nelt;
+ 
+   /* Generate two permutation masks.  If the required element is within
+      the given vector it is shuffled into the proper lane.  If the required
+@@ -20762,8 +20783,9 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
+ 	e -= nelt;
+ 
+       for (j = 0; j < eltsz; ++j)
+-	{
+-	  rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
++        {
++	  rperm[which][i*eltsz + j] = (d->perm[i] == 255) ?
++		  GEN_INT(255) : GEN_INT (e*eltsz + j);
+ 	  rperm[1-which][i*eltsz + j] = m128;
+ 	}
+ 
+@@ -21926,7 +21948,8 @@ canonicalize_perm (struct expand_vec_perm_d *d)
+   int i, which, nelt = d->nelt;
+ 
+   for (i = which = 0; i < nelt; ++i)
+-    which |= (d->perm[i] < nelt ? 1 : 2);
++    if (d->perm[i] != 255)
++      which |= (d->perm[i] < nelt ? 1 : 2);
+ 
+   d->one_operand_p = true;
+   switch (which)
+@@ -21948,7 +21971,8 @@ canonicalize_perm (struct expand_vec_perm_d *d)
+ 
+     case 2:
+       for (i = 0; i < nelt; ++i)
+-        d->perm[i] &= nelt - 1;
++	if (d->perm[i] != 255)
++          d->perm[i] &= nelt - 1;
+       d->op0 = d->op1;
+       break;
+ 
+@@ -22098,10 +22122,11 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+   for (i = which = 0; i < nelt; ++i)
+     {
+       unsigned char e = sel[i];
+-      gcc_assert (e < 2 * nelt);
++      gcc_assert (e < 2 * nelt || e == 255);
+       d.perm[i] = e;
+       perm[i] = e;
+-      which |= (e < nelt ? 1 : 2);
++      if (e != 255)
++        which |= (e < nelt ? 1 : 2);
+     }
+ 
+   if (d.testing_p)
+@@ -22109,7 +22134,8 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+       /* For all elements from second vector, fold the elements to first.  */
+       if (which == 2)
+ 	for (i = 0; i < nelt; ++i)
+-	  d.perm[i] -= nelt;
++	  if (d.perm[i] != 255)
++	    d.perm[i] -= nelt;
+ 
+       /* Check whether the mask can be applied to the vector type.  */
+       d.one_operand_p = (which != 3);
+diff --git a/gcc/expr.cc b/gcc/expr.cc
+index e7804d52656..ff126ebf509 100644
+--- a/gcc/expr.cc
++++ b/gcc/expr.cc
+@@ -10148,17 +10148,36 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
+       {
+ 	expand_operands (treeop0, treeop1, target, &op0, &op1, EXPAND_NORMAL);
+ 	vec_perm_builder sel;
++	machine_mode modeop = TYPE_MODE (TREE_TYPE (treeop0));
++
+ 	if (TREE_CODE (treeop2) == VECTOR_CST
+ 	    && tree_to_vec_perm_builder (&sel, treeop2))
+ 	  {
+ 	    machine_mode sel_mode = TYPE_MODE (TREE_TYPE (treeop2));
+-	    temp = expand_vec_perm_const (mode, op0, op1, sel,
+-					  sel_mode, target);
++	    if (modeop != mode && flag_shuffle_fusion)
++	      {
++		temp = expand_vec_perm_const (modeop, op0, op1, sel,
++					      sel_mode, target);
++		rtx tempnew = temp;
++		temp = gen_reg_rtx (mode);
++		convert_move (temp, tempnew, unsignedp);
++	      }
++	    else
++	      temp = expand_vec_perm_const (mode, op0, op1, sel,
++                                            sel_mode, target);
+ 	  }
+ 	else
+ 	  {
+ 	    op2 = expand_normal (treeop2);
+-	    temp = expand_vec_perm_var (mode, op0, op1, op2, target);
++	    if (modeop != mode && flag_shuffle_fusion)
++	      {
++		temp = expand_vec_perm_var (modeop, op0, op1, op2, target);
++		rtx tempnew = temp;
++		temp = gen_reg_rtx (mode);
++		convert_move (temp, tempnew, unsignedp);
++	      }
++	    else
++	      temp = expand_vec_perm_var (mode, op0, op1, op2, target);
+ 	  }
+ 	gcc_assert (temp);
+ 	return temp;
+diff --git a/gcc/gimple.h b/gcc/gimple.h
+index 77a5a07e9b5..df21897baa9 100644
+--- a/gcc/gimple.h
++++ b/gcc/gimple.h
+@@ -2953,6 +2953,27 @@ gimple_clobber_p (const gimple *s, enum clobber_kind kind)
+ static inline bool
+ is_gimple_call (const gimple *gs)
+ {
++  if (gimple_code (gs) == GIMPLE_CALL && flag_accumulation_combine)
++    {
++      const gcall *call=dyn_cast  (gs);
++      if (call -> u.internal_fn == IFN_FMA)
++        return false;
++    }
++  return gimple_code (gs) == GIMPLE_CALL;
++}
++
++/* For i386 structure IFN_FMA is not a call instruction, but it is also a 
++   gimple call. Returen true if GS is a no-fma GIMPLE_CALL */
++
++static inline bool
++is_nofma_gimple_call (const gimple *gs)
++{
++  if (gimple_code (gs) == GIMPLE_CALL)
++    {
++      const gcall *call=dyn_cast  (gs);
++      if (call -> u.internal_fn == IFN_FMA)
++        return false;
++    }
+   return gimple_code (gs) == GIMPLE_CALL;
+ }
+ 
+diff --git a/gcc/optabs.cc b/gcc/optabs.cc
+index 3d8fa3abdfe..4ad8fc44ad0 100644
+--- a/gcc/optabs.cc
++++ b/gcc/optabs.cc
+@@ -6194,7 +6194,9 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
+      cases in which the inputs are equal.  Not all backends can cope with
+      the single-input representation when testing for a double-input
+      target instruction.  */
+-  vec_perm_indices indices (sel, 2, GET_MODE_NUNITS (mode));
++  poly_uint16 indice_nunits = flag_shuffle_fusion ? 
++	                      256 : GET_MODE_NUNITS(mode);
++  vec_perm_indices indices (sel, 2, indice_nunits);
+ 
+   /* See if this can be handled with a vec_shr or vec_shl.  We only do this
+      if the second (for vec_shr) or first (for vec_shl) vector is all
+diff --git a/gcc/passes.def b/gcc/passes.def
+index 8dbb7983e3e..1addcf2d11d 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -297,6 +297,10 @@ along with GCC; see the file COPYING3.  If not see
+ 	  POP_INSERT_PASSES ()
+ 	  NEXT_PASS (pass_parallelize_loops, false /* oacc_kernels_p */);
+ 	  NEXT_PASS (pass_expand_omp_ssa);
++          NEXT_PASS (pass_tree_loop_done);
++          NEXT_PASS (pass_dominator, false);
++          NEXT_PASS (pass_copy_prop);
++          NEXT_PASS (pass_tree_loop_init);
+ 	  NEXT_PASS (pass_ch_vect);
+ 	  NEXT_PASS (pass_if_conversion);
+ 	  /* pass_vectorize must immediately follow pass_if_conversion.
+@@ -330,6 +334,7 @@ along with GCC; see the file COPYING3.  If not see
+       NEXT_PASS (pass_lower_vector_ssa);
+       NEXT_PASS (pass_lower_switch);
+       NEXT_PASS (pass_cse_reciprocals);
++      NEXT_PASS (pass_optimize_widening_mul, false);
+       NEXT_PASS (pass_reassoc, false /* early_p */);
+       NEXT_PASS (pass_strength_reduction);
+       NEXT_PASS (pass_split_paths);
+@@ -353,7 +358,8 @@ along with GCC; see the file COPYING3.  If not see
+       NEXT_PASS (pass_sink_code, true /* unsplit edges */);
+       NEXT_PASS (pass_phiopt, false /* early_p */);
+       NEXT_PASS (pass_fold_builtins);
+-      NEXT_PASS (pass_optimize_widening_mul);
++      NEXT_PASS (pass_shuffle_fusion);
++      NEXT_PASS (pass_optimize_widening_mul, true); 
+       NEXT_PASS (pass_store_merging);
+       NEXT_PASS (pass_tail_calls);
+       /* If DCE is not run before checking for uninitialized uses,
+diff --git a/gcc/shufflefusion.cc b/gcc/shufflefusion.cc
+new file mode 100644
+index 00000000000..f9a475fce5a
+--- /dev/null
++++ b/gcc/shufflefusion.cc
+@@ -0,0 +1,434 @@
++/* Routines for performing Temporary Expression Replacement (TER) in SSA trees.
++   Copyright (C) 2003-2022 Free Software Foundation, Inc.
++   Contributed by Andrew MacLeod  
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "tree.h"
++#include "gimple.h"
++#include "predict.h"
++#include "tree-pass.h"
++#include "ssa.h"
++#include "cgraph.h"
++#include "fold-const.h"
++#include "stor-layout.h"
++#include "gimple-iterator.h"
++#include "gimple-walk.h"
++#include "tree-ssa-loop-manip.h"
++#include "tree-ssa-loop-niter.h"
++#include "tree-cfg.h"
++#include "cfgloop.h"
++#include "tree-vectorizer.h"
++#include "tree-ssa-propagate.h"
++#include "dbgcnt.h"
++#include "tree-scalar-evolution.h"
++#include "stringpool.h"
++#include "attribs.h"
++#include "gimple-pretty-print.h"
++#include "opt-problem.h"
++#include "internal-fn.h"
++#include "tree-ssa-sccvn.h"
++#include "vec-perm-indices.h"
++
++// Operation1:
++// If a VEC_PERM_EXPR has all uses as a permute-like expression. Change all
++// the use into a new VEC_PERM_EXPR, and delete the old one.
++// And VEC_PERM_EXPR can use 255 as a number in operand3 which means to put
++// a 0 in the dest operand. As a result, we can turn VEC_UNPACK_LO/HI_EXPR
++// into a VEC_PERM_EXPR.
++//
++// Case 1:
++// vect__2 = VEC_PERM_EXPR ;
++// vect__3 = [vec_unpack_lo_expr] vect__2;
++// vect__4 = [vec_unpack_hi_expr] vect__2;
++// ==>
++// vect__3 = VEC_PERM_EXPR ;
++// vect__4 = VEC_PERM_EXPR ;
++//
++// Case 2:
++// vect__1 = VEC_PERM_EXPR ;
++// vect__2 = VEC_PERM_EXPR ;
++// vect__3 = VEC_PERM_EXPR ;
++// ==>
++// vect__2 = VEC_PERM_EXPR ;
++// vect__3 = VEC_PERM_EXPR ;
++//
++// Operation2:
++// While a VEC_PERM_EXPR has different src, and it's use only use one. It
++// can be performed either.
++//
++// Case:
++// vect__2 = VEC_PERM_EXPR ;
++// vect__3 = [vec_unpack_float_lo_expr] vect__2;
++// vect__4 = [vec_unpack_float_hi_expr] vect__2;
++// ==>
++// _0 = VEC_PERM_EXPR ;
++// vect__3 = [vec_unpack_float_lo_expr] _0;
++// _1 = VEC_PERM_EXPR ;
++// vect__4 = [vec_unpack_float_lo_expr] _1;
++void
++permute_stmt_operation(gimple *stmt)
++{
++  imm_use_iterator iter;
++  gimple *use_stmt;
++  unsigned int perm[256],n;
++
++  n=VECTOR_CST_NELTS (gimple_assign_rhs3(stmt)).to_constant ();
++
++  for (unsigned int i=0; i=256)
++	  {
++	    /* vpshufb for ymm only works intra lanes, it is not
++	       possible to shuffle bytes in between the lanes.  */
++	    for (unsigned int i = 0; i < n; ++i)
++	      if ((permnew[i]!=255) || ((permnew[i] ^ i) & (n / 2)))
++		{
++                  if (dump_enabled_p ())
++                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++		          "Expr for ymm should match only one insn.\n"
++		         );
++		  return;
++		}
++	  }
++
++      tree char_vectype = TREE_TYPE(gimple_assign_rhs3(stmt));
++      tree op0 = gimple_assign_rhs1(stmt);
++      tree op1 = gimple_assign_rhs2(stmt);
++      unsigned int t=0;
++      for (unsigned int i=0; i32) 
++    {
++      if (dump_enabled_p ())
++        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++		          "Shuffle fusion2 failed: expr has too many nelts.\n"
++		         );
++      return false;
++    }
++
++  for (unsigned int i=0; ia = r->a + (*k) * p[u].a;
++      r->b = r->b + (*k) * p[u].b;
++      r->c = r->c + (*k) * p[u].c;
++      r->d = r->d + (*k) * p[u].d;
++    }
++}
+diff --git a/gcc/testsuite/g++.target/i386/shuffle-fusion.C b/gcc/testsuite/g++.target/i386/shuffle-fusion.C
+new file mode 100644
+index 00000000000..dd3a011bff1
+--- /dev/null
++++ b/gcc/testsuite/g++.target/i386/shuffle-fusion.C
+@@ -0,0 +1,28 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=znver1 -O2" } */
++/* { dg-additional-options "-fshuffle-fusion -funsafe-math-optimizations" } */
++/* { dg-final { scan-assembler-not "vpsrl"} } */
++
++typedef struct _Double
++{
++  double
++    a,
++    b;
++} Double;
++
++typedef struct _Unsigned
++{
++  unsigned short
++    a,
++    b;
++} Unsigned;
++
++void S(unsigned n, Double *r,
++       const double *__restrict k, const Unsigned *__restrict p)
++{
++  for (unsigned u = 0; u < n; u++, k--)
++    {
++      r->a = r->a + p[u].a;
++      r->b = r->b + p[u].b;
++    }
++}
+diff --git a/gcc/testsuite/g++.target/i386/simd-coop.C b/gcc/testsuite/g++.target/i386/simd-coop.C
+new file mode 100644
+index 00000000000..60e831be748
+--- /dev/null
++++ b/gcc/testsuite/g++.target/i386/simd-coop.C
+@@ -0,0 +1,33 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-march=znver1 -O2 -fdump-tree-vect-details" } */
++/* { dg-additional-options "-floop-slp-coop -funsafe-math-optimizations" } */
++/* { dg-final { scan-tree-dump "Final SLP tree" "vect"} } */
++
++typedef struct _Double
++{
++  double
++    a,
++    b,
++    c,
++    d;
++} Double;
++
++typedef struct _Unsigned
++{
++  unsigned short
++    a,
++    b,
++    c,
++    d;
++} Unsigned;
++
++void S(unsigned n, Double *r,
++       const double *__restrict k, const Unsigned *__restrict p)
++{
++  for (unsigned u = 0; u < n; u++, k--)
++    {
++      r->a = r->a + (*k) * p[u].a;
++      r->b = r->b + (*k) * p[u].b;
++      r->c = r->c + (*k) * p[u].c;
++    }
++}
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr69270.c b/gcc/testsuite/gcc.dg/tree-ssa/pr69270.c
+index 0d66cc4383f..3aba15ee18b 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr69270.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr69270.c
+@@ -1,17 +1,17 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fsplit-paths -fdump-tree-dom3-details" } */
++/* { dg-options "-O2 -fsplit-paths -fdump-tree-dom4-details" } */
+ 
+ /* There should be two references to bufferstep that turn into
+    constants.  */
+-/* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .0." 1 "dom3"} } */
+-/* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .1." 1 "dom3"} } */
++/* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .0." 1 "dom4"} } */
++/* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .1." 1 "dom4"} } */
+ 
+ /* And some assignments ought to fold down to constants.  */
+-/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 1 "dom3"} } */
+-/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 1 "dom3"} } */
++/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 1 "dom4"} } */
++/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 1 "dom4"} } */
+ 
+ /* The XOR operations should have been optimized to constants.  */
+-/* { dg-final { scan-tree-dump-not "bit_xor" "dom3"} } */
++/* { dg-final { scan-tree-dump-not "bit_xor" "dom4"} } */
+ 
+ 
+ extern int *stepsizeTable;
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr70232.c b/gcc/testsuite/gcc.dg/tree-ssa/pr70232.c
+index d636672fddc..43809215a1b 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr70232.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr70232.c
+@@ -1,10 +1,10 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -w -fdump-tree-vrp1-details -fdump-tree-vrp2-details -fdump-tree-dom2-details -fdump-tree-dom3-details" } */
++/* { dg-options "-O2 -w -fdump-tree-vrp1-details -fdump-tree-vrp2-details -fdump-tree-dom2-details -fdump-tree-dom4-details" } */
+ 
+ /* All the threads found by the threader should have too many
+    statements to be profitable.  */
+ /* { dg-final { scan-tree-dump-not "Registering jump " "dom2"} } */
+-/* { dg-final { scan-tree-dump-not "Registering jump " "dom3"} } */
++/* { dg-final { scan-tree-dump-not "Registering jump " "dom4"} } */
+ /* { dg-final { scan-tree-dump-not "Registering jump " "vrp1"} } */
+ /* { dg-final { scan-tree-dump-not "Registering jump " "vrp2"} } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71437.c b/gcc/testsuite/gcc.dg/tree-ssa/pr71437.c
+index eab3a25928e..25bf07947d0 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr71437.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71437.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-ffast-math -O3 -fdump-tree-dom3-details" } */
++/* { dg-options "-ffast-math -O3 -fdump-tree-dom4-details" } */
+ 
+ int I = 50, J = 50;
+ int S, L;
+@@ -43,4 +43,4 @@ void foo (int K)
+ /* We used to get 1 vrp-thread1 candidates here, but they now get
+    deferred until after loop opts are done, because they were rotating
+    loops.  */
+-/* { dg-final { scan-tree-dump-times "Threaded jump " 2 "dom3" } } */
++/* { dg-final { scan-tree-dump-times "Threaded jump " 2 "dom4" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c b/gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c
+index c8f8e612da2..979bfed98d8 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/slsr-27.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fdump-tree-dom3" } */
++/* { dg-options "-O2 -fdump-tree-dom4" } */
+ 
+ struct x
+ {
+@@ -16,8 +16,8 @@ f (struct x *p, unsigned int n)
+   foo (p->a[n], p->c[n], p->b[n]);
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom3" { target { int32 } } } } */
+-/* { dg-final { scan-tree-dump-times "\\* 2;" 1 "dom3" { target { int16 } } } } */
+-/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ \[^\r\n\]*_\\d\+;" 1 "dom3" } } */
++/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom4" { target { int32 } } } } */
++/* { dg-final { scan-tree-dump-times "\\* 2;" 1 "dom4" { target { int16 } } } } */
++/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ \[^\r\n\]*_\\d\+;" 1 "dom4" } } */
+ /*
+-  { dg-final { scan-tree-dump-times "MEM *? *\\\[\\(struct x \\*\\)\[^\r\n\]*_\\d\+" 3 "dom3" } } */
++  { dg-final { scan-tree-dump-times "MEM *? *\\\[\\(struct x \\*\\)\[^\r\n\]*_\\d\+" 3 "dom4" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c b/gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c
+index b18e9c1fe21..2bcafe24000 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/slsr-28.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fdump-tree-dom3" } */
++/* { dg-options "-O2 -fdump-tree-dom4" } */
+ 
+ struct x
+ {
+@@ -20,7 +20,7 @@ f (struct x *p, unsigned int n)
+     foo (p->b[n], p->a[n], p->c[n]);
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom3" { target { int32 } } } } */
+-/* { dg-final { scan-tree-dump-times "\\* 2;" 1 "dom3" { target { int16 } } } } */
+-/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ \[^\r\n\]*_\\d\+" 1 "dom3" } } */
+-/* { dg-final { scan-tree-dump-times "MEM *? *\\\[\\(struct x \\*\\)\[^\r\n\]*_\\d\+" 9 "dom3" } } */
++/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom4" { target { int32 } } } } */
++/* { dg-final { scan-tree-dump-times "\\* 2;" 1 "dom4" { target { int16 } } } } */
++/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ \[^\r\n\]*_\\d\+" 1 "dom4" } } */
++/* { dg-final { scan-tree-dump-times "MEM *? *\\\[\\(struct x \\*\\)\[^\r\n\]*_\\d\+" 9 "dom4" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c b/gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c
+index 00e8d2b52b3..4db297a2c82 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/slsr-29.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fdump-tree-dom3" } */
++/* { dg-options "-O2 -fdump-tree-dom4" } */
+ 
+ struct x
+ {
+@@ -22,7 +22,7 @@ f (struct x *p, unsigned int n)
+     }
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom3" { target { int32 } } } } */
+-/* { dg-final { scan-tree-dump-times "\\* 2;" 1 "dom3" { target { int16 } } } } */
+-/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ \[^\r\n\]*_\\d\+" 1 "dom3" } } */
+-/* { dg-final { scan-tree-dump-times "MEM *? *\\\[\\(struct x \\*\\)\[^\r\n\]*_\\d\+" 9 "dom3" } } */
++/* { dg-final { scan-tree-dump-times "\\* 4;" 1 "dom4" { target { int32 } } } } */
++/* { dg-final { scan-tree-dump-times "\\* 2;" 1 "dom4" { target { int16 } } } } */
++/* { dg-final { scan-tree-dump-times "p_\\d\+\\(D\\) \\+ \[^\r\n\]*_\\d\+" 1 "dom4" } } */
++/* { dg-final { scan-tree-dump-times "MEM *? *\\\[\\(struct x \\*\\)\[^\r\n\]*_\\d\+" 9 "dom4" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
+index b64e71dae22..a9cf2578e3c 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fdump-tree-dom2-stats -fdump-tree-thread2-stats -fdump-tree-dom3-stats -fno-guess-branch-probability" } */
++/* { dg-options "-O2 -fdump-tree-dom2-stats -fdump-tree-thread2-stats -fdump-tree-dom4-stats -fno-guess-branch-probability" } */
+ 
+ /* { dg-final { scan-tree-dump-not "Jumps threaded"  "dom2" } } */
+ 
+@@ -10,7 +10,7 @@
+ /* aarch64 has the highest CASE_VALUES_THRESHOLD in GCC.  It's high enough
+    to change decisions in switch expansion which in turn can expose new
+    jump threading opportunities.  Skip the later tests on aarch64.  */
+-/* { dg-final { scan-tree-dump-not "Jumps threaded"  "dom3" { target { ! aarch64*-*-* } } } } */
++/* { dg-final { scan-tree-dump-not "Jumps threaded"  "dom4" { target { ! aarch64*-*-* } } } } */
+ /* { dg-final { scan-tree-dump "Jumps threaded: 7"  "thread2" { target { ! aarch64*-*-* } } } } */
+ /* { dg-final { scan-tree-dump "Jumps threaded: 18"  "thread2" { target { aarch64*-*-* } } } } */
+ 
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index 794b8017d18..26a5b49d02e 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -309,6 +309,7 @@ DEFTIMEVAR (TV_INITIALIZE_RTL        , "initialize rtl")
+ DEFTIMEVAR (TV_GIMPLE_LADDRESS       , "address lowering")
+ DEFTIMEVAR (TV_TREE_LOOP_IFCVT       , "tree loop if-conversion")
+ DEFTIMEVAR (TV_WARN_ACCESS           , "access analysis")
++DEFTIMEVAR (TV_SHUFFLE_FUSION        , "shuffle fusion")
+ 
+ /* Everything else in rest_of_compilation not included above.  */
+ DEFTIMEVAR (TV_EARLY_LOCAL	     , "early local passes")
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index 55ee2fe7f9e..cf86f15c501 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -499,6 +499,7 @@ extern gimple_opt_pass *make_pass_modref (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_coroutine_lower_builtins (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_coroutine_early_expand_ifns (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_adjust_alignment (gcc::context *ctxt);
++extern gimple_opt_pass *make_pass_shuffle_fusion (gcc::context *ctxt);
+ 
+ /* IPA Passes */
+ extern simple_ipa_opt_pass *make_pass_ipa_lower_emutls (gcc::context *ctxt);
+diff --git a/gcc/tree-ssa-loop.cc b/gcc/tree-ssa-loop.cc
+index 73aa46627b4..844df411de4 100644
+--- a/gcc/tree-ssa-loop.cc
++++ b/gcc/tree-ssa-loop.cc
+@@ -339,6 +339,7 @@ public:
+   pass_tree_loop_init (gcc::context *ctxt)
+     : gimple_opt_pass (pass_data_tree_loop_init, ctxt)
+   {}
++  opt_pass * clone () { return new pass_tree_loop_init (m_ctxt); }
+ 
+   /* opt_pass methods: */
+   virtual unsigned int execute (function *);
+@@ -501,6 +502,7 @@ public:
+   pass_tree_loop_done (gcc::context *ctxt)
+     : gimple_opt_pass (pass_data_tree_loop_done, ctxt)
+   {}
++  opt_pass * clone () { return new pass_tree_loop_done (m_ctxt); }
+ 
+   /* opt_pass methods: */
+   virtual unsigned int execute (function *) { return tree_ssa_loop_done (); }
+diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
+index 232e903b0d2..f89d3c3efcc 100644
+--- a/gcc/tree-ssa-math-opts.cc
++++ b/gcc/tree-ssa-math-opts.cc
+@@ -4891,13 +4891,24 @@ public:
+   {}
+ 
+   /* opt_pass methods: */
++  opt_pass * clone () { return new pass_optimize_widening_mul (m_ctxt); }
++  void set_pass_param (unsigned int n, bool param)
++    {
++      gcc_assert (n == 0);
++      ahead = param;
++    }
++
+   virtual bool gate (function *)
+     {
+-      return flag_expensive_optimizations && optimize;
++      return flag_expensive_optimizations && optimize
++	      && (ahead ^ flag_widening_mul_ahead);
+     }
+ 
+   virtual unsigned int execute (function *);
+ 
++  private:
++  /* Determines whether the pass moved ahead.  */
++  bool ahead;
+ }; // class pass_optimize_widening_mul
+ 
+ /* Walker class to perform the transformation in reverse dominance order. */
+@@ -5073,3 +5084,4 @@ make_pass_optimize_widening_mul (gcc::context *ctxt)
+ {
+   return new pass_optimize_widening_mul (ctxt);
+ }
++
+diff --git a/gcc/tree-ssa-ter.cc b/gcc/tree-ssa-ter.cc
+index 4cdad0d2749..d34e435c63a 100644
+--- a/gcc/tree-ssa-ter.cc
++++ b/gcc/tree-ssa-ter.cc
+@@ -685,7 +685,7 @@ find_replaceable_in_bb (temp_expr_table *tab, basic_block bb)
+       /* Increment counter if this is a non BUILT_IN call. We allow
+ 	 replacement over BUILT_IN calls since many will expand to inline
+ 	 insns instead of a true call.  */
+-      if (is_gimple_call (stmt)
++      if (is_nofma_gimple_call (stmt)
+ 	  && !((fndecl = gimple_call_fndecl (stmt))
+ 	       && fndecl_built_in_p (fndecl)))
+ 	cur_call_cnt++;
+diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
+index 4e615b80b3a..e4c8ffd5ff3 100644
+--- a/gcc/tree-vect-data-refs.cc
++++ b/gcc/tree-vect-data-refs.cc
+@@ -425,6 +425,8 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
+       if (apply_safelen ())
+ 	return opt_result::success ();
+ 
++      vect_depandence_issue = true;
++
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
+ 			 "versioning for alias required: "
+@@ -4210,7 +4212,10 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
+       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
+ 				     vectype, memory_type, offtype, scale,
+ 				     &ifn, &offset_vectype))
+-	ifn = IFN_LAST;
++        {     
++          ifn = IFN_LAST;
++          vect_depandence_issue = true;
++	}
+       decl = NULL_TREE;
+     }
+   else
+@@ -4225,6 +4230,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
+ 	  if (targetm.vectorize.builtin_scatter)
+ 	    decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
+ 	}
++      if (!decl) vect_depandence_issue = true;
+       ifn = IFN_LAST;
+       /* The offset vector type will be read from DECL when needed.  */
+       offset_vectype = NULL_TREE;
+diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
+index 3435f9378da..da3434ae43c 100644
+--- a/gcc/tree-vect-loop.cc
++++ b/gcc/tree-vect-loop.cc
+@@ -2374,11 +2374,14 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
+       /* Update the vectorization factor based on the SLP decision.  */
+       vect_update_vf_for_slp (loop_vinfo);
+ 
+-      /* Optimize the SLP graph with the vectorization factor fixed.  */
+-      vect_optimize_slp (loop_vinfo);
++      if (flag_vectorize_slp_optimize)
++	{
++	  /* Optimize the SLP graph with the vectorization factor fixed.  */
++	  vect_optimize_slp (loop_vinfo);
+ 
+-      /* Gather the loads reachable from the SLP graph entries.  */
+-      vect_gather_slp_loads (loop_vinfo);
++	  /* Gather the loads reachable from the SLP graph entries.  */
++	  vect_gather_slp_loads (loop_vinfo);
++	}
+     }
+ 
+   bool saved_can_use_partial_vectors_p
+@@ -3016,6 +3019,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+       /* Set cached VF to -1 prior to analysis, which indicates a mode has
+ 	 failed.  */
+       cached_vf_per_mode[last_mode_i] = -1;
++      vect_depandence_issue = false;
+       opt_loop_vec_info loop_vinfo
+ 	= vect_analyze_loop_1 (loop, shared, &loop_form_info,
+ 			       NULL, vector_modes, mode_i,
+@@ -3128,6 +3132,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+ 			 "mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
+ 
+       bool fatal;
++      vect_depandence_issue = true;
+       opt_loop_vec_info loop_vinfo
+ 	= vect_analyze_loop_1 (loop, shared, &loop_form_info,
+ 			       first_loop_vinfo,
+@@ -3193,6 +3198,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+   return first_loop_vinfo;
+ }
+ 
++bool vect_depandence_issue;
++
+ /* Return true if there is an in-order reduction function for CODE, storing
+    it in *REDUC_FN if so.  */
+ 
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index af477c31aa3..24343ebe597 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -924,6 +924,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
+   bool first_stmt_phi_p = false, phi_p = false;
+   bool maybe_soft_fail = false;
+   tree soft_fail_nunits_vectype = NULL_TREE;
++  bool arraystmt = false;
+ 
+   /* For every stmt in NODE find its def stmt/s.  */
+   stmt_vec_info stmt_info;
+@@ -932,6 +933,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
+       gimple *stmt = stmt_info->stmt;
+       swap[i] = 0;
+       matches[i] = false;
++      arraystmt = false;
+ 
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
+@@ -1033,6 +1035,20 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
+       else
+ 	{
+ 	  rhs_code = gimple_assign_rhs_code (stmt);
++	  /* Loop-slp-cooperate vectorization.
++	     For this case create a new stmt_info for the array, and perform as
++	     slp vectorize. Set the size as 1, and loop vectorize willperform 
++	     the vectorized stmt as a new element. */
++         if (rhs_code == MEM_REF && !vect_depandence_issue &&
++             !zerop(stmt_info->dr_aux.dr->innermost.step))
++            arraystmt = flag_loop_slp_coop;
++	  if (arraystmt)
++            {
++              if(!stmt_info->first_element)
++                stmt_info->first_element = stmt_info;
++              if(!stmt_info->size)
++                stmt_info->size = 1;
++            }
+ 	  load_p = gimple_vuse (stmt);
+ 	}
+ 
+@@ -1207,7 +1223,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
+ 	}
+ 
+       /* Grouped store or load.  */
+-      if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
++      if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || arraystmt)
+ 	{
+ 	  if (REFERENCE_CLASS_P (lhs))
+ 	    {
+diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
+index 34920041116..fe28725e5b1 100644
+--- a/gcc/tree-vect-stmts.cc
++++ b/gcc/tree-vect-stmts.cc
+@@ -6448,6 +6448,108 @@ vectorizable_operation (vec_info *vinfo,
+ 
+   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ 		     op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
++
++/* Accumulation Combine
++
++   For loop vectorize, reuse the register after rotation.
++
++   Before rotate:
++   PHI: vx1 = {v0, vy1}
++        vy1 = vx1 + vz[i]
++
++   After rotate:
++   PHI: vx1 = {v0, vy1}
++   PHI: vx2 = {0, vy2}
++        vy1 = vx1 + vz[i]
++        vy2 = vx2 + vz[i+1]	
++
++   After accumulation combine:
++   PHI: vx1 = {v0, vy2}
++        vy1 = vx1 + vz[i]
++        vy2 = vy1 + vz[i+1]
++*/
++
++  bool combined = false;
++
++  if(slp_node && !op2 && op1 && op0 
++	      && ((TREE_CODE(op0) == SSA_NAME 
++		   && SSA_NAME_DEF_STMT(op0)->code == GIMPLE_PHI)
++	      ||  (TREE_CODE(op1) == SSA_NAME
++		   && SSA_NAME_DEF_STMT(op1)->code == GIMPLE_PHI)))
++    {
++      gimple* phi;
++      if (TREE_CODE(op0) == SSA_NAME
++	  && SSA_NAME_DEF_STMT(op0)->code == GIMPLE_PHI)
++        phi = SSA_NAME_DEF_STMT(op0);
++      else
++        phi = SSA_NAME_DEF_STMT(op1);
++      for (i = 0; i < gimple_phi_num_args (phi); i++)
++        {
++          tree arg = gimple_phi_arg_def (phi, i);
++	  if (arg == scalar_dest)
++	    combined = flag_accumulation_combine;
++	}
++      if(SLP_TREE_LANES(slp_node) % 
++ 	 vect_nunits_for_cost(SLP_TREE_VECTYPE(slp_node)) == 0)
++        combined = false;
++      if (vec_num % SLP_TREE_LANES(slp_node) != 0) 
++	combined = false;
++    }
++  if (combined)
++    {
++      tree addvec[vec_num];
++      tree zero_val = build_zero_cst (TREE_TYPE (vectype_out));
++      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
++        {
++          gimple *new_stmt = NULL;
++	  vop1 = vec_oprnds1[i];
++	  if (icode == GIMPLE_PHI)
++	      new_stmt
++		= gimple_build_assign (vec_dest, code,
++		  		       addvec[i-SLP_TREE_LANES(slp_node)], 
++				       vop1, NULL_TREE);
++	    else
++	      new_stmt
++		= gimple_build_assign (vec_dest, code,
++				       addvec[i-SLP_TREE_LANES(slp_node)],
++		 		       vop0, NULL_TREE);
++	  addvec[i] = make_ssa_name (vec_dest, new_stmt);
++	  gimple_assign_set_lhs (new_stmt, addvec[i]);
++	  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
++	  if(vec_num - i <= SLP_TREE_LANES(slp_node))
++	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
++	}
++
++      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
++        if (i >= SLP_TREE_LANES(slp_node))
++	  {
++            gimple *new_stmt = NULL;
++	    if (SSA_NAME_DEF_STMT(op0)->code == GIMPLE_PHI)
++	      new_stmt
++		= gimple_build_assign (vec_dest, PLUS_EXPR,
++				       vop0, zero_val, NULL_TREE);
++	    else
++	      new_stmt
++		= gimple_build_assign (vec_dest, PLUS_EXPR,
++	      			       vec_oprnds1[i], zero_val, NULL_TREE);
++	    new_temp = make_ssa_name (vec_dest, new_stmt);
++	    gimple_assign_set_lhs (new_stmt, new_temp);
++	    vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
++	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
++	  }
++
++      vec_oprnds0.release ();
++      vec_oprnds1.release ();
++      vec_oprnds2.release ();
++
++      return true;
++    }
++
+   /* Arguments are ready.  Create the new vector stmt.  */
+   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
+     {
+diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+index 642eb0aeb21..dd99f98071e 100644
+--- a/gcc/tree-vectorizer.h
++++ b/gcc/tree-vectorizer.h
+@@ -2558,4 +2558,7 @@ vect_is_integer_truncation (stmt_vec_info stmt_info)
+ 	  && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
+ }
+ 
++/* Flag for ensure the cases of loop-slp-cooperate vectorization. */
++extern bool vect_depandence_issue;
++
+ #endif  /* GCC_TREE_VECTORIZER_H  */
+-- 
+2.22.0
+
diff --git a/HYGON-0007-padding-slp-optimization.patch b/HYGON-0007-padding-slp-optimization.patch
new file mode 100644
index 0000000..212a1e8
--- /dev/null
+++ b/HYGON-0007-padding-slp-optimization.patch
@@ -0,0 +1,451 @@
+From ead34ebea945263e5a8babb95e0ba8c5d7ee8ed8 Mon Sep 17 00:00:00 2001
+From: zhongyifan 
+Date: Mon, 15 Apr 2024 18:31:26 +0800
+Subject: [PATCH] add padding slp optimization
+
+The original slp algorithm does not support the case that the root instruction VF is not a power of 2.
+Instruction groups with VF of 3 or 6 are padded to satisfy the slp condition.
+---
+ gcc/cfgexpand.cc                              |  19 ++
+ gcc/common.opt                                |   4 +
+ gcc/config/i386/sse.md                        |  15 ++
+ gcc/internal-fn.cc                            |   2 +
+ .../g++.dg/vect/slp-pr-padding-slp.cc         |  20 ++
+ gcc/tree-ssa-ifcombine.cc                     |  12 +-
+ gcc/tree-vect-slp.cc                          | 204 +++++++++++++++++-
+ gcc/tree-vect-stmts.cc                        |  14 +-
+ 8 files changed, 283 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/g++.dg/vect/slp-pr-padding-slp.cc
+
+diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
+index 4691355aab0..bb05eb7e0c2 100644
+--- a/gcc/cfgexpand.cc
++++ b/gcc/cfgexpand.cc
+@@ -6136,6 +6136,25 @@ expand_gimple_basic_block (basic_block bb, bool disable_tail_calls)
+ 
+   update_bb_for_insn (bb);
+ 
++  /* Remove the rtl of mov and cmp that appear after the sse4_1_ptestv4si,
++     to ensure the correctness of the vptest instruction. */
++  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
++    if (INSN_P (insn) && recog_memoized (insn) >= 0)
++      {
++        const char *name = get_insn_name (recog_memoized (insn));
++        if (name && strcmp (name, "sse4_1_ptestv4si") == 0)
++          {
++            rtx_insn *next_insn = NEXT_INSN (insn);
++            if (next_insn)
++              {
++                const char *next_name = get_insn_name (recog_memoized (next_insn));
++                if (strcmp (next_name, "*movqi_internal") == 0
++                    || strcmp (next_name, "*cmpqi_ccno_1") == 0)
++                  remove_insn (next_insn);
++              }
++          }
++      }
++
+   return bb;
+ }
+ 
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 1cc920c22b1..d3d3bacdf9c 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3650,4 +3650,8 @@ fshuffle-fusion
+ Target Var(flag_shuffle_fusion) Init(0)
+ Combine the permute and permute-like gimple.
+ 
++fpadding-slp
++Common Var(padding_slp) Init(0) Optimization
++Enable padding SLP optimization.
++
+ ; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+index 82ee1717bdd..6f1e14dbf7c 100644
+--- a/gcc/config/i386/sse.md
++++ b/gcc/config/i386/sse.md
+@@ -22721,6 +22721,21 @@
+    (set_attr "prefix" "vex")
+    (set_attr "mode" "")])
+ 
++;; Implement the construction of reduc_ior_scal_v4si
++;; by using the gen_sse4_1_ptestv4si template.
++(define_expand "reduc_ior_scal_v4si"
++  [(parallel
++   [(set (match_operand:SI 0 "register_operand")
++         (compare
++	 (match_operand:V4SI 1 "register_operand")
++	 (const_int 0)))
++   (clobber (reg:CC FLAGS_REG))])]
++  "TARGET_SSE4_1 && padding_slp"
++{
++  emit_insn (gen_sse4_1_ptestv4si (operands[1], operands[1]));
++  DONE;
++})
++
+ ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
+ ;; But it is not a really compare instruction.
+ (define_insn "_ptest"
+diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
+index 8b1733e20c4..2855a7192ca 100644
+--- a/gcc/internal-fn.cc
++++ b/gcc/internal-fn.cc
+@@ -3837,6 +3837,8 @@ bool
+ direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
+ 				optimization_type opt_type)
+ {
++  if (padding_slp && fn == IFN_REDUC_IOR)
++    return false;
+   switch (fn)
+     {
+ #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
+diff --git a/gcc/testsuite/g++.dg/vect/slp-pr-padding-slp.cc b/gcc/testsuite/g++.dg/vect/slp-pr-padding-slp.cc
+new file mode 100644
+index 00000000000..598fa836dba
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/vect/slp-pr-padding-slp.cc
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-fpadding-slp" } */
++/* { dg-final { scan-tree-dump "Padding slp: Perform padding for the SLP case with a VF of 3" "slp1"} } */
++
++#define M 1000
++
++int foo(float *bb, float *start, float *axis) 
++{
++  for(int i =0; i < M; i++, bb += 3, start += 3,axis += 3)
++    {
++      float t1x = (bb[0] - start[0]) * axis[0];
++      float t1y = (bb[1] - start[1]) * axis[1];
++      float t1z = (bb[2] - start[2]) * axis[2];
++
++      if (t1x  || t1y  || t1z) 
++        return 0;
++    }
++  return 1;
++}
++
+diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
+index ce9bbebf948..27bbe207bee 100644
+--- a/gcc/tree-ssa-ifcombine.cc
++++ b/gcc/tree-ssa-ifcombine.cc
+@@ -574,7 +574,17 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
+ 	    return false;
+ 	  /* Only do this optimization if the inner bb contains only the conditional. */
+ 	  if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb)))
+-	    return false;
++            {
++              if (!padding_slp)
++                return false;
++              /* Relax the conditions of this optimization for padding SLP */
++              gimple_stmt_iterator gsi1 = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
++              gimple *stmt1 = gsi_stmt (gsi1);
++              enum tree_code rhs_code = gimple_assign_rhs_code (stmt1);
++              if (!(rhs_code == LT_EXPR || rhs_code == LE_EXPR
++                    || rhs_code == GT_EXPR || rhs_code == GE_EXPR))
++                return false;
++            }
+ 	  t1 = fold_build2_loc (gimple_location (inner_cond),
+ 				inner_cond_code,
+ 				boolean_type_node,
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index af477c31aa3..5164dad2c70 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -50,6 +50,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tree-cfg.h"
+ #include "alloc-pool.h"
+ 
++bool padding_merge = false;
++
+ static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
+ 					  slp_tree, stmt_vector_for_cost *);
+ static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree);
+@@ -3035,9 +3037,137 @@ vect_build_slp_instance (vec_info *vinfo,
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+ 			 "  %G", scalar_stmts[i]->stmt);
+     }
++  /* Padding for root instruction groups of quantities 3 and 9.  */
++  unsigned int group_size = scalar_stmts.length ();
++  bool padding_vf9 = false;
++  enum tree_code reduc_code;
++  enum tree_code scalar_code;
++  vec scalar_stmts1;
++  vec scalar_stmts2;
++  vec scalar_stmts3;
++  vec root_stmt_infos2;
++  vec root_stmt_infos3;
++
++  if (padding_slp && root_stmt_infos.length () > 0)
++    {
++      if (is_gimple_assign (as_a (root_stmt_infos[0]->stmt)))
++        reduc_code = gimple_assign_rhs_code (as_a (root_stmt_infos[0]->stmt));
++      if (is_gimple_assign (as_a (scalar_stmts[0]->stmt)))
++        scalar_code = gimple_assign_rhs_code (as_a (scalar_stmts[0]->stmt));
++      if (group_size == 3 && reduc_code == BIT_IOR_EXPR)
++        {
++          if (dump_enabled_p ())
++            dump_printf_loc (MSG_NOTE, vect_location,
++                             "Padding slp: Perform padding for "
++			     "the SLP case with a VF of 3\n");
++          /*Code 110 represents the comparison type, changing its
++          execution order does not affect the program logic. */
++          if (scalar_code == 110)
++            {
++              vec scalar_stmts_new;
++              scalar_stmts_new.create (4);
++              scalar_stmts_new.safe_push (scalar_stmts[0]);
++              scalar_stmts_new.safe_push (scalar_stmts[1]);
++              scalar_stmts_new.safe_push (scalar_stmts[2]);
++              scalar_stmts_new.safe_push (scalar_stmts[0]);
++              scalar_stmts = scalar_stmts_new;
++              /* Adjust the scalar order to meet the vector
++              reuse conditions. */
++              auto tmp1 = scalar_stmts[1];
++              scalar_stmts[1] = scalar_stmts[2];
++              scalar_stmts[2] = tmp1;
++              group_size++;
++	    }
++        }
++      else if (group_size == 9 && reduc_code == BIT_IOR_EXPR)
++        {
++          if (dump_enabled_p ())
++            dump_printf_loc (MSG_NOTE, vect_location,
++                             "Padding slp: Perform padding for "
++			     "the SLP case with a VF of 9\n");
++          /* For the VF9 case, it needs to be divided into 3
++          groups first and then each padded to VF4. */
++          scalar_stmts1.create (4);
++          scalar_stmts2.create (4);
++          scalar_stmts3.create (4);
++  
++          scalar_stmts1.safe_push (scalar_stmts.pop ());
++          scalar_stmts2.safe_push (scalar_stmts.pop ());
++          scalar_stmts1.safe_push (scalar_stmts.pop ());
++          scalar_stmts2.safe_push (scalar_stmts.pop ());
++          scalar_stmts2.safe_push (scalar_stmts.pop ());
++          scalar_stmts1.safe_push (scalar_stmts.pop ());
++
++          scalar_stmts3.safe_push (scalar_stmts.pop ());
++          scalar_stmts3.safe_push (scalar_stmts.pop ());
++          scalar_stmts3.safe_push (scalar_stmts.pop ());
++          /* Replace the greater-than comparison with a less-than
++          comparison, and swap the source operands. */
++          if (gimple_code (scalar_stmts1[1]->stmt) == GIMPLE_ASSIGN)
++            if (scalar_stmts1[1]->stmt->subcode == GT_EXPR)
++              {
++                tree rhs11 = gimple_assign_rhs1 (scalar_stmts1[1]->stmt);
++                tree rhs12 = gimple_assign_rhs2 (scalar_stmts1[1]->stmt);
++                gimple_assign_set_rhs1 (scalar_stmts1[1]->stmt, rhs12);
++                gimple_assign_set_rhs2 (scalar_stmts1[1]->stmt, rhs11);
++                gimple_assign_set_rhs_code (scalar_stmts1[1]->stmt, LT_EXPR);
++              }
++          if (gimple_code( scalar_stmts2[1]->stmt) == GIMPLE_ASSIGN)
++            if (scalar_stmts2[1]->stmt->subcode == LT_EXPR)
++              {
++                tree rhs21 = gimple_assign_rhs1 (scalar_stmts2[1]->stmt);
++                tree rhs22 = gimple_assign_rhs2 (scalar_stmts2[1]->stmt);
++                gimple_assign_set_rhs1 (scalar_stmts2[1]->stmt, rhs22);
++                gimple_assign_set_rhs2 (scalar_stmts2[1]->stmt, rhs21);
++                gimple_assign_set_rhs_code (scalar_stmts2[1]->stmt, GT_EXPR);
++              }
++          /* Adjust the scalar order to meet the vector
++          reuse conditions. */
++          auto tmp2 = scalar_stmts2[1];
++          scalar_stmts2[1] = scalar_stmts2[0];
++          scalar_stmts2[0] = tmp2;
++
++          scalar_stmts1.safe_push (scalar_stmts1[0]);
++          scalar_stmts2.safe_push (scalar_stmts2[0]);
++          scalar_stmts3.safe_push (scalar_stmts3[0]);
++
++          scalar_stmts.safe_push (scalar_stmts1.pop ());
++          scalar_stmts.safe_push (scalar_stmts1.pop ());
++          scalar_stmts.safe_push (scalar_stmts1.pop ());
++          scalar_stmts.safe_push (scalar_stmts1.pop ());
++
++          if (dump_enabled_p ())
++            {
++              dump_printf_loc (MSG_NOTE, vect_location,
++                              "scalar_stmts1\n");
++              for (unsigned i = 0; i < scalar_stmts.length (); ++i)
++                dump_printf_loc (MSG_NOTE, vect_location,
++                           "  %G", scalar_stmts[i]->stmt);
++              dump_printf_loc (MSG_NOTE, vect_location,
++                              "scalar_stmts2\n");
++              for (unsigned i = 0; i < scalar_stmts.length (); ++i)
++                dump_printf_loc (MSG_NOTE, vect_location,
++                           "  %G", scalar_stmts2[i]->stmt);
++              dump_printf_loc (MSG_NOTE, vect_location,
++                              "scalar_stmts3\n");
++              for (unsigned i = 0; i < scalar_stmts.length (); ++i)
++                dump_printf_loc (MSG_NOTE, vect_location,
++                           "  %G", scalar_stmts3[i]->stmt);
++            }
++
++          root_stmt_infos3.create (4);
++          root_stmt_infos2.create (4);
++          root_stmt_infos3.safe_push (root_stmt_infos.pop ());
++          root_stmt_infos2.safe_push (root_stmt_infos.pop ());
++          root_stmt_infos3.safe_push (root_stmt_infos.pop ());
++          root_stmt_infos2.safe_push (root_stmt_infos.pop ());
+ 
++          padding_vf9 = true;
++          padding_merge = true;
++          group_size = 4;
++        }
++      }
+   /* Build the tree for the SLP instance.  */
+-  unsigned int group_size = scalar_stmts.length ();
+   bool *matches = XALLOCAVEC (bool, group_size);
+   poly_uint64 max_nunits = 1;
+   unsigned tree_size = 0;
+@@ -3045,6 +3175,17 @@ vect_build_slp_instance (vec_info *vinfo,
+   slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+ 				       &max_nunits, matches, limit,
+ 				       &tree_size, bst_map);
++  /* For the VF9 case, three build tree operations are required.  */
++  if (padding_vf9)
++    {
++      vect_build_slp_instance (vinfo, kind, scalar_stmts3,
++                               root_stmt_infos3, max_tree_size,
++                               limit, bst_map, NULL);
++      vect_build_slp_instance (vinfo, kind, scalar_stmts2,
++                               root_stmt_infos2, max_tree_size,
++                               limit, bst_map, NULL);
++    }
++
+   if (node != NULL)
+     {
+       /* Calculate the unrolling factor based on the smallest type.  */
+@@ -4886,7 +5027,23 @@ vectorizable_bb_reduc_epilogue (slp_instance instance,
+     reduc_code = PLUS_EXPR;
+   internal_fn reduc_fn;
+   tree vectype = SLP_TREE_VECTYPE (SLP_INSTANCE_TREE (instance));
+-  if (!reduction_fn_for_scalar_code (reduc_code, &reduc_fn)
++  /* Relax the function support check under the IFN_REDUC_IOR case. */
++  if (padding_merge)
++    {
++      if (!reduction_fn_for_scalar_code (reduc_code, &reduc_fn)
++          || reduc_fn == IFN_LAST)
++        return false;
++      if (!direct_internal_fn_supported_p (reduc_fn, vectype, OPTIMIZE_FOR_BOTH))
++        {
++          if (reduc_fn != IFN_REDUC_IOR)
++            return false;
++          else if (!(TYPE_PRECISION (TREE_TYPE (vectype)) == 32
++                 && maybe_eq (TYPE_VECTOR_SUBPARTS (vectype), 4)))
++            return false;
++	    return true;
++        }
++    }
++  else if (!reduction_fn_for_scalar_code (reduc_code, &reduc_fn)
+       || reduc_fn == IFN_LAST
+       || !direct_internal_fn_supported_p (reduc_fn, vectype, OPTIMIZE_FOR_BOTH)
+       || !useless_type_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
+@@ -6079,6 +6236,49 @@ vect_slp_region (vec bbs, vec datarefs,
+ 			       GET_MODE_NAME (vector_modes[mode_i]));
+ 	    mode_i += 1;
+ 	  }
++      /* Merge three groups of reduce_ior in the same basic block. */
++      if (padding_merge)
++        for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
++          {
++            vec gimple_vec;
++            gimple_vec.create (0);
++            gimple_stmt_iterator gsi;
++            gimple_stmt_iterator gsi1;
++            for (gsi = gsi_start_bb (bb_vinfo->bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
++              {
++                gimple *stmt = gsi_stmt (gsi);
++                if (stmt && gimple_call_internal_p (stmt))
++                  {
++                    gcall *call_stmt = as_a (stmt);
++                    if (call_stmt)
++                      {
++                        enum internal_fn ifn = gimple_call_internal_fn (call_stmt);
++                        if (ifn == IFN_REDUC_IOR)
++                          {
++                            gimple_vec.safe_push (stmt);
++                            gsi1 = gsi;
++                          }
++                      }
++	          }
++	      }
++
++            if (gimple_vec.length () == 3)
++              {
++                tree lhs0 = gimple_call_arg (gimple_vec[0], 0);
++                tree vector_type = TREE_TYPE (lhs0);
++                tree vector1 = create_tmp_var (vector_type, "vector1");
++                tree vector2 = create_tmp_var (vector_type, "vector2");
++                gassign *stmt_to_insert1 = gimple_build_assign (vector1 , 
++                        BIT_IOR_EXPR, gimple_call_arg (gimple_vec[0], 0),
++                        gimple_call_arg (gimple_vec[1], 0));
++                gassign *stmt_to_insert2 = gimple_build_assign (vector2 ,
++                        BIT_IOR_EXPR, vector1, gimple_call_arg (gimple_vec[2], 0));
++                        gimple_call_set_arg (gimple_vec[2], 0, vector2);
++                gsi_insert_before (&gsi1, stmt_to_insert1, GSI_SAME_STMT);
++                gsi_insert_before (&gsi1, stmt_to_insert2, GSI_SAME_STMT);
++              }
++            gimple_vec.release ();
++	  }
+ 
+       delete bb_vinfo;
+ 
+diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
+index 34920041116..f98fbf1e207 100644
+--- a/gcc/tree-vect-stmts.cc
++++ b/gcc/tree-vect-stmts.cc
+@@ -18,7 +18,6 @@ for more details.
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ .  */
+-
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+@@ -58,7 +57,7 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ /* For lang_hooks.types.type_for_mode.  */
+ #include "langhooks.h"
+-
++static bool gap_load = false;
+ /* Return the vectorized type for the given statement.  */
+ 
+ tree
+@@ -1234,7 +1233,8 @@ vect_model_load_cost (vec_info *vinfo,
+ 					 scalar_to_vec, stmt_info, 0,
+ 					 vect_prologue);
+     }
+-  else
++  /* Correct the cost calculation for load with gap.  */
++  else if (!gap_load)
+     vect_get_load_cost (vinfo, stmt_info, ncopies,
+ 			alignment_support_scheme, misalignment, first_stmt_p,
+ 			&inside_cost, &prologue_cost, 
+@@ -8917,6 +8917,7 @@ vectorizable_load (vec_info *vinfo,
+ 		if (k > maxk)
+ 		  maxk = k;
+ 	      tree vectype = SLP_TREE_VECTYPE (slp_node);
++	      gap_load = false;
+ 	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
+ 		  || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
+ 		{
+@@ -8924,7 +8925,12 @@ vectorizable_load (vec_info *vinfo,
+ 		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 				     "BB vectorization with gaps at the end of "
+ 				     "a load is not supported\n");
+-		  return false;
++                  if (!padding_slp)
++                    return false;
++		  int group_size_rounded_down = DR_GROUP_SIZE (group_info) & ~(nunits - 1);
++		  if ((maxk != 2 && maxk != 1) || group_size_rounded_down != 0)
++		    return false;
++		  gap_load = true;
+ 		}
+ 	    }
+ 
+-- 
+2.22.0
+
diff --git a/gcc.spec b/gcc.spec
index d312181..22d2c85 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -1,4 +1,4 @@
-%define anolis_release 2
+%define anolis_release 3
 
 %global DATE 20221121
 %global gitrev b3f5a0d53b84ed27cf00cfa2b9c3e2c78935c07d
@@ -246,6 +246,15 @@ Patch3124: libsanitizer-add-LoongArch-support.patch
 Patch3125: LoongArch-fix-error-building.patch
 Patch3126: libjccjit-do-not-link-objects-contained-same-element.patch
 
+# Part 5000 ~ 5999
+Patch5001: HYGON-0001-arch-support-for-hygon.patch
+Patch5002: HYGON-0002-array-widen-compare.patch
+Patch5003: HYGON-0003-function-attribute-judgement.patch
+Patch5004: HYGON-0004-struct-data-layout-optimization.patch
+Patch5005: HYGON-0005-basick-block-reorder.patch
+Patch5006: HYGON-0006-coop-vectorize.patch
+Patch5007: HYGON-0007-padding-slp-optimization.patch
+
 %if %{build_go}
 %global __os_install_post \
 chmod 644 %{buildroot}%{_prefix}/%{_lib}/libgo.so.21.* \
@@ -884,6 +893,15 @@ The %{name}-doc package contains documentation files for %{name}.
 %patch3125 -p1
 %patch3126 -p1
 %endif
+%ifarch x86_64
+%patch5001 -p1
+%patch5002 -p1
+%patch5003 -p1
+%patch5004 -p1
+%patch5005 -p1
+%patch5006 -p1
+%patch5007 -p1
+%endif
 
 rm -f libphobos/testsuite/libphobos.gc/forkgc2.d
 
@@ -2340,6 +2358,9 @@ end
 
 
 %changelog
+* Mon Apr 08 2024 Zhaoling Bao  12.3.0-3
+- Hygon: Add supported patch.
+ 
 * Fri Mar 15 2024 Peng Fan  12.3.0-2
 - Type: Sync
 - DESC: Sync patch from gcc upstream
-- 
Gitee