From c3c66c1082ed7845e04ad912e4f2cc4ac43efe8b Mon Sep 17 00:00:00 2001 From: liyancheng <412998149@qq.com> Date: Wed, 7 May 2025 11:30:27 +0800 Subject: [PATCH] [Sync] Sync patches from openeuler/gcc Including the following patches: - 0369-SME-start-za-before-write-address-to-tpidr2.patch - 0370-Add-hip12-core-definition-and-cost-model.patch - 0371-SVE-Fix-std-find-with-sve.patch - 0372-oeAware-Add-.GCC4OE_oeAware-section-for-optimization.patch - 0373-Include-insn-opinit.h-in-PLUGIN_H-PR110610.patch - 0374-Add-hip12-instructions-pipeline.patch (cherry picked from commit 6902ffe2d7efb1d6ce129fbea9ad08225543c32f) --- ...rt-za-before-write-address-to-tpidr2.patch | 64 ++ ...hip12-core-definition-and-cost-model.patch | 421 ++++++++ 0371-SVE-Fix-std-find-with-sve.patch | 70 ++ ...4OE_oeAware-section-for-optimization.patch | 286 ++++++ ...e-insn-opinit.h-in-PLUGIN_H-PR110610.patch | 33 + 0374-Add-hip12-instructions-pipeline.patch | 965 ++++++++++++++++++ gcc.spec | 18 +- 7 files changed, 1856 insertions(+), 1 deletion(-) create mode 100644 0369-SME-start-za-before-write-address-to-tpidr2.patch create mode 100644 0370-Add-hip12-core-definition-and-cost-model.patch create mode 100644 0371-SVE-Fix-std-find-with-sve.patch create mode 100644 0372-oeAware-Add-.GCC4OE_oeAware-section-for-optimization.patch create mode 100644 0373-Include-insn-opinit.h-in-PLUGIN_H-PR110610.patch create mode 100644 0374-Add-hip12-instructions-pipeline.patch diff --git a/0369-SME-start-za-before-write-address-to-tpidr2.patch b/0369-SME-start-za-before-write-address-to-tpidr2.patch new file mode 100644 index 0000000..8bf8eb5 --- /dev/null +++ b/0369-SME-start-za-before-write-address-to-tpidr2.patch @@ -0,0 +1,64 @@ +From cc67c76a448a49783ccb317ada1db6523bd637d7 Mon Sep 17 00:00:00 2001 +From: eastb233 +Date: Mon, 24 Mar 2025 10:13:44 +0800 +Subject: [PATCH] [SME] start za before write address to tpidr2 + +--- + gcc/config/aarch64/aarch64.cc | 1 + + gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c | 3 ++- + gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c | 3 ++- + 3 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index a06c2c515..dea9447b4 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -30332,6 +30332,7 @@ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode, + || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD + || prev_mode == aarch64_local_sme_state::INACTIVE_CALLER) + { ++ emit_insn (gen_aarch64_smstart_za ()); + /* A transition from ACTIVE_LIVE to INACTIVE_LOCAL is the usual + case of setting up a lazy save buffer before a call. + A transition from INACTIVE_CALLER is similar, except that +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +index a764a7c89..b3c3442fe 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c +@@ -282,12 +282,13 @@ __arm_new("za") void test12(volatile int *ptr) + ** ... + ** bl inout_za + ** ... ++** smstart za ++** ... + ** msr tpidr2_el0, x[0-9]+ + ** ... + ** bl private_za + ** ... + ** cbnz [^\n]+ +-** smstart za + ** msr tpidr2_el0, xzr + ** bl out_za + ** bl in_za +diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c +index d54840d3d..d8f758adc 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c ++++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c +@@ -276,12 +276,13 @@ void test12(volatile int *ptr) __arm_inout("za") + ** msr tpidr2_el0, xzr + ** bl inout_za + ** ... ++** smstart za ++** ... + ** msr tpidr2_el0, x[0-9]+ + ** ... + ** bl private_za + ** ldr [^\n]+ + ** cbnz [^\n]+ +-** smstart za + ** msr tpidr2_el0, xzr + ** bl out_za + ** bl in_za +-- +2.34.1 + diff --git a/0370-Add-hip12-core-definition-and-cost-model.patch b/0370-Add-hip12-core-definition-and-cost-model.patch new file mode 100644 index 0000000..6b28a0b --- /dev/null +++ b/0370-Add-hip12-core-definition-and-cost-model.patch @@ -0,0 +1,421 @@ +From c5970536c2caa3980bb1fded812ac0dc8ebf3681 Mon Sep 17 00:00:00 2001 +From: liyunfei +Date: Fri, 18 Apr 2025 14:44:24 +0800 +Subject: [PATCH] Add hip12 core definition and cost model + +This adds a cost model and core definition for hip12. + +Signed-off-by: liyunfei +--- + gcc/config/aarch64/aarch64-cores.def | 1 + + gcc/config/aarch64/aarch64-cost-tables.h | 108 ++++++++++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64.cc | 209 +++++++++++++++++++++++ + gcc/doc/invoke.texi | 3 +- + 5 files changed, 321 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 8f6210397..97d3c5df9 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -179,4 +179,5 @@ AARCH64_CORE("hip11", hip11, hip11, V8_5A, (SVE, SVE2, F16), hip11, 0x48, 0xd22 + AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) + ++AARCH64_CORE("hip12", hip12, hip12, V9_2A, (SVE, SVE2, SVE2_BITPERM, SVE2_AES, SVE2_SM4, SVE2_SHA3, F16, RCPC, BF16, DOTPROD, LSE, SIMD, PAUTH, RDMA, LS64), hip12, 0x4e, 0xd06, -1) + #undef AARCH64_CORE +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index a39ace9ba..d59ef10be 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -665,6 +665,114 @@ const struct cpu_cost_table hip11_extra_costs = + } + }; + ++const struct cpu_cost_table hip12_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ 0, /* arith_shift. */ ++ 0, /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ COSTS_N_INSNS (1), /* extend. */ ++ 0, /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ 0, /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (5) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (7) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (4), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (5), /* loadf. */ ++ COSTS_N_INSNS (5), /* loadd. */ ++ COSTS_N_INSNS (4), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (5), /* loadv. */ ++ COSTS_N_INSNS (2) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (5), /* div. */ ++ COSTS_N_INSNS (2), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (3), /* fma. */ ++ COSTS_N_INSNS (1), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ 0, /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (3), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (7), /* div. */ ++ COSTS_N_INSNS (2), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (3), /* fma. */ ++ COSTS_N_INSNS (1), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ 0, /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (3), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1), /* alu. */ ++ COSTS_N_INSNS (2), /* mult. */ ++ COSTS_N_INSNS (1), /* movi. */ ++ COSTS_N_INSNS (1), /* dup. */ ++ COSTS_N_INSNS (1) /* extract. */ ++ } ++}; ++ + const struct cpu_cost_table a64fx_extra_costs = + { + /* ALU */ +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index 1cfa3559d..488e39b7c 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,hip10a,hip10c,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,hip10a,hip10c,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2,hip12" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 9f1fbf970..c4e2eba01 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -561,6 +561,24 @@ static const struct cpu_addrcost_table hip11_addrcost_table = + 0, /* imm_offset */ + }; + ++static const struct cpu_addrcost_table hip12_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 2, /* post_modify_ld3_st3 */ ++ 2, /* post_modify_ld4_st4 */ ++ 0, /* register_offset */ ++ 0, /* register_sextend */ ++ 0, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ + static const struct cpu_addrcost_table qdf24xx_addrcost_table = + { + { +@@ -756,6 +774,16 @@ static const struct cpu_regmove_cost hip11_regmove_cost = + 2 /* FP2FP */ + }; + ++static const struct cpu_regmove_cost hip12_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 6, /* GP2FP */ ++ 2, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ + static const struct cpu_regmove_cost a64fx_regmove_cost = + { + 1, /* GP2GP */ +@@ -1231,6 +1259,143 @@ static const struct cpu_vector_cost hip11_vector_cost = + nullptr /* issue_info */ + }; + ++static const advsimd_vec_cost hip12_advsimd_vector_cost = ++{ ++ 2, /* int_stmt_cost */ ++ 2, /* fp_stmt_cost */ ++ 2, /* ld2_st2_permute_cost */ ++ 2, /* ld3_st3_permute_cost */ ++ 3, /* ld4_st4_permute_cost */ ++ 2, /* permute_cost */ ++ 9, /* reduc_i8_cost */ ++ 7, /* reduc_i16_cost */ ++ 5, /* reduc_i32_cost */ ++ 3, /* reduc_i64_cost */ ++ 3, /* reduc_f16_cost */ ++ 3, /* reduc_f32_cost */ ++ 3, /* reduc_f64_cost */ ++ 3, /* store_elt_extra_cost */ ++ 2, /* vec_to_scalar_cost */ ++ 5, /* scalar_to_vec_cost */ ++ 8, /* align_load_cost */ ++ 8, /* unalign_load_cost */ ++ 1, /* unalign_store_cost */ ++ 1 /* store_cost */ ++}; ++ ++static const sve_vec_cost hip12_sve_vector_cost = ++{ ++ { ++ 2, /* int_stmt_cost */ ++ 2, /* fp_stmt_cost */ ++ 2, /* ld2_st2_permute_cost */ ++ 3, /* ld3_st3_permute_cost */ ++ 3, /* ld4_st4_permute_cost */ ++ 2, /* permute_cost */ ++ /* Theoretically, a reduction involving 31 scalar ADDs could ++ complete in ~6 cycles and would have a cost of 31. [SU]ADDV ++ completes in 13 cycles, so give it a cost of 31 + 7. */ ++ 38, /* reduc_i8_cost */ ++ /* Likewise for 15 scalar ADDs (~3 cycles) vs. 10: 15 + 7. */ ++ 22, /* reduc_i16_cost */ ++ /* Likewise for 7 scalar ADDs (~2 cycles) vs. 7: 7 + 5. */ ++ 12, /* reduc_i32_cost */ ++ /* Likewise for 3 scalar ADDs (~1 cycles) vs. 4: 3 + 3. */ ++ 6, /* reduc_i64_cost */ ++ /* Theoretically, a reduction involving 15 scalar FADDs could ++ complete in ~8 cycles and would have a cost of 30. FADDV ++ completes in 15 cycles, so give it a cost of 30 + 7. */ ++ 37, /* reduc_f16_cost */ ++ /* Likewise for 7 scalar FADDs (~4 cycles) vs. 12: 14 + 8. */ ++ 22, /* reduc_f32_cost */ ++ /* Likewise for 3 scalar FADDs (~2 cycles) vs. 9: 6 + 7. */ ++ 13, /* reduc_f64_cost */ ++ 2, /* store_elt_extra_cost */ ++ /* This value is just inherited from the Cortex-A57 table. */ ++ 2, /* vec_to_scalar_cost */ ++ /* See the comment above the Advanced SIMD versions. */ ++ 5, /* scalar_to_vec_cost */ ++ 8, /* align_load_cost */ ++ 8, /* unalign_load_cost */ ++ /* Although stores have a latency of 2 and compete for the ++ vector pipes, in practice it's better not to model that. */ ++ 1, /* unalign_store_cost */ ++ 1 /* store_cost */ ++ }, ++ 3, /* clast_cost */ ++ 42, /* fadda_f16_cost */ ++ 26, /* fadda_f32_cost */ ++ 20, /* fadda_f64_cost */ ++ 6, /* gather_load_x32_cost */ ++ 6, /* gather_load_x64_cost */ ++ 1 /* scatter_store_elt_cost */ ++}; ++ ++static const aarch64_scalar_vec_issue_info hip12_scalar_issue_info = ++{ ++ 5, /* loads_stores_per_cycle */ ++ 2, /* stores_per_cycle */ ++ 8, /* general_ops_per_cycle */ ++ 0, /* fp_simd_load_general_ops */ ++ 1 /* fp_simd_store_general_ops */ ++}; ++ ++static const aarch64_advsimd_vec_issue_info hip12_advsimd_issue_info = ++{ ++ { ++ 5, /* loads_stores_per_cycle */ ++ 2, /* stores_per_cycle */ ++ 4, /* general_ops_per_cycle */ ++ 0, /* fp_simd_load_general_ops */ ++ 1 /* fp_simd_store_general_ops */ ++ }, ++ 2, /* ld2_st2_general_ops */ ++ 2, /* ld3_st3_general_ops */ ++ 3 /* ld4_st4_general_ops */ ++}; ++ ++static const aarch64_sve_vec_issue_info hip12_sve_issue_info = ++{ ++ { ++ { ++ 5, /* loads_per_cycle */ ++ 2, /* stores_per_cycle */ ++ 4, /* general_ops_per_cycle */ ++ 0, /* fp_simd_load_general_ops */ ++ 1 /* fp_simd_store_general_ops */ ++ }, ++ 2, /* ld2_st2_general_ops */ ++ 2, /* ld3_st3_general_ops */ ++ 3 /* ld4_st4_general_ops */ ++ }, ++ 2, /* pred_ops_per_cycle */ ++ 1, /* while_pred_ops */ ++ 0, /* int_cmp_pred_ops */ ++ 0, /* fp_cmp_pred_ops */ ++ 1, /* gather_scatter_pair_general_ops */ ++ 1 /* gather_scatter_pair_pred_ops */ ++}; ++ ++static const aarch64_vec_issue_info hip12_vec_issue_info = ++{ ++ &hip12_scalar_issue_info, ++ &hip12_advsimd_issue_info, ++ &hip12_sve_issue_info ++}; ++ ++static const struct cpu_vector_cost hip12_vector_cost = ++{ ++ 1, /* scalar_int_stmt_cost */ ++ 2, /* scalar_fp_stmt_cost */ ++ 4, /* scalar_load_cost */ ++ 1, /* scalar_store_cost */ ++ 1, /* cond_taken_branch_cost */ ++ 1, /* cond_not_taken_branch_cost */ ++ &hip12_advsimd_vector_cost, /* advsimd */ ++ &hip12_sve_vector_cost, /* sve */ ++ &hip12_vec_issue_info /* issue_info */ ++}; ++ + static const advsimd_vec_cost cortexa57_advsimd_vector_cost = + { + 2, /* int_stmt_cost */ +@@ -1622,6 +1787,17 @@ static const cpu_prefetch_tune hip11_prefetch_tune = + -1 /* default_opt_level */ + }; + ++static const cpu_prefetch_tune hip12_prefetch_tune = ++{ ++ 0, /* num_slots */ ++ 64, /* l1_cache_size */ ++ 64, /* l1_cache_line_size */ ++ 512, /* l2_cache_size */ ++ true, /* prefetch_dynamic_strides */ ++ -1, /* minimum_stride */ ++ -1 /* default_opt_level */ ++}; ++ + static const cpu_prefetch_tune xgene1_prefetch_tune = + { + 8, /* num_slots */ +@@ -2121,6 +2297,39 @@ static const struct tune_params hip11_tunings = + &hip11_prefetch_tune + }; + ++static const struct tune_params hip12_tunings = ++{ ++ &hip12_extra_costs, ++ &hip12_addrcost_table, ++ &hip12_regmove_cost, ++ &hip12_vector_cost, ++ &generic_branch_cost, ++ &generic_approx_modes, ++ SVE_256, /* sve_width */ ++ { 4, /* load_int. */ ++ 1, /* store_int. */ ++ 6, /* load_fp. */ ++ 1, /* store_fp. */ ++ 6, /* load_pred. */ ++ 1 /* store_pred. */ ++ }, /* memmov_cost. */ ++ 16, /* issue_rate */ ++ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH ++ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ ++ "16", /* function_align. */ ++ "4", /* jump_align. */ ++ "8", /* loop_align. */ ++ 4, /* int_reassoc_width. */ ++ 4, /* fp_reassoc_width. */ ++ 4, /* vec_reassoc_width. */ ++ 2, /* min_div_recip_mul_sf. */ ++ 2, /* min_div_recip_mul_df. */ ++ 0, /* max_case_values. */ ++ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ ++ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ ++ &generic_prefetch_tune ++}; ++ + static const struct tune_params xgene1_tunings = + { + &xgene1_extra_costs, +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 90073ac98..e985e6c2c 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19221,7 +19221,8 @@ performance of the code. Permissible values for this option are: + @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96} + @samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n}, + @samp{octeontx2f95mm}, +-@samp{a64fx},@samp{hip09},@samp{hip10a},@samp{hip10c},@samp{hip11} ++@samp{a64fx}, ++@samp{hip09},@samp{hip10a},@samp{hip10c},@samp{hip11},@samp{hip12}, + @samp{thunderx}, @samp{thunderxt88}, + @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110}, + @samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus}, +-- +2.34.1 + diff --git a/0371-SVE-Fix-std-find-with-sve.patch b/0371-SVE-Fix-std-find-with-sve.patch new file mode 100644 index 0000000..a7af818 --- /dev/null +++ b/0371-SVE-Fix-std-find-with-sve.patch @@ -0,0 +1,70 @@ +From 707ec18abb4ec1a8af651197c8898021b999843a Mon Sep 17 00:00:00 2001 +From: blunce +Date: Mon, 21 Apr 2025 14:44:05 +0800 +Subject: [PATCH] [SVE] Fix std::find with sve + +--- + libgcc/config/aarch64/sve_std_find.c | 46 ++++++++++++++++------------ + 1 file changed, 26 insertions(+), 20 deletions(-) + +diff --git a/libgcc/config/aarch64/sve_std_find.c b/libgcc/config/aarch64/sve_std_find.c +index 0caf1f4f6..86ff4cb5a 100644 +--- a/libgcc/config/aarch64/sve_std_find.c ++++ b/libgcc/config/aarch64/sve_std_find.c +@@ -6,27 +6,33 @@ + uint64_t *__sve_optimized_find_u64 (uint64_t *first, uint64_t *last, + uint64_t const *value, uint8_t threshold) + { +- if (first + threshold > last) +- goto Tail; ++ if (first + threshold > last) ++ { ++ goto Tail; ++ } + +- uint64_t m = svcntd (); +- uint64_t n = (last - first) / m; +- svbool_t flag_true = svptrue_b64 (); +- for (; n-- > 0;) +- { +- svuint64_t v3 = svld1_u64 (flag_true, (uint64_t *)first); +- svbool_t v4 = svcmpeq_n_u64 (flag_true, v3, (uint64_t *)value); +- if (svptest_any (flag_true, v4)) +- break; +- first += m; +- } ++ uint64_t m = svcntd (); ++ uint64_t n = (last - first) / m; ++ svbool_t TRUE = svptrue_b64 (); ++ for (; n-- > 0;) ++ { ++ svuint64_t v3 = svld1_u64 (TRUE, (uint64_t *)first); ++ svbool_t v4 = svcmpeq_n_u64 (TRUE, v3, (uint64_t)*value); ++ if (svptest_any (TRUE, v4)) ++ { ++ break; ++ } ++ first += m; ++ } + + Tail: +- while (first < last) +- { +- if (*first == *value) +- return first; +- ++first; +- } +- return first; ++ while (first < last) ++ { ++ if (*first == *value) ++ { ++ return first; ++ } ++ ++first; ++ } ++ return last; + } +-- +2.34.1 + diff --git a/0372-oeAware-Add-.GCC4OE_oeAware-section-for-optimization.patch b/0372-oeAware-Add-.GCC4OE_oeAware-section-for-optimization.patch new file mode 100644 index 0000000..564a058 --- /dev/null +++ b/0372-oeAware-Add-.GCC4OE_oeAware-section-for-optimization.patch @@ -0,0 +1,286 @@ +From 3963ed3b7993378d925084acd2adc83ee69e2b80 Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Sat, 19 Apr 2025 17:06:14 +0800 +Subject: [PATCH] [oeAware] Add .GCC4OE_oeAware section for optimization policy + tracking + +This commit introduces a mechanism to embed optimization policy metadata +into the compiled binary through a dedicated ELF section. The feature is +controlled by the new -foeaware-policy=[n] command-line option. + +Key components: +- Creates mergeable section .GCC4OE_oeAware with SECTION_STRINGS flag +- Only triggers section creation in translation units containing main() +- Encodes 32-bit oeaware_optimize_policy value in little-endian format +- Uses safe context checks (cfun validation, MAIN_NAME_P predicate) + +Implementation details: +1. Section creation is guarded by function context checks to prevent + redundant section generation in non-main compilation units +2. The SECTION_STRINGS flag allows linker merging of identical policy + values across different translation units +3. Architecture-neutral implementation through byte-wise value emission +4. Includes gcc_assert to validate policy value range + +Usage example: +gcc -foeaware-policy=2 -O2 source.c + +This feature will be used by the OpenEuler runtime environment to guide +binary-level optimization decisions. The section content can be verified +with: objdump -s -j .GCC4OE_oeAware a.out +--- + gcc/common.opt | 8 +++ + gcc/doc/invoke.texi | 4 ++ + gcc/final.cc | 3 ++ + gcc/opts.cc | 8 +++ + gcc/testsuite/gcc.dg/dg.exp | 1 + + gcc/testsuite/gcc.dg/oeaware-main.c | 7 +++ + gcc/testsuite/gcc.dg/oeaware-no-main.c | 6 +++ + gcc/testsuite/lib/oeaware.exp | 75 ++++++++++++++++++++++++++ + gcc/varasm.cc | 29 ++++++++++ + gcc/varasm.h | 2 + + 10 files changed, 143 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/oeaware-main.c + create mode 100644 gcc/testsuite/gcc.dg/oeaware-no-main.c + create mode 100644 gcc/testsuite/lib/oeaware.exp + +diff --git a/gcc/common.opt b/gcc/common.opt +index 23544740d..2578c7cd0 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -3130,6 +3130,14 @@ foptimize-strlen + Common Var(flag_optimize_strlen) Optimization + Enable string length optimizations on trees. + ++foeaware-policy ++Common Var(flag_oeaware) Init(0) Optimization ++Perform oeAware-gcc co-optimization. ++ ++foeaware-policy= ++Common RejectNegative Joined UInteger Var(oeaware_optimize_policy) Init(1) IntegerRange(1, 7) ++Select the optimization policy. ++ + fisolate-erroneous-paths-dereference + Common Var(flag_isolate_erroneous_paths_dereference) Optimization + Detect paths that trigger erroneous or undefined behavior due to +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 90073ac98..9a8332b1b 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -22898,6 +22898,10 @@ output file being linked. See man ld(1) for more information. + When passed this option, GCC produces a dynamic library instead of + an executable when linking, using the Darwin @file{libtool} command. + ++@item -foeaware-policy=@var{n} ++@opindex foeaware-policy ++Emit optimization policy value @var{n} into .GCC4OE_oeAware section. ++ + @item -force_cpusubtype_ALL + @opindex force_cpusubtype_ALL + This causes GCC's output file to have the @samp{ALL} subtype, instead of +diff --git a/gcc/final.cc b/gcc/final.cc +index 0252250ba..033d2fff7 100644 +--- a/gcc/final.cc ++++ b/gcc/final.cc +@@ -4699,6 +4699,9 @@ rest_of_handle_final (void) + dump_profile_to_elf_sections (); + } + ++ if (flag_oeaware) ++ create_oeaware_section (); ++ + return 0; + } + +diff --git a/gcc/opts.cc b/gcc/opts.cc +index 162e14bc2..b69c43724 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -3180,6 +3180,14 @@ common_handle_option (struct gcc_options *opts, + } + break; + ++ case OPT_foeaware_policy_: ++ opts->x_oeaware_optimize_policy = value; ++ /* No break here - do -foeaware processing. */ ++ /* FALLTHRU. */ ++ case OPT_foeaware_policy: ++ opts->x_flag_oeaware = value; ++ break; ++ + case OPT_fipa_reorder_fields: + SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_reorg, value); + break; +diff --git a/gcc/testsuite/gcc.dg/dg.exp b/gcc/testsuite/gcc.dg/dg.exp +index 9c8b0eac3..0e93b2098 100644 +--- a/gcc/testsuite/gcc.dg/dg.exp ++++ b/gcc/testsuite/gcc.dg/dg.exp +@@ -18,6 +18,7 @@ + + # Load support procs. + load_lib gcc-dg.exp ++load_lib oeaware.exp + + # If a testcase doesn't have special options, use these. + global DEFAULT_CFLAGS +diff --git a/gcc/testsuite/gcc.dg/oeaware-main.c b/gcc/testsuite/gcc.dg/oeaware-main.c +new file mode 100644 +index 000000000..c3da4a3d3 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/oeaware-main.c +@@ -0,0 +1,7 @@ ++/* { dg-do run { target *-*-linux* *-*-gnu* } } */ ++/* { dg-options "-foeaware-policy=1" } */ ++ ++int main(void) { return 0; } ++ ++/* { dg-final { check-section-exists ".GCC4OE_oeAware" } } */ ++/* { dg-final { check-section-content ".GCC4OE_oeAware" "....01000000" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/oeaware-no-main.c b/gcc/testsuite/gcc.dg/oeaware-no-main.c +new file mode 100644 +index 000000000..79996f6f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/oeaware-no-main.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile { target *-*-linux* *-*-gnu* } } */ ++/* { dg-options "-foeaware-policy=1" } */ ++ ++int test(void) { return 0; } ++ ++/* { dg-final { scan-assembler-not "GCC4OE_oeAware" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/lib/oeaware.exp b/gcc/testsuite/lib/oeaware.exp +new file mode 100644 +index 000000000..d8b9f0a9e +--- /dev/null ++++ b/gcc/testsuite/lib/oeaware.exp +@@ -0,0 +1,75 @@ ++# Copyright (C) 2025-2025 Free Software Foundation, Inc. ++ ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++# gcc/testsuite/gcc.dg/oeaware.exp ++ ++proc check-section-exists { section } { ++ for {set level 1} {$level <= 5} {incr level} { ++ if {[catch {upvar $level output_file output_file}]} { ++ continue; ++ } ++ if {[info exists output_file]} { ++ break ++ } ++ } ++ ++ if {![info exists output_file]} { ++ fail "Cannot find output_file in any parent scope" ++ return ++ } ++ ++ if {![file exists $output_file]} { ++ fail "Output file $output_file does not exist" ++ return ++ } ++ ++ set cmd "objdump -h $output_file | grep -q '$section'" ++ if {[catch {exec sh -c $cmd}]} { ++ fail "Section $section not found" ++ } else { ++ pass "Section $section exists" ++ } ++} ++ ++proc check-section-content { section expected } { ++ for {set level 1} {$level <= 5} {incr level} { ++ if {[catch {upvar $level output_file output_file}]} { continue } ++ if {[info exists output_file]} { break } ++ } ++ ++ if {![info exists output_file]} { ++ fail "Cannot find output_file in any parent scope" ++ return ++ } ++ ++ if {![file exists $output_file]} { ++ fail "Output file $output_file does not exist" ++ return ++ } ++ ++ set cmd "objdump -s -j $section $output_file | tail -n +5 | awk '{ printf \"%s%s%s%s\", \$5, \$4, \$3, \$2 }'" ++ if {[catch {set result [exec sh -c $cmd]} err]} { ++ fail "Failed to read section content: $err" ++ return ++ } ++ ++ set result [string trim $result] ++ if {$result eq $expected} { ++ pass "Section $section content matches" ++ } else { ++ fail "Section $section content mismatch (got '$result', expected '$expected')" ++ } ++} +\ No newline at end of file +diff --git a/gcc/varasm.cc b/gcc/varasm.cc +index d122730b5..bdf02edea 100644 +--- a/gcc/varasm.cc ++++ b/gcc/varasm.cc +@@ -8564,4 +8564,33 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) + switch_to_comdat_section(sect, DECL_NAME (decl)); + } + ++/* Create .GCC4OE_oeAware section with optimization policy value. ++ Only emitted for main function's translation unit. The 4-byte ++ value is stored in target-endian format (little-endian here). ++ SECTION_STRINGS allows merging identical policy values. */ ++ ++void ++create_oeaware_section () ++{ ++ /* To prevent inserting repeated segments and data, ++ we only perform the insertion in the file where the main ++ function is located. */ ++ if (!cfun || TREE_CODE (cfun->decl) != FUNCTION_DECL ++ || !DECL_NAME (cfun->decl) || !MAIN_NAME_P (DECL_NAME (cfun->decl))) ++ return; ++ ++ int flags = SECTION_STRINGS; ++ section *oe_section = get_section (".GCC4OE_oeAware", flags, NULL, true); ++ switch_to_section (oe_section); ++ ++ gcc_assert (oeaware_optimize_policy <= UINT8_MAX); ++ uint32_t value = oeaware_optimize_policy; ++ uint8_t *bytes = (uint8_t *)&value; ++ ++ fprintf (asm_out_file, "\t.byte 0x%02x, 0x%02x, 0x%02x, 0x%02x\n", ++ bytes[0], bytes[1], bytes[2], bytes[3]); ++ ++ return; ++} ++ + #include "gt-varasm.h" +diff --git a/gcc/varasm.h b/gcc/varasm.h +index 8ba8374e7..8dec57e0f 100644 +--- a/gcc/varasm.h ++++ b/gcc/varasm.h +@@ -81,4 +81,6 @@ extern rtx assemble_trampoline_template (void); + + extern void switch_to_comdat_section (section *, tree); + ++extern void create_oeaware_section (); ++ + #endif // GCC_VARASM_H +-- +2.34.1 + diff --git a/0373-Include-insn-opinit.h-in-PLUGIN_H-PR110610.patch b/0373-Include-insn-opinit.h-in-PLUGIN_H-PR110610.patch new file mode 100644 index 0000000..0f02406 --- /dev/null +++ b/0373-Include-insn-opinit.h-in-PLUGIN_H-PR110610.patch @@ -0,0 +1,33 @@ +From 284ed9c3f87c71ef98e24b048a9ce6d461e70aa5 Mon Sep 17 00:00:00 2001 +From: Andre Vieira +Date: Mon, 17 Jul 2023 17:00:54 +0100 +Subject: [PATCH] Include insn-opinit.h in PLUGIN_H [PR110610] + +This patch fixes PR110610 by including insn-opinit.h in the INTERNAL_FN_H list, +as insn-opinit.h is now required by internal-fn.h. This will lead to +insn-opinit.h being installed in the plugin directory. + +gcc/ChangeLog: + + PR plugins/110610 + * Makefile.in (INTERNAL_FN_H): Add insn-opinit.h. +--- + gcc/Makefile.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index ab6ad8206..c7a503235 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -942,7 +942,7 @@ READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h + BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def \ + gtm-builtins.def sanitizer.def + INTERNAL_FN_DEF = internal-fn.def +-INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF) ++INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF) insn-opinit.h + TREE_CORE_H = tree-core.h $(CORETYPES_H) all-tree.def tree.def \ + c-family/c-common.def $(lang_tree_files) \ + $(BUILTINS_DEF) $(INPUT_H) statistics.h \ +-- +2.34.1 + diff --git a/0374-Add-hip12-instructions-pipeline.patch b/0374-Add-hip12-instructions-pipeline.patch new file mode 100644 index 0000000..58cf815 --- /dev/null +++ b/0374-Add-hip12-instructions-pipeline.patch @@ -0,0 +1,965 @@ +From d63119daeb54cd0c387c1b24981c47d795e5a672 Mon Sep 17 00:00:00 2001 +From: liyunfei +Date: Fri, 25 Apr 2025 16:04:53 +0800 +Subject: [PATCH] Add hip12 instructions pipeline + +This adds instructions pipeline for hip12. + +Signed-off-by: liyunfei +--- + gcc/config/aarch64/aarch64-cores.def | 2 +- + gcc/config/aarch64/aarch64.cc | 6 +- + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/hip12.md | 891 +++++++++++++++++++++++++++ + 4 files changed, 896 insertions(+), 4 deletions(-) + create mode 100644 gcc/config/aarch64/hip12.md + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 97d3c5df9..2cb752ca1 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -179,5 +179,5 @@ AARCH64_CORE("hip11", hip11, hip11, V8_5A, (SVE, SVE2, F16), hip11, 0x48, 0xd22 + AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) + AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) + +-AARCH64_CORE("hip12", hip12, hip12, V9_2A, (SVE, SVE2, SVE2_BITPERM, SVE2_AES, SVE2_SM4, SVE2_SHA3, F16, RCPC, BF16, DOTPROD, LSE, SIMD, PAUTH, RDMA, LS64), hip12, 0x4e, 0xd06, -1) ++AARCH64_CORE("hip12", hip12, hip12, V9_2A, (SVE, SVE2, SVE2_BITPERM, SVE2_AES, SVE2_SM4, SVE2_SHA3, F16, RCPC, BF16, DOTPROD, LSE, SIMD, PAUTH, RDMA, LS64), hip12, 0x48, 0xd06, -1) + #undef AARCH64_CORE +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index c4e2eba01..b62ba344f 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -1790,9 +1790,9 @@ static const cpu_prefetch_tune hip11_prefetch_tune = + static const cpu_prefetch_tune hip12_prefetch_tune = + { + 0, /* num_slots */ +- 64, /* l1_cache_size */ ++ 128, /* l1_cache_size */ + 64, /* l1_cache_line_size */ +- 512, /* l2_cache_size */ ++ 1024, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ + -1, /* minimum_stride */ + -1 /* default_opt_level */ +@@ -2327,7 +2327,7 @@ static const struct tune_params hip12_tunings = + 0, /* max_case_values. */ + tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ +- &generic_prefetch_tune ++ &hip12_prefetch_tune + }; + + static const struct tune_params xgene1_tunings = +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 69d296556..74a1a56bc 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -552,6 +552,7 @@ + (include "hip10a.md") + (include "hip10c.md") + (include "hip11.md") ++(include "hip12.md") + + ;; ------------------------------------------------------------------- + ;; Jumps and other miscellaneous insns +diff --git a/gcc/config/aarch64/hip12.md b/gcc/config/aarch64/hip12.md +new file mode 100644 +index 000000000..031a4c39d +--- /dev/null ++++ b/gcc/config/aarch64/hip12.md +@@ -0,0 +1,891 @@ ++;; hip12 pipeline description ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++;; ++;;Contributed by liyunfei ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_automaton "hip12") ++(define_automaton "hip12_ldst") ++(define_automaton "hip12_v") ++ ++; The hip12 core is modelled as issues pipeline that has ++; the following functional units. ++; 1. 4 pipelines for single cycle integer micro operations: ALU0, ALU1, ALU3, ALU4 ++ ++(define_cpu_unit "hip12_alu0" "hip12") ++(define_cpu_unit "hip12_alu1" "hip12") ++(define_cpu_unit "hip12_alu3" "hip12") ++(define_cpu_unit "hip12_alu4" "hip12") ++ ++ (define_reservation "hip12_alu0134" "hip12_alu0|hip12_alu1|hip12_alu3|hip12_alu4") ++(define_reservation "hip12_alu14" "hip12_alu1|hip12_alu4") ++ ++; 2. 2 pipelines for multi cycles integer micro operations: ALU2, ALU5 ++ ++(define_cpu_unit "hip12_alu2" "hip12") ++(define_cpu_unit "hip12_alu5" "hip12") ++ ++(define_reservation "hip12_alu25" "hip12_alu2|hip12_alu5") ++(define_reservation "hip12_alu1425" "hip12_alu1|hip12_alu4|hip12_alu2|hip12_alu5") ++ ++; 3. All ALU pipelines ++ ++(define_reservation "hip12_alu" "hip12_alu0|hip12_alu1|hip12_alu2|hip12_alu3|hip12_alu4|hip12_alu5") ++ ++; 4. 3 pipelines for load micro opetations: Load0, Load1, Load2 ++ ++(define_cpu_unit "hip12_load0" "hip12_ldst") ++(define_cpu_unit "hip12_load1" "hip12_ldst") ++(define_cpu_unit "hip12_load2" "hip12_ldst") ++ ++(define_reservation "hip12_ld" "hip12_load0|hip12_load1|hip12_load2") ++ ++; 5. 2 pipelines for store micro operations: Store1, Store2 ++ ++(define_cpu_unit "hip12_store0" "hip12_ldst") ++(define_cpu_unit "hip12_store1" "hip12_ldst") ++ ++(define_reservation "hip12_st" "hip12_store0|hip12_store1") ++ ++; 6. 2 pipelines for store data micro operations: STD0, STD1 ++ ++(define_cpu_unit "hip12_store_data0" "hip12_ldst") ++(define_cpu_unit "hip12_store_data1" "hip12_ldst") ++ ++(define_reservation "hip12_std" "hip12_store_data0|hip12_store_data1") ++ ++; 7. 4 asymmetric pipelines for Asimd/FP/SVE micro operations: V0, V1, V2, V3 ++ ++(define_cpu_unit "hip12_v0" "hip12_v") ++(define_cpu_unit "hip12_v1" "hip12_v") ++(define_cpu_unit "hip12_v2" "hip12_v") ++(define_cpu_unit "hip12_v3" "hip12_v") ++ ++(define_reservation "hip12_v0123" "hip12_v0|hip12_v1|hip12_v2|hip12_v3") ++(define_reservation "hip12_v02" "hip12_v0|hip12_v2") ++ ++; 8. 2 pipelines for branch operations: Branch0, Branch1 ++ ++(define_cpu_unit "hip12_b0" "hip12") ++(define_cpu_unit "hip12_b1" "hip12") ++ ++(define_reservation "hip12_b" "hip12_b0|hip12_b1") ++ ++;; Block all issue queues. ++ ++(define_reservation "hip12_block" " ++ hip12_alu0+hip12_alu1+hip12_alu2+hip12_alu3 ++ +hip12_alu4+hip12_alu5+hip12_load0+hip12_load1+hip12_load2+hip12_store0+hip12_store1+hip12_store_data0+hip12_store_data1+hip12_v0+hip12_v1+hip12_v2+hip12_v3") ++ ++;; Branch execution Unit ++ ++(define_insn_reservation "hip12_branch" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "branch")) ++ "hip12_b") ++ ++(define_insn_reservation "hip12_branch_and_link" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "call")) ++ "hip12_b+hip12_alu14") ++ ++;; Integer arithmetic/logic instructions. ++ ++(define_insn_reservation "hip12_alu_basic" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "alu_imm,alu_sreg,\ ++ adc_reg,adc_imm")) ++ "hip12_alu") ++ ++(define_insn_reservation "hip12_alu_basic_flagset" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "alus_imm,alus_sreg,\ ++ adcs_reg,adcs_imm")) ++ "hip12_alu1425") ++ ++(define_insn_reservation "hip12_alu_basic_extend" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "alu_ext,alus_ext,\ ++ alu_shift_imm_lsl_1to4,alu_shift_imm_other,\ ++ alu_shift_reg,alus_shift_imm,alus_shift_reg")) ++ "hip12_alu25") ++ ++(define_insn_reservation "hip12_alu_logical" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "logic_reg,logic_imm")) ++ "hip12_alu") ++ ++(define_insn_reservation "hip12_alu_logical_imm" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "logic_imm")) ++ "hip12_alu14") ++ ++(define_insn_reservation "hip12_alu_logical_flagset" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "logics_reg")) ++ "hip12_alu1425") ++ ++(define_insn_reservation "hip12_alu_logical_flagset_imm" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "logics_imm")) ++ "hip12_alu25") ++ ++(define_insn_reservation "hip12_alu_conditional" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "csel")) ++ "hip12_alu14") ++ ++;; Divide and Multiply instructions. ++ ++(define_insn_reservation "hip12_divide" 8 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "sdiv,udiv")) ++ "hip12_alu25") ++ ++(define_insn_reservation "hip12_multiply" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "mul,muls")) ++ "hip12_alu25") ++ ++(define_insn_reservation "hip12_multiply_long" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "smull,umull,smulls,umulls")) ++ "hip12_alu25") ++ ++(define_insn_reservation "hip12_multiply_accumulate" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "mla,mlas")) ++ "hip12_alu25+hip12_alu0134") ++ ++(define_insn_reservation "hip12_multiply_accumulate_long" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "smlal,umlal")) ++ "hip12_alu25+hip12_alu0134") ++ ++;; no Pointer Authentication instructions in backend types. ++ ++;; Miscellaneous Data-Processing instructions. ++ ++(define_insn_reservation "hip12_address" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "adr")) ++ "hip12_alu14") ++ ++(define_insn_reservation "hip12_bitfield" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "bfm,bfx")) ++ "hip12_alu14") ++ ++;; Todo: Does hip12 have reg move or mvn instructions? ++(define_insn_reservation "hip12_move" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "mov_imm,mov_shift_reg")) ++ "hip12_alu") ++ ++(define_insn_reservation "hip12_count_leading" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "clz")) ++ "hip12_alu14") ++ ++(define_insn_reservation "hip12_reverse_bits_bytes" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "rbit,rev")) ++ "hip12_alu14") ++ ++; Todo: Does hip12 have imm shift instructions? ++(define_insn_reservation "hip12_variable_shift" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "shift_reg")) ++ "hip12_alu14") ++ ++; Block all issue pipes for a cycle ++(define_insn_reservation "hip12_block" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "block")) ++ "hip12_block") ++ ++;; Load and Store instructions. ++ ++(define_insn_reservation "hip12_load_register" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "load_4,load_8")) ++ "hip12_ld") ++ ++(define_insn_reservation "hip12_load_pair" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "load_16")) ++ "hip12_ld") ++ ++(define_insn_reservation "hip12_store" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "store_4,store_8")) ++ "hip12_st+hip12_std") ++ ++(define_insn_reservation "hip12_store_pair" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "store_16")) ++ "hip12_st+hip12_std") ++ ++;; FP Data Processing instructions. ++; abs/neg/cpy ++(define_insn_reservation "hip12_fp_arith" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "ffariths,ffarithd")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_compare" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fcmpd,fcmps")) ++ "hip12_v02+hip12_alu0134") ++ ++(define_insn_reservation "hip12_fp_conditional_compare" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fccmpd,fccmps")) ++ "hip12_alu14,hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_conditional_select" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fcsel")) ++ "hip12_alu14,hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_divide_single" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fdivs")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_divide_double" 8 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fdivd")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_square_single" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fsqrts")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_square_double" 8 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fsqrtd")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_fused_multiply_add" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "ffmad,ffmas")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_max_min" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_minmaxd,f_minmaxs")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_add" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fadds,faddd")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_multiply" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fmuld,fmuls")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_round_int" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_rintd,f_rints")) ++ "hip12_v0123") ++ ++;; FP Miscellaneous instructions. ++ ++(define_insn_reservation "hip12_fp_covert_i2f" 7 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_cvti2f")) ++ "hip12_alu14,hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_covert_f2i" 5 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_cvtf2i")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_covert_f2f" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_cvt")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_move" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "fmov")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_transfer_arm2vfp" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_mcr")) ++ "hip12_alu14") ++ ++; transfer low half + high half ++(define_insn_reservation "hip12_fp_transfer_2arm2vfp" 10 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_mcrr")) ++ "hip12_alu14,nothing*3,hip12_alu14,hip12_v0123") ++ ++(define_insn_reservation "hip12_fp_transfer_vfp2arm" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_mrc,f_mrrc")) ++ "hip12_v0123") ++ ++;; FP Load instructions. ++; only basic double/single load ++(define_insn_reservation "hip12_fp_load" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_loadd,f_loads")) ++ "hip12_ld") ++ ++(define_insn_reservation "hip12_fp_load_vector_pair" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_ldr,neon_ldp,neon_ldp_q")) ++ "hip12_alu+hip12_ld") ++ ++;; FP Store instructions. ++; only basic double/single store ++(define_insn_reservation "hip12_fp_store" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "f_stored,f_stores")) ++ "hip12_st+hip12_std") ++ ++(define_insn_reservation "hip12_fp_store_vector" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_ldr")) ++ "hip12_alu+hip12_st+hip12_std") ++ ++(define_insn_reservation "hip12_fp_store_pair" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_ldp,neon_ldp_q")) ++ "hip12_st+hip12_std+hip12_alu") ++ ++;; ASIMD Int instructions. ++ ++(define_insn_reservation "hip12_neon_absolute_diff" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_abd,neon_abd_q,neon_abd_long")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_arith_basic" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_abs,neon_abs_q,\ ++ neon_add,neon_add_q,\ ++ neon_sub,neon_sub_q,\ ++ neon_neg,neon_neg_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_arith_long" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_add_long,neon_sub_long")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_arith_wide" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_add_widen,neon_sub_widen")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_arith_complex" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_qadd,neon_qadd_q,\ ++ neon_qsub,neon_qsub_q,\ ++ neon_qneg,neon_qneg_q,\ ++ neon_qabs,neon_qabs_q")) ++ "hip12_v0123") ++; arith pair not specified ++ ++; neon_reduc_add is used for both addp and [su]adalp ++(define_insn_reservation "hip12_neon_arith_reduce" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_reduc_add,neon_reduc_add_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_arith_cmp" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\ ++ neon_tst,neon_tst_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_arith_dot" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_dot,neon_dot_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_logical" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_logic,neon_logic_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_multiply_accumulate" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_mla_b,neon_mla_b_q,\ ++ neon_mla_h,neon_mla_h_q,\ ++ neon_mla_s,neon_mla_s_q,\ ++ neon_mla_b_long,neon_mla_h_long,\ ++ neon_mla_s_long,neon_mla_h_scalar,\ ++ neon_mla_h_scalar_q,neon_mla_s_scalar,\ ++ neon_mla_s_scalar_q,neon_mla_h_scalar_long,\ ++ neon_mla_s_scalar_long,neon_sat_mla_b_long,\ ++ neon_sat_mla_h_long,neon_sat_mla_s_long,\ ++ neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_minmax" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_minmax,neon_minmax_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_minmax_reduce" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_reduc_minmax,neon_reduc_minmax_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_multiply" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_mul_b,neon_mul_b_q,\ ++ neon_mul_h,neon_mul_h_q,\ ++ neon_mul_s,neon_mul_s_q,\ ++ neon_mul_b_long,neon_mul_h_long,\ ++ neon_mul_s_long,neon_mul_d_long,\ ++ neon_mul_h_scalar,neon_mul_h_scalar_q,\ ++ neon_mul_s_scalar,neon_mul_s_scalar_q,\ ++ neon_mul_h_scalar_long,neon_mul_s_scalar_long,\ ++ neon_sat_mul_b,neon_sat_mul_b_q,\ ++ neon_sat_mul_h,neon_sat_mul_h_q,\ ++ neon_sat_mul_s,neon_sat_mul_s_q,\ ++ neon_sat_mul_b_long,neon_sat_mul_h_long,\ ++ neon_sat_mul_s_long,neon_sat_mul_h_scalar,\ ++ neon_sat_mul_h_scalar_q,neon_sat_mul_s_scalar,\ ++ neon_sat_mul_s_scalar_q,neon_sat_mul_h_scalar_long,\ ++ neon_sat_mul_s_scalar_long")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_shift" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_shift_imm,neon_shift_imm_q,\ ++ neon_shift_imm_narrow_q,neon_shift_imm_long,\ ++ neon_shift_reg,neon_shift_reg_q,\ ++ neon_sat_shift_imm,neon_sat_shift_imm_q,\ ++ neon_sat_shift_imm_narrow_q,neon_sat_shift_reg,\ ++ neon_sat_shift_reg_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_shift_accumulate" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_shift_acc,neon_shift_acc_q")) ++ "hip12_v0123") ++ ++;; ASIMD FP instructions. ++ ++(define_insn_reservation "hip12_neon_fp_abs" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_s_q,\ ++ neon_fp_abs_d,neon_fp_abs_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_neg" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_neg_s,neon_fp_neg_s_q,\ ++ neon_fp_neg_d,neon_fp_neg_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_abd" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_s_q,\ ++ neon_fp_abd_d,neon_fp_abd_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_arith" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_addsub_s,neon_fp_addsub_s_q,\ ++ neon_fp_addsub_d,neon_fp_addsub_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_compare" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_compare_s,neon_fp_compare_s_q,\ ++ neon_fp_compare_d,neon_fp_compare_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_convert_narrow" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_cvt_narrow_s_q,neon_fp_cvt_narrow_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_convert_2int" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_to_int_s,neon_fp_to_int_d")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_convert_2int_q" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_to_int_s_q,neon_fp_to_int_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_convert_from_int" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_int_to_fp_s,neon_int_to_fp_d")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_convert_from_int_q" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_int_to_fp_s_q,neon_int_to_fp_d_q")) ++ "hip12_v0123") ++ ++; D/F32 ++(define_insn_reservation "hip12_neon_fp_divide_s" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_div_s")) ++ "hip12_v0123") ++ ++; Q/F32 ++(define_insn_reservation "hip12_neon_fp_divide_s_q" 7 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_div_s_q")) ++ "hip12_v0123") ++ ++; Q/F64 ++(define_insn_reservation "hip12_neon_fp_divide_d" 9 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q")) ++ "hip12_v0123") ++ ++; D/F32 ++(define_insn_reservation "hip12_neon_fp_sqrt_s" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_sqrt_s")) ++ "hip12_v0123") ++ ++; Q/F32 ++(define_insn_reservation "hip12_neon_fp_sqrt_s_q" 7 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_sqrt_s_q")) ++ "hip12_v0123") ++ ++; Q/F64 ++(define_insn_reservation "hip12_neon_fp_sqrt_d" 9 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_sqrt_d,neon_fp_sqrt_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_minmax" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_minmax_s,neon_fp_minmax_s_q,\ ++ neon_fp_minmax_d,neon_fp_minmax_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_minmax_reduce" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\ ++ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_multiply" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_s_q,\ ++ neon_fp_mul_s_scalar,neon_fp_mul_s_scalar_q,\ ++ neon_fp_mul_d,neon_fp_mul_d_q,\ ++ neon_fp_mul_d_scalar_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_multiply_add" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_q,\ ++ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ ++ neon_fp_mla_d,neon_fp_mla_d_q,\ ++ neon_fp_mla_d_scalar_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_round" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_round_s,neon_fp_round_d")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_round_q" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_round_s_q,neon_fp_round_d_q")) ++ "hip12_v0123") ++ ++;; ASIMD Miscellaneous instructions ++ ++(define_insn_reservation "hip12_neon_bit_reverse" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_rbit,neon_rbit_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_bitwise_insert" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_bsl,neon_bsl_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_count" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_cls,neon_cls_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_count_ds" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_cnt_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_count_bh" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_cnt")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_duplicate" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_dup,neon_dup_q")) ++ "(hip12_v0123)+hip12_alu0134") ++ ++(define_insn_reservation "hip12_neon_extract" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_dup,neon_dup_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_insert" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_ins,neon_ins_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_move" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_move,neon_move_q,neon_move_narrow_q")) ++ "hip12_v0123") ++ ++; gcc only gen neon fp recp ++(define_insn_reservation "hip12_neon_fp_recp" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_recpe_s,neon_fp_recpe_d,\ ++ neon_fp_rsqrte_s,neon_fp_rsqrte_d")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_recp_q" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_recpe_s_q,neon_fp_recpe_d_q,\ ++ neon_fp_rsqrte_s_q,neon_fp_rsqrte_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_recpx" 3 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_recpx_s,neon_fp_recpx_s_q,\ ++ neon_fp_recpx_d,neon_fp_recpx_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_fp_recps" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\ ++ neon_fp_recps_d,neon_fp_recps_d_q,\ ++ neon_fp_rsqrts_s,neon_fp_rsqrts_s_q,\ ++ neon_fp_rsqrts_d,neon_fp_rsqrts_d_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_rev" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_rev,neon_rev_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_tbl_12" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_tbl1,neon_tbl1_q,\ ++ neon_tbl2,neon_tbl2_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_tbl_3" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_tbl3,neon_tbl3_q,\ ++ neon_tbl2,neon_tbl2_q")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_tbl_4" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_tbl4,neon_tbl4_q,\ ++ neon_tbl2,neon_tbl2_q")) ++ "hip12_v0123") ++; gcc only gen neon tbl, no tbx ++ ++; no neon transfer specified ++ ++(define_insn_reservation "hip12_neon_zip" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_zip,neon_zip_q")) ++ "hip12_v0123") ++ ++;; ASIMD Load instructions. ++ ++(define_insn_reservation "hip12_neon_ld1_12reg" 6 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\ ++ neon_load1_2reg,neon_load1_2reg_q")) ++ "hip12_ld") ++ ++(define_insn_reservation "hip12_neon_ld1_34reg" 7 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q,\ ++ neon_load1_4reg,neon_load1_4reg_q")) ++ "hip12_ld") ++ ++(define_insn_reservation "hip12_neon_ld1_lane" 8 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q,\ ++ neon_load1_one_lane,neon_load1_one_lane_q")) ++ "hip12_ld+(hip12_v0123)") ++ ++(define_insn_reservation "hip12_neon_ld2" 8 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\ ++ neon_load2_4reg,neon_load2_4reg_q,\ ++ neon_load2_all_lanes,neon_load2_all_lanes_q,\ ++ neon_load2_one_lane,neon_load2_one_lane_q")) ++ "(hip12_ld)+(hip12_v0123)") ++ ++(define_insn_reservation "hip12_neon_ld3" 8 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\ ++ neon_load3_all_lanes,neon_load3_all_lanes_q,\ ++ neon_load3_one_lane,neon_load3_one_lane_q")) ++ "hip12_ld+hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_ld4_reg" 10 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q")) ++ "hip12_ld+hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_ld4_lane" 8 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\ ++ neon_load4_one_lane,neon_load4_one_lane_q")) ++ "hip12_ld+hip12_v0123") ++ ++;; ASIMD Load instructions. ++ ++(define_insn_reservation "hip12_neon_st1_12reg_4reg" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\ ++ neon_store1_2reg,neon_store1_2reg_q,\ ++ neon_store1_4reg")) ++ "hip12_st+hip12_std+hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_st1_3reg" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q")) ++ "hip12_st+hip12_std+hip12_v0123") ++ ++(define_insn_reservation "hip12_neon_st1_4reg_q" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_store1_4reg_q")) ++ "hip12_st+hip12_std") ++ ++(define_insn_reservation "hip12_neon_st1_lane_st2" 1 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q,\ ++ neon_store2_2reg,neon_store2_2reg_q,\ ++ neon_store2_4reg,neon_store2_4reg_q,\ ++ neon_store2_one_lane,neon_store2_one_lane_q")) ++ "hip12_st+hip12_std") ++ ++(define_insn_reservation "hip12_neon_st3_st4_q" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\ ++ neon_store3_one_lane,neon_store3_one_lane_q,\ ++ neon_store4_4reg_q")) ++ "hip12_v0123+hip12_st+hip12_std") ++ ++(define_insn_reservation "hip12_neon_st4" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "neon_store4_4reg,\ ++ neon_store4_one_lane,neon_store4_one_lane_q")) ++ "hip12_v0123+hip12_st+hip12_std") ++ ++;; Cryptography Extensions ++ ++(define_insn_reservation "hip12_crypto_aes" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_aese,crypto_aesmc")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_crypto_pmull" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_pmull")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_crypto_sha1_fast" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_crypto_sha256_fast" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_sha256_fast")) ++ "hip12_v02") ++ ++(define_insn_reservation "hip12_crypto_complex_1" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_sha1_slow")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_crypto_complex_256" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_sha256_slow")) ++ "hip12_v02") ++ ++(define_insn_reservation "hip12_crypto_sha512" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_sha512")) ++ "hip12_v02") ++ ++(define_insn_reservation "hip12_crypto_sha3" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_sha3")) ++ "hip12_v0123") ++ ++(define_insn_reservation "hip12_crypto_sm3" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_sm3")) ++ "hip12_v02") ++ ++(define_insn_reservation "hip12_crypto_sm4" 4 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crypto_sm4")) ++ "hip12_v0123") ++ ++;; CRC instructions ++ ++(define_insn_reservation "hip12_crc" 2 ++ (and (eq_attr "tune" "hip12") ++ (eq_attr "type" "crc")) ++ "hip12_alu25") ++ ++;; Simple execution unit bypasses ++(define_bypass 2 "hip12_fp_fused_multiply_add" ++ "hip12_fp_fused_multiply_add") ++ ++(define_bypass 2 "hip12_neon_arith_dot" ++ "hip12_neon_arith_dot") ++ ++(define_bypass 2 "hip12_neon_multiply_accumulate" ++ "hip12_neon_multiply_accumulate") ++ ++(define_bypass 1 "hip12_neon_shift_accumulate" ++ "hip12_neon_shift_accumulate") ++ ++(define_bypass 2 "hip12_neon_fp_multiply_add" ++ "hip12_neon_fp_multiply_add") ++ ++(define_bypass 2 "hip12_neon_fp_recps" ++ "hip12_neon_fp_recps") +\ No newline at end of file +-- +2.34.1 + diff --git a/gcc.spec b/gcc.spec index fe1b8f1..5fae19a 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 79 +%global gcc_release 80 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -478,6 +478,12 @@ Patch365: 0365-add-llc-allocate-feature.patch Patch366: 0366-fix-prefetch-case-failed.patch Patch367: 0367-llc-feature-bugfix.patch Patch368: 0368-fix-llc-feature-case-failed.patch +Patch369: 0369-SME-start-za-before-write-address-to-tpidr2.patch +Patch370: 0370-Add-hip12-core-definition-and-cost-model.patch +Patch371: 0371-SVE-Fix-std-find-with-sve.patch +Patch372: 0372-oeAware-Add-.GCC4OE_oeAware-section-for-optimization.patch +Patch373: 0373-Include-insn-opinit.h-in-PLUGIN_H-PR110610.patch +Patch374: 0374-Add-hip12-instructions-pipeline.patch # Part 1001-1999 %ifarch sw_64 @@ -1632,6 +1638,12 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch -P366 -p1 %patch -P367 -p1 %patch -P368 -p1 +%patch -P369 -p1 +%patch -P370 -p1 +%patch -P371 -p1 +%patch -P372 -p1 +%patch -P373 -p1 +%patch -P374 -p1 %ifarch sw_64 %patch -P1001 -p1 @@ -4259,6 +4271,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Wed May 7 2025 liyancheng <412998149@qq.com> - 12.3.1-80 +- Type: Sync +- DESC: Sync patches from openeuler/gcc. + * Sat May 3 2025 huang-xiaoquan - 12.3.1-79 - Type: Sync - DESC: Sync patches from openeuler/gcc. -- Gitee