From 69467148fc9b42e6c25465d041876d4d571bb5d6 Mon Sep 17 00:00:00 2001 From: liyancheng <412998149@qq.com> Date: Sat, 15 Feb 2025 15:46:12 +0800 Subject: [PATCH] Sync bugfix for ai4c, cspgo and hip09, and add hip10a cpuinfo --- 0338-CSPGO-Update-the-gate-of-cspgo.patch | 88 ++ 0339-Add-hip10a-machine-discription.patch | 877 ++++++++++++++++++ ...-for-hip11-and-hip10c-addrcost_table.patch | 34 + gcc.spec | 14 +- 4 files changed, 1012 insertions(+), 1 deletion(-) create mode 100644 0338-CSPGO-Update-the-gate-of-cspgo.patch create mode 100644 0339-Add-hip10a-machine-discription.patch create mode 100644 0340-Fix-for-hip11-and-hip10c-addrcost_table.patch diff --git a/0338-CSPGO-Update-the-gate-of-cspgo.patch b/0338-CSPGO-Update-the-gate-of-cspgo.patch new file mode 100644 index 0000000..bd5307c --- /dev/null +++ b/0338-CSPGO-Update-the-gate-of-cspgo.patch @@ -0,0 +1,88 @@ +From 25f3b77d288e26b198c7836c3ed9b4fb0a85a48a Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Mon, 16 Dec 2024 15:52:22 +0800 +Subject: [PATCH] [CSPGO] Update the gate of cspgo + +Update gate to allow CSPGO to be enabled after PGO +--- + gcc/tree-profile.cc | 59 +++++++++++++++++++++++++++------------------ + 1 file changed, 36 insertions(+), 23 deletions(-) + +diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc +index ace1fe31c..3c57a0a75 100644 +--- a/gcc/tree-profile.cc ++++ b/gcc/tree-profile.cc +@@ -1108,34 +1108,47 @@ public: + /* opt_pass methods: */ + virtual bool gate (function *) + { +- return (flag_csprofile_generate || flag_csprofile_use); +- } +- /* The main process of cspgo is in csprofile_transform, execute does not need +- to do anything. */ +- virtual unsigned int execute (function *) +- { +- if (!profile_data_prefix) +- error ("profile_data_prefix must set when using cspgo."); ++ if (flag_csprofile_generate || flag_csprofile_use) ++ { ++ int ret = true; ++ if (!profile_data_prefix) ++ { ++ error ("pgo profile path must set when using cspgo."); ++ ret = false; ++ } + +- if (!csprofile_data_prefix) +- error ("csprofile_data_prefix must set when using cspgo."); ++ if (!csprofile_data_prefix) ++ { ++ error ("cspgo profile path must set when using cspgo."); ++ ret = false; ++ } + +- if (!flag_cfgo_profile_use) +- error ("cspgo must used with cfgo-pgo."); ++ if (!(flag_cfgo_profile_use || flag_profile_use)) ++ { ++ error ("cspgo must used with cfgo-pgo or pgo."); ++ ret = false; ++ } + +- /* Just compare canonical pathnames. */ +- char* cfgo_pgo_path = lrealpath (profile_data_prefix); +- char* cfgo_cspgo_path = lrealpath (csprofile_data_prefix); +- bool files_differ = filename_cmp (cfgo_pgo_path, cfgo_cspgo_path); +- if (!files_differ) +- { +- error ("pgo and cspgo path must different between %s and %s", +- cfgo_pgo_path, cfgo_cspgo_path); ++ /* pgo and cspgo path must different. */ ++ char* cfgo_pgo_path = lrealpath (profile_data_prefix); ++ char* cfgo_cspgo_path = lrealpath (csprofile_data_prefix); ++ bool files_differ = filename_cmp (cfgo_pgo_path, cfgo_cspgo_path); ++ if (!files_differ) ++ { ++ error ("pgo and cspgo path must different between %s and %s", ++ cfgo_pgo_path, cfgo_cspgo_path); ++ ret = false; ++ } ++ free (cfgo_pgo_path); ++ free (cfgo_cspgo_path); ++ ++ return ret; + } +- free (cfgo_pgo_path); +- free (cfgo_cspgo_path); +- return 0; ++ return false; + } ++ /* The main process of cspgo is in csprofile_transform, execute does not need ++ to do anything. */ ++ virtual unsigned int execute (function *) { return 0; } + + }; // class pass_ipa_csprofile + +-- +2.25.1 + diff --git a/0339-Add-hip10a-machine-discription.patch b/0339-Add-hip10a-machine-discription.patch new file mode 100644 index 0000000..77e8c97 --- /dev/null +++ b/0339-Add-hip10a-machine-discription.patch @@ -0,0 +1,877 @@ +From 2eea7cfbd7128906034e3d3c5a0fe7d05860ba6b Mon Sep 17 00:00:00 2001 +From: liyunfei +Date: Fri, 17 Jan 2025 20:05:33 +0800 +Subject: [PATCH] Add hip10a machine discription + +Here is the patch introducing hip10a machine model +for the scheduler. +--- + gcc/config/aarch64/aarch64-cores.def | 3 +- + gcc/config/aarch64/aarch64-cost-tables.h | 103 +++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64.cc | 109 +++++ + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/hip10a.md | 538 +++++++++++++++++++++++ + gcc/doc/invoke.texi | 2 +- + 7 files changed, 755 insertions(+), 3 deletions(-) + create mode 100644 gcc/config/aarch64/hip10a.md + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 1e8de523c..8f6210397 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -131,7 +131,8 @@ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1) + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1) + AARCH64_CORE("hip09", hip09, hip09, V8_5A, (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES), hip09, 0x48, 0xd02, 0x0) +-AARCH64_CORE("hip10c", hip10c, hip10c, V8_5A, (SVE, I8MM, BF16, F32MM, F64MM, FLAGM, PAUTH, SSBS, SHA3, SM4, PROFILE, PREDRES), hip10c, 0x48, 0xddd, 0x0) ++AARCH64_CORE("hip10a", hip10a, hip10a, V8_5A, (SVE, I8MM, BF16, F32MM, F64MM, SSBS, SHA3, SM4, PREDRES, SVE2, SVE2_BITPERM, DOTPROD, F16FML), hip10a, 0x48, 0xd03, 0x0) ++AARCH64_CORE("hip10c", hip10c, hip10c, V8_5A, (SVE, I8MM, BF16, F32MM, F64MM, FLAGM, PAUTH, SSBS, SHA3, SM4, PROFILE, PREDRES), hip10c, 0x48, 0xd45, 0x0) + + /* ARMv8.3-A Architecture Processors. */ + +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index 06da1b271..a39ace9ba 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -880,6 +880,109 @@ const struct cpu_cost_table hip09_extra_costs = + } + }; + ++const struct cpu_cost_table hip10a_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ 0, /* extend. */ ++ 0, /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ COSTS_N_INSNS (2), /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (7) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (10) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (6), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (5), /* loadf. */ ++ COSTS_N_INSNS (5), /* loadd. */ ++ COSTS_N_INSNS (3), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (4), /* loadv. */ ++ COSTS_N_INSNS (4) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (6), /* div. */ ++ COSTS_N_INSNS (2), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (3), /* fma. */ ++ COSTS_N_INSNS (1), /* addsub. */ ++ 0, /* fpconst. */ ++ 0, /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (4), /* toint. */ ++ COSTS_N_INSNS (5), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (9), /* div. */ ++ COSTS_N_INSNS (2), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (3), /* fma. */ ++ COSTS_N_INSNS (1), /* addsub. */ ++ 0, /* fpconst. */ ++ 0, /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (4), /* toint. */ ++ COSTS_N_INSNS (5), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1) /* alu. */ ++ } ++}; ++ + const struct cpu_cost_table hip10c_extra_costs = + { + /* ALU */ +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index e176a4d70..1cfa3559d 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,hip10c,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,hip10a,hip10c,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 65b684ef6..a6ef40a47 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -507,6 +507,24 @@ static const struct cpu_addrcost_table hip09_addrcost_table = + 0, /* imm_offset */ + }; + ++static const struct cpu_addrcost_table hip10a_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 0, /* post_modify_ld3_st3 */ ++ 0, /* post_modify_ld4_st4 */ ++ 0, /* register_offset */ ++ 1, /* register_sextend */ ++ 1, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ + static const struct cpu_addrcost_table hip10c_addrcost_table = + { + { +@@ -754,6 +772,16 @@ static const struct cpu_regmove_cost hip09_regmove_cost = + 2 /* FP2FP */ + }; + ++static const struct cpu_regmove_cost hip10a_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 5, /* GP2FP */ ++ 5, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ + static const struct cpu_regmove_cost hip10c_regmove_cost = + { + 1, /* GP2GP */ +@@ -1088,6 +1116,43 @@ static const struct cpu_vector_cost hip09_vector_cost = + nullptr /* issue_info */ + }; + ++static const advsimd_vec_cost hip10a_advsimd_vector_cost = ++{ ++ 2, /* int_stmt_cost */ ++ 2, /* fp_stmt_cost */ ++ 0, /* ld2_st2_permute_cost */ ++ 0, /* ld3_st3_permute_cost */ ++ 0, /* ld4_st4_permute_cost */ ++ 2, /* permute_cost */ ++ 3, /* reduc_i8_cost */ ++ 3, /* reduc_i16_cost */ ++ 3, /* reduc_i32_cost */ ++ 3, /* reduc_i64_cost */ ++ 3, /* reduc_f16_cost */ ++ 3, /* reduc_f32_cost */ ++ 3, /* reduc_f64_cost */ ++ 3, /* store_elt_extra_cost */ ++ 3, /* vec_to_scalar_cost */ ++ 2, /* scalar_to_vec_cost */ ++ 5, /* align_load_cost */ ++ 5, /* unalign_load_cost */ ++ 1, /* unalign_store_cost */ ++ 1 /* store_cost */ ++}; ++ ++static const struct cpu_vector_cost hip10a_vector_cost = ++{ ++ 1, /* scalar_int_stmt_cost */ ++ 1, /* scalar_fp_stmt_cost */ ++ 5, /* scalar_load_cost */ ++ 1, /* scalar_store_cost */ ++ 1, /* cond_taken_branch_cost */ ++ 1, /* cond_not_taken_branch_cost */ ++ &hip10a_advsimd_vector_cost, /* advsimd */ ++ nullptr, /* sve */ ++ nullptr /* issue_info */ ++}; ++ + static const advsimd_vec_cost hip10c_advsimd_vector_cost = + { + 2, /* int_stmt_cost */ +@@ -1520,6 +1585,17 @@ static const cpu_prefetch_tune hip09_prefetch_tune = + -1 /* default_opt_level */ + }; + ++static const cpu_prefetch_tune hip10a_prefetch_tune = ++{ ++ 0, /* num_slots */ ++ 64, /* l1_cache_size */ ++ 64, /* l1_cache_line_size */ ++ 512, /* l2_cache_size */ ++ true, /* prefetch_dynamic_strides */ ++ -1, /* minimum_stride */ ++ -1 /* default_opt_level */ ++}; ++ + static const cpu_prefetch_tune hip10c_prefetch_tune = + { + 0, /* num_slots */ +@@ -1940,6 +2016,39 @@ static const struct tune_params hip09_tunings = + &hip09_prefetch_tune + }; + ++static const struct tune_params hip10a_tunings = ++{ ++ &hip10a_extra_costs, ++ &hip10a_addrcost_table, ++ &hip10a_regmove_cost, ++ &generic_vector_cost, ++ &generic_branch_cost, ++ &generic_approx_modes, ++ SVE_256, /* sve_width */ ++ { 4, /* load_int. */ ++ 4, /* store_int. */ ++ 4, /* load_fp. */ ++ 4, /* store_fp. */ ++ 4, /* load_pred. */ ++ 4 /* store_pred. */ ++ }, /* memmov_cost. */ ++ 8, /* issue_rate */ ++ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH ++ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ ++ "16", /* function_align. */ ++ "4", /* jump_align. */ ++ "8", /* loop_align. */ ++ 2, /* int_reassoc_width. */ ++ 4, /* fp_reassoc_width. */ ++ 1, /* vec_reassoc_width. */ ++ 2, /* min_div_recip_mul_sf. */ ++ 2, /* min_div_recip_mul_df. */ ++ 0, /* max_case_values. */ ++ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ ++ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */ ++ &hip10a_prefetch_tune ++}; ++ + static const struct tune_params hip10c_tunings = + { + &hip10c_extra_costs, +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 96b8ab471..2f46bc793 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -549,6 +549,7 @@ + (include "tsv110.md") + (include "thunderx3t110.md") + (include "hip09.md") ++(include "hip10a.md") + (include "hip10c.md") + (include "hip11.md") + +diff --git a/gcc/config/aarch64/hip10a.md b/gcc/config/aarch64/hip10a.md +new file mode 100644 +index 000000000..3a687e8af +--- /dev/null ++++ b/gcc/config/aarch64/hip10a.md +@@ -0,0 +1,538 @@ ++;; hip10a pipeline description ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++;; ++;;Contributed by liyunfei ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_automaton "hip10a") ++(define_automaton "hip10a_ldst") ++(define_automaton "hip10a_fsu") ++ ++(define_attr "hip10a_type" ++ "hip10a_neon_base1, hip10a_neon_base2, hip10a_neon_base3, hip10a_neon_base4, ++ hip10a_neon_load1_12, hip10a_neon_load1_34, hip10a_neon_load1_lanes, hip10a_neon_load2, ++ hip10a_neon_load34_all_lane, hip10a_neon_load34_one_lane, hip10a_neon_load34, hip10a_neon_load34_q, ++ hip10a_neon_store1, hip10a_neon_store2, hip10a_neon_store1_34reg_d, hip10a_neon_store1_12reg_d, ++ hip10a_neon_store34, ++ unknown" ++ (cond [ ++ (eq_attr "type" "neon_abs,neon_abs_q,\ ++ neon_neg,neon_neg_q,\ ++ neon_add,neon_add_q,neon_add_widen,neon_add_long,\ ++ neon_sub,neon_sub_q,neon_sub_widen,neon_sub_long,\ ++ neon_qadd,neon_qadd_q,\ ++ neon_qsub,neon_qsub_q,\ ++ neon_qabs,neon_qabs_q,\ ++ neon_qneg,neon_qneg_q,\ ++ neon_compare,neon_compare_q,neon_compare_zero,neon_compare_zero_q,\ ++ neon_logic,neon_logic_q,\ ++ neon_minmax,neon_minmax_q,\ ++ neon_tst,neon_tst_q,\ ++ neon_bsl,neon_bsl_q,\ ++ neon_cls,neon_cls_q,\ ++ neon_ext,neon_ext_q,\ ++ neon_rev,neon_rev_q,\ ++ neon_fp_abs_s,neon_fp_abs_s_q,neon_fp_abs_d,\ ++ neon_fp_neg_s,neon_fp_neg_s_q,neon_fp_neg_d,neon_fp_neg_d_q,\ ++ neon_move,neon_move_q,\ ++ neon_ins,neon_ins_q") ++ (const_string "hip10a_neon_base1") ++ (eq_attr "type" "neon_abd,neon_abd_q,\ ++ neon_tbl1,neon_tbl1_q,\ ++ neon_arith_acc,neon_arith_acc_q,\ ++ neon_add_halve,neon_add_halve_q,neon_add_halve_narrow_q,\ ++ neon_sub_halve,neon_sub_halve_q,neon_sub_halve_narrow_q,\ ++ neon_sat_shift_imm,neon_sat_shift_imm_q,\ ++ neon_shift_imm,neon_shift_imm_q,neon_shift_imm_long,\ ++ neon_shift_imm_narrow_q,\ ++ neon_cnt,neon_cnt_q,\ ++ neon_tbl1,neon_tbl1_q,neon_tbl2,neon_tbl2_q,\ ++ neon_to_gp,neon_to_gp_q,\ ++ neon_fp_recpe_s,neon_fp_recpe_s_q,\ ++ neon_fp_recpe_d,neon_fp_recpe_d_q,\ ++ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\ ++ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\ ++ neon_fp_recpx_s,neon_fp_recpx_s_q,\ ++ neon_fp_recpx_d,neon_fp_recpx_d_q,\ ++ neon_fp_abd_s,neon_fp_abd_s_q,\ ++ neon_fp_abd_d,neon_fp_abd_d_q,\ ++ neon_fp_addsub_s,neon_fp_addsub_s_q,\ ++ neon_fp_addsub_d,neon_fp_addsub_d_q,\ ++ neon_fp_compare_s,neon_fp_compare_s_q,\ ++ neon_fp_compare_d,neon_fp_compare_d_q,\ ++ neon_fp_minmax_s,\ ++ neon_fp_minmax_s_q,neon_fp_minmax_d,\ ++ neon_fp_minmax_d_q,neon_fp_round_s,\ ++ neon_fp_round_s_q,\ ++ neon_fp_round_d,neon_fp_round_d_q") ++ (const_string "hip10a_neon_base2") ++ (eq_attr "type" "neon_dot,neon_dot_q,\ ++ neon_reduc_add,neon_reduc_add_q,\ ++ neon_sat_mul_b,neon_sat_mul_b_q,neon_sat_mul_b_long,\ ++ neon_sat_mul_h,neon_sat_mul_h_q,\ ++ neon_sat_mul_s,neon_sat_mul_s_q,\ ++ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\ ++ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\ ++ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\ ++ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\ ++ neon_mul_b,neon_mul_b_q,neon_mul_b_long,\ ++ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\ ++ neon_mul_s_long,neon_mul_h_scalar_long,\ ++ neon_mul_s_scalar_long,\ ++ neon_mla_b,neon_mla_b_q,neon_mla_b_long,\ ++ neon_mla_h,neon_mla_h_q,neon_mla_h_long,\ ++ neon_mla_h_scalar,neon_mla_h_scalar_q,neon_mla_h_scalar_long,\ ++ neon_mla_s,neon_mla_s_q,neon_mla_s_long,\ ++ neon_mla_s_scalar,neon_mla_s_scalar_q,neon_mla_s_scalar_long,\ ++ neon_sat_mla_b_long,\ ++ neon_sat_mla_h_long,\ ++ neon_sat_mla_h_scalar_long,\ ++ neon_sat_mla_s_long,\ ++ neon_sat_mla_s_scalar_long,\ ++ neon_shift_acc,neon_shift_acc_q,neon_shift_reg,neon_shift_reg_q,\ ++ neon_sat_shift_reg,neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\ ++ neon_reduc_minmax,neon_reduc_minmax_q,\ ++ neon_fp_reduc_add_s,neon_fp_reduc_add_s_q,\ ++ neon_fp_reduc_add_d,neon_fp_reduc_add_d_q,\ ++ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\ ++ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q,\ ++ neon_fp_mul_s,neon_fp_mul_s_q,neon_fp_mul_s_scalar,\ ++ neon_fp_mul_d,neon_fp_mul_d_q,neon_fp_mul_d_scalar_q,\ ++ neon_fp_mul_s_scalar_q,\ ++ neon_fp_recpe_s,neon_fp_recpe_d,\ ++ neon_fp_recpx_s,neon_fp_recpx_s_q,neon_fp_recpx_d,neon_fp_recpx_d_q,\ ++ neon_fp_to_int_s,neon_fp_to_int_d") ++ (const_string "hip10a_neon_base3") ++ (eq_attr "type" "neon_tbl3,neon_tbl3_q,\ ++ neon_fp_recpe_s_q,neon_fp_recpe_d_q,\ ++ neon_fp_recps_s_q,neon_fp_recps_d,neon_fp_recps_s,neon_fp_recps_d_q,\ ++ neon_fp_to_int_s_q,neon_fp_to_int_d_q,\ ++ neon_fp_cvt_narrow_d_q,neon_fp_cvt_narrow_s_q,\ ++ neon_fp_mla_s,neon_fp_mla_s_q,\ ++ neon_fp_mla_d,neon_fp_mla_d_q,\ ++ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ ++ neon_fp_mla_d_scalar_q") ++ (const_string "hip10a_neon_base4") ++ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\ ++ neon_load1_2reg,neon_load1_2reg_q") ++ (const_string "hip10a_neon_load1_12") ++ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q,\ ++ neon_load1_4reg,neon_load1_4reg_q") ++ (const_string "hip10a_neon_load1_34") ++ (eq_attr "type" "neon_load1_one_lane,\ ++ neon_load1_one_lane_q,\ ++ neon_load1_all_lanes,neon_load1_all_lanes_q") ++ (const_string "hip10a_neon_load1_lanes") ++ (eq_attr "type" "neon_load2_all_lanes,\ ++ neon_load2_all_lanes_q,\ ++ neon_load2_one_lane,neon_load2_2reg,\ ++ neon_load2_2reg_q,neon_load3_one_lane") ++ (const_string "hip10a_neon_load2") ++ (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane") ++ (const_string "hip10a_neon_load34_one_lane") ++ (eq_attr "type" "neon_load3_all_lanes,neon_load3_all_lanes_q,\ ++ neon_load4_all_lanes,neon_load4_all_lanes_q") ++ (const_string "hip10a_neon_load34_all_lane") ++ (eq_attr "type" "neon_load3_3reg,neon_load4_4reg") ++ (const_string "hip10a_neon_load34") ++ (eq_attr "type" "neon_load3_3reg_q,neon_load4_4reg_q") ++ (const_string "hip10a_neon_load34_q") ++ (eq_attr "type" "neon_store1_1reg_q,neon_store1_2reg_q,\ ++ neon_store1_3reg_q,neon_store1_4reg_q,\ ++ neon_store1_one_lane,neon_store1_one_lane_q") ++ (const_string "hip10a_neon_store1") ++ (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q,\ ++ neon_store2_2reg,neon_store2_2reg_q") ++ (const_string "hip10a_neon_store2") ++ (eq_attr "type" "neon_store1_1reg,neon_store1_2reg") ++ (const_string "hip10a_neon_store1_12reg_d") ++ (eq_attr "type" "neon_store1_3reg,neon_store1_4reg") ++ (const_string "hip10a_neon_store1_34reg_d") ++ (eq_attr "type" "neon_store3_one_lane,neon_store3_one_lane_q,\ ++ neon_store4_one_lane,neon_store4_one_lane_q,\ ++ neon_store3_3reg_q,neon_store3_3reg,\ ++ neon_store4_4reg_q,neon_store4_4reg") ++ (const_string "hip10a_neon_store34")] ++ (const_string "unknown"))) ++ ++; The hip10a core is modelled as issues pipeline that has ++; the following functional units. ++; 1. Three pipelines for single cycle integer micro operations: ALUs0, ALUs1, ALUs2 ++ ++(define_cpu_unit "hip10a_alus0" "hip10a") ++(define_cpu_unit "hip10a_alus1" "hip10a") ++(define_cpu_unit "hip10a_alus2" "hip10a") ++ ++(define_reservation "hip10a_alus012" "hip10a_alus0|hip10a_alus1|hip10a_alus2") ++;(define_reservation "hip10a_alus01" "hip10a_alus0|hip10a_alus1") ++;(define_reservation "hip10a_alus23" "hip10a_alus2|hip10a_alus3") ++ ++; 2. Three pipelines for multi cycles integer micro operations: ALUm0, ALUm1, ALUm2 ++ ++(define_cpu_unit "hip10a_alum0" "hip10a") ++(define_cpu_unit "hip10a_alum1" "hip10a") ++(define_cpu_unit "hip10a_alum2" "hip10a") ++ ++(define_reservation "hip10a_alum012" "hip10a_alum0|hip10a_alum1|hip10a_alum2") ++ ++; 3. All ALU pipelines ++ ++(define_reservation "hip10a_alu" "hip10a_alus0|hip10a_alus1|hip10a_alus2|hip10a_alum0|hip10a_alum1|hip10a_alum2") ++ ++; 4. Three pipelines for load micro opetations: Load0, Load1, Load2 ++ ++(define_cpu_unit "hip10a_load0" "hip10a_ldst") ++(define_cpu_unit "hip10a_load1" "hip10a_ldst") ++(define_cpu_unit "hip10a_load2" "hip10a_ldst") ++ ++(define_reservation "hip10a_ld012" "hip10a_load0|hip10a_load1|hip10a_load2") ++ ++; 5. Two pipelines for store micro operations: Store1, Store2 ++ ++(define_cpu_unit "hip10a_store0" "hip10a_ldst") ++(define_cpu_unit "hip10a_store1" "hip10a_ldst") ++ ++(define_reservation "hip10a_st01" "hip10a_store0|hip10a_store1") ++ ++; 6. Two pipelines for store data micro operations: STD0,STD1 ++ ++(define_cpu_unit "hip10a_store_data0" "hip10a_ldst") ++(define_cpu_unit "hip10a_store_data1" "hip10a_ldst") ++ ++(define_reservation "hip10a_std01" "hip10a_store_data0|hip10a_store_data1") ++ ++; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU0, FSU1, FSU2, FSU3 ++ ++(define_cpu_unit "hip10a_fsu0" "hip10a_fsu") ++(define_cpu_unit "hip10a_fsu1" "hip10a_fsu") ++(define_cpu_unit "hip10a_fsu2" "hip10a_fsu") ++(define_cpu_unit "hip10a_fsu3" "hip10a_fsu") ++ ++(define_reservation "hip10a_fsu0123" "hip10a_fsu0|hip10a_fsu1|hip10a_fsu2|hip10a_fsu3") ++(define_reservation "hip10a_fsu02" "hip10a_fsu0|hip10a_fsu2") ++ ++ ++; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2 ++ ++;; Branch execution Unit ++; ++; Branches take two issue slot. ++; No latency as there is no result ++(define_insn_reservation "hip10a_branch" 0 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "branch,call")) ++ "hip10a_alus012") ++ ++;; Simple Execution Unit: ++; ++;; Simple ALU without shift ++(define_insn_reservation "hip10a_alu_all" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "alu_imm,\ ++ adc_imm,adc_reg,\ ++ alu_sreg,\ ++ mov_imm,mov_reg")) ++ "hip10a_alu") ++ ++(define_insn_reservation "hip10a_alum" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "logic_imm,logic_reg,\ ++ csel,rotate_imm,bfm,\ ++ clz,rbit,rev")) ++ "hip10a_alum012") ++ ++(define_insn_reservation "hip10a_alus" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "alus_sreg,alus_imm,\ ++ adcs_reg,adcs_imm,\ ++ logics_imm,logics_reg,adr")) ++ "hip10a_alus012") ++ ++;; ALU ops with shift and extend ++(define_insn_reservation "hip10a_alu_ext_shift" 2 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "alu_ext,alus_ext,\ ++ logics_shift_imm,logics_shift_reg,\ ++ logic_shift_reg,logic_shift_imm,\ ++ ")) ++ "hip10a_alum012") ++ ++;; Multiply and mulitply accumulate and count leading zeros ++(define_insn_reservation "hip10a_mul" 3 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "mul,muls,clz,smull,umull")) ++ "hip10a_alum012") ++ ++(define_insn_reservation "hip10a_mla" 4 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "mla,mlas,smlal,umlal")) ++ "hip10a_alum012|hip10a_alu") ++ ++;; Integer divide ++(define_insn_reservation "hip10a_div" 11 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "udiv,sdiv")) ++ "hip10a_alum0") ++ ++;; Load execution Unit ++; ++; Loads of up to two words. ++(define_insn_reservation "hip10a_load1" 4 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "load_4,load_8,load_16")) ++ "hip10a_ld012") ++ ++; Stores of up to two words. ++(define_insn_reservation "hip10a_store1" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "store_4,store_8,load_16")) ++ "hip10a_st01") ++ ++;; FP data processing instructions. ++ ++(define_insn_reservation "hip10a_fp_arith" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\ ++ f_mrc")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_fp_cmp" 2 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fcmps,fcmpd")) ++ "hip10a_fsu02+hip10a_alus012") ++ ++(define_insn_reservation "hip10a_fp_ccmp" 6 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fccmps,fccmpd")) ++ "hip10a_fsu0123+hip10a_alus012") ++ ++(define_insn_reservation "hip10a_fp_csel" 6 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fcsel,f_mcr")) ++ "hip10a_fsu0123+hip10a_alus012") ++ ++(define_insn_reservation "hip10a_fp_divs" 7 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fdivs")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_fp_divd" 10 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fdivd")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_fp_sqrts" 9 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fsqrts")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_fp_sqrtd" 15 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fsqrtd")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_fp_mul" 3 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fmuls,fmuld")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_fp_add" 2 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\ ++ f_rints,f_rintd")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_fp_mac" 4 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "fmacs,fmacd")) ++ "hip10a_fsu0123") ++ ++;; FP miscellaneous instructions. ++ ++(define_insn_reservation "hip10a_fp_cvt" 5 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "f_cvtf2i")) ++ "hip10a_fsu0123+hip10a_alus012") ++ ++(define_insn_reservation "hip10a_fp_cvt2" 6 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "f_cvti2f")) ++ "hip10a_alus012+hip10a_fsu0123") ++ ++;; FP Load Instructions ++ ++(define_insn_reservation "hip10a_fp_load" 8 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "f_loads,f_loadd")) ++ "hip10a_ld012") ++ ++(define_insn_reservation "hip10a_fp_load2" 6 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_ldp_q,neon_ldp")) ++ "hip10a_ld012+hip10a_alu") ++ ++;; FP store instructions ++ ++(define_insn_reservation "hip10a_fp_store" 3 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "f_stores,f_stored")) ++ "hip10a_st01+hip10a_std01") ++ ++(define_insn_reservation "hip10a_fp_store2" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_stp_q,neon_stp")) ++ "hip10a_st01+hip10a_std01+hip10a_alu") ++ ++;; ASIMD integer instructions ++ ++(define_insn_reservation "hip10a_asimd_base1" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_base1")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_base2" 2 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_base2")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_base3" 3 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_base3")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_base4" 4 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_base4")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_base5" 5 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_base6" 6 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_tbl4,neon_tbl4_q")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_base7" 7 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_fp_div_s,neon_fp_div_d")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_base9" 9 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_fp_div_s_q,neon_fp_sqrt_s,neon_fp_sqrt_d")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_fsqrt_q" 13 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_fp_sqrt_s_q")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_fdiv_f64_q" 15 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_fp_div_d_q")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_fsqrt_f64_q" 25 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_fp_sqrt_d_q")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_dup" 5 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "neon_dup,neon_dup_q")) ++ "hip10a_alus012+hip10a_fsu0123") ++ ++;; ASIMD load instructions ++ ++(define_insn_reservation "hip10a_asimd_ld1_12" 6 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_load1_12")) ++ "hip10a_ld012") ++ ++(define_insn_reservation "hip10a_asimd_ld1_34" 7 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_load1_34")) ++ "hip10a_ld012") ++ ++(define_insn_reservation "hip10a_asimd_ld7" 7 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_load1_lanes,hip10a_neon_load2,hip10a_neon_load34_all_lane,hip10a_neon_load34")) ++ "hip10a_ld012+hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_ld8" 8 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_load34_one_lane,hip10a_neon_load34_q")) ++"hip10a_ld012+hip10a_fsu0123") ++ ++;; ASIMD store instructions ++ ++(define_insn_reservation "hip10a_asimd_st1" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_store1,hip10a_neon_store2")) ++ "hip10a_st01+hip10a_std01") ++ ++(define_insn_reservation "hip10a_asimd_st1_12" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_store1_12reg_d")) ++ "hip10a_st01+hip10a_std01+hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_st4" 4 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "hip10a_type" "hip10a_neon_store1_34reg_d,hip10a_neon_store34")) ++ "hip10a_fsu0123+hip10a_st01+hip10a_std01") ++ ++;; Cryptography extensions ++ ++ ++(define_insn_reservation "hip10a_asimd_pmull" 2 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "crypto_pmull")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_aes" 2 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "crypto_aese,crypto_aesmc")) ++ "hip10a_fsu0+hip10a_fsu2") ++ ++(define_insn_reservation "hip10a_asimd_sha3" 1 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "crypto_sha3")) ++ "hip10a_fsu0123") ++ ++(define_insn_reservation "hip10a_asimd_sha1" 2 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\ ++ crypto_sha256_fast,crypto_sha512,\ ++ crypto_sm3")) ++ "hip10a_fsu0+hip10a_fsu2") ++ ++(define_insn_reservation "hip10a_asimd_sha1_and256" 4 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\ ++ crypto_sm4")) ++ "hip10a_fsu0+hip10a_fsu2") ++ ++;; CRC extension. ++ ++(define_insn_reservation "hip10a_crc" 2 ++ (and (eq_attr "tune" "hip10a") ++ (eq_attr "type" "crc")) ++ "hip10a_alum012") +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 3eced16e3..2d906ee56 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -19220,7 +19220,7 @@ performance of the code. Permissible values for this option are: + @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96} + @samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n}, + @samp{octeontx2f95mm}, +-@samp{a64fx},@samp{hip09},@samp{hip10c},@samp{hip11} ++@samp{a64fx},@samp{hip09},@samp{hip10a},@samp{hip10c},@samp{hip11} + @samp{thunderx}, @samp{thunderxt88}, + @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110}, + @samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus}, +-- +2.25.1 + diff --git a/0340-Fix-for-hip11-and-hip10c-addrcost_table.patch b/0340-Fix-for-hip11-and-hip10c-addrcost_table.patch new file mode 100644 index 0000000..efdf0e9 --- /dev/null +++ b/0340-Fix-for-hip11-and-hip10c-addrcost_table.patch @@ -0,0 +1,34 @@ +From 62bbc7f631a49712903281ad85b62205780d8af7 Mon Sep 17 00:00:00 2001 +From: liyunfei +Date: Tue, 21 Jan 2025 15:16:50 +0800 +Subject: [PATCH] Fix for hip11 and hip10c addrcost_table + +--- + gcc/config/aarch64/aarch64.cc | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index a6ef40a47..52ce7d905 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -535,6 +535,8 @@ static const struct cpu_addrcost_table hip10c_addrcost_table = + }, + 0, /* pre_modify */ + 0, /* post_modify */ ++ 0, /* post_modify_ld3_st3 */ ++ 0, /* post_modify_ld4_st4 */ + 0, /* register_offset */ + 1, /* register_sextend */ + 1, /* register_zextend */ +@@ -551,6 +553,8 @@ static const struct cpu_addrcost_table hip11_addrcost_table = + }, + 0, /* pre_modify */ + 0, /* post_modify */ ++ 0, /* post_modify_ld3_st3 */ ++ 0, /* post_modify_ld4_st4 */ + 0, /* register_offset */ + 1, /* register_sextend */ + 1, /* register_zextend */ +-- +2.25.1 + diff --git a/gcc.spec b/gcc.spec index 1a8e1e7..dd2c749 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 69 +%global gcc_release 70 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -443,6 +443,9 @@ Patch334: 0334-Dont-use-local_detect_cpu-when-cross-build.patch Patch335: 0335-fix-costs-for-hip09.patch Patch336: 0336-sfc-Add-struct-static-field-compression-optimization.patch Patch337: 0337-Reduce-ipa-inline-warning-output.patch +Patch338: 0338-CSPGO-Update-the-gate-of-cspgo.patch +Patch339: 0339-Add-hip10a-machine-discription.patch +Patch340: 0340-Fix-for-hip11-and-hip10c-addrcost_table.patch # Part 1001-1999 %ifarch sw_64 @@ -1566,6 +1569,9 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch -P335 -p1 %patch -P336 -p1 %patch -P337 -p1 +%patch -P338 -p1 +%patch -P339 -p1 +%patch -P340 -p1 %ifarch sw_64 %patch -P1001 -p1 @@ -4193,6 +4199,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Sat Feb 15 2025 liyancheng <412998149@qq.com> - 12.3.1-70 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Sync bugfix for ai4c, cspgo and hip09, and add hip10a cpuinfo. + * Wed Feb 12 2025 huang-xiaoquan - 12.3.1-69 - Type:Bugfix - ID:NA -- Gitee