Ai
23 Star 29 Fork 165

src-openEuler/gcc

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0353-Add-hip10a-machine-discription.patch 33.14 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877
From 2eea7cfbd7128906034e3d3c5a0fe7d05860ba6b Mon Sep 17 00:00:00 2001
From: liyunfei <liyunfei33@huawei.com>
Date: Fri, 17 Jan 2025 20:05:33 +0800
Subject: [PATCH] Add hip10a machine discription
Here is the patch introducing hip10a machine model
for the scheduler.
---
gcc/config/aarch64/aarch64-cores.def | 3 +-
gcc/config/aarch64/aarch64-cost-tables.h | 103 +++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.cc | 109 +++++
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/hip10a.md | 538 +++++++++++++++++++++++
gcc/doc/invoke.texi | 2 +-
7 files changed, 755 insertions(+), 3 deletions(-)
create mode 100644 gcc/config/aarch64/hip10a.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 1e8de523c..8f6210397 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -131,7 +131,8 @@ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1)
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1)
AARCH64_CORE("hip09", hip09, hip09, V8_5A, (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES), hip09, 0x48, 0xd02, 0x0)
-AARCH64_CORE("hip10c", hip10c, hip10c, V8_5A, (SVE, I8MM, BF16, F32MM, F64MM, FLAGM, PAUTH, SSBS, SHA3, SM4, PROFILE, PREDRES), hip10c, 0x48, 0xddd, 0x0)
+AARCH64_CORE("hip10a", hip10a, hip10a, V8_5A, (SVE, I8MM, BF16, F32MM, F64MM, SSBS, SHA3, SM4, PREDRES, SVE2, SVE2_BITPERM, DOTPROD, F16FML), hip10a, 0x48, 0xd03, 0x0)
+AARCH64_CORE("hip10c", hip10c, hip10c, V8_5A, (SVE, I8MM, BF16, F32MM, F64MM, FLAGM, PAUTH, SSBS, SHA3, SM4, PROFILE, PREDRES), hip10c, 0x48, 0xd45, 0x0)
/* ARMv8.3-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 06da1b271..a39ace9ba 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -880,6 +880,109 @@ const struct cpu_cost_table hip09_extra_costs =
}
};
+const struct cpu_cost_table hip10a_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ 0, /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (7) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (10) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (6), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (5), /* loadf. */
+ COSTS_N_INSNS (5), /* loadd. */
+ COSTS_N_INSNS (3), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (6), /* div. */
+ COSTS_N_INSNS (2), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (3), /* fma. */
+ COSTS_N_INSNS (1), /* addsub. */
+ 0, /* fpconst. */
+ 0, /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (4), /* toint. */
+ COSTS_N_INSNS (5), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (9), /* div. */
+ COSTS_N_INSNS (2), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (3), /* fma. */
+ COSTS_N_INSNS (1), /* addsub. */
+ 0, /* fpconst. */
+ 0, /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (4), /* toint. */
+ COSTS_N_INSNS (5), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table hip10c_extra_costs =
{
/* ALU */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index e176a4d70..1cfa3559d 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,hip10c,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,hip10a,hip10c,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 65b684ef6..a6ef40a47 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -507,6 +507,24 @@ static const struct cpu_addrcost_table hip09_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table hip10a_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* post_modify_ld3_st3 */
+ 0, /* post_modify_ld4_st4 */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table hip10c_addrcost_table =
{
{
@@ -754,6 +772,16 @@ static const struct cpu_regmove_cost hip09_regmove_cost =
2 /* FP2FP */
};
+static const struct cpu_regmove_cost hip10a_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 5, /* GP2FP */
+ 5, /* FP2GP */
+ 2 /* FP2FP */
+};
+
static const struct cpu_regmove_cost hip10c_regmove_cost =
{
1, /* GP2GP */
@@ -1088,6 +1116,43 @@ static const struct cpu_vector_cost hip09_vector_cost =
nullptr /* issue_info */
};
+static const advsimd_vec_cost hip10a_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 0, /* ld2_st2_permute_cost */
+ 0, /* ld3_st3_permute_cost */
+ 0, /* ld4_st4_permute_cost */
+ 2, /* permute_cost */
+ 3, /* reduc_i8_cost */
+ 3, /* reduc_i16_cost */
+ 3, /* reduc_i32_cost */
+ 3, /* reduc_i64_cost */
+ 3, /* reduc_f16_cost */
+ 3, /* reduc_f32_cost */
+ 3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
+ 3, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 5, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+static const struct cpu_vector_cost hip10a_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &hip10a_advsimd_vector_cost, /* advsimd */
+ nullptr, /* sve */
+ nullptr /* issue_info */
+};
+
static const advsimd_vec_cost hip10c_advsimd_vector_cost =
{
2, /* int_stmt_cost */
@@ -1520,6 +1585,17 @@ static const cpu_prefetch_tune hip09_prefetch_tune =
-1 /* default_opt_level */
};
+static const cpu_prefetch_tune hip10a_prefetch_tune =
+{
+ 0, /* num_slots */
+ 64, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ 512, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
static const cpu_prefetch_tune hip10c_prefetch_tune =
{
0, /* num_slots */
@@ -1940,6 +2016,39 @@ static const struct tune_params hip09_tunings =
&hip09_prefetch_tune
};
+static const struct tune_params hip10a_tunings =
+{
+ &hip10a_extra_costs,
+ &hip10a_addrcost_table,
+ &hip10a_regmove_cost,
+ &generic_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_256, /* sve_width */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
+ 8, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
+ "16", /* function_align. */
+ "4", /* jump_align. */
+ "8", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */
+ &hip10a_prefetch_tune
+};
+
static const struct tune_params hip10c_tunings =
{
&hip10c_extra_costs,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 96b8ab471..2f46bc793 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -549,6 +549,7 @@
(include "tsv110.md")
(include "thunderx3t110.md")
(include "hip09.md")
+(include "hip10a.md")
(include "hip10c.md")
(include "hip11.md")
diff --git a/gcc/config/aarch64/hip10a.md b/gcc/config/aarch64/hip10a.md
new file mode 100644
index 000000000..3a687e8af
--- /dev/null
+++ b/gcc/config/aarch64/hip10a.md
@@ -0,0 +1,538 @@
+;; hip10a pipeline description
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;;
+;;Contributed by liyunfei
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "hip10a")
+(define_automaton "hip10a_ldst")
+(define_automaton "hip10a_fsu")
+
+(define_attr "hip10a_type"
+ "hip10a_neon_base1, hip10a_neon_base2, hip10a_neon_base3, hip10a_neon_base4,
+ hip10a_neon_load1_12, hip10a_neon_load1_34, hip10a_neon_load1_lanes, hip10a_neon_load2,
+ hip10a_neon_load34_all_lane, hip10a_neon_load34_one_lane, hip10a_neon_load34, hip10a_neon_load34_q,
+ hip10a_neon_store1, hip10a_neon_store2, hip10a_neon_store1_34reg_d, hip10a_neon_store1_12reg_d,
+ hip10a_neon_store34,
+ unknown"
+ (cond [
+ (eq_attr "type" "neon_abs,neon_abs_q,\
+ neon_neg,neon_neg_q,\
+ neon_add,neon_add_q,neon_add_widen,neon_add_long,\
+ neon_sub,neon_sub_q,neon_sub_widen,neon_sub_long,\
+ neon_qadd,neon_qadd_q,\
+ neon_qsub,neon_qsub_q,\
+ neon_qabs,neon_qabs_q,\
+ neon_qneg,neon_qneg_q,\
+ neon_compare,neon_compare_q,neon_compare_zero,neon_compare_zero_q,\
+ neon_logic,neon_logic_q,\
+ neon_minmax,neon_minmax_q,\
+ neon_tst,neon_tst_q,\
+ neon_bsl,neon_bsl_q,\
+ neon_cls,neon_cls_q,\
+ neon_ext,neon_ext_q,\
+ neon_rev,neon_rev_q,\
+ neon_fp_abs_s,neon_fp_abs_s_q,neon_fp_abs_d,\
+ neon_fp_neg_s,neon_fp_neg_s_q,neon_fp_neg_d,neon_fp_neg_d_q,\
+ neon_move,neon_move_q,\
+ neon_ins,neon_ins_q")
+ (const_string "hip10a_neon_base1")
+ (eq_attr "type" "neon_abd,neon_abd_q,\
+ neon_tbl1,neon_tbl1_q,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_add_halve,neon_add_halve_q,neon_add_halve_narrow_q,\
+ neon_sub_halve,neon_sub_halve_q,neon_sub_halve_narrow_q,\
+ neon_sat_shift_imm,neon_sat_shift_imm_q,\
+ neon_shift_imm,neon_shift_imm_q,neon_shift_imm_long,\
+ neon_shift_imm_narrow_q,\
+ neon_cnt,neon_cnt_q,\
+ neon_tbl1,neon_tbl1_q,neon_tbl2,neon_tbl2_q,\
+ neon_to_gp,neon_to_gp_q,\
+ neon_fp_recpe_s,neon_fp_recpe_s_q,\
+ neon_fp_recpe_d,neon_fp_recpe_d_q,\
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_s,neon_fp_recpx_s_q,\
+ neon_fp_recpx_d,neon_fp_recpx_d_q,\
+ neon_fp_abd_s,neon_fp_abd_s_q,\
+ neon_fp_abd_d,neon_fp_abd_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
+ neon_fp_compare_s,neon_fp_compare_s_q,\
+ neon_fp_compare_d,neon_fp_compare_d_q,\
+ neon_fp_minmax_s,\
+ neon_fp_minmax_s_q,neon_fp_minmax_d,\
+ neon_fp_minmax_d_q,neon_fp_round_s,\
+ neon_fp_round_s_q,\
+ neon_fp_round_d,neon_fp_round_d_q")
+ (const_string "hip10a_neon_base2")
+ (eq_attr "type" "neon_dot,neon_dot_q,\
+ neon_reduc_add,neon_reduc_add_q,\
+ neon_sat_mul_b,neon_sat_mul_b_q,neon_sat_mul_b_long,\
+ neon_sat_mul_h,neon_sat_mul_h_q,\
+ neon_sat_mul_s,neon_sat_mul_s_q,\
+ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\
+ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\
+ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\
+ neon_mul_b,neon_mul_b_q,neon_mul_b_long,\
+ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\
+ neon_mul_s_long,neon_mul_h_scalar_long,\
+ neon_mul_s_scalar_long,\
+ neon_mla_b,neon_mla_b_q,neon_mla_b_long,\
+ neon_mla_h,neon_mla_h_q,neon_mla_h_long,\
+ neon_mla_h_scalar,neon_mla_h_scalar_q,neon_mla_h_scalar_long,\
+ neon_mla_s,neon_mla_s_q,neon_mla_s_long,\
+ neon_mla_s_scalar,neon_mla_s_scalar_q,neon_mla_s_scalar_long,\
+ neon_sat_mla_b_long,\
+ neon_sat_mla_h_long,\
+ neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_long,\
+ neon_sat_mla_s_scalar_long,\
+ neon_shift_acc,neon_shift_acc_q,neon_shift_reg,neon_shift_reg_q,\
+ neon_sat_shift_reg,neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\
+ neon_reduc_minmax,neon_reduc_minmax_q,\
+ neon_fp_reduc_add_s,neon_fp_reduc_add_s_q,\
+ neon_fp_reduc_add_d,neon_fp_reduc_add_d_q,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q,\
+ neon_fp_mul_s,neon_fp_mul_s_q,neon_fp_mul_s_scalar,\
+ neon_fp_mul_d,neon_fp_mul_d_q,neon_fp_mul_d_scalar_q,\
+ neon_fp_mul_s_scalar_q,\
+ neon_fp_recpe_s,neon_fp_recpe_d,\
+ neon_fp_recpx_s,neon_fp_recpx_s_q,neon_fp_recpx_d,neon_fp_recpx_d_q,\
+ neon_fp_to_int_s,neon_fp_to_int_d")
+ (const_string "hip10a_neon_base3")
+ (eq_attr "type" "neon_tbl3,neon_tbl3_q,\
+ neon_fp_recpe_s_q,neon_fp_recpe_d_q,\
+ neon_fp_recps_s_q,neon_fp_recps_d,neon_fp_recps_s,neon_fp_recps_d_q,\
+ neon_fp_to_int_s_q,neon_fp_to_int_d_q,\
+ neon_fp_cvt_narrow_d_q,neon_fp_cvt_narrow_s_q,\
+ neon_fp_mla_s,neon_fp_mla_s_q,\
+ neon_fp_mla_d,neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
+ neon_fp_mla_d_scalar_q")
+ (const_string "hip10a_neon_base4")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
+ neon_load1_2reg,neon_load1_2reg_q")
+ (const_string "hip10a_neon_load1_12")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q,\
+ neon_load1_4reg,neon_load1_4reg_q")
+ (const_string "hip10a_neon_load1_34")
+ (eq_attr "type" "neon_load1_one_lane,\
+ neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q")
+ (const_string "hip10a_neon_load1_lanes")
+ (eq_attr "type" "neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,\
+ neon_load2_one_lane,neon_load2_2reg,\
+ neon_load2_2reg_q,neon_load3_one_lane")
+ (const_string "hip10a_neon_load2")
+ (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane")
+ (const_string "hip10a_neon_load34_one_lane")
+ (eq_attr "type" "neon_load3_all_lanes,neon_load3_all_lanes_q,\
+ neon_load4_all_lanes,neon_load4_all_lanes_q")
+ (const_string "hip10a_neon_load34_all_lane")
+ (eq_attr "type" "neon_load3_3reg,neon_load4_4reg")
+ (const_string "hip10a_neon_load34")
+ (eq_attr "type" "neon_load3_3reg_q,neon_load4_4reg_q")
+ (const_string "hip10a_neon_load34_q")
+ (eq_attr "type" "neon_store1_1reg_q,neon_store1_2reg_q,\
+ neon_store1_3reg_q,neon_store1_4reg_q,\
+ neon_store1_one_lane,neon_store1_one_lane_q")
+ (const_string "hip10a_neon_store1")
+ (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q,\
+ neon_store2_2reg,neon_store2_2reg_q")
+ (const_string "hip10a_neon_store2")
+ (eq_attr "type" "neon_store1_1reg,neon_store1_2reg")
+ (const_string "hip10a_neon_store1_12reg_d")
+ (eq_attr "type" "neon_store1_3reg,neon_store1_4reg")
+ (const_string "hip10a_neon_store1_34reg_d")
+ (eq_attr "type" "neon_store3_one_lane,neon_store3_one_lane_q,\
+ neon_store4_one_lane,neon_store4_one_lane_q,\
+ neon_store3_3reg_q,neon_store3_3reg,\
+ neon_store4_4reg_q,neon_store4_4reg")
+ (const_string "hip10a_neon_store34")]
+ (const_string "unknown")))
+
+; The hip10a core is modelled as issues pipeline that has
+; the following functional units.
+; 1. Three pipelines for single cycle integer micro operations: ALUs0, ALUs1, ALUs2
+
+(define_cpu_unit "hip10a_alus0" "hip10a")
+(define_cpu_unit "hip10a_alus1" "hip10a")
+(define_cpu_unit "hip10a_alus2" "hip10a")
+
+(define_reservation "hip10a_alus012" "hip10a_alus0|hip10a_alus1|hip10a_alus2")
+;(define_reservation "hip10a_alus01" "hip10a_alus0|hip10a_alus1")
+;(define_reservation "hip10a_alus23" "hip10a_alus2|hip10a_alus3")
+
+; 2. Three pipelines for multi cycles integer micro operations: ALUm0, ALUm1, ALUm2
+
+(define_cpu_unit "hip10a_alum0" "hip10a")
+(define_cpu_unit "hip10a_alum1" "hip10a")
+(define_cpu_unit "hip10a_alum2" "hip10a")
+
+(define_reservation "hip10a_alum012" "hip10a_alum0|hip10a_alum1|hip10a_alum2")
+
+; 3. All ALU pipelines
+
+(define_reservation "hip10a_alu" "hip10a_alus0|hip10a_alus1|hip10a_alus2|hip10a_alum0|hip10a_alum1|hip10a_alum2")
+
+; 4. Three pipelines for load micro opetations: Load0, Load1, Load2
+
+(define_cpu_unit "hip10a_load0" "hip10a_ldst")
+(define_cpu_unit "hip10a_load1" "hip10a_ldst")
+(define_cpu_unit "hip10a_load2" "hip10a_ldst")
+
+(define_reservation "hip10a_ld012" "hip10a_load0|hip10a_load1|hip10a_load2")
+
+; 5. Two pipelines for store micro operations: Store1, Store2
+
+(define_cpu_unit "hip10a_store0" "hip10a_ldst")
+(define_cpu_unit "hip10a_store1" "hip10a_ldst")
+
+(define_reservation "hip10a_st01" "hip10a_store0|hip10a_store1")
+
+; 6. Two pipelines for store data micro operations: STD0,STD1
+
+(define_cpu_unit "hip10a_store_data0" "hip10a_ldst")
+(define_cpu_unit "hip10a_store_data1" "hip10a_ldst")
+
+(define_reservation "hip10a_std01" "hip10a_store_data0|hip10a_store_data1")
+
+; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU0, FSU1, FSU2, FSU3
+
+(define_cpu_unit "hip10a_fsu0" "hip10a_fsu")
+(define_cpu_unit "hip10a_fsu1" "hip10a_fsu")
+(define_cpu_unit "hip10a_fsu2" "hip10a_fsu")
+(define_cpu_unit "hip10a_fsu3" "hip10a_fsu")
+
+(define_reservation "hip10a_fsu0123" "hip10a_fsu0|hip10a_fsu1|hip10a_fsu2|hip10a_fsu3")
+(define_reservation "hip10a_fsu02" "hip10a_fsu0|hip10a_fsu2")
+
+
+; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2
+
+;; Branch execution Unit
+;
+; Branches take two issue slot.
+; No latency as there is no result
+(define_insn_reservation "hip10a_branch" 0
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "branch,call"))
+ "hip10a_alus012")
+
+;; Simple Execution Unit:
+;
+;; Simple ALU without shift
+(define_insn_reservation "hip10a_alu_all" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "alu_imm,\
+ adc_imm,adc_reg,\
+ alu_sreg,\
+ mov_imm,mov_reg"))
+ "hip10a_alu")
+
+(define_insn_reservation "hip10a_alum" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "logic_imm,logic_reg,\
+ csel,rotate_imm,bfm,\
+ clz,rbit,rev"))
+ "hip10a_alum012")
+
+(define_insn_reservation "hip10a_alus" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "alus_sreg,alus_imm,\
+ adcs_reg,adcs_imm,\
+ logics_imm,logics_reg,adr"))
+ "hip10a_alus012")
+
+;; ALU ops with shift and extend
+(define_insn_reservation "hip10a_alu_ext_shift" 2
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "alu_ext,alus_ext,\
+ logics_shift_imm,logics_shift_reg,\
+ logic_shift_reg,logic_shift_imm,\
+ "))
+ "hip10a_alum012")
+
+;; Multiply and mulitply accumulate and count leading zeros
+(define_insn_reservation "hip10a_mul" 3
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "mul,muls,clz,smull,umull"))
+ "hip10a_alum012")
+
+(define_insn_reservation "hip10a_mla" 4
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "mla,mlas,smlal,umlal"))
+ "hip10a_alum012|hip10a_alu")
+
+;; Integer divide
+(define_insn_reservation "hip10a_div" 11
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "udiv,sdiv"))
+ "hip10a_alum0")
+
+;; Load execution Unit
+;
+; Loads of up to two words.
+(define_insn_reservation "hip10a_load1" 4
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "load_4,load_8,load_16"))
+ "hip10a_ld012")
+
+; Stores of up to two words.
+(define_insn_reservation "hip10a_store1" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "store_4,store_8,load_16"))
+ "hip10a_st01")
+
+;; FP data processing instructions.
+
+(define_insn_reservation "hip10a_fp_arith" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\
+ f_mrc"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_fp_cmp" 2
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "hip10a_fsu02+hip10a_alus012")
+
+(define_insn_reservation "hip10a_fp_ccmp" 6
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fccmps,fccmpd"))
+ "hip10a_fsu0123+hip10a_alus012")
+
+(define_insn_reservation "hip10a_fp_csel" 6
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fcsel,f_mcr"))
+ "hip10a_fsu0123+hip10a_alus012")
+
+(define_insn_reservation "hip10a_fp_divs" 7
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fdivs"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_fp_divd" 10
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fdivd"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_fp_sqrts" 9
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fsqrts"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_fp_sqrtd" 15
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fsqrtd"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_fp_mul" 3
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fmuls,fmuld"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_fp_add" 2
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\
+ f_rints,f_rintd"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_fp_mac" 4
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "fmacs,fmacd"))
+ "hip10a_fsu0123")
+
+;; FP miscellaneous instructions.
+
+(define_insn_reservation "hip10a_fp_cvt" 5
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "f_cvtf2i"))
+ "hip10a_fsu0123+hip10a_alus012")
+
+(define_insn_reservation "hip10a_fp_cvt2" 6
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "f_cvti2f"))
+ "hip10a_alus012+hip10a_fsu0123")
+
+;; FP Load Instructions
+
+(define_insn_reservation "hip10a_fp_load" 8
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "hip10a_ld012")
+
+(define_insn_reservation "hip10a_fp_load2" 6
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_ldp_q,neon_ldp"))
+ "hip10a_ld012+hip10a_alu")
+
+;; FP store instructions
+
+(define_insn_reservation "hip10a_fp_store" 3
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "f_stores,f_stored"))
+ "hip10a_st01+hip10a_std01")
+
+(define_insn_reservation "hip10a_fp_store2" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_stp_q,neon_stp"))
+ "hip10a_st01+hip10a_std01+hip10a_alu")
+
+;; ASIMD integer instructions
+
+(define_insn_reservation "hip10a_asimd_base1" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_base1"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_base2" 2
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_base2"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_base3" 3
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_base3"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_base4" 4
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_base4"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_base5" 5
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" ""))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_base6" 6
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_tbl4,neon_tbl4_q"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_base7" 7
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_d"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_base9" 9
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_fp_div_s_q,neon_fp_sqrt_s,neon_fp_sqrt_d"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_fsqrt_q" 13
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_fp_sqrt_s_q"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_fdiv_f64_q" 15
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_fp_div_d_q"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_fsqrt_f64_q" 25
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_fp_sqrt_d_q"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_dup" 5
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "neon_dup,neon_dup_q"))
+ "hip10a_alus012+hip10a_fsu0123")
+
+;; ASIMD load instructions
+
+(define_insn_reservation "hip10a_asimd_ld1_12" 6
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_load1_12"))
+ "hip10a_ld012")
+
+(define_insn_reservation "hip10a_asimd_ld1_34" 7
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_load1_34"))
+ "hip10a_ld012")
+
+(define_insn_reservation "hip10a_asimd_ld7" 7
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_load1_lanes,hip10a_neon_load2,hip10a_neon_load34_all_lane,hip10a_neon_load34"))
+ "hip10a_ld012+hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_ld8" 8
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_load34_one_lane,hip10a_neon_load34_q"))
+"hip10a_ld012+hip10a_fsu0123")
+
+;; ASIMD store instructions
+
+(define_insn_reservation "hip10a_asimd_st1" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_store1,hip10a_neon_store2"))
+ "hip10a_st01+hip10a_std01")
+
+(define_insn_reservation "hip10a_asimd_st1_12" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_store1_12reg_d"))
+ "hip10a_st01+hip10a_std01+hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_st4" 4
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "hip10a_type" "hip10a_neon_store1_34reg_d,hip10a_neon_store34"))
+ "hip10a_fsu0123+hip10a_st01+hip10a_std01")
+
+;; Cryptography extensions
+
+
+(define_insn_reservation "hip10a_asimd_pmull" 2
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "crypto_pmull"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_aes" 2
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "hip10a_fsu0+hip10a_fsu2")
+
+(define_insn_reservation "hip10a_asimd_sha3" 1
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "crypto_sha3"))
+ "hip10a_fsu0123")
+
+(define_insn_reservation "hip10a_asimd_sha1" 2
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\
+ crypto_sha256_fast,crypto_sha512,\
+ crypto_sm3"))
+ "hip10a_fsu0+hip10a_fsu2")
+
+(define_insn_reservation "hip10a_asimd_sha1_and256" 4
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\
+ crypto_sm4"))
+ "hip10a_fsu0+hip10a_fsu2")
+
+;; CRC extension.
+
+(define_insn_reservation "hip10a_crc" 2
+ (and (eq_attr "tune" "hip10a")
+ (eq_attr "type" "crc"))
+ "hip10a_alum012")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 3eced16e3..2d906ee56 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19220,7 +19220,7 @@ performance of the code. Permissible values for this option are:
@samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
@samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
@samp{octeontx2f95mm},
-@samp{a64fx},@samp{hip09},@samp{hip10c},@samp{hip11}
+@samp{a64fx},@samp{hip09},@samp{hip10a},@samp{hip10c},@samp{hip11}
@samp{thunderx}, @samp{thunderxt88},
@samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
@samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},
--
2.25.1
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/src-openeuler/gcc.git
git@gitee.com:src-openeuler/gcc.git
src-openeuler
gcc
gcc
master

搜索帮助