代码拉取完成,页面将自动刷新
From d3a8c59e7eaf99bff77447e08e15898530af8a9e Mon Sep 17 00:00:00 2001
From: liyunfei <liyunfei33@huawei.com>
Date: Tue, 19 Nov 2024 11:10:29 +0800
Subject: [PATCH] Add hip10c machine discription
Here is the patch introducing hip10c machine model
for the scheduler.
---
gcc/config/aarch64/aarch64-cores.def | 1 +
gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.cc | 108 +++++
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/hip10c.md | 562 +++++++++++++++++++++++
gcc/doc/invoke.texi | 2 +-
7 files changed, 778 insertions(+), 2 deletions(-)
create mode 100644 gcc/config/aarch64/hip10c.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 3337fd1a0..1e8de523c 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -131,6 +131,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1)
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1)
AARCH64_CORE("hip09", hip09, hip09, V8_5A, (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES), hip09, 0x48, 0xd02, 0x0)
+AARCH64_CORE("hip10c", hip10c, hip10c, V8_5A, (SVE, I8MM, BF16, F32MM, F64MM, FLAGM, PAUTH, SSBS, SHA3, SM4, PROFILE, PREDRES), hip10c, 0x48, 0xddd, 0x0)
/* ARMv8.3-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 0ee427b61..dc51d9c2c 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -876,6 +876,110 @@ const struct cpu_cost_table hip09_extra_costs =
}
};
+const struct cpu_cost_table hip10c_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (11) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (4), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (10), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (17), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (6), /* mult_addsub. */
+ COSTS_N_INSNS (6), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table ampere1_extra_costs =
{
/* ALU */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 511422081..e176a4d70 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,hip10c,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 025a3c478..e14d38e78 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -505,6 +505,22 @@ static const struct cpu_addrcost_table hip09_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table hip10c_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table hip11_addrcost_table =
{
{
@@ -736,6 +752,16 @@ static const struct cpu_regmove_cost hip09_regmove_cost =
2 /* FP2FP */
};
+static const struct cpu_regmove_cost hip10c_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 2, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
static const struct cpu_regmove_cost neoversen2_regmove_cost =
{
1, /* GP2GP */
@@ -1060,6 +1086,43 @@ static const struct cpu_vector_cost hip09_vector_cost =
nullptr /* issue_info */
};
+static const advsimd_vec_cost hip10c_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 0, /* ld2_st2_permute_cost */
+ 0, /* ld3_st3_permute_cost */
+ 0, /* ld4_st4_permute_cost */
+ 2, /* permute_cost */
+ 3, /* reduc_i8_cost */
+ 3, /* reduc_i16_cost */
+ 3, /* reduc_i32_cost */
+ 3, /* reduc_i64_cost */
+ 3, /* reduc_f16_cost */
+ 3, /* reduc_f32_cost */
+ 3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
+ 3, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 5, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+static const struct cpu_vector_cost hip10c_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &hip10c_advsimd_vector_cost, /* advsimd */
+ nullptr, /* sve */
+ nullptr /* issue_info */
+};
+
static const advsimd_vec_cost hip11_advsimd_vector_cost =
{
2, /* int_stmt_cost */
@@ -1455,6 +1518,17 @@ static const cpu_prefetch_tune hip09_prefetch_tune =
-1 /* default_opt_level */
};
+static const cpu_prefetch_tune hip10c_prefetch_tune =
+{
+ 0, /* num_slots */
+ 64, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ 512, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
static const cpu_prefetch_tune hip11_prefetch_tune =
{
0, /* num_slots */
@@ -1865,6 +1939,40 @@ static const struct tune_params hip09_tunings =
&hip09_prefetch_tune
};
+static const struct tune_params hip10c_tunings =
+{
+ &hip10c_extra_costs,
+ &hip10c_addrcost_table,
+ &hip10c_regmove_cost,
+ &hip10c_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_256, /* sve_width */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
+ "16", /* function_align. */
+ "4", /* jump_align. */
+ "8", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ &hip10c_prefetch_tune
+};
+
static const struct tune_params hip11_tunings =
{
&hip11_extra_costs,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 6b4341866..96b8ab471 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -549,6 +549,7 @@
(include "tsv110.md")
(include "thunderx3t110.md")
(include "hip09.md")
+(include "hip10c.md")
(include "hip11.md")
;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/hip10c.md b/gcc/config/aarch64/hip10c.md
new file mode 100644
index 000000000..a4ab2a3e3
--- /dev/null
+++ b/gcc/config/aarch64/hip10c.md
@@ -0,0 +1,562 @@
+;; hip10c pipeline description
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;;
+;;Contributed by liyunfei
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "hip10c")
+(define_automaton "hip10c_ldst")
+(define_automaton "hip10c_fsu")
+
+(define_attr "hip10c_type"
+ "hip10c_neon_abs, hip10c_neon_fp_arith, hip10c_neon_mul, hip10c_neon_mla,
+ hip10c_neon_dot, hip10c_neon_fp_div, hip10c_neon_fp_sqrt,
+ hip10c_neon_ins, hip10c_neon_load1, hip10c_neon_load1_lanes,
+ hip10c_neon_load2and4, hip10c_neon_load3_3reg,
+ hip10c_neon_load4_4reg, hip10c_neon_store1and2,
+ hip10c_neon_store1_1reg, hip10c_neon_store1_2reg,
+ hip10c_neon_store1_3reg, hip10c_neon_store1_4reg,
+ hip10c_neon_store3and4_lane, hip10c_neon_store3_3reg,
+ hip10c_neon_store4_4reg, unknown"
+ (cond [
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add,neon_add_q,\
+ neon_neg,neon_neg_q,neon_sub,neon_sub_q,\
+ neon_qadd,neon_qadd_q,\
+ neon_add_long,neon_sub_long,\
+ neon_qabs,neon_qabs_q,neon_qneg,\
+ neon_qneg_q,neon_qsub,neon_qsub_q,neon_compare,\
+ neon_compare_q,neon_compare_zero,\
+ neon_compare_zero_q,neon_logic,neon_logic_q,\
+ neon_minmax,neon_minmax_q,neon_tst,\
+ neon_tst_q,neon_bsl,neon_bsl_q,\
+ neon_cls,neon_cls_q,neon_ext,\
+ neon_ext_q,neon_rev,neon_rev_q,\
+ neon_tbl1,neon_tbl1_q,neon_tbl2,neon_fp_abs_s,\
+ neon_fp_abs_s_q,neon_fp_abs_d,\
+ neon_fp_neg_s,neon_fp_neg_s_q,\
+ neon_fp_neg_d,neon_fp_neg_d_q,\
+ neon_shift_imm_narrow_q,neon_move,neon_move_q")
+ (const_string "hip10c_neon_abs")
+ (eq_attr "type" "neon_abd,neon_abd_q,\
+ neon_add_widen,neon_sub_widen,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_add_halve,neon_add_halve_q,\
+ neon_sub_halve,neon_sub_halve_q,\
+ neon_add_halve_narrow_q,\
+ neon_sub_halve_narrow_q,neon_reduc_add,\
+ neon_reduc_add_q,\
+ neon_sat_mla_b_long,\
+ neon_sat_shift_imm,\
+ neon_sat_shift_imm_q,neon_shift_imm_long,\
+ neon_shift_imm,neon_shift_imm_q,neon_cnt,\
+ neon_cnt_q,neon_fp_recpe_s,\
+ neon_fp_recpe_d,\
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_s,\
+ neon_fp_recpx_d,\
+ neon_tbl3,neon_tbl2_q,neon_to_gp,\
+ neon_to_gp_q,neon_fp_abd_s,neon_fp_abd_s_q,\
+ neon_fp_abd_d,neon_fp_abd_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
+ neon_fp_compare_s,neon_fp_compare_s_q,\
+ neon_fp_compare_d,neon_fp_compare_d_q,\
+ neon_fp_cvt_widen_s,neon_fp_to_int_s,\
+ neon_fp_to_int_s_q,neon_fp_to_int_d,\
+ neon_fp_to_int_d_q,neon_fp_minmax_s,\
+ neon_fp_minmax_s_q,neon_fp_minmax_d,\
+ neon_fp_minmax_d_q,neon_fp_round_s,\
+ neon_fp_round_s_q,neon_fp_cvt_narrow_d_q,\
+ neon_fp_round_d,neon_fp_round_d_q,\
+ neon_fp_cvt_narrow_s_q")
+ (const_string "hip10c_neon_fp_arith")
+ (eq_attr "type" "neon_reduc_minmax_q,neon_reduc_minmax,\
+ neon_sat_mul_h,neon_sat_mul_h_q,\
+ neon_sat_mul_s,neon_sat_mul_s_q,\
+ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\
+ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\
+ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\
+ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\
+ neon_mul_s_long,neon_mul_h_scalar_long,\
+ neon_mul_s_scalar_long,\
+ neon_sat_mul_b,neon_sat_mul_b_q,\
+ neon_sat_mul_b_long,neon_mul_b,neon_mul_b_q,\
+ neon_mul_b_long,\
+ neon_mla_b,neon_mla_b_q,neon_mla_b_long,\
+ neon_mla_h,neon_mla_h_q,\
+ neon_mla_s,neon_mla_h_scalar,\
+ neon_mla_h_scalar_q,neon_mla_s_scalar,\
+ neon_mla_h_long,\
+ neon_mla_s_long,neon_sat_mla_h_long,\
+ neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long,neon_mla_s_scalar_long,\
+ neon_mla_h_scalar_long,neon_mla_s_scalar_q,\
+ neon_shift_acc,neon_shift_acc_q,\
+ neon_sat_shift_imm_narrow_q,\
+ neon_tbl4,neon_tbl3_q,neon_fp_reduc_add_s,\
+ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d,\
+ neon_fp_reduc_add_d_q,neon_fp_reduc_minmax_s,\
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_mul_s_q,\
+ neon_fp_mul_d,neon_fp_mul_d_q,\
+ neon_fp_mul_d_scalar_q,neon_fp_mul_s_scalar,\
+ neon_fp_mul_s_scalar_q,\
+ neon_fp_recpe_s_q,neon_fp_recpe_d_q,\
+ neon_fp_recpx_s_q,neon_fp_recpx_d_q")
+ (const_string "hip10c_neon_mul")
+ (eq_attr "type" "neon_mla_s_q,\
+ neon_fp_recps_s,\
+ neon_fp_recps_s_q,neon_fp_recps_d,\
+ neon_fp_recps_d_q,neon_tbl4_q,\
+ neon_fp_mla_s,\
+ neon_fp_mla_d,neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
+ neon_fp_mla_d_scalar_q,\
+ neon_shift_reg,neon_shift_reg_q,\
+ neon_sat_shift_reg,neon_sat_shift_reg_q")
+ (const_string "hip10c_neon_mla")
+ (eq_attr "type" "neon_dot,neon_dot_q")
+ (const_string "hip10c_neon_dot")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
+ neon_fp_div_d,neon_fp_div_d_q")
+ (const_string "hip10c_neon_fp_div")
+ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+ neon_fp_sqrt_d,neon_fp_sqrt_d_q")
+ (const_string "hip10c_neon_fp_sqrt")
+ (eq_attr "type" "neon_dup,neon_dup_q,\
+ neon_ins,neon_ins_q")
+ (const_string "hip10c_neon_ins")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
+ neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load1_3reg,neon_load1_3reg_q,\
+ neon_load1_4reg,neon_load1_4reg_q")
+ (const_string "hip10c_neon_load1")
+ (eq_attr "type" "neon_load1_one_lane,\
+ neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q")
+ (const_string "hip10c_neon_load1_lanes")
+ (eq_attr "type" "neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,\
+ neon_load2_one_lane,neon_load2_2reg,\
+ neon_load2_2reg_q,neon_load3_one_lane,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_all_lanes,\
+ neon_load4_all_lanes_q")
+ (const_string "hip10c_neon_load2and4")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q")
+ (const_string "hip10c_neon_load3_3reg")
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q")
+ (const_string "hip10c_neon_load4_4reg")
+ (eq_attr "type" "neon_store1_one_lane,\
+ neon_store1_one_lane_q,neon_store2_one_lane,\
+ neon_store2_one_lane_q,neon_store2_2reg,\
+ neon_store2_2reg_q")
+ (const_string "hip10c_neon_store1and2")
+ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q")
+ (const_string "hip10c_neon_store1_1reg")
+ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q")
+ (const_string "hip10c_neon_store1_2reg")
+ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q")
+ (const_string "hip10c_neon_store1_3reg")
+ (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q")
+ (const_string "hip10c_neon_store1_4reg")
+ (eq_attr "type" "neon_store3_one_lane,\
+ neon_store3_one_lane_q,neon_store4_one_lane,\
+ neon_store4_one_lane_q")
+ (const_string "hip10c_neon_store3and4_lane")
+ (eq_attr "type" "neon_store3_3reg,\
+ neon_store3_3reg_q")
+ (const_string "hip10c_neon_store3_3reg")
+ (eq_attr "type" "neon_store4_4reg,\
+ neon_store4_4reg_q")
+ (const_string "hip10c_neon_store4_4reg")]
+ (const_string "unknown")))
+
+; The hip10c core is modelled as issues pipeline that has
+; the following functional units.
+; 1. Two pipelines for branch micro operations: BRU1, BRU2
+
+(define_cpu_unit "hip10c_bru0" "hip10c")
+(define_cpu_unit "hip10c_bru1" "hip10c")
+
+(define_reservation "hip10c_bru01" "hip10c_bru0|hip10c_bru1")
+
+; 2. Four pipelines for single cycle integer micro operations: ALUs1, ALUs2, ALUs3, ALUs4
+
+(define_cpu_unit "hip10c_alus0" "hip10c")
+(define_cpu_unit "hip10c_alus1" "hip10c")
+(define_cpu_unit "hip10c_alus2" "hip10c")
+(define_cpu_unit "hip10c_alus3" "hip10c")
+
+(define_reservation "hip10c_alus0123" "hip10c_alus0|hip10c_alus1|hip10c_alus2|hip10c_alus3")
+(define_reservation "hip10c_alus01" "hip10c_alus0|hip10c_alus1")
+(define_reservation "hip10c_alus23" "hip10c_alus2|hip10c_alus3")
+
+; 3. Two pipelines for multi cycles integer micro operations: ALUm1, ALUm2
+
+(define_cpu_unit "hip10c_alum0" "hip10c")
+(define_cpu_unit "hip10c_alum1" "hip10c")
+
+(define_reservation "hip10c_alum01" "hip10c_alum0|hip10c_alum1")
+
+; 4. Two pipelines for load micro opetations: Load1, Load2
+
+(define_cpu_unit "hip10c_load0" "hip10c_ldst")
+(define_cpu_unit "hip10c_load1" "hip10c_ldst")
+
+(define_reservation "hip10c_ld01" "hip10c_load0|hip10c_load1")
+
+; 5. Two pipelines for store micro operations: Store1, Store2
+
+(define_cpu_unit "hip10c_store0" "hip10c_ldst")
+(define_cpu_unit "hip10c_store1" "hip10c_ldst")
+
+(define_reservation "hip10c_st01" "hip10c_store0|hip10c_store1")
+
+; 6. Two pipelines for store data micro operations: STD0,STD1
+
+(define_cpu_unit "hip10c_store_data0" "hip10c_ldst")
+(define_cpu_unit "hip10c_store_data1" "hip10c_ldst")
+
+(define_reservation "hip10c_std01" "hip10c_store_data0|hip10c_store_data1")
+
+; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU1, FSU2
+
+(define_cpu_unit "hip10c_fsu0" "hip10c_fsu")
+(define_cpu_unit "hip10c_fsu1" "hip10c_fsu")
+
+(define_reservation "hip10c_fsu01" "hip10c_fsu0|hip10c_fsu1")
+
+
+; 8. Two pipelines for sve operations but same with fsu0 and fsu1: SVE1, SVE2
+
+;; Simple Execution Unit:
+;
+;; Simple ALU without shift
+(define_insn_reservation "hip10c_alu" 1
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ adc_imm,adc_reg,\
+ alu_sreg,logic_reg,\
+ mov_imm,mov_reg,\
+ csel,rotate_imm,bfm,mov_imm,\
+ clz,rbit,rev"))
+ "hip10c_alus0123")
+
+(define_insn_reservation "hip10c_alus" 1
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "alus_sreg,alus_imm,\
+ adcs_reg,adcs_imm,\
+ logics_imm,logics_reg,adr"))
+ "hip10c_alus23")
+
+;; ALU ops with shift and extend
+(define_insn_reservation "hip10c_alu_ext_shift" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "alu_ext,alus_ext,\
+ logics_shift_imm,logics_shift_reg,\
+ logic_shift_reg,logic_shift_imm,\
+ "))
+ "hip10c_alum01")
+
+;; Multiplies instructions
+(define_insn_reservation "hip10c_mult" 3
+ (and (eq_attr "tune" "hip10c")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "hip10c_alum01")
+
+;; Integer divide
+(define_insn_reservation "hip10c_div" 10
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "udiv,sdiv"))
+ "hip10c_alum0")
+
+;; Branch execution Unit
+;
+; Branches take two issue slot.
+; No latency as there is no result
+(define_insn_reservation "hip10c_branch" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "branch,call"))
+ "hip10c_bru01 + hip10c_alus23")
+
+;; Load execution Unit
+;
+; Loads of up to two words.
+(define_insn_reservation "hip10c_load1" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "load_4,load_8"))
+ "hip10c_ld01")
+
+; Stores of up to two words.
+(define_insn_reservation "hip10c_store1" 1
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "store_4,store_8"))
+ "hip10c_st01")
+
+;; FP data processing instructions.
+
+(define_insn_reservation "hip10c_fp_arith" 1
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\
+ f_mrc"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_fp_cmp" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "hip10c_fsu01+hip10c_alus23")
+
+(define_insn_reservation "hip10c_fp_ccmp" 7
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fccmps,fccmpd"))
+ "hip10c_alus01+hip10c_fsu01+hip10c_alus23")
+
+(define_insn_reservation "hip10c_fp_csel" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fcsel,f_mcr"))
+ "hip10c_alus01+hip10c_fsu01")
+
+(define_insn_reservation "hip10c_fp_divs" 7
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fdivs"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_fp_divd" 10
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fdivd"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_fp_sqrts" 9
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fsqrts"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_fp_sqrtd" 15
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fsqrtd"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_fp_mul" 3
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fmuls,fmuld"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_fp_add" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\
+ f_rints,f_rintd"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_fp_mac" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "fmacs,fmacd"))
+ "hip10c_fsu01")
+
+;; FP miscellaneous instructions.
+
+(define_insn_reservation "hip10c_fp_cvt" 5
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "f_cvtf2i"))
+ "hip10c_fsu01+hip10c_alus23")
+
+(define_insn_reservation "hip10c_fp_cvt2" 5
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "f_cvti2f"))
+ "hip10c_alus01+hip10c_fsu01")
+
+;; FP Load Instructions
+
+(define_insn_reservation "hip10c_fp_load" 7
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "hip10c_ld01")
+
+(define_insn_reservation "hip10c_fp_load2" 6
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "neon_ldp_q,neon_ldp"))
+ "hip10c_ld01+hip10c_alus01")
+
+;; FP store instructions
+
+(define_insn_reservation "hip10c_fp_store" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "f_stores,f_stored"))
+ "hip10c_st01+hip10c_std01")
+
+;; ASIMD integer and fp instructions
+
+(define_insn_reservation "hip10c_asimd_base1" 1
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_abs"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_base2" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_fp_arith"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_base3" 3
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_mul"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_base4" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_mla"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_base5" 5
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "neon_fp_mul_s"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_dot" 3
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_dot"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_bfmmla" 9
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "neon_fp_mla_s_q"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_fdiv" 15
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_fp_div"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_fsqrt" 25
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_fp_sqrt"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_pmull" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "crypto_pmull"))
+ "hip10c_fsu1")
+
+(define_insn_reservation "hip10c_asimd_dup" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_ins"))
+ "hip10c_alus01+hip10c_fsu01")
+
+;; ASIMD load instructions
+
+(define_insn_reservation "hip10c_asimd_ld1_reg" 6
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_load1"))
+ "hip10c_ld01")
+
+(define_insn_reservation "hip10c_asimd_ld1_lane" 7
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_load1_lanes"))
+ "hip10c_ld01+hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_ld23" 8
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_load2and4"))
+"hip10c_ld01+hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_ld3_mtp" 9
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_load3_3reg"))
+ "hip10c_ld01+hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_ld4_mtp" 14
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_load4_4reg"))
+ "hip10c_ld01+hip10c_fsu01")
+
+;; ASIMD store instructions
+
+(define_insn_reservation "hip10c_asimd_st12" 1
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_store1and2"))
+ "hip10c_st01+hip10c_std01")
+
+(define_insn_reservation "hip10c_asimd_st1_1reg" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_store1_1reg"))
+ "hip10c_st01+hip10c_std01")
+
+(define_insn_reservation "hip10c_asimd_st1_2reg" 3
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_store1_2reg"))
+ "hip10c_st01+hip10c_std01")
+
+(define_insn_reservation "hip10c_asimd_st1_3reg" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_store1_3reg"))
+ "hip10c_st01+hip10c_std01")
+
+(define_insn_reservation "hip10c_asimd_st1_4reg" 5
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_store1_4reg"))
+ "hip10c_st01+hip10c_std01")
+
+(define_insn_reservation "hip10c_asimd_st34_lane" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_store3and4_lane"))
+ "hip10c_fsu01+hip10c_st01+hip10c_std01")
+
+(define_insn_reservation "hip10c_asimd_st3_mtp" 7
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_store3_3reg"))
+ "hip10c_fsu01+hip10c_st01+hip10c_std01")
+
+(define_insn_reservation "hip10c_asimd_st4_mtp" 10
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "hip10c_type" "hip10c_neon_store4_4reg"))
+ "hip10c_fsu01+hip10c_st01+hip10c_std01")
+
+;; Cryptography extensions
+
+(define_insn_reservation "hip10c_asimd_aes" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "hip10c_fsu01")
+
+(define_insn_reservation "hip10c_asimd_sha3" 1
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "crypto_sha3"))
+ "hip10c_fsu1")
+
+(define_insn_reservation "hip10c_asimd_sha1" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\
+ crypto_sha256_fast,crypto_sha512,\
+ crypto_sm3"))
+ "hip10c_fsu1")
+
+(define_insn_reservation "hip10c_asimd_sha1_and256" 4
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\
+ crypto_sm4"))
+ "hip10c_fsu1")
+
+;; CRC extension.
+
+(define_insn_reservation "hip10c_crc" 2
+ (and (eq_attr "tune" "hip10c")
+ (eq_attr "type" "crc"))
+ "hip10c_alum01")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 2ff7d860d..3eced16e3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19220,7 +19220,7 @@ performance of the code. Permissible values for this option are:
@samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
@samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
@samp{octeontx2f95mm},
-@samp{a64fx},@samp{hip11}
+@samp{a64fx},@samp{hip09},@samp{hip10c},@samp{hip11}
@samp{thunderx}, @samp{thunderxt88},
@samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
@samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},
--
2.25.1
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。