diff --git a/add-tsv110-pipeline-scheduling.patch b/Backport-AArch64-Add-tsv110-pipeline-scheduling.patch similarity index 88% rename from add-tsv110-pipeline-scheduling.patch rename to Backport-AArch64-Add-tsv110-pipeline-scheduling.patch index 508de27e056b9686971b5f3fdc5e30c968c866e2..6354bfb17f657918e7e4de0a846998885981d771 100644 --- a/add-tsv110-pipeline-scheduling.patch +++ b/Backport-AArch64-Add-tsv110-pipeline-scheduling.patch @@ -1,53 +1,34 @@ -diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2019-04-15 14:50:25.866378665 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2019-04-15 14:49:21.986376983 +0800 -@@ -554,6 +554,31 @@ static const struct tune_params generic_ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ - }; +From a29529542553e0c49cf1efe0808fc4a4733dc674 Mon Sep 17 00:00:00 2001 +From: xiezhiheng +Date: Wed, 22 Nov 2023 17:18:35 +0800 +Subject: [PATCH 2/4] [Backport][AArch64] Add tsv110 pipeline scheduling + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8108dfde82ad6ec43613107b2c156999e6a5cbe7 + +Committed on behalf of Wu Yuan. +--- + gcc/config/aarch64/aarch64-cores.def | 2 +- + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/tsv110.md | 708 +++++++++++++++++++++++++++ + 3 files changed, 710 insertions(+), 1 deletion(-) + create mode 100644 gcc/config/aarch64/tsv110.md + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index ea7052388..6911f9704 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -88,6 +88,6 @@ AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH + /* V8.2 Architecture Processors. */ -+static const struct tune_params tsv110_tunings = -+{ -+ &cortexa57_extra_costs, -+ &generic_addrcost_table, -+ &generic_regmove_cost, -+ &generic_vector_cost, -+ &generic_branch_cost, -+ &generic_approx_modes, -+ 4, /* memmov_cost */ -+ 4, /* issue_rate */ -+ AARCH64_FUSE_NOTHING, /* fusible_ops */ -+ 16, /* function_align. */ -+ 16, /* jump_align. */ -+ 8, /* loop_align. */ -+ 2, /* int_reassoc_width. */ -+ 4, /* fp_reassoc_width. */ -+ 1, /* vec_reassoc_width. */ -+ 2, /* min_div_recip_mul_sf. */ -+ 2, /* min_div_recip_mul_df. */ -+ 0, /* max_case_values. */ -+ 0, /* cache_line_size. */ -+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ -+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ -+}; -+ - static const struct tune_params cortexa35_tunings = - { - &cortexa53_extra_costs, -diff -urpN a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def ---- a/gcc/config/aarch64/aarch64-cores.def 2017-02-15 08:09:28.845771000 +0800 -+++ b/gcc/config/aarch64/aarch64-cores.def 2019-04-15 14:49:21.986376983 +0800 -@@ -78,6 +78,8 @@ AARCH64_CORE("xgene1", xgene1, x - AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) - AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) + /* HiSilicon ('H') cores. */ +-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16, tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16, tsv110, 0x48, 0xd01, -1) -+AARCH64_CORE("tsv110", tsv110, tsv110, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, tsv110, 0x48, 0xd01, -1) -+ - /* V8 big.LITTLE implementations. */ - - AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) -diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md ---- a/gcc/config/aarch64/aarch64.md 2019-04-15 14:50:25.870378665 +0800 -+++ b/gcc/config/aarch64/aarch64.md 2019-04-15 14:49:21.986376983 +0800 + #undef AARCH64_CORE +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 5b5c401f8..0d6ed8c5c 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md @@ -226,6 +226,7 @@ (include "thunderx.md") (include "../arm/xgene1.md") @@ -56,19 +37,11 @@ diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md ;; ------------------------------------------------------------------- ;; Jumps and other miscellaneous insns -diff -urpN a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md ---- a/gcc/config/aarch64/aarch64-tune.md 2017-02-15 08:09:28.845771000 +0800 -+++ b/gcc/config/aarch64/aarch64-tune.md 2019-04-15 14:49:21.986376983 +0800 -@@ -1,5 +1,5 @@ - ;; -*- buffer-read-only: t -*- - ;; Generated automatically by gentune.sh from aarch64-cores.def - (define_attr "tune" -- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53" -+ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,tsv110,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53" - (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) -diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md ---- a/gcc/config/aarch64/tsv110.md 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/config/aarch64/tsv110.md 2019-04-15 14:55:30.420081420 +0800 +diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md +new file mode 100644 +index 000000000..33fc72ab4 +--- /dev/null ++++ b/gcc/config/aarch64/tsv110.md @@ -0,0 +1,708 @@ +;; tsv110 pipeline description +;; Copyright (C) 2018 Free Software Foundation, Inc. @@ -688,7 +661,7 @@ diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md + +(define_insn_reservation "tsv110_fp_add_sub" 5 + (and (eq_attr "tune" "tsv110") -+ (eq_attr "type" "fadds,faddd,fmuls,fmuld")) ++ (eq_attr "type" "fadds,faddd,fmuls,fmuld")) + "tsv110_fsu1|tsv110_fsu2") + +(define_insn_reservation "tsv110_fp_mac" 7 @@ -778,3 +751,6 @@ diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md +;; help. +(define_bypass 1 "tsv110_*" + "tsv110_call,tsv110_branch") +-- +2.19.1 + diff --git a/Backport-AArch64-Fix-longbranch-test.patch b/Backport-AArch64-Fix-longbranch-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d26cb71e69823148230a1ffac3ae3df6c9834bf --- /dev/null +++ b/Backport-AArch64-Fix-longbranch-test.patch @@ -0,0 +1,39 @@ +From 312e8086a6a1164e8f16aff68ca175f32b3185ee Mon Sep 17 00:00:00 2001 +From: xiezhiheng +Date: Sat, 25 Nov 2023 10:50:11 +0800 +Subject: [PATCH] [Backport][AArch64] Fix longbranch test + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c7fd21762de653a19dabf837917a8ad6f9491bc0 + +Fix longbranch test so it still generates long tbz branches. +--- + gcc/testsuite/gcc.target/aarch64/long_branch_1.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/testsuite/gcc.target/aarch64/long_branch_1.c b/gcc/testsuite/gcc.target/aarch64/long_branch_1.c +index 46f500d36..49d8b6a22 100644 +--- a/gcc/testsuite/gcc.target/aarch64/long_branch_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/long_branch_1.c +@@ -54,10 +54,6 @@ test_and_branch (int selector, int addend, int cond) + { + start0: + return sum - 1; +-start1: +- return sum + 1; +-start2: +- return sum; + start3: + return sum - 2; + } +@@ -65,6 +61,8 @@ start3: + { + switch (selector) + { ++start1: ++start2: + CASE_ENTRY128 (1) + CASE_ENTRY64 (129) + CASE_ENTRY16 (193) +-- +2.19.1 + diff --git a/Backport-Learn-GIMPLE-pretty-printer-to-produce-nice.patch b/Backport-Learn-GIMPLE-pretty-printer-to-produce-nice.patch new file mode 100644 index 0000000000000000000000000000000000000000..12b7f6abb82606dcd2dd1c87777378aef781f9f9 --- /dev/null +++ b/Backport-Learn-GIMPLE-pretty-printer-to-produce-nice.patch @@ -0,0 +1,205 @@ +From d290efa0319b0327a6dc804a4b9ecad0e8b5a5bb Mon Sep 17 00:00:00 2001 +From: xiezhiheng +Date: Thu, 23 Nov 2023 11:01:45 +0800 +Subject: [PATCH 4/4] [Backport] Learn GIMPLE pretty printer to produce nicer + dump output. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5edb1c48f8e526a2b8f7f9d03fce9a7fdcb14b88 + +But we only port part of changes about attr-hotcold-2.c becasue +the rest is irrelevant. + +[Backport] Recover GOTO predictor. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7fef86d3486c9f4208a111a41a2cc66b7328b6d9 +--- + gcc/c/c-typeck.c | 1 + + gcc/cp/constexpr.c | 1 + + gcc/cp/pt.c | 2 ++ + gcc/cp/semantics.c | 2 ++ + gcc/gimplify.c | 4 +++- + gcc/predict.def | 5 ++--- + gcc/testsuite/gcc.dg/predict-15.c | 17 +++++++++++++++++ + gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c | 13 ++++++------- + gcc/testsuite/gcc.dg/tree-ssa/vrp24.c | 10 ++++------ + 9 files changed, 38 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/predict-15.c + +diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c +index ee365313c..cf8463da7 100644 +--- a/gcc/c/c-typeck.c ++++ b/gcc/c/c-typeck.c +@@ -9816,6 +9816,7 @@ c_finish_goto_label (location_t loc, tree label) + return NULL_TREE; + TREE_USED (decl) = 1; + { ++ add_stmt (build_predict_expr (PRED_GOTO, NOT_TAKEN)); + tree t = build1 (GOTO_EXPR, void_type_node, decl); + SET_EXPR_LOCATION (t, loc); + return add_stmt (t); +diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c +index 9082230b9..adae14b4f 100644 +--- a/gcc/cp/constexpr.c ++++ b/gcc/cp/constexpr.c +@@ -5827,6 +5827,7 @@ potential_constant_expression_1 (tree t, bool want_rval, bool strict, + + case CLEANUP_STMT: + case EMPTY_CLASS_EXPR: ++ case PREDICT_EXPR: + return false; + + case GOTO_EXPR: +diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c +index 5687bb212..b536a54af 100644 +--- a/gcc/cp/pt.c ++++ b/gcc/cp/pt.c +@@ -15161,6 +15161,8 @@ tsubst_copy (tree t, tree args, tsubst_flags_t complain, tree in_decl) + return tsubst_binary_left_fold (t, args, complain, in_decl); + case BINARY_RIGHT_FOLD_EXPR: + return tsubst_binary_right_fold (t, args, complain, in_decl); ++ case PREDICT_EXPR: ++ return t; + + default: + /* We shouldn't get here, but keep going if !flag_checking. */ +diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c +index e06637646..840d193b9 100644 +--- a/gcc/cp/semantics.c ++++ b/gcc/cp/semantics.c +@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see + #include "omp-general.h" + #include "convert.h" + #include "gomp-constants.h" ++#include "predict.h" + + /* There routines provide a modular interface to perform many parsing + operations. They may therefore be used during actual parsing, or +@@ -628,6 +629,7 @@ finish_goto_stmt (tree destination) + + check_goto (destination); + ++ add_stmt (build_predict_expr (PRED_GOTO, NOT_TAKEN)); + return add_stmt (build_stmt (input_location, GOTO_EXPR, destination)); + } + +diff --git a/gcc/gimplify.c b/gcc/gimplify.c +index c6a06d014..dfc2fddd8 100644 +--- a/gcc/gimplify.c ++++ b/gcc/gimplify.c +@@ -2023,7 +2023,9 @@ should_warn_for_implicit_fallthrough (gimple_stmt_iterator *gsi_p, tree label) + gsi = *gsi_p; + + /* Skip all immediately following labels. */ +- while (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL) ++ while (!gsi_end_p (gsi) ++ && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL ++ || gimple_code (gsi_stmt (gsi)) == GIMPLE_PREDICT)) + gsi_next (&gsi); + + /* { ... something; default:; } */ +diff --git a/gcc/predict.def b/gcc/predict.def +index e96be12be..d7048e433 100644 +--- a/gcc/predict.def ++++ b/gcc/predict.def +@@ -141,9 +141,8 @@ DEF_PREDICTOR (PRED_RECURSIVE_CALL, "recursive call", HITRATE (75), 0) + this from FE or retire the predictor. */ + DEF_PREDICTOR (PRED_TREE_EARLY_RETURN, "early return (on trees)", HITRATE (54), 0) + +-/* Branch containing goto is probably not taken. +- FIXME: Currently not used. */ +-DEF_PREDICTOR (PRED_GOTO, "goto", HITRATE (70), 0) ++/* Branch containing goto is probably not taken. */ ++DEF_PREDICTOR (PRED_GOTO, "goto", HITRATE (66), 0) + + /* Branch ending with return constant is probably not taken. */ + DEF_PREDICTOR (PRED_CONST_RETURN, "const return", HITRATE (69), 0) +diff --git a/gcc/testsuite/gcc.dg/predict-15.c b/gcc/testsuite/gcc.dg/predict-15.c +new file mode 100644 +index 000000000..2a8c3ea85 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/predict-15.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-profile_estimate" } */ ++ ++int main(int argc, char **argv) ++{ ++ if (argc == 123) ++ goto exit; ++ else ++ { ++ return 0; ++ } ++ ++exit: ++ return 1; ++} ++ ++/* { dg-final { scan-tree-dump "goto heuristics of edge" "profile_estimate"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c +index 184dd10dd..17526113d 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c +@@ -1,8 +1,7 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-profile_estimate-blocks-details" } */ ++/* { dg-options "-O2 -fdump-tree-profile_estimate" } */ + +-void g(void); +-void h(void); ++int v1, v2; + void f(int x, int y) + { + if (x) goto A; +@@ -10,19 +9,19 @@ void f(int x, int y) + return; + + A: __attribute__((cold)) +- g(); ++ v1 = x; + return; + + B: __attribute__((hot)) +- h(); ++ v2 = y; + return; + } + + /* { dg-final { scan-tree-dump-times "hot label heuristics" 1 "profile_estimate" } } */ + /* { dg-final { scan-tree-dump-times "cold label heuristics" 1 "profile_estimate" } } */ +-/* { dg-final { scan-tree-dump "A \\\[0\\\..*\\\]" "profile_estimate" } } */ ++/* { dg-final { scan-tree-dump-times "combined heuristics: 0\\\..*" 1 "profile_estimate" } } */ + + /* Note: we're attempting to match some number > 6000, i.e. > 60%. + The exact number ought to be tweekable without having to juggle + the testcase around too much. */ +-/* { dg-final { scan-tree-dump "B \\\[\[6-9\]\[0-9\]\\\..*\\\]" "profile_estimate" } } */ ++/* { dg-final { scan-tree-dump-times "combined heuristics: \[6-9\]\[0-9\]\\\..*" 1 "profile_estimate" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c +index 853ee21bb..ed49e25f8 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-vrp1-details" } */ ++/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-vrp1-details -fdump-tree-optimized" } */ + + + struct rtx_def; +@@ -86,10 +86,8 @@ L7: + + /* The first n_sets > 0 test can be simplfiied into n_sets == 1 since + n_sets can only have the values [0, 1] as it's the result of a +- boolean operation. ++ boolean operation. */ + +- The second n_sets > 0 test can also be simplified into n_sets == 1 +- as the only way to reach the tests is when n_sets <= 1 and the only +- value which satisfies both conditions is n_sets == 1. */ +-/* { dg-final { scan-tree-dump-times "Simplified relational" 2 "vrp1" } } */ ++/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */ ++/* { dg-final { scan-tree-dump-times "if " 4 "optimized" } } */ + +-- +2.19.1 + diff --git a/Backport-Rework-cold-and-hot-label-attributes-in-pre.patch b/Backport-Rework-cold-and-hot-label-attributes-in-pre.patch new file mode 100644 index 0000000000000000000000000000000000000000..aec1f1b0c8e4de8ea9ccf88bc18d227cd2ad4d26 --- /dev/null +++ b/Backport-Rework-cold-and-hot-label-attributes-in-pre.patch @@ -0,0 +1,90 @@ +From 96b5d381d4a670fd3f2fed4ca73ded601ec779e9 Mon Sep 17 00:00:00 2001 +From: xiezhiheng +Date: Fri, 24 Nov 2023 15:47:33 +0800 +Subject: [PATCH 3/4] [Backport] Rework cold and hot label attributes in + predict.c. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=93c18375823fdd0e384f673f75e39136719135dd +--- + gcc/gimplify.c | 10 +++++++- + gcc/predict.c | 23 ------------------- + .../gcc.dg/tree-ssa/attr-hotcold-2.c | 4 ++-- + 3 files changed, 11 insertions(+), 26 deletions(-) + +diff --git a/gcc/gimplify.c b/gcc/gimplify.c +index 5264a4f3d..c6a06d014 100644 +--- a/gcc/gimplify.c ++++ b/gcc/gimplify.c +@@ -2340,10 +2340,18 @@ gimplify_label_expr (tree *expr_p, gimple_seq *pre_p) + gcc_assert (decl_function_context (LABEL_EXPR_LABEL (*expr_p)) + == current_function_decl); + +- glabel *label_stmt = gimple_build_label (LABEL_EXPR_LABEL (*expr_p)); ++ tree label = LABEL_EXPR_LABEL (*expr_p); ++ glabel *label_stmt = gimple_build_label (label); + gimple_set_location (label_stmt, EXPR_LOCATION (*expr_p)); + gimplify_seq_add_stmt (pre_p, label_stmt); + ++ if (lookup_attribute ("cold", DECL_ATTRIBUTES (label))) ++ gimple_seq_add_stmt (pre_p, gimple_build_predict (PRED_COLD_LABEL, ++ NOT_TAKEN)); ++ else if (lookup_attribute ("hot", DECL_ATTRIBUTES (label))) ++ gimple_seq_add_stmt (pre_p, gimple_build_predict (PRED_HOT_LABEL, ++ TAKEN)); ++ + return GS_ALL_DONE; + } + +diff --git a/gcc/predict.c b/gcc/predict.c +index fa4e626fa..d68fb17ba 100644 +--- a/gcc/predict.c ++++ b/gcc/predict.c +@@ -2701,29 +2701,6 @@ tree_estimate_probability_bb (basic_block bb) + + FOR_EACH_EDGE (e, ei, bb->succs) + { +- /* Predict edges to user labels with attributes. */ +- if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) +- { +- gimple_stmt_iterator gi; +- for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi)) +- { +- glabel *label_stmt = dyn_cast (gsi_stmt (gi)); +- tree decl; +- +- if (!label_stmt) +- break; +- decl = gimple_label_label (label_stmt); +- if (DECL_ARTIFICIAL (decl)) +- continue; +- +- /* Finally, we have a user-defined label. */ +- if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl))) +- predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN); +- else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl))) +- predict_edge_def (e, PRED_HOT_LABEL, TAKEN); +- } +- } +- + /* Predict early returns to be probable, as we've already taken + care for error returns and other cases are often used for + fast paths through function. +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c +index 13d2916c4..184dd10dd 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c +@@ -20,9 +20,9 @@ void f(int x, int y) + + /* { dg-final { scan-tree-dump-times "hot label heuristics" 1 "profile_estimate" } } */ + /* { dg-final { scan-tree-dump-times "cold label heuristics" 1 "profile_estimate" } } */ +-/* { dg-final { scan-tree-dump-times "block 4, loop depth 0, count 0, freq \[1-4\]\[^0-9\]" 3 "profile_estimate" } } */ ++/* { dg-final { scan-tree-dump "A \\\[0\\\..*\\\]" "profile_estimate" } } */ + + /* Note: we're attempting to match some number > 6000, i.e. > 60%. + The exact number ought to be tweekable without having to juggle + the testcase around too much. */ +-/* { dg-final { scan-tree-dump-times "block 5, loop depth 0, count 0, freq \[6-9\]\[0-9\]\[0-9\]\[0-9\]" 3 "profile_estimate" } } */ ++/* { dg-final { scan-tree-dump "B \\\[\[6-9\]\[0-9\]\\\..*\\\]" "profile_estimate" } } */ +-- +2.19.1 + diff --git a/Backport-aarch64-Add-HiSilicon-tsv110-CPU-support.patch b/Backport-aarch64-Add-HiSilicon-tsv110-CPU-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a16ec1a876beb0243f158a1095476d36a672e3e --- /dev/null +++ b/Backport-aarch64-Add-HiSilicon-tsv110-CPU-support.patch @@ -0,0 +1,281 @@ +From a9c1a43518391483789e3b036b1d7242b7576c4e Mon Sep 17 00:00:00 2001 +From: xiezhiheng +Date: Wed, 22 Nov 2023 16:46:58 +0800 +Subject: [PATCH 1/4] [Backport][aarch64] Add HiSilicon tsv110 CPU support + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=910f72e78fe76993b8a9bcc9e385a788446f1f10 + +This patch adds HiSilicon's an mcpu: tsv110, which supports v8_4A. +It has been tested on aarch64 and no regressions from this patch. + +[aarch64] Correct architecture for tsv110. + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5a8d95cc43f3ff425fa58bf4025a8527320fb46c + +For HiSilicon's tsv110 cpu core, it supports some v8_4A features, but +some mandatory features are not implemented. +--- + gcc/config/aarch64/aarch64-cores.def | 5 ++ + gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++++++++++++++++++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64.c | 71 ++++++++++++++++ + gcc/doc/invoke.texi | 2 +- + 5 files changed, 182 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 92b57cffb..ea7052388 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -85,4 +85,9 @@ AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH + AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) + AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) + ++/* V8.2 Architecture Processors. */ ++ ++/* HiSilicon ('H') cores. */ ++AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16, tsv110, 0x48, 0xd01, -1) ++ + #undef AARCH64_CORE +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index 070c083be..4595c5e71 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -334,4 +334,108 @@ const struct cpu_cost_table thunderx2t99_extra_costs = + } + }; + ++const struct cpu_cost_table tsv110_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ 0, /* extend. */ ++ COSTS_N_INSNS (1), /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ COSTS_N_INSNS (2), /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (11) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (19) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (4), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (4), /* loadf. */ ++ COSTS_N_INSNS (4), /* loadd. */ ++ COSTS_N_INSNS (4), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (4), /* loadv. */ ++ COSTS_N_INSNS (4) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (10), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (4), /* fma. */ ++ COSTS_N_INSNS (4), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (17), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (6), /* mult_addsub. */ ++ COSTS_N_INSNS (6), /* fma. */ ++ COSTS_N_INSNS (3), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1) /* alu. */ ++ } ++}; ++ + #endif +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index c948846af..4231e56ec 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53" ++ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,tsv110" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 22e76e083..58e91ecce 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -254,6 +254,22 @@ static const struct cpu_addrcost_table xgene1_addrcost_table = + 0, /* imm_offset */ + }; + ++static const struct cpu_addrcost_table tsv110_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 0, /* register_offset */ ++ 1, /* register_sextend */ ++ 1, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ + static const struct cpu_addrcost_table qdf24xx_addrcost_table = + { + { +@@ -362,6 +378,16 @@ static const struct cpu_regmove_cost thunderx2t99_regmove_cost = + 4 /* FP2FP */ + }; + ++static const struct cpu_regmove_cost tsv110_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 2, /* GP2FP */ ++ 3, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ + /* Generic costs for vector insn classes. */ + static const struct cpu_vector_cost generic_vector_cost = + { +@@ -402,6 +428,25 @@ static const struct cpu_vector_cost thunderx_vector_cost = + 3 /* cond_not_taken_branch_cost */ + }; + ++static const struct cpu_vector_cost tsv110_vector_cost = ++{ ++ 1, /* scalar_int_stmt_cost */ ++ 1, /* scalar_fp_stmt_cost */ ++ 5, /* scalar_load_cost */ ++ 1, /* scalar_store_cost */ ++ 2, /* vec_int_stmt_cost */ ++ 2, /* vec_fp_stmt_cost */ ++ 2, /* vec_permute_cost */ ++ 3, /* vec_to_scalar_cost */ ++ 2, /* scalar_to_vec_cost */ ++ 5, /* vec_align_load_cost */ ++ 5, /* vec_unalign_load_cost */ ++ 1, /* vec_unalign_store_cost */ ++ 1, /* vec_store_cost */ ++ 1, /* cond_taken_branch_cost */ ++ 1 /* cond_not_taken_branch_cost */ ++}; ++ + /* Generic costs for vector insn classes. */ + static const struct cpu_vector_cost cortexa57_vector_cost = + { +@@ -731,6 +776,32 @@ static const struct tune_params thunderx_tunings = + (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */ + }; + ++static const struct tune_params tsv110_tunings = ++{ ++ &tsv110_extra_costs, ++ &tsv110_addrcost_table, ++ &tsv110_regmove_cost, ++ &tsv110_vector_cost, ++ &generic_branch_cost, ++ &generic_approx_modes, ++ 4, /* memmov_cost */ ++ 4, /* issue_rate */ ++ (AARCH64_FUSE_AES_AESMC ++ | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */ ++ 16, /* function_align. */ ++ 4, /* jump_align. */ ++ 8, /* loop_align. */ ++ 2, /* int_reassoc_width. */ ++ 4, /* fp_reassoc_width. */ ++ 1, /* vec_reassoc_width. */ ++ 2, /* min_div_recip_mul_sf. */ ++ 2, /* min_div_recip_mul_df. */ ++ 0, /* max_case_values. */ ++ 0, /* cache_line_size. */ ++ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ ++ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ ++}; ++ + static const struct tune_params xgene1_tunings = + { + &xgene1_extra_costs, +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 808ebe57f..430c0d5db 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -14028,7 +14028,7 @@ performance of the code. Permissible values for this option are: + @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a57}, + @samp{cortex-a72}, @samp{cortex-a73}, @samp{exynos-m1}, @samp{falkor}, + @samp{qdf24xx}, @samp{xgene1}, @samp{vulcan}, @samp{thunderx}, +-@samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81}, ++@samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110}, + @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53}, + @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35}, + @samp{cortex-a73.cortex-a53}, @samp{native}. +-- +2.19.1 + diff --git a/gcc.spec b/gcc.spec index 94273a73227f73bc8c37d4fcd33a0855662319e9..46cea26bdb21f66dfa5ccf39838205102086ee27 100644 --- a/gcc.spec +++ b/gcc.spec @@ -41,7 +41,7 @@ Version: 7.3.0 # number 2020033101 meaning the openEuler 20.03 release date plus 01 to # replace DATE and will never change it in the future. %global openEulerDATE 2020033101 -Release: %{openEulerDATE}.55 +Release: %{openEulerDATE}.56 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD Group: Development/Languages #Source0: hcc-aarch64-linux-release.tar.bz2 @@ -73,7 +73,6 @@ Patch2: gcc-adapt-to-isl.patch Patch3: sanitizer-pr-85835.patch Patch4: CVE-2018-12886.patch Patch5: CVE-2019-15847.patch -Patch7: add-tsv110-pipeline-scheduling.patch Patch12: aarch64-fix-tls-negative-offset.patch Patch14: arm-fix-push-minipool.patch Patch22: arm-bigendian-disable-interleaved-LS-vectorize.patch @@ -100,6 +99,11 @@ Patch49: aarch64-Rename-hard_fp_offset-to-bytes_above_hard_fp.patch Patch50: aarch64-Tweak-frame_size-comment.patch Patch51: Backport-check-function-bodies-support.patch Patch52: aarch64-Make-stack-smash-canary-protect-saved-registers.patch +Patch53: Backport-aarch64-Add-HiSilicon-tsv110-CPU-support.patch +Patch54: Backport-AArch64-Add-tsv110-pipeline-scheduling.patch +Patch55: Backport-Rework-cold-and-hot-label-attributes-in-pre.patch +Patch56: Backport-Learn-GIMPLE-pretty-printer-to-produce-nice.patch +Patch57: Backport-AArch64-Fix-longbranch-test.patch #AutoReqProv: off AutoReq: true @@ -557,39 +561,7 @@ package or when debugging this package. %setup -q -n gcc-%{version} -a 1 -a 2 /bin/pwd - -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 -%patch7 -p1 -%patch12 -p1 -%patch14 -p1 -%patch22 -p1 -%patch23 -p1 -%patch25 -p1 -%patch29 -p1 -%patch31 -p1 -%patch34 -p1 -%patch35 -p1 -%patch36 -p1 -%patch37 -p1 -%patch38 -p1 -%patch39 -p1 -%patch40 -p1 -%patch41 -p1 -%patch42 -p1 -%patch43 -p1 -%patch44 -p1 -%patch45 -p1 -%patch46 -p1 -%patch47 -p1 -%patch48 -p1 -%patch49 -p1 -%patch50 -p1 -%patch51 -p1 -%patch52 -p1 +%autopatch -p1 %if 0%{?_enable_debug_packages} cat > split-debuginfo.sh <<\EOF @@ -3364,6 +3336,9 @@ fi %changelog +* Mon Nov 27 2023 eastb233 - 7.3.0-2020033101.56 +- Fix incorrect port of tsv110 pipeline. + * Fri Nov 24 2023 zhaoshujian - 7.3.0-2020033101.55 - Change isl version from isl-0.21 to isl-0.16.1 to fix the deja test ice.