diff --git a/0196-fix-bugs-in-loop-detections-add-filter-to-SSA-statem.patch b/0196-fix-bugs-in-loop-detections-add-filter-to-SSA-statem.patch new file mode 100644 index 0000000000000000000000000000000000000000..2d38f8069f97eb2fb4863aecbc88461b0201be88 --- /dev/null +++ b/0196-fix-bugs-in-loop-detections-add-filter-to-SSA-statem.patch @@ -0,0 +1,563 @@ +From 591a1ed8489e2c230f19220599b4ce8bb89d6148 Mon Sep 17 00:00:00 2001 +From: yzyssdd +Date: Thu, 6 Jun 2024 15:42:52 +0800 +Subject: [PATCH 2/2] fix bugs in loop detections, add filter to SSA statement + and corresponding deja cases. Fix bugs so llc pass can detect it when going + back into a loop after jumping out of a loop. Return directly from processing + a non-ssa statement when looking for references in a gimple call. + +--- + .../gcc.dg/llc-allocate/llc-filter-ssa.c | 30 ++++ + .../gcc.dg/llc-allocate/llc-loop-generate.c | 168 ++++++++++++++++++ + .../aarch64/sve/acle/general-c/prefetch_1.c | 10 +- + .../acle/general-c/prefetch_gather_index_1.c | 8 +- + .../acle/general-c/prefetch_gather_index_2.c | 8 +- + .../acle/general-c/prefetch_gather_offset_1.c | 7 +- + .../acle/general-c/prefetch_gather_offset_2.c | 7 +- + .../acle/general-c/prefetch_gather_offset_3.c | 7 +- + .../acle/general-c/prefetch_gather_offset_4.c | 7 +- + gcc/tree-ssa-llc-allocate.c | 70 +++++--- + 10 files changed, 277 insertions(+), 45 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-filter-ssa.c + create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-loop-generate.c + +diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-filter-ssa.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-filter-ssa.c +new file mode 100644 +index 000000000..4478f7531 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-filter-ssa.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-parallelize-loops=2 -fipa-pta -fllc-allocate -S -fdump-tree-llc_allocate-details-lineno" } */ ++ ++int a, b; ++int *d; ++void f(void) ++{ ++ int c; ++ b %= 1; ++ ++ if(1 - (b < 1)) ++ { ++ int *q = 0; ++ ++ if(a) ++ { ++ c = 0; ++lbl: ++ for(*d; *d; ++*d) ++ if(c ? : a ? : (c = 1) ? : 0) ++ *q &= 1; ++ return; ++ } ++ ++ q = (int *)1; ++ } ++ goto lbl; ++} ++ ++/* { dg-final { scan-tree-dump "Unhandled scenario for non-ssa pointer." "llc-allocate" } } */ +diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-loop-generate.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-loop-generate.c +new file mode 100644 +index 000000000..dc1f0eadc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-loop-generate.c +@@ -0,0 +1,168 @@ ++/* { dg-require-effective-target label_values } */ ++/* { dg-require-stack-size "4000" } */ ++/* { dp-option "-O2 -fllc-allocate" } */ ++ ++#include ++ ++#if __INT_MAX__ >= 2147483647 ++typedef unsigned int uint32; ++typedef signed int sint32; ++ ++typedef uint32 reg_t; ++ ++typedef unsigned long int host_addr_t; ++typedef uint32 target_addr_t; ++typedef sint32 target_saddr_t; ++ ++typedef union ++{ ++ struct ++ { ++ signed int offset:18; ++ unsigned int ignore:4; ++ unsigned int s1:8; ++ int :2; ++ signed int simm:14; ++ unsigned int s3:8; ++ unsigned int s2:8; ++ int pad2:2; ++ } f1; ++ long long ll; ++ double d; ++} insn_t; ++ ++typedef struct ++{ ++ target_addr_t vaddr_tag; ++ unsigned long int rigged_paddr; ++} tlb_entry_t; ++ ++typedef struct ++{ ++ insn_t *pc; ++ reg_t registers[256]; ++ insn_t *program; ++ tlb_entry_t tlb_tab[0x100]; ++} environment_t; ++ ++enum operations ++{ ++ LOAD32_RR, ++ METAOP_DONE ++}; ++ ++host_addr_t ++f () ++{ ++ abort (); ++} ++ ++reg_t ++simulator_kernel (int what, environment_t *env) ++{ ++ register insn_t *pc = env->pc; ++ register reg_t *regs = env->registers; ++ register insn_t insn; ++ register int s1; ++ register reg_t r2; ++ register void *base_addr = &&sim_base_addr; ++ register tlb_entry_t *tlb = env->tlb_tab; ++ ++ if (what != 0) ++ { ++ int i; ++ static void *op_map[] = ++ { ++ &&L_LOAD32_RR, ++ &&L_METAOP_DONE, ++ }; ++ insn_t *program = env->program; ++ for (i = 0; i < what; i++) ++ program[i].f1.offset = op_map[program[i].f1.offset] - base_addr; ++ } ++ ++ sim_base_addr:; ++ ++ insn = *pc++; ++ r2 = (*(reg_t *) (((char *) regs) + (insn.f1.s2 << 2))); ++ s1 = (insn.f1.s1 << 2); ++ goto *(base_addr + insn.f1.offset); ++ ++ L_LOAD32_RR: ++ { ++ target_addr_t vaddr_page = r2 / 4096; ++ unsigned int x = vaddr_page % 0x100; ++ insn = *pc++; ++ ++ for (;;) ++ { ++ target_addr_t tag = tlb[x].vaddr_tag; ++ host_addr_t rigged_paddr = tlb[x].rigged_paddr; ++ ++ if (tag == vaddr_page) ++ { ++ *(reg_t *) (((char *) regs) + s1) = *(uint32 *) (rigged_paddr + r2); ++ r2 = *(reg_t *) (((char *) regs) + (insn.f1.s2 << 2)); ++ s1 = insn.f1.s1 << 2; ++ goto *(base_addr + insn.f1.offset); ++ } ++ ++ if (((target_saddr_t) tag < 0)) ++ { ++ *(reg_t *) (((char *) regs) + s1) = *(uint32 *) f (); ++ r2 = *(reg_t *) (((char *) regs) + (insn.f1.s2 << 2)); ++ s1 = insn.f1.s1 << 2; ++ goto *(base_addr + insn.f1.offset); ++ } ++ ++ x = (x - 1) % 0x100; ++ } ++ ++ L_METAOP_DONE: ++ return (*(reg_t *) (((char *) regs) + s1)); ++ } ++} ++ ++insn_t program[2 + 1]; ++ ++void *malloc (); ++ ++int ++main () ++{ ++ environment_t env; ++ insn_t insn; ++ int i, res; ++ host_addr_t a_page = (host_addr_t) malloc (2 * 4096); ++ target_addr_t a_vaddr = 0x123450; ++ target_addr_t vaddr_page = a_vaddr / 4096; ++ a_page = (a_page + 4096 - 1) & -4096; ++ ++ env.tlb_tab[((vaddr_page) % 0x100)].vaddr_tag = vaddr_page; ++ env.tlb_tab[((vaddr_page) % 0x100)].rigged_paddr = a_page - vaddr_page * 4096; ++ insn.f1.offset = LOAD32_RR; ++ env.registers[0] = 0; ++ env.registers[2] = a_vaddr; ++ *(sint32 *) (a_page + a_vaddr % 4096) = 88; ++ insn.f1.s1 = 0; ++ insn.f1.s2 = 2; ++ ++ for (i = 0; i < 2; i++) ++ program[i] = insn; ++ ++ insn.f1.offset = METAOP_DONE; ++ insn.f1.s1 = 0; ++ program[2] = insn; ++ ++ env.pc = program; ++ env.program = program; ++ ++ res = simulator_kernel (2 + 1, &env); ++ ++ if (res != 88) ++ abort (); ++ exit (0); ++} ++#else ++main(){ exit (0); } ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c +index 316f77fc7..fba3b7447 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c +@@ -10,8 +10,12 @@ f1 (svbool_t pg, int32_t *s32_ptr, enum svprfop op) + svprfb (pg, s32_ptr, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ + svprfb (pg, s32_ptr, (enum svprfop) 0); + svprfb (pg, s32_ptr, (enum svprfop) 5); +- svprfb (pg, s32_ptr, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ +- svprfb (pg, s32_ptr, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ ++ svprfb (pg, s32_ptr, (enum svprfop) 6); ++ svprfb (pg, s32_ptr, (enum svprfop) 7); + svprfb (pg, s32_ptr, (enum svprfop) 8); +- svprfb (pg, s32_ptr, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ ++ svprfb (pg, s32_ptr, (enum svprfop) 14); ++ svprfb (pg, s32_ptr, (enum svprfop) 15); ++ svprfb (pg, s32_ptr, (enum svprfop) 16); /* { dg-error {passing 16 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ ++ ++ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c +index c33c95440..cf387bf92 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c +@@ -46,8 +46,10 @@ f1 (svbool_t pg, int32_t *s32_ptr, void *void_ptr, void **ptr_ptr, + svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ + svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 0); + svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 5); +- svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ +- svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 6); ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 7); + svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 8); +- svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 14); ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 15); ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c +index 3d7797305..bc99b29d1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c +@@ -10,8 +10,10 @@ f1 (svbool_t pg, int32_t *s32_ptr, svint32_t s32, enum svprfop op) + svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ + svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 0); + svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 5); +- svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ +- svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 6); ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 7); + svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 8); +- svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 14); ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ ++ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c +index cc61901cb..8b304ed89 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c +@@ -46,8 +46,9 @@ f1 (svbool_t pg, int32_t *s32_ptr, void *void_ptr, void **ptr_ptr, + svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ + svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 0); + svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 5); +- svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ +- svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 6); ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 7); + svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 8); +- svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 14); ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c +index b74721fad..64e55dd76 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c +@@ -30,8 +30,9 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, + svprfb_gather (pg, u32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ + svprfb_gather (pg, u32, (enum svprfop) 0); + svprfb_gather (pg, u32, (enum svprfop) 5); +- svprfb_gather (pg, u32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ +- svprfb_gather (pg, u32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather (pg, u32, (enum svprfop) 6); ++ svprfb_gather (pg, u32, (enum svprfop) 7); + svprfb_gather (pg, u32, (enum svprfop) 8); +- svprfb_gather (pg, u32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather (pg, u32, (enum svprfop) 14); ++ svprfb_gather (pg, u32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c +index 24b4aa190..f400e91e8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c +@@ -10,8 +10,9 @@ f1 (svbool_t pg, int32_t *s32_ptr, svint32_t s32, enum svprfop op) + svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ + svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 0); + svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 5); +- svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ +- svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 6); ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 7); + svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 8); +- svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 14); ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c +index 63ccdc5a4..7b91dbd2e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c +@@ -10,8 +10,9 @@ f1 (svbool_t pg, svuint32_t u32, enum svprfop op) + svprfb_gather_u32base (pg, u32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ + svprfb_gather_u32base (pg, u32, (enum svprfop) 0); + svprfb_gather_u32base (pg, u32, (enum svprfop) 5); +- svprfb_gather_u32base (pg, u32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ +- svprfb_gather_u32base (pg, u32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 6); ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 7); + svprfb_gather_u32base (pg, u32, (enum svprfop) 8); +- svprfb_gather_u32base (pg, u32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 14); ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ + } +diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c +index 75501f41c..3f6ff3623 100644 +--- a/gcc/tree-ssa-llc-allocate.c ++++ b/gcc/tree-ssa-llc-allocate.c +@@ -1020,6 +1020,14 @@ trace_ptr_mem_ref (data_ref &mem_ref, std::set &traced_ref_stmt, + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Unhandled scenario for non-constant offset.\n"); ++ ++ return false; ++ } ++ if (TREE_CODE (pointer) != SSA_NAME) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Unhandled scenario for non-ssa pointer.\n"); ++ + return false; + } + +@@ -2330,7 +2338,7 @@ enum bb_traversal_state + bool + revisit_bb_abnormal_p (basic_block bb, std::vector &bb_visited, + const std::set &header_bb_idx_set, +- std::set > &backedges, ++ std::set > &unused_edges, + int src_bb_idx) + { + /* If the header bb has been already fully traversed, early exit +@@ -2340,19 +2348,20 @@ revisit_bb_abnormal_p (basic_block bb, std::vector &bb_visited, + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Already visited bb index %d. Abort.\n", + bb->index); ++ unused_edges.insert (std::make_pair (src_bb_idx, bb->index)); + return true; + } + + /* If we revisit a non-header bb during next-bb traversal, we detect + an inner-loop cycle and dump warning info. Record this abnormal edge +- in `backedges` for special treatment in path weight update. */ ++ in `unused_edges` for special treatment in path weight update. */ + if (!header_bb_idx_set.count (bb->index) + && bb_visited[bb->index] == UNDER_TRAVERSAL) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Warning: Find cycle at bb index %d. Abort.\n", + bb->index); +- backedges.insert (std::make_pair (src_bb_idx, bb->index)); ++ unused_edges.insert (std::make_pair (src_bb_idx, bb->index)); + return true; + } + +@@ -2397,7 +2406,7 @@ void + get_next_toposort_bb (basic_block bb, std::vector &bb_visited, + std::list &bb_topo_order, + const std::set &header_bb_idx_set, +- std::set > &backedges, ++ std::set > &unused_edges, + int src_bb_idx) + { + /* 1) Before bb returns to the loop header, bb will not go to the outer loop. +@@ -2412,7 +2421,7 @@ get_next_toposort_bb (basic_block bb, std::vector &bb_visited, + if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) + return; + +- if (revisit_bb_abnormal_p (bb, bb_visited, header_bb_idx_set, backedges, ++ if (revisit_bb_abnormal_p (bb, bb_visited, header_bb_idx_set, unused_edges, + src_bb_idx)) + return; + +@@ -2431,7 +2440,7 @@ get_next_toposort_bb (basic_block bb, std::vector &bb_visited, + FOR_EACH_VEC_ELT (exits, i, e) + { + get_next_toposort_bb (e->dest, bb_visited, bb_topo_order, +- header_bb_idx_set, backedges, bb->index); ++ header_bb_idx_set, unused_edges, src_bb_idx); + } + return; + } +@@ -2447,7 +2456,7 @@ get_next_toposort_bb (basic_block bb, std::vector &bb_visited, + continue; + + get_next_toposort_bb (e->dest, bb_visited, bb_topo_order, +- header_bb_idx_set, backedges, bb->index); ++ header_bb_idx_set, unused_edges, bb->index); + } + + /* bb is marked as fully traversed and all its descendents have been +@@ -2526,7 +2535,8 @@ check_null_info_in_path_update (basic_block bb, edge e) + to header bb using a backedge. */ + + void +-update_backedge_path_weight (std::vector &bb_weights, basic_block bb) ++update_backedge_path_weight (std::vector &bb_weights, basic_block bb, ++ const std::set > &unused_edges) + { + unsigned i; + edge e_exit; +@@ -2542,6 +2552,11 @@ update_backedge_path_weight (std::vector &bb_weights, basic_block bb) + continue; + } + ++ if (unused_edges.count (std::make_pair (bb->index, e_exit->dest->index))) ++ { ++ /* Inner-loop-cycle backedge case. */ ++ continue; ++ } + update_path_weight (bb_weights, bb->index, e_exit->dest->index, + e_exit->dest->count.to_gcov_type ()); + } +@@ -2553,7 +2568,7 @@ void + update_max_length_of_path (std::vector &bb_weights, + std::list &bb_topo_order, + const std::set &header_bb_idx_set, +- const std::set > &backedges) ++ const std::set > &unused_edges) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Start update weight traversal:\n"); +@@ -2573,22 +2588,22 @@ update_max_length_of_path (std::vector &bb_weights, + if (check_null_info_in_path_update (bb, e)) + continue; + +- if (header_bb_idx_set.count (e->dest->index) +- && bb->loop_father == e->dest->loop_father) ++ if (unused_edges.count (std::make_pair (bb->index, e->dest->index))) + { +- /* Backedge case. */ +- update_backedge_path_weight (bb_weights, bb); ++ /* Inner-loop-cycle backedge case. */ ++ continue; + } +- else if (bb->loop_father->num != 0 ++ else if (bb->loop_father->num != 0 + && !flow_bb_inside_loop_p (bb->loop_father, e->dest)) + { + /* Outer-loop edge case. */ + continue; + } +- else if (backedges.count (std::make_pair (bb->index, e->dest->index))) ++ else if (header_bb_idx_set.count (e->dest->index) ++ && bb->loop_father == e->dest->loop_father) + { +- /* Inner-loop-cycle backedge case. */ +- continue; ++ /* Backedge case. */ ++ update_backedge_path_weight (bb_weights, bb, unused_edges); + } + else + { +@@ -2676,9 +2691,9 @@ filter_and_sort_kernels_feedback (std::vector &sorted_kernel, + basic_block bb_start = ENTRY_BLOCK_PTR_FOR_FN (cfun); + + /* Step 1: Get topological order of bb during traversal. */ +- std::set > backedges; ++ std::set > unused_edges; + get_next_toposort_bb (bb_start, bb_visited, bb_topo_order, header_bb_idx_set, +- backedges, -1); ++ unused_edges, -1); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nCheck bbs in topological order:\n"); +@@ -2693,7 +2708,7 @@ filter_and_sort_kernels_feedback (std::vector &sorted_kernel, + std::vector bb_weights = std::vector(bb_num_max, weight_init); + bb_weights[0].bb_count = 0; /* ENTRY bb has count 0 and prev bb as -1. */ + update_max_length_of_path (bb_weights, bb_topo_order, header_bb_idx_set, +- backedges); ++ unused_edges); + + /* Step 3: Backtrack a path from EXIT bb to ENTRY bb. */ + if (dump_file && (dump_flags & TDF_DETAILS)) +@@ -2706,6 +2721,13 @@ filter_and_sort_kernels_feedback (std::vector &sorted_kernel, + tmp_bb_idx = bb_weights[tmp_bb_idx].prev_bb_idx; + while (tmp_bb_idx > 0 && tmp_bb_idx < bb_num_max) + { ++ if (bb_pathset.count (tmp_bb_idx)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf(dump_file, "ERROR: already seen bb index %d\n", ++ tmp_bb_idx); ++ return false; ++ } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "%d: %ld, ", tmp_bb_idx, + bb_weights[tmp_bb_idx].bb_count); +@@ -3398,14 +3420,14 @@ issue_builtin_prefetch (data_ref &mem_ref) + if (param_llc_level == 3) + { + /* for simulation. +- BUILT_IN_PREFETCH (addr, rw, locality). */ ++ BUILT_IN_PREFETCH (addr, rw, locality). */ + call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH), +- 3, addr, integer_zero_node, integer_one_node); ++ 3, addr, integer_zero_node, integer_one_node); + } + else if (param_llc_level == 4) + { +- tree prfop = build_int_cst (TREE_TYPE (integer_zero_node), 6); +- call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH_FULL), ++ tree prfop = build_int_cst (TREE_TYPE (integer_zero_node), 6); ++ call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH_FULL), + 3, addr, integer_zero_node, prfop); + } + else +-- +2.33.0 + diff --git a/0197-Add-hip09-machine-discribtion.patch b/0197-Add-hip09-machine-discribtion.patch new file mode 100644 index 0000000000000000000000000000000000000000..0ce10ac75d8b0eb23e58d704e9028cec9679141b --- /dev/null +++ b/0197-Add-hip09-machine-discribtion.patch @@ -0,0 +1,873 @@ +From 8953134c07329ff0841f2fa18da0db07c94e0167 Mon Sep 17 00:00:00 2001 +From: xingyushuai +Date: Fri, 3 Mar 2023 09:31:04 +0800 +Subject: [PATCH 04/10] Add hip09 machine discribtion + +Here is the patch introducing hip09 machine model +for the scheduler. +--- + gcc/ChangeLog | 8 + + gcc/config/aarch64/aarch64-cores.def | 1 + + gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++ + gcc/config/aarch64/aarch64-tune.md | 2 +- + gcc/config/aarch64/aarch64.c | 83 ++++ + gcc/config/aarch64/aarch64.md | 1 + + gcc/config/aarch64/hip09.md | 558 +++++++++++++++++++++++ + 7 files changed, 756 insertions(+), 1 deletion(-) + create mode 100644 gcc/config/aarch64/hip09.md + +diff --git a/gcc/ChangeLog b/gcc/ChangeLog +index 07aea9b86..59e5b8a3d 100644 +--- a/gcc/ChangeLog ++++ b/gcc/ChangeLog +@@ -1,3 +1,11 @@ ++2023-03-17 xingyushuai ++ * config/aarch64/aarch64-cores.def: Add support for hip09 CPU ++ * config/aarch64/aarch64-cost-tables.h: Add cost tables for hip09 CPU ++ * config/aarch64/aarch64-tune.md: Regenerated ++ * config/aarch64/aarch64.c: Add tuning table for hip09 CPU ++ * config/aarch64/aarch64.md: Include the new model of hip09 CPU ++ * config/aarch64/lc910.md: New file to support for hip09 CPU ++ + 2020-05-12 Richard Sandiford + + PR tree-optimization/94980 +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 53125f6bd..eb1c6c894 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -124,6 +124,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) ++AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0x1, 0xd01) + + /* ARMv8.3-A Architecture Processors. */ + +diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h +index 377650be0..89b3c180f 100644 +--- a/gcc/config/aarch64/aarch64-cost-tables.h ++++ b/gcc/config/aarch64/aarch64-cost-tables.h +@@ -645,6 +645,110 @@ const struct cpu_cost_table hip11_extra_costs = + } + }; + ++const struct cpu_cost_table hip09_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ 0, /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ COSTS_N_INSNS (1), /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ COSTS_N_INSNS (1), /* log_shift_reg. */ ++ 0, /* extend. */ ++ COSTS_N_INSNS (1), /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (2), /* simple. */ ++ COSTS_N_INSNS (2), /* flag_setting. */ ++ COSTS_N_INSNS (2), /* extend. */ ++ COSTS_N_INSNS (2), /* add. */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ COSTS_N_INSNS (11) /* idiv. */ ++ }, ++ /* MULT DImode */ ++ { ++ COSTS_N_INSNS (3), /* simple. */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (3), /* extend. */ ++ COSTS_N_INSNS (3), /* add. */ ++ COSTS_N_INSNS (3), /* extend_add. */ ++ COSTS_N_INSNS (19) /* idiv. */ ++ } ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (3), /* load. */ ++ COSTS_N_INSNS (4), /* load_sign_extend. */ ++ COSTS_N_INSNS (3), /* ldrd. */ ++ COSTS_N_INSNS (3), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (4), /* loadf. */ ++ COSTS_N_INSNS (4), /* loadd. */ ++ COSTS_N_INSNS (4), /* load_unaligned. */ ++ 0, /* store. */ ++ 0, /* strd. */ ++ 0, /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ 0, /* storef. */ ++ 0, /* stored. */ ++ COSTS_N_INSNS (1), /* store_unaligned. */ ++ COSTS_N_INSNS (4), /* loadv. */ ++ COSTS_N_INSNS (4) /* storev. */ ++ }, ++ { ++ /* FP SFmode */ ++ { ++ COSTS_N_INSNS (10), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (4), /* mult_addsub. */ ++ COSTS_N_INSNS (4), /* fma. */ ++ COSTS_N_INSNS (4), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (17), /* div. */ ++ COSTS_N_INSNS (4), /* mult. */ ++ COSTS_N_INSNS (6), /* mult_addsub. */ ++ COSTS_N_INSNS (6), /* fma. */ ++ COSTS_N_INSNS (3), /* addsub. */ ++ COSTS_N_INSNS (1), /* fpconst. */ ++ COSTS_N_INSNS (1), /* neg. */ ++ COSTS_N_INSNS (1), /* compare. */ ++ COSTS_N_INSNS (2), /* widen. */ ++ COSTS_N_INSNS (2), /* narrow. */ ++ COSTS_N_INSNS (2), /* toint. */ ++ COSTS_N_INSNS (1), /* fromint. */ ++ COSTS_N_INSNS (2) /* roundint. */ ++ } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1) /* alu. */ ++ } ++}; ++ + const struct cpu_cost_table a64fx_extra_costs = + { + /* ALU */ +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index f33a3330d..1349ec04b 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,hip11,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,saphira,neoversen2,hip11,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 938948f29..4ef7bd8b3 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -464,6 +464,22 @@ static const struct cpu_addrcost_table hip11_addrcost_table = + 0, /* imm_offset */ + }; + ++static const struct cpu_addrcost_table hip09_addrcost_table = ++{ ++ { ++ 1, /* hi */ ++ 0, /* si */ ++ 0, /* di */ ++ 1, /* ti */ ++ }, ++ 0, /* pre_modify */ ++ 0, /* post_modify */ ++ 0, /* register_offset */ ++ 1, /* register_sextend */ ++ 1, /* register_zextend */ ++ 0, /* imm_offset */ ++}; ++ + static const struct cpu_addrcost_table qdf24xx_addrcost_table = + { + { +@@ -601,6 +617,16 @@ static const struct cpu_regmove_cost hip11_regmove_cost = + 2 /* FP2FP */ + }; + ++static const struct cpu_regmove_cost hip09_regmove_cost = ++{ ++ 1, /* GP2GP */ ++ /* Avoid the use of slow int<->fp moves for spilling by setting ++ their cost higher than memmov_cost. */ ++ 2, /* GP2FP */ ++ 3, /* FP2GP */ ++ 2 /* FP2FP */ ++}; ++ + static const struct cpu_regmove_cost a64fx_regmove_cost = + { + 1, /* GP2GP */ +@@ -709,6 +735,25 @@ static const struct cpu_vector_cost hip11_vector_cost = + 1 /* cond_not_taken_branch_cost */ + }; + ++static const struct cpu_vector_cost hip09_vector_cost = ++{ ++ 1, /* scalar_int_stmt_cost */ ++ 1, /* scalar_fp_stmt_cost */ ++ 5, /* scalar_load_cost */ ++ 1, /* scalar_store_cost */ ++ 2, /* vec_int_stmt_cost */ ++ 2, /* vec_fp_stmt_cost */ ++ 2, /* vec_permute_cost */ ++ 3, /* vec_to_scalar_cost */ ++ 2, /* scalar_to_vec_cost */ ++ 5, /* vec_align_load_cost */ ++ 5, /* vec_unalign_load_cost */ ++ 1, /* vec_unalign_store_cost */ ++ 1, /* vec_store_cost */ ++ 1, /* cond_taken_branch_cost */ ++ 1 /* cond_not_taken_branch_cost */ ++}; ++ + /* Generic costs for vector insn classes. */ + static const struct cpu_vector_cost cortexa57_vector_cost = + { +@@ -958,6 +1003,17 @@ static const cpu_prefetch_tune hip11_prefetch_tune = + -1 /* default_opt_level */ + }; + ++static const cpu_prefetch_tune hip09_prefetch_tune = ++{ ++ 0, /* num_slots */ ++ 64, /* l1_cache_size */ ++ 64, /* l1_cache_line_size */ ++ 512, /* l2_cache_size */ ++ true, /* prefetch_dynamic_strides */ ++ -1, /* minimum_stride */ ++ -1 /* default_opt_level */ ++}; ++ + static const cpu_prefetch_tune xgene1_prefetch_tune = + { + 8, /* num_slots */ +@@ -1252,6 +1308,33 @@ static const struct tune_params tsv110_tunings = + &tsv110_prefetch_tune + }; + ++static const struct tune_params hip09_tunings = ++{ ++ &hip09_extra_costs, ++ &hip09_addrcost_table, ++ &hip09_regmove_cost, ++ &hip09_vector_cost, ++ &generic_branch_cost, ++ &generic_approx_modes, ++ SVE_128, /* sve_width */ ++ 4, /* memmov_cost */ ++ 4, /* issue_rate */ ++ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH ++ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */ ++ "16", /* function_align. */ ++ "4", /* jump_align. */ ++ "8", /* loop_align. */ ++ 2, /* int_reassoc_width. */ ++ 4, /* fp_reassoc_width. */ ++ 1, /* vec_reassoc_width. */ ++ 2, /* min_div_recip_mul_sf. */ ++ 2, /* min_div_recip_mul_df. */ ++ 0, /* max_case_values. */ ++ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ ++ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */ ++ &hip09_prefetch_tune ++}; ++ + static const struct tune_params hip11_tunings = + { + &hip11_extra_costs, +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 38af8d000..04d1e4ead 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -459,6 +459,7 @@ + (include "thunderx2t99.md") + (include "tsv110.md") + (include "thunderx3t110.md") ++(include "hip09.md") + + ;; ------------------------------------------------------------------- + ;; Jumps and other miscellaneous insns +diff --git a/gcc/config/aarch64/hip09.md b/gcc/config/aarch64/hip09.md +new file mode 100644 +index 000000000..25428de9a +--- /dev/null ++++ b/gcc/config/aarch64/hip09.md +@@ -0,0 +1,558 @@ ++;; hip09 pipeline description ++;; Copyright (C) 2023 Free Software Foundation, Inc. ++;; ++;;Contributed by Yushuai Xing ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify it ++;; under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, but ++;; WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++;; General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++ ++(define_automaton "hip09") ++(define_automaton "hip09_ldst") ++(define_automaton "hip09_fsu") ++ ++(define_attr "hip09_type" ++ "hip09_neon_abs, hip09_neon_fp_arith, hip09_neon_mul, hip09_neon_mla, ++ hip09_neon_dot, hip09_neon_fp_div, hip09_neon_fp_sqrt, ++ hip09_neon_ins, hip09_neon_load1, hip09_neon_load1_lanes, ++ hip09_neon_load2and4, hip09_neon_load3_3reg, ++ hip09_neon_load4_4reg, hip09_neon_store1and2, ++ hip09_neon_store1_1reg, hip09_neon_store1_2reg, ++ hip09_neon_store1_3reg, hip09_neon_store1_4reg, ++ hip09_neon_store3and4_lane, hip09_neon_store3_3reg, ++ hip09_neon_store4_4reg, unknown" ++ (cond [ ++ (eq_attr "type" "neon_abs,neon_abs_q,neon_add,neon_add_q,\ ++ neon_neg,neon_neg_q,neon_sub,neon_sub_q,neon_add_widen,\ ++ neon_sub_widen,neon_qadd,neon_qadd_q,\ ++ neon_add_long,neon_sub_long,\ ++ neon_qabs,neon_qabs_q,neon_qneg,\ ++ neon_qneg_q,neon_qsub,neon_qsub_q,neon_compare,\ ++ neon_compare_q,neon_compare_zero,\ ++ neon_compare_zero_q,neon_logic,neon_logic_q,\ ++ neon_minmax,neon_minmax_q,neon_tst,\ ++ neon_tst_q,neon_bsl,neon_bsl_q,\ ++ neon_cls,neon_cls_q,neon_ext,\ ++ neon_ext_q,neon_rev,neon_rev_q,\ ++ neon_tbl1,neon_tbl1_q,neon_fp_abs_s,\ ++ neon_fp_abs_s_q,neon_fp_abs_d,\ ++ neon_fp_neg_s,neon_fp_neg_s_q,\ ++ neon_fp_neg_d,neon_fp_neg_d_q,\ ++ neon_shift_imm_narrow_q,neon_move,neon_move_q") ++ (const_string "hip09_neon_abs") ++ (eq_attr "type" "neon_abd,neon_abd_q,\ ++ neon_arith_acc,neon_arith_acc_q,\ ++ neon_add_halve,neon_add_halve_q,\ ++ neon_sub_halve,neon_sub_halve_q,\ ++ neon_add_halve_narrow_q,\ ++ neon_sub_halve_narrow_q,neon_reduc_add,\ ++ neon_reduc_add_q,\ ++ neon_sat_mul_b,neon_sat_mul_b_q,\ ++ neon_sat_mul_b_long,neon_mul_b,neon_mul_b_q,\ ++ neon_mul_b_long,neon_mla_b,neon_mla_b_q,\ ++ neon_mla_b_long,neon_sat_mla_b_long,\ ++ neon_sat_shift_imm,\ ++ neon_sat_shift_imm_q,neon_shift_imm_long,\ ++ neon_shift_imm,neon_shift_imm_q,neon_cnt,\ ++ neon_cnt_q,neon_fp_recpe_s,neon_fp_recpe_s_q,\ ++ neon_fp_recpe_d,neon_fp_recpe_d_q,\ ++ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\ ++ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\ ++ neon_fp_recpx_s,neon_fp_recpx_s_q,\ ++ neon_fp_recpx_d,neon_fp_recpx_d_q,\ ++ neon_tbl2,neon_tbl2_q,neon_to_gp,\ ++ neon_to_gp_q,neon_fp_abd_s,neon_fp_abd_s_q,\ ++ neon_fp_abd_d,neon_fp_abd_d_q,\ ++ neon_fp_addsub_s,neon_fp_addsub_s_q,\ ++ neon_fp_addsub_d,neon_fp_addsub_d_q,\ ++ neon_fp_compare_s,neon_fp_compare_s_q,\ ++ neon_fp_compare_d,neon_fp_compare_d_q,\ ++ neon_fp_cvt_widen_s,neon_fp_to_int_s,\ ++ neon_fp_to_int_s_q,neon_fp_to_int_d,\ ++ neon_fp_to_int_d_q,neon_fp_minmax_s,\ ++ neon_fp_minmax_s_q,neon_fp_minmax_d,\ ++ neon_fp_minmax_d_q,neon_fp_round_s,\ ++ neon_fp_round_s_q,neon_fp_cvt_narrow_d_q,\ ++ neon_fp_round_d,neon_fp_round_d_q,\ ++ neon_fp_cvt_narrow_s_q") ++ (const_string "hip09_neon_fp_arith") ++ (eq_attr "type" "neon_sat_mul_h,neon_sat_mul_h_q,\ ++ neon_sat_mul_s,neon_sat_mul_s_q,\ ++ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\ ++ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\ ++ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\ ++ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\ ++ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\ ++ neon_mul_s_long,neon_mul_h_scalar_long,\ ++ neon_mul_s_scalar_long,neon_mla_h,neon_mla_h_q,\ ++ neon_mla_s,neon_mla_h_scalar,\ ++ neon_mla_h_scalar_q,neon_mla_s_scalar,\ ++ neon_mla_h_long,\ ++ neon_mla_s_long,neon_sat_mla_h_long,\ ++ neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,\ ++ neon_sat_mla_s_scalar_long,neon_mla_s_scalar_long,\ ++ neon_mla_h_scalar_long,neon_mla_s_scalar_q,\ ++ neon_shift_acc,neon_shift_acc_q,neon_shift_reg,\ ++ neon_shift_reg_q,neon_sat_shift_reg,\ ++ neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\ ++ neon_tbl3,neon_tbl3_q,neon_fp_reduc_add_s,\ ++ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d,\ ++ neon_fp_reduc_add_d_q,neon_fp_reduc_minmax_s,\ ++ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_s_q,\ ++ neon_fp_reduc_minmax_d_q,\ ++ neon_fp_mul_s_q,\ ++ neon_fp_mul_d,neon_fp_mul_d_q,\ ++ neon_fp_mul_d_scalar_q,neon_fp_mul_s_scalar,\ ++ neon_fp_mul_s_scalar_q") ++ (const_string "hip09_neon_mul") ++ (eq_attr "type" "neon_mla_s_q,neon_reduc_minmax,\ ++ neon_reduc_minmax_q,neon_fp_recps_s,\ ++ neon_fp_recps_s_q,neon_fp_recps_d,\ ++ neon_fp_recps_d_q,neon_tbl4,neon_tbl4_q,\ ++ neon_fp_mla_s,\ ++ neon_fp_mla_d,neon_fp_mla_d_q,\ ++ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ ++ neon_fp_mla_d_scalar_q") ++ (const_string "hip09_neon_mla") ++ (eq_attr "type" "neon_dot,neon_dot_q") ++ (const_string "hip09_neon_dot") ++ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\ ++ neon_fp_div_d,neon_fp_div_d_q") ++ (const_string "hip09_neon_fp_div") ++ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\ ++ neon_fp_sqrt_d,neon_fp_sqrt_d_q") ++ (const_string "hip09_neon_fp_sqrt") ++ (eq_attr "type" "neon_dup,neon_dup_q,\ ++ neon_ins,neon_ins_q") ++ (const_string "hip09_neon_ins") ++ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\ ++ neon_load1_2reg,neon_load1_2reg_q,\ ++ neon_load1_3reg,neon_load1_3reg_q,\ ++ neon_load1_4reg,neon_load1_4reg_q") ++ (const_string "hip09_neon_load1") ++ (eq_attr "type" "neon_load1_one_lane,\ ++ neon_load1_one_lane_q,\ ++ neon_load1_all_lanes,neon_load1_all_lanes_q") ++ (const_string "hip09_neon_load1_lanes") ++ (eq_attr "type" "neon_load2_all_lanes,\ ++ neon_load2_all_lanes_q,\ ++ neon_load2_one_lane,neon_load2_2reg,\ ++ neon_load2_2reg_q,neon_load3_one_lane,\ ++ neon_load3_all_lanes,neon_load3_all_lanes_q,\ ++ neon_load4_one_lane,neon_load4_all_lanes,\ ++ neon_load4_all_lanes_q") ++ (const_string "hip09_neon_load2and4") ++ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q") ++ (const_string "hip09_neon_load3_3reg") ++ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q") ++ (const_string "hip09_neon_load4_4reg") ++ (eq_attr "type" "neon_store1_one_lane,\ ++ neon_store1_one_lane_q,neon_store2_one_lane,\ ++ neon_store2_one_lane_q,neon_store2_2reg,\ ++ neon_store2_2reg_q") ++ (const_string "hip09_neon_store1and2") ++ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q") ++ (const_string "hip09_neon_store1_1reg") ++ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q") ++ (const_string "hip09_neon_store1_2reg") ++ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q") ++ (const_string "hip09_neon_store1_3reg") ++ (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q") ++ (const_string "hip09_neon_store1_4reg") ++ (eq_attr "type" "neon_store3_one_lane,\ ++ neon_store3_one_lane_q,neon_store4_one_lane,\ ++ neon_store4_one_lane_q") ++ (const_string "hip09_neon_store3and4_lane") ++ (eq_attr "type" "neon_store3_3reg,\ ++ neon_store3_3reg_q") ++ (const_string "hip09_neon_store3_3reg") ++ (eq_attr "type" "neon_store4_4reg,\ ++ neon_store4_4reg_q") ++ (const_string "hip09_neon_store4_4reg")] ++ (const_string "unknown"))) ++ ++; The hip09 core is modelled as issues pipeline that has ++; the following functional units. ++; 1. Two pipelines for branch micro operations: BRU1, BRU2 ++ ++(define_cpu_unit "hip09_bru0" "hip09") ++(define_cpu_unit "hip09_bru1" "hip09") ++ ++(define_reservation "hip09_bru01" "hip09_bru0|hip09_bru1") ++ ++; 2. Four pipelines for single cycle integer micro operations: ALUs1, ALUs2, ALUs3, ALUs4 ++ ++(define_cpu_unit "hip09_alus0" "hip09") ++(define_cpu_unit "hip09_alus1" "hip09") ++(define_cpu_unit "hip09_alus2" "hip09") ++(define_cpu_unit "hip09_alus3" "hip09") ++ ++(define_reservation "hip09_alus0123" "hip09_alus0|hip09_alus1|hip09_alus2|hip09_alus3") ++(define_reservation "hip09_alus01" "hip09_alus0|hip09_alus1") ++(define_reservation "hip09_alus23" "hip09_alus2|hip09_alus3") ++ ++; 3. Two pipelines for multi cycles integer micro operations: ALUm1, ALUm2 ++ ++(define_cpu_unit "hip09_alum0" "hip09") ++(define_cpu_unit "hip09_alum1" "hip09") ++ ++(define_reservation "hip09_alum01" "hip09_alum0|hip09_alum1") ++ ++; 4. Two pipelines for load micro opetations: Load1, Load2 ++ ++(define_cpu_unit "hip09_load0" "hip09_ldst") ++(define_cpu_unit "hip09_load1" "hip09_ldst") ++ ++(define_reservation "hip09_ld01" "hip09_load0|hip09_load1") ++ ++; 5. Two pipelines for store micro operations: Store1, Store2 ++ ++(define_cpu_unit "hip09_store0" "hip09_ldst") ++(define_cpu_unit "hip09_store1" "hip09_ldst") ++ ++(define_reservation "hip09_st01" "hip09_store0|hip09_store1") ++ ++; 6. Two pipelines for store data micro operations: STD0,STD1 ++ ++(define_cpu_unit "hip09_store_data0" "hip09_ldst") ++(define_cpu_unit "hip09_store_data1" "hip09_ldst") ++ ++(define_reservation "hip09_std01" "hip09_store_data0|hip09_store_data1") ++ ++; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU1, FSU2, FSU3, FSU4 ++ ++(define_cpu_unit "hip09_fsu0" "hip09_fsu") ++(define_cpu_unit "hip09_fsu1" "hip09_fsu") ++(define_cpu_unit "hip09_fsu2" "hip09_fsu") ++(define_cpu_unit "hip09_fsu3" "hip09_fsu") ++ ++(define_reservation "hip09_fsu0123" "hip09_fsu0|hip09_fsu1|hip09_fsu2|hip09_fsu3") ++(define_reservation "hip09_fsu02" "hip09_fsu0|hip09_fsu2") ++ ++ ++; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2 ++ ++;; Simple Execution Unit: ++; ++;; Simple ALU without shift ++(define_insn_reservation "hip09_alu" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alu_imm,logic_imm,\ ++ adc_imm,adc_reg,\ ++ alu_sreg,logic_reg,\ ++ mov_imm,mov_reg,\ ++ csel,rotate_imm,bfm,mov_imm,\ ++ clz,rbit,rev")) ++ "hip09_alus0123") ++ ++(define_insn_reservation "hip09_alus" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alus_sreg,alus_imm,\ ++ adcs_reg,adcs_imm,\ ++ logics_imm,logics_reg,adr")) ++ "hip09_alus23") ++ ++;; ALU ops with shift and extend ++(define_insn_reservation "hip09_alu_ext_shift" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "alu_ext,alus_ext,\ ++ logics_shift_imm,logics_shift_reg,\ ++ logic_shift_reg,logic_shift_imm,\ ++ ")) ++ "hip09_alum01") ++ ++;; Multiplies instructions ++(define_insn_reservation "hip09_mult" 3 ++ (and (eq_attr "tune" "hip09") ++ (ior (eq_attr "mul32" "yes") ++ (eq_attr "widen_mul64" "yes"))) ++ "hip09_alum01") ++ ++;; Integer divide ++(define_insn_reservation "hip09_div" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "udiv,sdiv")) ++ "hip09_alum0") ++ ++;; Branch execution Unit ++; ++; Branches take two issue slot. ++; No latency as there is no result ++(define_insn_reservation "hip09_branch" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "branch,call")) ++ "hip09_bru01 + hip09_alus23") ++ ++;; Load execution Unit ++; ++; Loads of up to two words. ++(define_insn_reservation "hip09_load1" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "load_4,load_8")) ++ "hip09_ld01") ++ ++; Stores of up to two words. ++(define_insn_reservation "hip09_store1" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "store_4,store_8")) ++ "hip09_st01") ++ ++;; FP data processing instructions. ++ ++(define_insn_reservation "hip09_fp_arith" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\ ++ f_mrc")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_cmp" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fcmps,fcmpd")) ++ "hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_ccmp" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fccmps,fccmpd")) ++ "hip09_alus01+hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_csel" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fcsel,f_mcr")) ++ "hip09_alus01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_divs" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fdivs")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_divd" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fdivd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_sqrts" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fsqrts")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_sqrtd" 15 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fsqrtd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_mul" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fmuls,fmuld")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_add" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\ ++ f_rints,f_rintd")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_fp_mac" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "fmacs,fmacd")) ++ "hip09_fsu0123") ++ ++;; FP miscellaneous instructions. ++ ++(define_insn_reservation "hip09_fp_cvt" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_cvtf2i")) ++ "hip09_fsu0123+hip09_alus23") ++ ++(define_insn_reservation "hip09_fp_cvt2" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_cvti2f")) ++ "hip09_alus01+hip09_fsu0123") ++ ++;; FP Load Instructions ++ ++(define_insn_reservation "hip09_fp_load" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_loads,f_loadd")) ++ "hip09_ld01") ++ ++(define_insn_reservation "hip09_fp_load2" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_ldp_q,neon_ldp")) ++ "hip09_ld01+hip09_alus01") ++ ++;; FP store instructions ++ ++(define_insn_reservation "hip09_fp_store" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "f_stores,f_stored")) ++ "hip09_st01+hip09_std01") ++ ++;; ASIMD integer instructions ++ ++(define_insn_reservation "hip09_asimd_base1" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_abs")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base2" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_arith")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base3" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_mul")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base4" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_mla")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_base5" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_fp_mul_s")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_dot" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_dot")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_bfmmla" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "neon_fp_mla_s_q")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_fdiv" 15 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_div")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_fsqrt" 25 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_fp_sqrt")) ++ "hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_pmull" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_pmull")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_dup" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_ins")) ++ "hip09_alus01+hip09_fsu0123") ++ ++;; ASIMD load instructions ++ ++(define_insn_reservation "hip09_asimd_ld1_reg" 6 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load1")) ++ "hip09_ld01") ++ ++(define_insn_reservation "hip09_asimd_ld1_lane" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load1_lanes")) ++ "hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld23" 8 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load2and4")) ++"hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld3_mtp" 9 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load3_3reg")) ++ "hip09_ld01+hip09_fsu0123") ++ ++(define_insn_reservation "hip09_asimd_ld4_mtp" 13 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_load4_4reg")) ++ "hip09_ld01+hip09_fsu0123") ++ ++;; ASIMD store instructions ++ ++(define_insn_reservation "hip09_asimd_st12" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1and2")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_1reg" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_1reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_2reg" 3 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_2reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_3reg" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_3reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st1_4reg" 5 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store1_4reg")) ++ "hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st34_lane" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store3and4_lane")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st3_mtp" 7 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store3_3reg")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++(define_insn_reservation "hip09_asimd_st4_mtp" 10 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "hip09_type" "hip09_neon_store4_4reg")) ++ "hip09_fsu0123+hip09_st01+hip09_std01") ++ ++;; Cryptography extensions ++ ++(define_insn_reservation "hip09_asimd_aes" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_aese,crypto_aesmc")) ++ "hip09_fsu02") ++ ++(define_insn_reservation "hip09_asimd_sha3" 1 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha3")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_sha1" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\ ++ crypto_sha256_fast,crypto_sha512,\ ++ crypto_sm3")) ++ "hip09_fsu2") ++ ++(define_insn_reservation "hip09_asimd_sha1_and256" 4 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\ ++ crypto_sm4")) ++ "hip09_fsu2") ++ ++;; CRC extension. ++ ++(define_insn_reservation "hip09_crc" 2 ++ (and (eq_attr "tune" "hip09") ++ (eq_attr "type" "crc")) ++ "hip09_alum01") +-- +2.33.0 + diff --git a/0198-bugfix-Modify-the-hip09-CPU-information.patch b/0198-bugfix-Modify-the-hip09-CPU-information.patch new file mode 100644 index 0000000000000000000000000000000000000000..ff39eb9fbb56f1735753122cd0af1ecc6d70a0a9 --- /dev/null +++ b/0198-bugfix-Modify-the-hip09-CPU-information.patch @@ -0,0 +1,39 @@ +From 8bfb0125f6c2aed9b1f5c2cd43563ce403c00d71 Mon Sep 17 00:00:00 2001 +From: XingYuShuai <1150775134@qq.com> +Date: Tue, 11 Jun 2024 20:39:48 +0800 +Subject: [PATCH 1/2] [bugfix] Modify the hip09 CPU information. + +--- + gcc/config/aarch64/aarch64-cores.def | 2 +- + gcc/config/aarch64/aarch64.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index eb1c6c894..a8f3376d4 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -124,7 +124,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F + + /* HiSilicon ('H') cores. */ + AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) +-AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0x1, 0xd01) ++AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0) + + /* ARMv8.3-A Architecture Processors. */ + +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 4ef7bd8b3..2117326ba 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -1316,7 +1316,7 @@ static const struct tune_params hip09_tunings = + &hip09_vector_cost, + &generic_branch_cost, + &generic_approx_modes, +- SVE_128, /* sve_width */ ++ SVE_256, /* sve_width */ + 4, /* memmov_cost */ + 4, /* issue_rate */ + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH +-- +2.33.0 + diff --git a/gcc.spec b/gcc.spec index ce1c29e546768cdae05d621d195ccf95989750cd..d794101f7738ddb2627a677a06386e42f4e16490 100644 --- a/gcc.spec +++ b/gcc.spec @@ -61,7 +61,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: 57 +Release: 58 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD URL: https://gcc.gnu.org @@ -303,7 +303,9 @@ Patch192: 0192-Add-feedback-directed-filter_and_sort_kernels-in-Pha.patch Patch193: 0193-Add-prefetch-level-parameter-to-specify-the-last-lev.patch Patch194: 0194-AutoFDO-avoid-accessing-dump_file-null-pointer.patch Patch195: 0195-add-whitelist-feature-for-OneProfile.patch - +Patch196: 0196-fix-bugs-in-loop-detections-add-filter-to-SSA-statem.patch +Patch197: 0197-Add-hip09-machine-discribtion.patch +Patch198: 0198-bugfix-Modify-the-hip09-CPU-information.patch %global gcc_target_platform %{_arch}-linux-gnu %if %{build_go} @@ -951,6 +953,9 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch193 -p1 %patch194 -p1 %patch195 -p1 +%patch196 -p1 +%patch197 -p1 +%patch198 -p1 %build @@ -2985,6 +2990,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Fri Jun 14 2024 zhenyu zhao - 10.3.1-58 +- Type:Sync +- ID:NA +- SUG:NA +- DESC: Sync patch from openeuler/gcc + * Thu Jun 13 2024 zhenyu zhao - 10.3.1-57 - Type:Sync - ID:NA