diff --git a/0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch b/0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch new file mode 100644 index 0000000000000000000000000000000000000000..590bec6b171ce2326c0b6bd58b019233a97cdec9 --- /dev/null +++ b/0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch @@ -0,0 +1,2201 @@ +From 1560015fbbd8cd73f31c8573c44dcd987a803aef Mon Sep 17 00:00:00 2001 +From: xiajingze +Date: Thu, 24 Oct 2024 10:29:47 +0800 +Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 sched model + +Signed-off-by: xiajingze +--- + llvm/lib/Target/AArch64/AArch64.td | 4 +- + llvm/lib/Target/AArch64/AArch64SchedHIP09.td | 2158 ++++++++++++++++++ + 2 files changed, 2160 insertions(+), 2 deletions(-) + create mode 100644 llvm/lib/Target/AArch64/AArch64SchedHIP09.td + +diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td +index fdb931a0fe6c..edd5b91e3ad1 100644 +--- a/llvm/lib/Target/AArch64/AArch64.td ++++ b/llvm/lib/Target/AArch64/AArch64.td +@@ -768,6 +768,7 @@ include "AArch64SchedThunderX2T99.td" + include "AArch64SchedA64FX.td" + include "AArch64SchedThunderX3T110.td" + include "AArch64SchedTSV110.td" ++include "AArch64SchedHIP09.td" + include "AArch64SchedAmpere1.td" + include "AArch64SchedNeoverseN1.td" + include "AArch64SchedNeoverseN2.td" +@@ -1491,8 +1492,7 @@ def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, + // HiSilicon Processors. + def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, + [TuneTSV110]>; +-// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57. +-def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09, ++def : ProcessorModel<"hip09", HIP09Model, ProcessorFeatures.HIP09, + [TuneHIP09]>; + + // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. +diff --git a/llvm/lib/Target/AArch64/AArch64SchedHIP09.td b/llvm/lib/Target/AArch64/AArch64SchedHIP09.td +new file mode 100644 +index 000000000000..11cd250f6c7f +--- /dev/null ++++ b/llvm/lib/Target/AArch64/AArch64SchedHIP09.td +@@ -0,0 +1,2158 @@ ++//=- AArch64SchedHIP09.td - Huawei HIP09 Scheduling Defs ---*- tablegen -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the machine model for Huawei HIP09 to support instruction ++// scheduling and other instruction cost heuristics. ++// ++//===----------------------------------------------------------------------===// ++ ++def HIP09Model : SchedMachineModel { ++ let IssueWidth = 6; // HIP09 can dispatch 6 micro-ops per cycle. ++ let MicroOpBufferSize = 88; // Based on the reorder buffer. ++ let LoadLatency = 4; // Basic latency for most load instructions. ++ let MispredictPenalty = 14; // Based on ALU pipeline depth. ++ let LoopMicroOpBufferSize = 16; // Based on the instruction queue size. ++ let CompleteModel = 1; ++ ++ list UnsupportedFeatures = !listconcat(PAUnsupported.F, ++ SMEUnsupported.F, ++ SVE2Unsupported.F, ++ [HasMTE, HasSVE2p1_or_HasSME]); ++} ++ ++let SchedModel = HIP09Model in { ++ ++// HIP09 has 18 pipelines. The 4 Advanced SIMD&FP units handle different ++// sets of operations, of which 2 can also handle SVE. ++ ++// These are also defined in the upstream AArch64SchedHIP09.td. ++// In our implementation, HIP09UnitAB is called HIP09UnitBRU instead. ++def HIP09UnitBRU : ProcResource<2>; // Branch 0/1 ++def HIP09UnitALUS0 : ProcResource<1>; // Integer ALU single cycle 0 ++def HIP09UnitALUS1 : ProcResource<1>; // Integer ALU single cycle 1 ++def HIP09UnitALUS23 : ProcResource<2>; // Integer ALU single cycle 2/3 ++def HIP09UnitALUM0 : ProcResource<1>; // Integer ALU multi cycle 0 ++def HIP09UnitALUM1 : ProcResource<1>; // Integer ALU multi cycle 1 ++def HIP09UnitLD : ProcResource<2>; // Load address generation and special memory 0/1 ++def HIP09UnitST : ProcResource<2>; // Store address generation and special memory 0/1 ++def HIP09UnitFSU0 : ProcResource<1>; // SIMD&FP 0, can handle sve ++def HIP09UnitFSU2 : ProcResource<1>; // SIMD&FP 2, can handle sve ++def HIP09UnitFSU13 : ProcResource<2>; // SIMD&FP 1/3 ++def HIP09UnitSTD : ProcResource<2>; // Store data 0/1 ++ ++def HIP09UnitALUS01 : ProcResGroup<[HIP09UnitALUS0, HIP09UnitALUS1]>; ++def HIP09UnitALUS : ProcResGroup<[HIP09UnitALUS0, HIP09UnitALUS1, HIP09UnitALUS23]>; ++def HIP09UnitALUM : ProcResGroup<[HIP09UnitALUM0, HIP09UnitALUM1]>; ++def HIP09UnitFSU02 : ProcResGroup<[HIP09UnitFSU0, HIP09UnitFSU2]>; ++def HIP09UnitFSU : ProcResGroup<[HIP09UnitFSU0, HIP09UnitFSU2, HIP09UnitFSU13]>; ++ ++//===----------------------------------------------------------------------===// ++// ++// Contains all of the HIP09-specific SchedWriteRes types. The approach below ++// is to define a generic SchedWriteRes for every combination of latency and ++// micro-ops. The naming conventions is to use a prefix, one field for latency, ++// and one or more microOp count/type designators. ++// ++// Prefix: HIP09Write ++// Latency: #cyc ++// Micro-op Count/Types: #(BRU|ALUS01|ALUS23|ALUS|ALUM1|ALUM2|ALUM|LD|ST|FSU0|FSU2|FSU02|FSU|STD) ++// ++// e.g. HIP09Write_6cyc_1ALUS_6LD_4FSU means the total latency is 6 cycles, ++// and 11 micro-ops are issued down 1 ALUS pipe, 6 LD pipes, and 4 FSU ++// pipes ++ ++def HIP09Write_0cyc : SchedWriteRes<[]> { let Latency = 0; } ++ ++def HIP09Write_1cyc_1BRU : SchedWriteRes<[HIP09UnitBRU]> { let Latency = 1; } ++ ++def HIP09Write_1cyc_1ALUS : SchedWriteRes<[HIP09UnitALUS]> { let Latency = 1; } ++def HIP09Write_1cyc_1ALUS1 : SchedWriteRes<[HIP09UnitALUS1]> { let Latency = 1; } ++def HIP09Write_1cyc_1ALUS01 : SchedWriteRes<[HIP09UnitALUS01]> { let Latency = 1; } ++def HIP09Write_2cyc_1ALUS01 : SchedWriteRes<[HIP09UnitALUS01]> { let Latency = 2; } ++def HIP09Write_3cyc_1ALUS01 : SchedWriteRes<[HIP09UnitALUS01]> { let Latency = 3; } ++def HIP09Write_1cyc_1ALUS23 : SchedWriteRes<[HIP09UnitALUS23]> { let Latency = 1; } ++def HIP09Write_2cyc_1ALUS23 : SchedWriteRes<[HIP09UnitALUS23]> { let Latency = 2; } ++ ++def HIP09Write_2cyc_1ALUM : SchedWriteRes<[HIP09UnitALUM]> { let Latency = 2; } ++def HIP09Write_3cyc_1ALUM : SchedWriteRes<[HIP09UnitALUM]> { let Latency = 3; } ++def HIP09Write_5cyc_1ALUM1 : SchedWriteRes<[HIP09UnitALUM1]> { let Latency = 5; } ++def HIP09Write_12cyc_1ALUM0_12RC : SchedWriteRes<[HIP09UnitALUM0]> { let Latency = 12; let ResourceCycles = [12]; } ++def HIP09Write_20cyc_1ALUM0_20RC : SchedWriteRes<[HIP09UnitALUM0]> { let Latency = 20; let ResourceCycles = [20]; } ++ ++def HIP09Write_1cyc_1ST : SchedWriteRes<[HIP09UnitST]> { let Latency = 1; } ++ ++def HIP09Write_1cyc_1FSU : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 1; } ++def HIP09Write_2cyc_1FSU : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 2; } ++def HIP09Write_3cyc_1FSU : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 3; } ++def HIP09Write_4cyc_1FSU : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 4; } ++def HIP09Write_5cyc_1FSU : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 5; } ++def HIP09Write_5cyc_1FSU_3RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 5; let ResourceCycles = [3]; } ++def HIP09Write_7cyc_1FSU_3RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 7; let ResourceCycles = [3]; } ++def HIP09Write_9cyc_1FSU_5RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 9; let ResourceCycles = [5]; } ++def HIP09Write_9cyc_1FSU_8RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 9; let ResourceCycles = [8]; } ++def HIP09Write_10cyc_1FSU_6RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 10; let ResourceCycles = [6]; } ++def HIP09Write_13cyc_1FSU_9RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 13; let ResourceCycles = [9]; } ++def HIP09Write_15cyc_1FSU_11RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 15; let ResourceCycles = [11]; } ++def HIP09Write_21cyc_1FSU_17RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 21; let ResourceCycles = [17]; } ++def HIP09Write_25cyc_1FSU_21RC : SchedWriteRes<[HIP09UnitFSU]> { let Latency = 25; let ResourceCycles = [21]; } ++def HIP09Write_1cyc_1FSU02 : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 1; } ++def HIP09Write_2cyc_1FSU02 : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 2; } ++def HIP09Write_3cyc_1FSU02 : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 3; } ++def HIP09Write_4cyc_1FSU02 : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 4; } ++def HIP09Write_4cyc_1FSU02_4RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 4; let ResourceCycles = [4]; } ++def HIP09Write_5cyc_1FSU02 : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 5; } ++def HIP09Write_7cyc_1FSU02_3RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 7; let ResourceCycles = [3]; } ++def HIP09Write_9cyc_1FSU02_3RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 9; let ResourceCycles = [3]; } ++def HIP09Write_12cyc_1FSU02_4RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 12; let ResourceCycles = [4]; } ++def HIP09Write_12cyc_1FSU02_8RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 12; let ResourceCycles = [8]; } ++def HIP09Write_13cyc_1FSU02_9RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 13; let ResourceCycles = [9]; } ++def HIP09Write_15cyc_1FSU02_11RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 15; let ResourceCycles = [11]; } ++def HIP09Write_17cyc_1FSU02_13RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 17; let ResourceCycles = [13]; } ++def HIP09Write_20cyc_1FSU02_16RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 20; let ResourceCycles = [16]; } ++def HIP09Write_21cyc_1FSU02_17RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 21; let ResourceCycles = [17]; } ++def HIP09Write_25cyc_1FSU02_21RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 25; let ResourceCycles = [21]; } ++def HIP09Write_36cyc_1FSU02_32RC : SchedWriteRes<[HIP09UnitFSU02]> { let Latency = 36; let ResourceCycles = [32]; } ++def HIP09Write_1cyc_1FSU2 : SchedWriteRes<[HIP09UnitFSU2]> { let Latency = 1; } ++def HIP09Write_2cyc_1FSU2 : SchedWriteRes<[HIP09UnitFSU2]> { let Latency = 2; } ++def HIP09Write_4cyc_1FSU2 : SchedWriteRes<[HIP09UnitFSU2]> { let Latency = 4; } ++def HIP09Write_4cyc_1LD : SchedWriteRes<[HIP09UnitLD]> { let Latency = 4; } ++def HIP09Write_5cyc_1LD : SchedWriteRes<[HIP09UnitLD]> { let Latency = 5; } ++def HIP09Write_6cyc_1LD : SchedWriteRes<[HIP09UnitLD]> { let Latency = 6; } ++def HIP09Write_6cyc_1LD_3RC : SchedWriteRes<[HIP09UnitLD]> { let Latency = 6; let ResourceCycles = [3]; } ++def HIP09Write_6cyc_1LD_4RC : SchedWriteRes<[HIP09UnitLD]> { let Latency = 6; let ResourceCycles = [4]; } ++def HIP09Write_16cyc_1LD_4RC : SchedWriteRes<[HIP09UnitLD]> { let Latency = 16; let ResourceCycles = [4]; } ++def HIP09Write_18cyc_1LD_4RC : SchedWriteRes<[HIP09UnitLD]> { let Latency = 18; let ResourceCycles = [4]; } ++ ++def HIP09Write_1cyc_2FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 1; ++ let NumMicroOps = 2; ++} ++def HIP09Write_2cyc_2FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 2; ++} ++def HIP09Write_2cyc_2FSU02 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_3cyc_2FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 3; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_3cyc_2FSU02 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02]> ++{ ++ let Latency = 3; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_4cyc_2FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_4cyc_2FSU02 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_4cyc_4FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_5cyc_2FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 5; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_6cyc_2FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_6cyc_2FSU02 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_6cyc_4FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_7cyc_4FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 7; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_9cyc_4FSU : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 9; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_6cyc_1BRU_1ALUM1 : SchedWriteRes<[HIP09UnitBRU, HIP09UnitALUM1]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_1cyc_1ST_1STD : SchedWriteRes<[HIP09UnitST, HIP09UnitSTD]> ++{ ++ let Latency = 1; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_1cyc_2ST_2STD : SchedWriteRes<[HIP09UnitST, HIP09UnitST, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 1; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_2cyc_1ST_1STD : SchedWriteRes<[HIP09UnitST, HIP09UnitSTD]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_2cyc_2ST_2STD : SchedWriteRes<[HIP09UnitST, HIP09UnitST, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_2cyc_4ST_4STD : SchedWriteRes<[HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 8; ++} ++ ++def HIP09Write_3cyc_2ST_2STD : SchedWriteRes<[HIP09UnitST, HIP09UnitST, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 3; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_4cyc_3ST_3STD : SchedWriteRes<[HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 6; ++} ++ ++def HIP09Write_4cyc_8ST_8STD : SchedWriteRes<[HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 16; ++} ++ ++ ++def HIP09Write_5cyc_4ST_4STD : SchedWriteRes<[HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 5; ++ let NumMicroOps = 8; ++} ++ ++def HIP09Write_1cyc_1ST_1STD_1ALUS : SchedWriteRes<[HIP09UnitST, HIP09UnitSTD, HIP09UnitALUS]> ++{ ++ let Latency = 1; ++ let NumMicroOps = 3; ++} ++ ++def HIP09Write_2cyc_1ST_1STD_1ALUS : SchedWriteRes<[HIP09UnitST, HIP09UnitSTD, HIP09UnitALUS]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 3; ++} ++ ++def HIP09Write_2cyc_2ST_2STD_2ALUS : SchedWriteRes<[HIP09UnitST, HIP09UnitST, HIP09UnitSTD, ++ HIP09UnitSTD, HIP09UnitALUS, HIP09UnitALUS]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 6; ++} ++ ++def HIP09Write_2cyc_1BRU_1ALUS23 : SchedWriteRes<[HIP09UnitBRU, HIP09UnitALUS23]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_4cyc_1LD_1ALUS : SchedWriteRes<[HIP09UnitLD, HIP09UnitALUS]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_6cyc_1LD_1ALUS : SchedWriteRes<[HIP09UnitLD, HIP09UnitALUS]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_9cyc_1LD_1ALUM1 : SchedWriteRes<[HIP09UnitLD, HIP09UnitALUM1]> ++{ ++ let Latency = 9; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_1cyc_1ST_1ALUM : SchedWriteRes<[HIP09UnitST, HIP09UnitALUM]> ++{ ++ let Latency = 1; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_2cyc_1ST_1ALUM : SchedWriteRes<[HIP09UnitST, HIP09UnitALUM]> ++{ ++ let Latency = 2; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_4cyc_1ALUS01_1FSU : SchedWriteRes<[HIP09UnitALUS01, HIP09UnitFSU]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_1cyc_1ST_1ALUS_1ALUM : SchedWriteRes<[HIP09UnitST, HIP09UnitALUS, HIP09UnitALUM]> ++{ ++ let Latency = 1; ++ let NumMicroOps = 3; ++} ++ ++def HIP09Write_3cyc_1ST_1ALUS_1ALUM : SchedWriteRes<[HIP09UnitST, HIP09UnitALUS, HIP09UnitALUM]> ++{ ++ let Latency = 3; ++ let NumMicroOps = 3; ++} ++ ++def HIP09Write_4cyc_1FSU_1ALUS23 : SchedWriteRes<[HIP09UnitFSU, HIP09UnitALUS23]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_7cyc_1ALUS01_1FSU_1ALUS23 : SchedWriteRes<[HIP09UnitALUS01, HIP09UnitFSU, HIP09UnitALUS23]> ++{ ++ let Latency = 7; ++ let NumMicroOps = 3; ++} ++ ++def HIP09Write_5cyc_1FSU02_1ALUS : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitALUS]> ++{ ++ let Latency = 5; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_6cyc_1FSU02_1ALUS : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitALUS]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_5cyc_1ALUS01_1FSU : SchedWriteRes<[HIP09UnitALUS01, HIP09UnitFSU]> ++{ ++ let Latency = 5; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_5cyc_1FSU_1ALUS23 : SchedWriteRes<[HIP09UnitFSU, HIP09UnitALUS23]> ++{ ++ let Latency = 5; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_5cyc_1FSU02_1ALUS23 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitALUS23]> ++{ ++ let Latency = 5; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_6cyc_4FSU02_4ALUS23 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 8; ++} ++ ++def HIP09Write_7cyc_4FSU02_4ALUS23 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23]> ++{ ++ let Latency = 7; ++ let NumMicroOps = 8; ++} ++ ++def HIP09Write_6cyc_6FSU02_6ALUS23 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 12; ++} ++ ++def HIP09Write_7cyc_6FSU02_6ALUS23 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23]> ++{ ++ let Latency = 7; ++ let NumMicroOps = 12; ++} ++ ++def HIP09Write_8cyc_6FSU02_6ALUS23 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23]> ++{ ++ let Latency = 8; ++ let NumMicroOps = 12; ++} ++ ++def HIP09Write_9cyc_6FSU02_6ALUS23 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23, ++ HIP09UnitALUS23, HIP09UnitALUS23, HIP09UnitALUS23]> ++{ ++ let Latency = 9; ++ let NumMicroOps = 12; ++} ++ ++def HIP09Write_9cyc_18FSU02_9ALUM1 : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitALUM1, HIP09UnitALUM1, ++ HIP09UnitALUM1, HIP09UnitALUM1, HIP09UnitALUM1, HIP09UnitALUM1, ++ HIP09UnitALUM1, HIP09UnitALUM1, HIP09UnitALUM1]> ++{ ++ let Latency = 9; ++ let NumMicroOps = 27; ++} ++ ++def HIP09Write_6cyc_2LD : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_7cyc_1LD_1FSU : SchedWriteRes<[HIP09UnitLD, HIP09UnitFSU]> ++{ ++ let Latency = 7; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_8cyc_1LD_1FSU : SchedWriteRes<[HIP09UnitLD, HIP09UnitFSU]> ++{ ++ let Latency = 8; ++ let NumMicroOps = 2; ++} ++ ++def HIP09Write_8cyc_2LD_2FSU : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD, HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 8; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_8cyc_2LD_2FSU02 : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD, HIP09UnitFSU02, HIP09UnitFSU02]> ++{ ++ let Latency = 8; ++ let NumMicroOps = 4; ++} ++ ++def HIP09Write_9cyc_3LD_3FSU : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 9; ++ let NumMicroOps = 6; ++} ++ ++def HIP09Write_9cyc_4LD_4FSU02 : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02]> ++{ ++ let Latency = 9; ++ let NumMicroOps = 8; ++} ++ ++def HIP09Write_11cyc_6LD_6FSU02 : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02]> ++{ ++ let Latency = 11; ++ let NumMicroOps = 12; ++} ++ ++def HIP09Write_16cyc_16LD_16FSU02 : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02]> ++{ ++ let Latency = 16; ++ let NumMicroOps = 32; ++} ++ ++def HIP09Write_12cyc_8LD_8FSU : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, ++ HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 12; ++ let NumMicroOps = 16; ++} ++ ++def HIP09Write_13cyc_8LD_8FSU : SchedWriteRes<[HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, HIP09UnitLD, ++ HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, ++ HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU]> ++{ ++ let Latency = 13; ++ let NumMicroOps = 16; ++} ++ ++def HIP09Write_3cyc_1FSU02_1ST_1STD : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitST, HIP09UnitSTD]> ++{ ++ let Latency = 3; ++ let NumMicroOps = 3; ++} ++ ++def HIP09Write_4cyc_1FSU_1ST_1STD : SchedWriteRes<[HIP09UnitFSU, HIP09UnitST, HIP09UnitSTD]> ++{ ++ let Latency = 4; ++ let NumMicroOps = 3; ++} ++ ++def HIP09Write_6cyc_2FSU_2ST_2STD : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU, ++ HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 6; ++} ++ ++def HIP09Write_6cyc_3FSU02_3ST_3STD : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 9; ++} ++ ++def HIP09Write_6cyc_4FSU02_4ST_4STD : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 6; ++ let NumMicroOps = 12; ++} ++ ++def HIP09Write_7cyc_3FSU_3ST_3STD : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 7; ++ let NumMicroOps = 9; ++} ++ ++def HIP09Write_8cyc_16FSU02_16ST_16STD : SchedWriteRes<[HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, HIP09UnitFSU02, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 8; ++ let NumMicroOps = 48; ++} ++ ++def HIP09Write_10cyc_6FSU_6ST_6STD : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, ++ HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 10; ++ let NumMicroOps = 18; ++} ++ ++def HIP09Write_10cyc_8FSU_8ST_8STD : SchedWriteRes<[HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, ++ HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, HIP09UnitFSU, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitST, HIP09UnitST, HIP09UnitST, HIP09UnitST, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, ++ HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD, HIP09UnitSTD]> ++{ ++ let Latency = 10; ++ let NumMicroOps = 24; ++} ++ ++//===----------------------------------------------------------------------===// ++// Map the target-defined scheduler read/write resources and latency for HIP09. ++// The aliases are sufficient for creating a coarse, working model. As the model ++// evolves, InstRWs will be used to override some of these SchedAliases. ++// ++// WARNING: Using SchedAliases is convenient and works well for latency and ++// resource lookup for instructions. However, this creates an entry in ++// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking ++// any SchedReadAdvance since the lookup will fail. ++ ++def : SchedAlias; ++def : SchedAlias; ++def : SchedAlias; ++def : SchedAlias; ++ ++def : WriteRes { let Unsupported = 1; } ++def : WriteRes { let Latency = 1; } ++def : WriteRes { let Latency = 1; } ++def : WriteRes { let Latency = 4; } ++def : WriteRes { let Latency = 3; } ++def : WriteRes { let Latency = 4; } ++ ++// Forwarding logic is only modeled for multiply and accumulate. ++def : ReadAdvance; ++def : ReadAdvance; ++def : ReadAdvance; ++def : ReadAdvance; ++def : ReadAdvance; ++def : ReadAdvance; ++def : ReadAdvance; ++def : ReadAdvance; ++def : ReadAdvance; ++def : ReadAdvance; ++ ++ ++//===----------------------------------------------------------------------===// ++// Specialize the coarse model by associating instruction groups with the ++// subtarget-defined types. As the model is refined, this will override most ++// of the above SchedAlias mappings. ++ ++//Miscellaneous ++// ----------------------------------------------------------------------------- ++ ++def : InstRW<[WriteI], (instrs COPY)>; ++ ++// Branch Instructions ++// ----------------------------------------------------------------------------- ++ ++def : SchedAlias; ++def : SchedAlias; ++ ++// Branch, immed ++def : InstRW<[HIP09Write_1cyc_1BRU], (instrs B, Bcc)>; ++ ++// Branch, register ++// Compare and branch ++def : InstRW<[HIP09Write_1cyc_1BRU], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ))$")>; ++ ++// Branch and link, immed ++// Branch and link, register ++def : InstRW<[HIP09Write_2cyc_1BRU_1ALUS23], (instrs BL, BLR)>; ++ ++// Arithmetic and Logical Instructions ++// ----------------------------------------------------------------------------- ++def : SchedAlias; ++def : SchedAlias; ++def : SchedAlias; ++ ++// Convert floating-point condition flags ++// Flag manipulation instructions ++def : WriteRes { let Latency = 1; } ++ ++// ALU, basic ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(ADD|AND|EOR|ORR|SUB)[WX]r(r|i)$")>; ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(ADC|SBC)[WX]r$")>; ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(BIC|EON|ORN)[WX]rr$")>; ++ ++// ALU, basic, flagset ++def : InstRW<[HIP09Write_1cyc_1ALUS23], (instregex "^(ADD|AND|SUB)S[WX]r(r|i)$")>; ++def : InstRW<[HIP09Write_1cyc_1ALUS23], (instregex "^(ADC|SBC)S[WX]r$")>; ++def : InstRW<[HIP09Write_1cyc_1ALUS23], (instregex "^BICS[WX]rr$")>; ++ ++// Shifted Register with Shift == 0 ++def HIP09WriteISReg : SchedWriteVariant<[ ++ SchedVar, ++ SchedVar]>; ++def : InstRW<[HIP09WriteISReg], (instregex "^(ADD|AND|EON|EOR|ORN|ORR|SUB)[WX]rs$")>; ++ ++def HIP09WrISReg23 : SchedWriteVariant<[ ++ SchedVar, ++ SchedVar]>; ++def : InstRW<[HIP09WrISReg23], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>; ++ ++// Extended Register with Extend == 0 ++def HIP09WrIEReg : SchedWriteVariant<[ ++ SchedVar, ++ SchedVar]>; ++def : InstRW<[HIP09WrIEReg], (instregex "^(ADD|SUB)[WX]r(x|x64)$")>; ++ ++def HIP09WrIEReg23 : SchedWriteVariant<[ ++ SchedVar, ++ SchedVar]>; ++def : InstRW<[HIP09WrIEReg23], (instregex "^(ADD|SUB)S[WX]r(x|x64)$")>; ++ ++// Conditional compare ++def : InstRW<[HIP09Write_1cyc_1ALUS23], (instregex "^(CCMN|CCMP)[WX](r|i)$")>; ++ ++// Conditional select ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(CSEL|CSINC|CSINV|CSNEG)[WX]r$")>; ++ ++//Convert floating-point condition flags ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(AX|XA)FLAG$")>; ++ ++// Flag manipulation instructions ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instrs SETF8, SETF16, RMIF, CFINV)>; ++ ++// Logical, shift no flagset ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^BIC[WX]rs$")>; ++ ++// Divide and Multiply Instructions ++// ----------------------------------------------------------------------------- ++ ++def : SchedAlias; ++def : SchedAlias; ++ ++//Divide, W-form ++def : InstRW<[HIP09Write_12cyc_1ALUM0_12RC], (instregex "^(S|U)DIVWr$")>; ++ ++//Divide, X-form ++def : InstRW<[HIP09Write_20cyc_1ALUM0_20RC], (instregex "^(S|U)DIVXr$")>; ++ ++// Multiply, W-form ++// Multiply accumulate, W-form ++def HIP09ReadMAW : SchedReadAdvance<2, [HIP09Write_2cyc_1ALUM]>; ++def : InstRW<[HIP09Write_2cyc_1ALUM, HIP09ReadMAW], (instrs MADDWrrr, MSUBWrrr)>; ++ ++// Multiply, x-form ++// Multiply accumulate, X-form ++def HIP09ReadMAQ : SchedReadAdvance<3, [HIP09Write_3cyc_1ALUM]>; ++def : InstRW<[HIP09Write_3cyc_1ALUM, HIP09ReadMAQ], (instrs MADDXrrr, MSUBXrrr)>; ++ ++// Multiply accumulate long ++// Multiply long ++def : InstRW<[HIP09Write_2cyc_1ALUM, HIP09ReadMAW], (instregex "(S|U)(MADDL|MSUBL)rrr")>; ++ ++// Multiply high ++def : InstRW<[HIP09Write_3cyc_1ALUM], (instregex "^(S|U)MULHrr$")>; ++ ++//Pointer Authentication Instructions ++// ----------------------------------------------------------------------------- ++ ++// Bitfield move, basic ++def : SchedAlias; ++ ++// Authenticate data address ++def : InstRW<[HIP09Write_5cyc_1ALUM1], (instregex "^AUTDZ?[AB]$")>; ++ ++// Authenticate instruction address ++def : InstRW<[HIP09Write_5cyc_1ALUM1], (instregex "^AUTI[AB](1716|SP|Z)?$", "^AUTIZ[AB]$")>; ++ ++// Branch and link, register, with pointer authentication ++def : InstRW<[HIP09Write_6cyc_1BRU_1ALUM1], (instregex "^BLRA[AB]Z?$")>; ++ ++// Branch, register, with pointer authentication ++def : InstRW<[HIP09Write_6cyc_1BRU_1ALUM1], (instregex "^BRA[AB]Z?$")>; ++ ++// Branch, return, with pointer authentication ++def : InstRW<[HIP09Write_6cyc_1BRU_1ALUM1], (instregex "^RETA[AB]$")>; ++ ++// Compute pointer authentication code for data address ++def : InstRW<[HIP09Write_5cyc_1ALUM1], (instregex "^PACDZ?[AB]$")>; ++ ++// Compute pointer authentication code, using generic key ++def : InstRW<[HIP09Write_5cyc_1ALUM1], (instrs PACGA)>; ++ ++// Compute pointer authentication code for instruction address ++def : InstRW<[HIP09Write_5cyc_1ALUM1], (instregex "^PACI[AB](1716|SP|Z)?$", "^PACIZ[AB]$")>; ++ ++// Load register, with pointer authentication ++def : InstRW<[HIP09Write_9cyc_1LD_1ALUM1], (instregex "^LDRA[AB](indexed|writeback)$")>; ++ ++// Strip pointer authentication code ++def : InstRW<[HIP09Write_1cyc_1ALUS1], (instrs XPACD, XPACI, XPACLRI)>; ++ ++// Exception return, with pointer authentication ++def : InstRW<[HIP09Write_5cyc_1ALUM1], (instregex "^ERETA[AB]$")>; ++ ++// Load Instructions ++// ----------------------------------------------------------------------------- ++ ++def : WriteRes { let Latency = 4; } ++def : WriteRes { let Latency = 4; } ++ ++// Pre/Post Indexing ++def : WriteRes { let Latency = 1; } ++ ++// Load register, literal ++def : InstRW<[HIP09Write_4cyc_1LD], (instregex "^LDR(W|X)l$")>; ++def : InstRW<[HIP09Write_4cyc_1LD], (instrs LDRSWl)>; ++def : InstRW<[HIP09Write_4cyc_1LD], (instrs PRFMl)>; ++ ++// Load register, unscaled immed ++def : InstRW<[HIP09Write_4cyc_1LD], (instregex "^LDUR(W|X|BB|HH)i$")>; ++def : InstRW<[HIP09Write_4cyc_1LD], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; ++def : InstRW<[HIP09Write_4cyc_1LD], (instrs PRFUMi)>; ++ ++// Load register, immed post-index ++// Load register, immed pre-index ++def : InstRW<[WriteAdr, HIP09Write_4cyc_1LD], (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; ++def : InstRW<[WriteAdr, HIP09Write_4cyc_1LD], (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; ++ ++// Load register, immed unprivileged ++def : InstRW<[HIP09Write_4cyc_1LD], (instregex "^LDTR(W|X|B|H)i$")>; ++def : InstRW<[HIP09Write_4cyc_1LD], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; ++ ++// Load register, unsigned immed ++def : InstRW<[HIP09Write_4cyc_1LD], (instregex "^LDR(W|X|BB|HH)ui$")>; ++def : InstRW<[HIP09Write_4cyc_1LD], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; ++def : InstRW<[HIP09Write_4cyc_1LD], (instrs PRFMui)>; ++ ++// Load register, register offset ++def : InstRW<[HIP09Write_5cyc_1LD], (instregex "^LDR(W|X|BB)ro(W|X)$")>; ++def : InstRW<[HIP09Write_5cyc_1LD], (instregex "^LDRS(BW|BX|W)ro(W|X)$")>; ++def : InstRW<[HIP09Write_5cyc_1LD], (instregex "^PRFMro(W|X)$")>; ++ ++// Load register, register offset, extend, scale by 2 ++def : InstRW<[HIP09Write_6cyc_1LD_1ALUS], (instregex "^LDR(HH|SHW|SHX)ro(W|X)$")>; ++ ++// Load pair, immed offset ++def : InstRW<[HIP09Write_4cyc_1LD, WriteLDHi], (instregex "^LDP(W|X|SW)i$")>; ++def : InstRW<[HIP09Write_4cyc_1LD, WriteLDHi], (instregex "^LDNP(W|X)i$")>; ++ ++// Load pair, immed post-index ++def : InstRW<[WriteAdr, HIP09Write_4cyc_1LD_1ALUS, WriteLDHi], (instregex "^LDP[WX]post$")>; ++def : InstRW<[WriteAdr, HIP09Write_4cyc_1LD_1ALUS, WriteLDHi], (instrs LDPSWpost)>; ++ ++// Load pair, immed pre-index ++def : InstRW<[WriteAdr, HIP09Write_4cyc_1LD_1ALUS, WriteLDHi], (instregex "^LDP[WX]pre$")>; ++def : InstRW<[WriteAdr, HIP09Write_4cyc_1LD_1ALUS, WriteLDHi], (instrs LDPSWpre)>; ++ ++// Miscellaneous Data-Processing Instructions ++// ----------------------------------------------------------------------------- ++ ++def : SchedAlias; ++def : SchedAlias; ++ ++// Address generation ++def : InstRW<[HIP09Write_1cyc_1ALUS23], (instrs ADR, ADRP)>; ++ ++// Bitfield extract, one reg ++// Bitfield extract, two reg ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^EXTR(W|X)rri$")>; ++ ++// Bitfield move, basic ++// Bitfield move, insert ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(S|U)?BFM(W|X)ri$")>; ++ ++// Move immed ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^MOV[NZK][WX]i$")>; ++ ++// Count leading ++// Reverse bit/bytes ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(CLS|CLZ|RBIT|REV(16|32)?)(W|X)r$")>; ++ ++// Variable shift ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(ASRV|LSLV|LSRV|RORV)(W|X)r$")>; ++ ++// Store instructions ++// ----------------------------------------------------------------------------- ++def : WriteRes { let Latency = 1; } ++def : WriteRes { let Latency = 1; } ++def : WriteRes { let Latency = 1; } ++ ++// Store register, unscaled immed ++def : InstRW<[HIP09Write_1cyc_1ST_1ALUM], (instregex "^STUR(BB|HH|W|X)i$")>; ++ ++// Store register, immed post-index ++// Store register, immed pre-index ++def : InstRW<[WriteAdr, HIP09Write_1cyc_1ST_1ALUS_1ALUM], (instregex "^STR(BB|HH|W|X)(post|pre)$")>; ++ ++// Store register, immed unprivileged ++def : InstRW<[HIP09Write_1cyc_1ST_1ALUM], (instregex "^STTR(B|H|W|X)i$")>; ++ ++// Store register, unsigned immed ++def : InstRW<[HIP09Write_1cyc_1ST_1ALUM], (instregex "^STR(BB|HH|W|X)ui$")>; ++ ++// Store register, register offset ++def : InstRW<[HIP09Write_2cyc_1ST_1ALUM], (instregex "^STR(BB|W|X)ro(W|X)$")>; ++ ++// Store register offset, no-extend, scaled by 2 ++// def : InstRW<[HIP09Write_3cyc_1ST_1ALUS_1ALUM], (instregex "^STRHHro(W|X)$")>; ++ ++// Store pair, immed offset ++def : InstRW<[HIP09Write_1cyc_1ST_1ALUM], (instregex "^STN?P(W|X)i$")>; ++ ++// Store pair, immed post-index ++// Store pair, immed pre-index ++def : InstRW<[WriteAdr, HIP09Write_1cyc_1ST_1ALUS_1ALUM], (instregex "^STP(W|X)(post|pre)$")>; ++ ++// FP data processing instructions ++// ----------------------------------------------------------------------------- ++ ++def : SchedAlias; ++def : SchedAlias; ++def : SchedAlias; ++ ++// FP absolute value ++// FP negate ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(FABS|FNEG)[HSD]r$")>; ++ ++// FP absolute value ++// def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^FABD$")>; ++ ++// FP compare ++def : InstRW<[HIP09Write_4cyc_1FSU_1ALUS23], (instregex "^FCMPE?[HSD]r[ri]$")>; ++ ++// FP conditional compare ++def : InstRW<[HIP09Write_7cyc_1ALUS01_1FSU_1ALUS23], (instregex "^FCCMPE?[HSD]rr$")>; ++ ++// FP conditional select ++def : InstRW<[HIP09Write_4cyc_1ALUS01_1FSU], (instregex "^FCSEL[HSD]rrr$")>; ++ ++// FP divide, H-form ++def : InstRW<[HIP09Write_7cyc_1FSU_3RC], (instrs FDIVHrr)>; ++// FP divide, S-form ++def : InstRW<[HIP09Write_7cyc_1FSU_3RC], (instrs FDIVSrr)>; ++// FP divide, D-form ++def : InstRW<[HIP09Write_10cyc_1FSU_6RC], (instrs FDIVDrr)>; ++ ++// FP square root, H-form ++def : InstRW<[HIP09Write_7cyc_1FSU_3RC], (instrs FSQRTHr)>; ++// FP square root, S-form ++def : InstRW<[HIP09Write_9cyc_1FSU_5RC], (instrs FSQRTSr)>; ++// FP square root, D-form ++def : InstRW<[HIP09Write_15cyc_1FSU_11RC], (instrs FSQRTDr)>; ++ ++// FP fused multiply-add ++def : InstRW<[HIP09Write_4cyc_1FSU], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; ++ ++// FP max/min ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^FM(AX|IN)(NM)?[HSD]rr$")>; ++ ++// FP add ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^F(ADD|SUB)[HSD]rr")>; ++ ++//FP multiply ++def : WriteRes { let Latency = 3; } ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FN?MUL[HSD]rr")>; ++ ++// FP round to FP integral ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FRINT[AIMNPXZ][HSD]r$", ++ "^FRINT(32|64)[XZ][SD]r$")>; ++ ++// FP convert to FP ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FCVT(HD|SD|DH|SH|DS|HS)r")>; ++ ++// FP miscellaneous instructions ++// ----------------------------------------------------------------------------- ++ ++def : SchedAlias; ++ ++ ++// FP convert, from vec to vec reg ++def : SchedAlias; ++ ++// Integer/ Fixed point convert to FP ++def : InstRW<[HIP09Write_5cyc_1ALUS01_1FSU], (instregex "^[SU]CVTF[SU][WX][SHD]ri")>; ++ ++// FP convert, from vec to gen reg ++def : InstRW<[HIP09Write_5cyc_1FSU_1ALUS23], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(S|D|H)r$")>; ++def : InstRW<[HIP09Write_5cyc_1FSU_1ALUS23], (instregex "^FCVTZ[SU][SU][WX](S|D|H)ri?$")>; ++ ++// FP convert, Javascript from to gen reg ++def : InstRW<[HIP09Write_5cyc_1FSU_1ALUS23], (instrs FJCVTZS)>; ++ ++// FP move, immed ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^FMOV[HSD]i$")>; ++ ++// FP move, register ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^FMOV[HSD]r$")>; ++ ++// FP transfer, from gen to low half of vec reg ++def : InstRW<[HIP09Write_3cyc_1ALUS01], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr, ++ FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>; ++ ++// FP transfer, from gen to high half of vec reg ++def : InstRW<[HIP09Write_4cyc_1ALUS01_1FSU], (instrs FMOVXDHighr)>; ++ ++//FP transfer, from vec to gen reg ++def : SchedAlias; ++ ++// FP load instructions ++// ----------------------------------------------------------------------------- ++ ++// Load vector reg, literal ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LDR[SDQ]l$")>; ++ ++// Load vector reg, unscaled immed ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LDUR[BHSDQ]i")>; ++ ++// Load vector reg, immed post-index ++// Load vector reg, immed pre-index ++def : InstRW<[WriteAdr, HIP09Write_6cyc_1LD], (instregex "^LDR[BHSDQ](post|pre)")>; ++ ++// Load vector reg, unsigned immed ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LDR[BHSDQ]ui")>; ++ ++// Load vector reg, register offset ++def : InstRW<[HIP09Write_6cyc_1LD, ReadAdrBase], (instregex "^LDR[BHSDQ]ro(W|X)$")>; ++ ++// Load vector pair, immed offset ++def : InstRW<[HIP09Write_6cyc_1LD, WriteLDHi], (instregex "^LDN?P[SDQ]i$")>; ++ ++// Load vector pair, immed post-index ++// Load vector pair, immed pre-index ++def : InstRW<[WriteAdr, HIP09Write_6cyc_1LD, WriteLDHi], (instregex "^LDP[SDQ](post|pre)$")>; ++ ++// FP store instructions ++// ----------------------------------------------------------------------------- ++ ++//Store vector reg, unscaled immed ++def : InstRW<[HIP09Write_1cyc_1ST_1STD], (instregex "^STUR[BHSDQ]i$")>; ++ ++// Store vector reg, immed post-index ++// Store vector reg, immed pre-index ++def : InstRW<[HIP09Write_1cyc_1ST_1STD_1ALUS, ReadAdrBase], (instregex "^STR[BHSDQ](post|pre)$")>; ++ ++// Store vector reg, immed unprivileged ++// Store vector reg, unsigned immed ++def : InstRW<[HIP09Write_1cyc_1ST_1STD], (instregex "^STR[BHSDQ]ui$")>; ++ ++// Store vector reg, reg offset, no-extend ++// Store vector reg, reg offset, extend ++def : InstRW<[HIP09Write_2cyc_1ST_1STD_1ALUS, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]$")>; ++ ++// Store vector pair, immed offset ++def : InstRW<[HIP09Write_1cyc_1ST_1STD], (instregex "^STN?P[SD]i$")>; ++ ++// Store vector pair, immed offset ++def : InstRW<[HIP09Write_2cyc_2ST_2STD], (instregex "^STN?PQi$")>; ++ ++// Store vector pair, immed post-index ++// Store vector pair, immed pre-index ++def : InstRW<[WriteAdr, HIP09Write_1cyc_1ST_1STD_1ALUS], (instregex "^STP[SD](post|pre)$")>; ++def : InstRW<[WriteAdr, HIP09Write_2cyc_2ST_2STD_2ALUS], (instregex "^STPQ(post|pre)$")>; ++ ++// ASIMD integer Instructions ++// ----------------------------------------------------------------------------- ++ ++// ASIMD absolute diff ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^[SU]ABDv")>; ++ ++// ASIMD absolute diff accum ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^[SU]ABAL?v")>; ++ ++// ASIMD arith, basicc ++// ASIMD arith wide ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(ABS|NEG|ADD|SUB)v")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^[SU]ADD(L|W)v")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^[SU]SUB[LW]v")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(SH|UH)(ADD|SUB)v")>; ++ ++// Integer SIMD complex arithmetic ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(SU|US)QADDv")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^SQ(ABS|NEG)v")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(SQ|UQ)(ADD|SUB)v")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(ADD|SUB)HNv")>; ++ ++// Integer SIMD complex arithmetic ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^R(ADD|SUB)HNv")>; ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^[SU]RHADDv")>; ++ ++// ASIMD arith, pair-wise ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^ADDPv")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^[SU]ADDLPv")>; ++ ++// ASIMD arith, reduce ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^(ADDV|[SU]ADDLV)v")>; ++ ++// ASIMD compare ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^CM(GT|EQ|GE|LT|LE|TST|HI|HS)v")>; ++ ++// ASIMD dot product ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^[SU]DOT(lane)?(v8|v16)i8$")>; ++ ++// ASIMD dot product using signed and unsigned integers ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^(SU|US)DOT(lane)?(v8|v16)i8$")>; ++ ++// ASIMD logical ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(AND|NOT|ORN|ORR|BIC|EOR)v")>; ++ ++// ASIMD matrix multiply-accumulate ++def : InstRW<[HIP09Write_4cyc_4FSU], (instregex "^(S|U|US)MMLA$")>; ++ ++// ASIMD max,min ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^[SU](MAX|MIN)v")>; ++ ++// ASIMD max/min pair-wise ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^[SU](MAX|MIN)Pv")>; ++ ++// ASIMD max/min, reduce, S form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^[SU](MAX|MIN)V(v4|v2)i32v$")>; ++ ++// ASIMD max/min, reduce, B/H form ++def : InstRW<[HIP09Write_4cyc_2FSU], (instregex "^[SU](MAX|MIN)V(v4i16|v8i8|v8i16|v16i8)v$")>; ++ ++// Integer SIMD multiply(accumulate), B form ++def : InstRW<[HIP09Write_2cyc_2FSU], (instregex "^M(UL|LA|LS)(v8|v16)i8$")>; ++ ++// Integer SIMD multiply(accumulate), H/S form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^M(UL|LA|LS)(v4i16|v8i16|v4i32|v2i32)(_indexed)?$", ++ "^SQR?DMULH(v4|v8|v1)i16(_indexed)?$", ++ "^SQR?DMULH(v4|v2|v1)i32(_indexed)?$")>; ++// ASIMD multiply accumulate high, H/S form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^SQRDML[AS]H(v4|v8|v1)?i16(_indexed)?$", ++ "^SQRDML[AS]H(v4|v2|v1)?i32(_indexed)?$")>; ++ ++// ASIMD multiply(accumulate) long B form ++def : InstRW<[HIP09Write_2cyc_2FSU], (instregex "^[SU]M(LA|LS|UL)L(v8|v16)i8_v8i16$")>; ++ ++// Integer SIMD multiply(accumulate) long H/S form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^(S|U|SQD)M(LA|LS|UL)L(v4|v8)i16", ++ "^(S|U|SQD)M(LA|LS|UL)L(v2|v4)i32", ++ "^SQDM(LA|LS|UL)L(i16|i32)$", ++ "^SQDM(LA|LS|UL)Lv1(i32|i64)_indexed$")>; ++ ++// ASIMD multiply/multiply long (8x8) polynomial ++def : InstRW<[HIP09Write_2cyc_1FSU2], (instregex "^PMULL?(v8i8|v16i8)$")>; ++ ++// ASIMD pairwise add and accumulate long ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^[SU]ADALPv")>; ++ ++// ASIMD shift accumulate ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^[SU]R?SRA(d|v)")>; ++ ++// ASIMD shift by immed, basic ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^SHL(v|d)", "^SH(LL|RN)v", ++ "^[SU]SHLLv", "^[SU]SHR(d|v)")>; ++ ++// ASIMD shift by immed and insert, basic ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "SLI(d|v)", "^SRI(d|v)")>; ++ ++// ASIMD shift by immed, complex ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^RSHRNv", "^[SU]QRSHRU?N(b|h|s|v)", ++ "^[SU]RSHR(d|v)")>; ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^SQSHLU(b|h|s|d|v)", "^[SU]QSHRU?N(b|h|s|v)")>; ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^[SU]QSHL(b|h|s|d|v)")>; ++ ++// ASIMD shift by register, basic ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^[SU]SHLv")>; ++ ++// ASIMD shift by immed, complex ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^[SU]QRSHLv", "^[SU]RSHL(d|v)")>; ++ ++// ASIMD floating-point instructions ++// ----------------------------------------------------------------------------- ++ ++// Reference for forms in this group ++// D form - v2f32 ++// Q form - v4f32, v2f64 ++// D form - 32, 64 ++// D form - v1i32, v1i64 ++// D form - v2i32 ++// Q form - v4i32, v2i64 ++ ++// FP SIMD sign manipulation ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^F(ABS|NEG)v")>; ++ ++// ASIMD FP absolute difference ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^FABDv")>; ++ ++// ASIMD FP arith ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^F(ADD|SUB)v")>; ++ ++// ASIMD FP add pairwise ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FADDPv")>; ++ ++// ASIMD FP compare ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^FACG[ET]v")>; ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "FCM(EQ|GE|GT|LE|LT)v")>; ++ ++// ASIMD FP convert long ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^FCVTLv")>; ++ ++// ASIMD FP convert narrow ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^FCVTX?Nv")>; ++ ++// ASIMD FP convert to Integer/Fixed point, D-form ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FCVT[NMAPZ][SU](v4f16|v2f32|v1f16|v1i64|v1i32)")>; ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FCVTZ[SU](h|s|v4i16_shift|v2i32_shift)")>; ++ ++// ASIMD FP convert to Integer/Fixed point, Q-form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^FCVT[NMAPZ][SU](v8f16|v4f32|v2f64)")>; ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FCVTZ[SU](d|v4i32_shift|v2i64_shift)")>; ++ ++// ASIMD FP convert from Integer/Fixed-point to FP, Q-form ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^[SU]CVTF(h|s|v4f16|v2f32|v1i64|v1i32|v1i16|v4i16_shift|v2i32_shift)$")>; ++ ++// ASIMD FP convert from Integer/Fixed-point to FP, Q-form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^[SU]CVTF(d|v8f16|v4f32|v2f64|v8i16_shift|v4i32_shift|v2i64_shift)$")>; ++ ++// ASIMD FP divide, D-form, F16 ++def : InstRW<[HIP09Write_9cyc_1FSU_5RC], (instregex "^FDIVv4f16$")>; ++ ++// ASIMD FP divide, D-form, F32 ++def : InstRW<[HIP09Write_9cyc_1FSU_5RC], (instregex "^FDIVv2f32$")>; ++ ++// ASIMD FP divide, Q-form, F16 ++def : InstRW<[HIP09Write_13cyc_1FSU_9RC], (instregex "^FDIVv8f16$")>; ++ ++// ASIMD FP divide, Q-form, F32 ++def : InstRW<[HIP09Write_13cyc_1FSU_9RC], (instregex "^FDIVv4f32$")>; ++ ++// ASIMD FP divide, Q-form, F64 ++def : InstRW<[HIP09Write_15cyc_1FSU_11RC], (instregex "^FDIVv2f64$")>; ++ ++// ASIMD FP square root, D-form, F16 ++def : InstRW<[HIP09Write_13cyc_1FSU_9RC], (instregex "^FSQRTv4f16$")>; ++ ++// ASIMD FP square root, D-form, F32 ++def : InstRW<[HIP09Write_13cyc_1FSU_9RC], (instregex "^FSQRTv2f32$")>; ++ ++// ASIMD FP square root, Q-form, F16 ++def : InstRW<[HIP09Write_21cyc_1FSU_17RC], (instregex "^FSQRTv8f16$")>; ++ ++// ASIMD FP square root, Q-form, F32 ++def : InstRW<[HIP09Write_21cyc_1FSU_17RC], (instregex "^FSQRTv4f32$")>; ++ ++// ASIMD FP square root, Q-form, F64 ++def : InstRW<[HIP09Write_25cyc_1FSU_21RC], (instregex "^FSQRTv2f64$")>; ++ ++// ASIMD FP max/min, pairwise ++def : InstRW<[HIP09Write_2cyc_1FSU], (instregex "^F(MAX|MIN)(NM)?v")>; ++ ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^F(MAX|MIN)(NM)?Pv")>; ++ ++// FP SIMD max,min reduce HP-form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^F(MAX|MIN)(NM)?V(v4|v8)i16v")>; ++ ++// FP SIMD max,min reduce SP/DP-form ++def : InstRW<[HIP09Write_2cyc_2FSU], (instregex "^F(MAX|MIN)(NM)?Vv4i32v")>; ++ ++// ASIMD FP multiply ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FMULX?v")>; ++ ++// ASIMD FP fused multiply-add ++def : InstRW<[HIP09Write_4cyc_1FSU], (instregex "^FML[AS]v")>; ++ ++// ASIMD FP fused multiply-add long ++def : InstRW<[HIP09Write_5cyc_1FSU], (instregex "^FML[AS]L2?v")>; ++ ++// ASIMD FP round to FP integral, D-form ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FRINT(N|M|P|Z|A|X|I)(v4f16|v2f32)")>; ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FRINT(32|64)[ZX]v2f32")>; ++ ++// ASIMD FP round to FP integral, Q-form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^FRINT(N|M|P|Z|A|X|I)(v8f16|v4f32|v2f64)")>; ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^FRINT(32|64)[ZX](v4f32|v2f64)")>; ++ ++// ASIMD Bfloat16 (BF16) Instructions ++// ----------------------------------------------------------------------------- ++ ++// ASIMD convert, F32 to BF16 ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^BFCVTN2?$")>; ++ ++// ASIMD dot product ++def : InstRW<[HIP09Write_6cyc_2FSU], (instregex "^(BFDOT|BF16DOTlane)v")>; ++ ++// ASIMD matrix multiply accumulate ++def : InstRW<[HIP09Write_9cyc_4FSU], (instrs BFMMLA)>; ++ ++// ASIMD multiply accumulate long ++def : InstRW<[HIP09Write_5cyc_1FSU], (instregex "^BFMLAL[BT](Idx)?$")>; ++ ++// Scalar convert, F32 to BF16 ++def : InstRW<[HIP09Write_3cyc_1FSU], (instrs BFCVT)>; ++ ++// ASIMD Miscellaneous Instructions ++// ----------------------------------------------------------------------------- ++ ++// Reference for forms in this group ++// D form - v8i8, v4i16, v2i32 ++// Q form - v16i8, v8i16, v4i32 ++// D form - v1i8, v1i16, v1i32, v1i64 ++// Q form - v16i8, v8i16, v4i32, v2i64 ++ ++// ASIMD bit reverse ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^RBITv")>; ++ ++// ASIMD bitwise insert ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(BIF|BIT|BSL)v")>; ++ ++// ASIMD count ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(CLS|CLZ)v")>; ++ ++// TODO: CNT only supports B element sizes now. ++// ASIMD count, D ++// ASIMD count, B/H/S ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^CNT(v8i8|v16i8)")>; ++ ++// ASIMD duplicate, gen reg ++// Integer SIMD complex move general register to FP ++def : InstRW<[HIP09Write_4cyc_1ALUS01_1FSU], (instregex "^DUPv.+gpr")>; ++ ++// ASIMD duplicate, element ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^DUP(i8|i16|i32|i64)$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^DUPv.+lane")>; ++ ++// ASIMD extract ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^EXTv", "^XTNv")>; ++ ++// ASIMD extract narrow, saturating ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^[SU]QXTU?Nv")>; ++ ++// ASIMD insert, element to element ++def : InstRW<[HIP09Write_4cyc_1ALUS01_1FSU], (instregex "^INSv")>; ++ ++// ASIMD FP move, immed ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^FMOVv")>; ++ ++// ASIMD move, integer immediate ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^MOVIv", "^MOVID$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^MVNIv")>; ++ ++// ASIMD reciprocal and square root estimate, D-form ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^URECPEv2i32", "^URSQRTEv2i32")>; ++ ++// ASIMD reciprocal and square root estimate, Q-form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^URECPEv4i32", "^URSQRTEv4i32")>; ++ ++// ASIMD FP reciprocal and square root estimate, D-form ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^(FRECPE|FRSQRTE)(v2f32|v4f16|v1)")>; ++ ++// ASIMD FP reciprocal and square root estimate, Q-form ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^(FRECPE|FRSQRTE)(v8f16|v4f32|v2f64)")>; ++ ++// ASIMD FP reciprocal exponent ++def : InstRW<[HIP09Write_3cyc_1FSU], (instregex "^FRECPXv")>; ++ ++// ASIMD FP reciprocal step ++def : InstRW<[HIP09Write_4cyc_1FSU], (instregex "^FR(ECP|SQRT)S(v|32|64)")>; ++ ++// ASIMD reverse ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^REV(16|32|64)v")>; ++ ++// ASIMD table lookup, 1 or 2 table RegS ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^TBL(v8|v16)i8(One|Two)$")>; ++ ++// ASIMD table lookup, 3 table RegS ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^TBL(v8|v16)i8Three$")>; ++ ++// ASIMD table lookup, 4 table RegS ++def : InstRW<[HIP09Write_5cyc_1FSU_3RC], (instregex "^TBL(v8|v16)i8Four$")>; ++ ++// ASIMD table lookup extension, 1 table reg ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^TBX(v8|v16)i8One$")>; ++ ++// ASIMD table lookup extension, 2 table reg ++def : InstRW<[HIP09Write_3cyc_2FSU], (instregex "^TBX(v8|v16)i8Two$")>; ++ ++// ASIMD table lookup extension, 3 table reg ++def : InstRW<[HIP09Write_5cyc_1FSU_3RC], (instregex "^TBX(v8|v16)i8Three$")>; ++ ++// ASIMD table lookup extension, 4 table reg ++def : InstRW<[HIP09Write_7cyc_4FSU], (instregex "^TBX(v8|v16)i8Four$")>; ++ ++// ASIMD move FP to general register ++def : InstRW<[HIP09Write_2cyc_2FSU], (instregex "^[SU]MOV")>; ++ ++// ASIMD transpose ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^TRN[12]v")>; ++ ++// ASIMD uzip/zip ++def : InstRW<[HIP09Write_1cyc_1FSU], (instregex "^(UZP|ZIP)[12]v")>; ++ ++// ASIMD load instructions ++// ----------------------------------------------------------------------------- ++ ++// SIMD load, 1-element, multiple, 1-reg ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_1LD], (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_1LD], (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD load, 1-element, multiple, 2-reg ++def : InstRW<[HIP09Write_6cyc_2LD], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_2LD], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; ++def : InstRW<[HIP09Write_6cyc_2LD], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_2LD], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD load, 1-element, multiple, 3-reg ++def : InstRW<[HIP09Write_6cyc_1LD_3RC], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_1LD_3RC], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; ++def : InstRW<[HIP09Write_6cyc_1LD_3RC], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_1LD_3RC], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD load, 1-element, multiple, 4-reg ++def : InstRW<[HIP09Write_6cyc_1LD_4RC], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_1LD_4RC], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; ++def : InstRW<[HIP09Write_6cyc_1LD_4RC], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_1LD_4RC], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; ++ ++ ++// SIMD load, 1-element, single, 1 lane ++def : InstRW<[HIP09Write_7cyc_1LD_1FSU], (instregex "^LD1i(8|16|32|64)$")>; ++def : InstRW<[WriteAdr, HIP09Write_7cyc_1LD_1FSU], (instregex "^LD1i(8|16|32|64)_POST$")>; ++ ++// SIMD load, 1-element, single, replicate to all lanes ++def : InstRW<[HIP09Write_7cyc_1LD_1FSU], (instregex "^LD1Rv(8b|4h|2s|1d)$$")>; ++def : InstRW<[WriteAdr, HIP09Write_7cyc_1LD_1FSU], (instregex "^LD1Rv(8b|4h|2s|1d)_POST$")>; ++def : InstRW<[HIP09Write_7cyc_1LD_1FSU], (instregex "^LD1Rv(16b|8h|4s|2d)$$")>; ++def : InstRW<[WriteAdr, HIP09Write_7cyc_1LD_1FSU], (instregex "^LD1Rv(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD load, 2-elements, multiple, Q-form ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_2LD_2FSU], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD load, 2-elements, multiple, other form ++def : InstRW<[HIP09Write_8cyc_1LD_1FSU], (instregex "^LD2Twov(8b|4h|2s)$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_1LD_1FSU], (instregex "^LD2Twov(8b|4h|2s)_POST$")>; ++ ++// SIMD load, 2-element, single, 1 lane ++def : InstRW<[HIP09Write_8cyc_1LD_1FSU], (instregex "^LD2i(8|16|32|64)$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_1LD_1FSU], (instregex "^LD2i(8|16|32|64)_POST$")>; ++ ++// SIMD load LD3 (multiple structures) ++def : InstRW<[HIP09Write_9cyc_3LD_3FSU], (instregex "^LD3Threev(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_9cyc_3LD_3FSU], (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; ++def : InstRW<[HIP09Write_9cyc_3LD_3FSU], (instregex "^LD3Threev(8b|4h|2s)$")>; ++def : InstRW<[WriteAdr, HIP09Write_9cyc_3LD_3FSU], (instregex "^LD3Threev(8b|4h|2s)_POST$")>; ++ ++// SIMD load, 3-element, single, 1 lane ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU], (instregex "^LD3i(8|16|32|64)$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_2LD_2FSU], (instregex "^LD3i(8|16|32|64)_POST$")>; ++ ++// SIMD load, 4-element, multiple, Q-form ++def : InstRW<[HIP09Write_13cyc_8LD_8FSU], (instregex "^LD4Fourv(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_13cyc_8LD_8FSU], (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD load, 4-element, multiple, D-form ++def : InstRW<[HIP09Write_12cyc_8LD_8FSU], (instregex "^LD4Fourv(8b|4h|2s)$")>; ++def : InstRW<[WriteAdr, HIP09Write_12cyc_8LD_8FSU], (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; ++ ++// SIMD load LD4 (single structure) ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU], (instregex "^LD4i(8|16|32|64)$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_2LD_2FSU], (instregex "^LD4i(8|16|32|64)_POST$")>; ++ ++// SIMD load, 2-element, single, replicate to all lanes ++def : InstRW<[HIP09Write_8cyc_1LD_1FSU], (instregex "^LD2Rv(8b|4h|2s|1d)$$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_1LD_1FSU], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; ++def : InstRW<[HIP09Write_8cyc_1LD_1FSU], (instregex "^LD2Rv(16b|8h|4s|2d)$$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_1LD_1FSU], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD load, 3-element, single, replicate to all lanes ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_2LD_2FSU], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_2LD_2FSU], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD load, 4-element, single, replicate to all lanes ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU], (instregex "^LD4Rv(8b|4h|2s|1d)$$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_2LD_2FSU], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU], (instregex "^LD4Rv(16b|8h|4s|2d)$$")>; ++def : InstRW<[WriteAdr, HIP09Write_8cyc_2LD_2FSU], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; ++ ++// ASIMD Store Instructions ++// ----------------------------------------------------------------------------- ++ ++// SIMD store, 1-element, multiple, 1 reg, Q-form ++def : InstRW<[HIP09Write_2cyc_1ST_1STD], (instregex "ST1Onev(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_2cyc_1ST_1STD], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD store, 1-element, multiple, 2 reg, Q-form ++def : InstRW<[HIP09Write_3cyc_2ST_2STD], (instregex "ST1Twov(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_3cyc_2ST_2STD], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD store, 1-element, multiple, 3 reg, Q-form ++def : InstRW<[HIP09Write_4cyc_3ST_3STD], (instregex "ST1Threev(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_4cyc_3ST_3STD], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD store, 1-element, multiple, 4 reg, Q-form ++def : InstRW<[HIP09Write_5cyc_4ST_4STD], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_5cyc_4ST_4STD], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD store ST1 (multiple structure) Q=0, n=1/2 ++def : InstRW<[HIP09Write_2cyc_1ST_1STD], (instregex "ST1(One|Two)v(8b|4h|2s|1d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_2cyc_1ST_1STD], (instregex "ST1(One|Two)v(8b|4h|2s|1d)_POST$")>; ++ ++// SIMD store ST1 (multiple structure) Q=0, n=3/4 ++def : InstRW<[HIP09Write_3cyc_2ST_2STD], (instregex "ST1(Three|Four)v(8b|4h|2s|1d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_3cyc_2ST_2STD], (instregex "ST1(Three|Four)v(8b|4h|2s|1d)_POST$")>; ++ ++// SIMD store, 1-element, single, 1 lane ++// SIMD store, 2-element, single, 1 lane ++def : InstRW<[HIP09Write_1cyc_1ST_1STD], (instregex "ST[12]i(8|16|32|64)$")>; ++def : InstRW<[WriteAdr, HIP09Write_1cyc_1ST_1STD], (instregex "ST[12]i(8|16|32|64)_POST$")>; ++ ++// SIMD store, 2-element, multiple, Q-form ++def : InstRW<[HIP09Write_1cyc_2ST_2STD], (instregex "ST2Twov(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_1cyc_2ST_2STD], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD store, 2-element, multiple, D-form ++def : InstRW<[HIP09Write_1cyc_1ST_1STD], (instregex "ST2Twov(8b|4h|2s)$")>; ++def : InstRW<[WriteAdr, HIP09Write_1cyc_1ST_1STD], (instregex "ST2Twov(8b|4h|2s)_POST$")>; ++ ++// SIMD store, 3-element, multiple, Q-form ++def : InstRW<[HIP09Write_7cyc_3FSU_3ST_3STD], (instregex "ST3Threev(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_7cyc_3FSU_3ST_3STD], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD store, 3-element, multiple, D-form ++def : InstRW<[HIP09Write_6cyc_2FSU_2ST_2STD], (instregex "ST3Threev(8b|4h|2s)$")>; ++def : InstRW<[WriteAdr, HIP09Write_6cyc_2FSU_2ST_2STD], (instregex "ST3Threev(8b|4h|2s)_POST$")>; ++ ++// SIMD store, 4-element, multiple, Q-form ++def : InstRW<[HIP09Write_10cyc_8FSU_8ST_8STD], (instregex "ST4Fourv(16b|8h|4s|2d)$")>; ++def : InstRW<[WriteAdr, HIP09Write_10cyc_8FSU_8ST_8STD], (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>; ++ ++// SIMD store, 4-element, multiple, D-form ++def : InstRW<[HIP09Write_10cyc_6FSU_6ST_6STD], (instregex "ST4Fourv(8b|4h|2s)$")>; ++def : InstRW<[WriteAdr, HIP09Write_10cyc_6FSU_6ST_6STD], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; ++ ++// SIMD store, 3-element, single, 1 lane ++// SIMD store, 4-element, single, 1 lane ++def : InstRW<[HIP09Write_4cyc_1FSU_1ST_1STD], (instregex "ST[34]i(8|16|32|64)$")>; ++def : InstRW<[WriteAdr, HIP09Write_4cyc_1FSU_1ST_1STD], (instregex "ST[34]i(8|16|32|64)_POST$")>; ++ ++// Cryptography Extensions v8.0 ++// ----------------------------------------------------------------------------- ++ ++// Crypto AES ops ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^AES[DE]rr$", "^AESI?MCrr$")>; ++ ++// Crypto polynomial (64x64) multiply long ++def : InstRW<[HIP09Write_2cyc_1FSU2], (instregex "^PMULL(v1|v2)i64$")>; ++ ++// Crypto SHA1 hash acceleration ops ++// Crypto SHA1 schedule acceleration ops ++def : InstRW<[HIP09Write_2cyc_1FSU2], (instregex "^SHA1(H|SU0|SU1)")>; ++ ++// Crypto SHA1 hash acceleration ops ++def : InstRW<[HIP09Write_4cyc_1FSU2], (instregex "^SHA1[CMP]")>; ++ ++// Crypto SHA256 schedule acceleration ops ++def : InstRW<[HIP09Write_2cyc_1FSU2], (instregex "^SHA256SU[01]")>; ++ ++// Crypto SHA256 hash acceleration ops ++def : InstRW<[HIP09Write_4cyc_1FSU2], (instregex "^SHA256H2?rrr")>; ++ ++// Cryptography Extensions v8.2 ++// ----------------------------------------------------------------------------- ++// v8.2 SHA512 hash acceleration ops ++def : InstRW<[HIP09Write_2cyc_1FSU2], (instregex "^SHA512(H|H2|SU0|SU1)")>; ++ ++// v8.2 SHA3 ops ++def : InstRW<[HIP09Write_1cyc_1FSU2], (instrs BCAX, EOR3, RAX1, XAR)>; ++ ++// v8.2 SM/SM3 ops ++def : InstRW<[HIP09Write_2cyc_1FSU2], (instregex "^SM3SS1$", "^SM3TT[12][AB]$" , ++ "^SM3PARTW[12]$")>; ++ ++// v8.2 SM/SM4 ops ++def : InstRW<[HIP09Write_4cyc_1FSU2], (instregex "^SM4E(NCKEY)?$")>; ++ ++// CRC ++// ----------------------------------------------------------------------------- ++ ++// CRC checksum ops ++def : InstRW<[HIP09Write_2cyc_1ALUM], (instregex "^CRC32C?[BHWX]rr$")>; ++ ++// 3.22 SVE Predicate instructions ++// ----------------------------------------------------------------------------- ++ ++// Loop control, based on predicate ++// Loop control, based on predicate and flag setting ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^BRK[AB]S?_PPzP", "^BRK[AB]_PPmP")>; ++ ++// Loop control, propagating ++// Loop control, propagating and flag setting ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^BRKNS?_PPzP$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^BRKP[AB]S?_PPzPP$")>; ++ ++// Loop control, based on GPR ++def : InstRW<[HIP09Write_2cyc_1ALUS01], (instregex "^WHILEL(E|O|S|T)_P(WW|XX)_[BHSD]$")>; ++ ++// Loop terminate ++def : InstRW<[HIP09Write_1cyc_1ALUS23], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; ++ ++// Predicate counting scalar ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^ADD(PL|VL)_XXI$")>; ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instregex "^(CNT|DEC|INC)[BHWD]_XPiI$")>; ++def : InstRW<[HIP09Write_1cyc_1ALUS], (instrs RDVLI_XI)>; ++ ++// Predicate counting scalar ++def : InstRW<[HIP09Write_2cyc_1ALUS23], (instregex "^SQ(DEC|INC)[BHWD]_(XPiWdI|XPiI)$")>; ++def : InstRW<[HIP09Write_2cyc_1ALUS23], (instregex "^UQ(DEC|INC)[BHWD]_(WPiI|XPiI)$")>; ++ ++// Predicate counting scalar, active predicate ++def : InstRW<[HIP09Write_5cyc_1FSU02_1ALUS], (instregex "^(INCP|DECP)_XP_[BHSD]$")>; ++ ++// Predicate counting scalar, active predicate ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^CNTP_XPP_[BHSD]$")>; ++ ++// Predicate counting vector, active predicate ++def : InstRW<[HIP09Write_6cyc_1FSU02_1ALUS], (instregex "^SQ(INCP|DECP)_XPWd_[BHSD]$", ++ "^(SQ|UQ)(INCP|DECP)_[XW]P_[BHSD]$")>; ++ ++// Predicate counting vector, active predicate ++def : InstRW<[HIP09Write_4cyc_2FSU02], (instregex "^(SQ|UQ)?(INCP|DECP)_ZP_[HSD]$")>; ++ ++// Predicate logical ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(ORR|EOR|AND|BIC|NOT)_ZPmZ_[BHSD]$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(AND|ORR|EOR|BIC|NAND|NOR|ORN)_PPzPP$")>; ++ ++// Predicate logical, flag setting ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(AND|BIC|EOR|ORR|ORN|NOR|NAND)S_PPzPP$")>; ++ ++// Predicate reverse ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^REV_PP_[BHSD]$")>; ++ ++// Predicate select ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^SEL_ZPZZ_[BHSD]$")>; ++ ++// Predicate set ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instrs PFALSE)>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^PTRUE_[BHSD]$")>; ++ ++// Predicate set/initialize, set flags ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^PTRUES_[BHSD]$")>; ++ ++// Predicate find first/next ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^PFIRST_B$", ++ "^PNEXT_[BHSD]$")>; ++ ++// Predicate test ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^PTEST_PP")>; ++ ++// Predicate transpose ++// Predicate zip/unzip ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(ZIP|UZP|TRN)[12]_PPP_[BHSD]$")>; ++ ++// Predicate unpack and widen ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(PUNPKHI|PUNPKLO)_PP$")>; ++ ++ ++// 3.23 SVE Integer Instructions ++// ----------------------------------------------------------------------------- ++ ++// Arithmetic, absolute diff SABD, UABD ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>; ++ ++// Arithmetic, address generation ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^ADR_[SU]XTW_ZZZ_D_[0123]$", "^ADR_LSL_ZZZ_[SD]_[0123]$")>; ++ ++// Arithmetic, basic ++// Arithmetic, complex ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(ABS|ADD|SUBR?|NEG|CNOT)_ZPmZ")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(SQ|UQ)?(ADD|SUBR?)_(ZZZ|ZI)_[BHSD]$")>; ++ ++// Arithmetic, shift ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^(ASR|LSR|LSL)_ZPmI_[BHSD]$")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^(ASR|LSR|LSL)_ZZI_[BHSD]$")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^(ASR|LSR|LSL)_ZPZ[IZ]")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^(ASR|LSR|LSL)R?_ZPmZ_[BHSD]")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^(ASR|LSR|LSL)_WIDE_(ZPmZ|ZZZ)_[BHS]")>; ++ ++// Arithmetic, shift right for divide ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^ASRD_ZPmI")>; ++ ++// Count/reverse bits ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(CLS|CLZ)_ZPmZ_[BHSD]_UNDEF$")>; ++ ++// Count/reverse bits, B H S form ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^CNT_ZPmZ_[BHS]$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^CNT_ZPmZ_[BHS]_UNDEF$")>; ++ ++// Count/reverse bits, D form ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^CNT_ZPmZ_D$")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^CNT_ZPmZ_D_UNDEF$")>; ++ ++// Broadcast logical bitmask immediate to vector ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^DUPM_ZI$")>; ++ ++// Compare and set flags ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^CMP(GE|GT|LT|LE|HS|HI|LO|LS|EQ|NE)_PPzZ[ZI]_[BHSD]$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^CMP(GE|GT|LT|LE|HS|HI|LO|LS|EQ|NE)_WIDE_PPzZZ_[BHS]$")>; ++ ++// Conditional extract operations, scalar form ++def : InstRW<[HIP09Write_5cyc_1FSU02_1ALUS23], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; ++ ++// Conditional extract operations, SIMD&FP scalar and vector forms ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$")>; ++ ++// Conditional extract operations, SIMD&FP scalar and vector forms ++def : InstRW<[HIP09Write_7cyc_1FSU02_3RC], (instregex "^SPLICE_ZPZZ?_[BHSD]$")>; ++def : InstRW<[HIP09Write_5cyc_1FSU02], (instregex "^COMPACT_ZPZ_[SD]$")>; ++ ++// Convert to floating point ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^[SU]CVTF_ZPmZ_(HtoH|StoS|StoD|StoH|DtoS|DtoH|DtoD)$")>; ++ ++// SVE copy general register to vector (predicated) ++def : InstRW<[HIP09Write_5cyc_1FSU02_1ALUS23], (instregex "^CPY_ZPmV_[BHSD]$")>; ++ ++// SVE copy integer immediate (predicated) ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^CPY_(ZPmI|ZPzI)_[BHSD]$")>; ++ ++// SVE copy element from SIMD&FP scalar register ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^CPY_ZPmR_[BHSD]$")>; ++ ++// SVE integer divide vectors (predicated) 32-bit ++def : InstRW<[HIP09Write_17cyc_1FSU02_13RC], (instregex "^[SU](DIV)R?_ZPmZ_S$")>; ++ ++// SVE integer divide vectors (predicated) 64-bit ++def : InstRW<[HIP09Write_17cyc_1FSU02_13RC], (instregex "^[SU](DIV)R?_ZPmZ_D$")>; ++ ++// Dot product, 8-bit ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^[SU]DOT_ZZZI?_S$")>; ++ ++// Dot product, 16-bit ++def : InstRW<[HIP09Write_3cyc_2FSU02], (instregex "^[SU]DOT_ZZZI?_D$")>; ++ ++// Dot product, 16-bit, using signed and unsigned integers ++def : InstRW<[HIP09Write_3cyc_2FSU02], (instregex "^(SU|US)DOT_ZZZI?$")>; ++ ++// Duplicate, indexed ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^DUP_ZZI_[BHSDQ]$")>; ++ ++// Duplicate, immediate ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^DUP_ZI_[BHSD]$")>; ++ ++// Duplicate, scalar ++def : InstRW<[HIP09Write_5cyc_1FSU02_1ALUS23], (instregex "^DUP_ZR_[BHSD]$")>; ++ ++// Extend, sign or zero ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^[SU]XTB_ZPmZ_[HSD]$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^[SU]XTH_ZPmZ_[SD]$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^[SU]XTW_ZPmZ_D$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^[SU]XTB_ZPmZ_[HSD]_UNDEF$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^[SU]XTH_ZPmZ_[SD]_UNDEF$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^[SU]XTW_ZPmZ_D_UNDEF$")>; ++ ++// Extract ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^EXT_ZZI(_B)?$")>; ++ ++// Insert operation, scalar ++def : InstRW<[HIP09Write_5cyc_1FSU02_1ALUS23], (instregex "^INSR_ZR_[BHSD]$")>; ++ ++// Insert operation, SIMD and FP scalar ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^INSR_ZV_[BHSD]$")>; ++ ++// Extract operation, SIMD and FP scalar ++// Extract operation, scalar ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^LAST[AB]_[RV]PZ_[BHSD]$")>; ++ ++// Horizontal operations, B-form, immediate operands only ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^INDEX_II_B$")>; ++ ++// Horizontal operations, H,S,D-form, immediate operands only ++def : InstRW<[HIP09Write_3cyc_2FSU02], (instregex "^INDEX_II_[HSD]$")>; ++ ++// Horizontal operations, B-form, scalar start, immediate increment ++def : InstRW<[HIP09Write_8cyc_6FSU02_6ALUS23], (instregex "^INDEX_RI_B$")>; ++ ++// Horizontal operations, H,S,D-form, scalar start, immediate increment ++def : InstRW<[HIP09Write_9cyc_6FSU02_6ALUS23], (instregex "^INDEX_RI_[HSD]$")>; ++ ++// Horizontal operations, B-form, immediate start, scalar increment ++def : InstRW<[HIP09Write_6cyc_4FSU02_4ALUS23], (instregex "^INDEX_IR_B$")>; ++ ++// Horizontal operations, H,S,D-form, immediate start, scalar increment ++def : InstRW<[HIP09Write_7cyc_4FSU02_4ALUS23], (instregex "^INDEX_IR_[HSD]$")>; ++ ++// Horizontal operations, B-form, scalar ++def : InstRW<[HIP09Write_6cyc_6FSU02_6ALUS23], (instregex "^INDEX_RR_B$")>; ++ ++// Horizontal operations, H,S,D-form, scalar ++def : InstRW<[HIP09Write_7cyc_6FSU02_6ALUS23], (instregex "^INDEX_RR_[HSD]$")>; ++ ++// Logical ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(AND|ORR|EOR|BIC)_(ZZZ|ZI)$")>; ++ ++// Max/min, basic and pairwise ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^[SU](MAX|MIN)_(ZPmZ|ZI)_[BHSD]$")>; ++ ++// Matrix multiply-accumulate ++def : InstRW<[HIP09Write_4cyc_1FSU02_4RC], (instregex "^(S|U|US)MMLA_ZZZ$")>; ++ ++// Move prefix ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^MOVPRFX")>; ++ ++// Multiply, B element size ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^MUL_ZI_B$", ++ "^(MUL|SMULH|UMULH)_ZPmZ_B$")>; ++ ++// Multiply, H, S, D element size ++def : InstRW<[HIP09Write_3cyc_2FSU02], (instregex "^MUL_ZI_[HSD]$", ++ "^(MUL|SMULH|UMULH)_ZPmZ_[HSD]$")>; ++ ++// Multiply accumulate, B element size ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^(MLA|MLS|MAD|MSB)_ZPmZZ_B$")>; ++ ++// Multiply accumulate, H, S, D element size ++def : InstRW<[HIP09Write_3cyc_2FSU02], (instregex "^(MLA|MLS|MAD|MSB)_ZPmZZ_[HSD]$", ++ "^(MLA|MLS)_ZZZI_[HSD]$")>; ++ ++// Predicate counting vector ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^(SQ|UQ)?(DEC|INC)[HWD]_ZPiI$")>; ++ ++// Reduction, arithmetic ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^[SU]ADDV_VPZ_[BHSD]$")>; ++ ++// Reduction, arithmetic, B H element size ++def : InstRW<[HIP09Write_4cyc_1FSU02], (instregex "^[SU](MAX|MIN)V_VPZ_D$")>; ++ ++// Reduction, arithmetic, S D element size ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^[SU](MAX|MIN)V_VPZ_[BHS]$")>; ++ ++// Reduction, logical ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>; ++ ++// Reverse, vector ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^REV_ZZ_[BHSD]$")>; ++ ++// Reverse within elements ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^REV[BHW]_ZPmZ_[HSD]$")>; ++ ++// Select, vector form ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^SEL_PPPP$")>; ++ ++// Table lookup ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^TBL_ZZZ_[BHSD]$")>; ++ ++// Transpose, vector form ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; ++ ++// Unpack and extend ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; ++ ++// Zip/unzip ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; ++ ++// 3.24 SVE Floating-point Instructions ++// ----------------------------------------------------------------------------- ++ ++// Floating point absolute value ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^FABS_ZPmZ_[HSD]$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^FABS_ZPmZ_[HSD]_UNDEF$")>; ++ ++// Floating point negative value ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^FNEG_ZPmZ_[HSD]$")>; ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^FNEG_ZPmZ_[HSD]_UNDEF$")>; ++ ++// Floating point absolute difference ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^FABD_ZPmZ_[HSD]$")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^FABD_ZPZZ")>; ++ ++// Floating point arithmetic ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^F(ADD|SUB|SUBR)_ZPm[IZ]_[HSD]$")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^F(ADD|SUB)_ZZZ_[HSD]$")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^F(ADD|SUB|SUBR)_ZPZ[IZ]")>; ++ ++ ++// Floating point compare ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^FACG[ET]_PPzZZ_[HSD]$", ++ "^FCM(GE|GT|EQ|NE|UO)_PPzZZ_[HSD]$", ++ "^FCM(GE|GT|LT|LE|EQ|NE)_PPzZ0_[HSD]$")>; ++ ++// Floating point complex add ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^FCADD_ZPmZ_[HSD]$")>; ++ ++// Floating point complex multiply add ++def : InstRW<[HIP09Write_5cyc_1FSU02], (instregex "^FCMLA_ZPmZZ_[HSD]$", ++ "^FCMLA_ZZZI_[HS]$")>; ++ ++// Floating point convert, long or narrow ++def : InstRW<[HIP09Write_3cyc_2FSU02], (instregex "^FCVT_ZPmZ")>; ++ ++// Floating point convert to integer ++def : InstRW<[HIP09Write_3cyc_2FSU02], (instregex "^FCVTZ[SU]_ZPmZ")>; ++ ++// Floating point copy ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^FCPY_ZPmI_[HSD]$", "^FDUP_ZI_[HSD]$")>; ++ ++// Floating point divide, F16 / f32 ++def : InstRW<[HIP09Write_13cyc_1FSU02_9RC], (instregex "^FDIVR?_ZPmZ_[HS]$")>; ++def : InstRW<[HIP09Write_13cyc_1FSU02_9RC], (instregex "^FDIVR?_ZPZZ_[HS]_(UNDEF|ZERO)$")>; ++ ++// Floating point divide, F64 ++def : InstRW<[HIP09Write_15cyc_1FSU02_11RC], (instregex "^FDIVR?_ZPmZ_D$")>; ++def : InstRW<[HIP09Write_15cyc_1FSU02_11RC], (instregex "^FDIVR?_ZPZZ_D_(UNDEF|ZERO)$")>; ++ ++// Floating point min/max ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^F(MAX|MIN)(NM)?_ZPm[ZI]_[HSD]$")>; ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^F(MAX|MIN)(NM)?_ZPZ[ZI]")>; ++ ++// Floating point multiply ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^FMUL_(ZPmI|ZPmZ|ZZZI?)_[HSD]$")>; ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^FMUL_ZPZ[ZI]")>; ++ ++// Floating point multiply ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$")>; ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^FMULX_ZPZZ")>; ++ ++// Floating point multiply accumulate ++def : InstRW<[HIP09Write_4cyc_1FSU02], (instregex "^FN?(MLA|MLS|MAD|MSB)_ZPmZZ_[HSD]$")>; ++def : InstRW<[HIP09Write_4cyc_1FSU02], (instregex "^FML[AS]_ZZZI_[HSD]$")>; ++def : InstRW<[HIP09Write_4cyc_1FSU02], (instregex "^FN?ML[AS]_ZPZZZ_[HSD]_UNDEF$")>; ++ ++// Floating point reciprocal estimate ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^FR(ECPE|SQRTE)_ZZ_[HSD]$")>; ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^FRECPX_ZPmZ_[HSD]$")>; ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^FRECPX_ZPmZ_[HSD]_UNDEF$")>; ++ ++// Floating point reciprocal step ++def : InstRW<[HIP09Write_4cyc_1FSU02], (instregex "^FR(ECPS|SQRTS)_ZZZ_[HSD]$")>; ++ ++// Floating point reduction, F16 ++def : InstRW<[HIP09Write_12cyc_1FSU02_4RC], (instregex "^FADDV_VPZ_H$")>; ++ ++// Floating point reduction, F32 ++def : InstRW<[HIP09Write_9cyc_1FSU02_3RC], (instregex "^FADDV_VPZ_S$")>; ++ ++// Floating point reduction, F64 ++def : InstRW<[HIP09Write_6cyc_2FSU02], (instregex "^FADDV_VPZ_D$")>; ++ ++// Floating point reduction, F16, F32 ++def : InstRW<[HIP09Write_3cyc_1FSU02], (instregex "^F(MAX|MIN)(NM)?V_VPZ_[HS]$")>; ++ ++// Floating point reduction, F64 ++def : InstRW<[HIP09Write_2cyc_1FSU02], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D$")>; ++ ++// Floating point round to integral ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^FRINT[AMNPXZI]_ZPmZ_[HSD]$")>; ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^FRINT[AMNPXZI]_ZPmZ_[HSD]_UNDEF$")>; ++ ++// Floating point square root, F16 / F32 ++def : InstRW<[HIP09Write_21cyc_1FSU02_17RC], (instregex "^FSQRT_ZPmZ_[HS]$")>; ++def : InstRW<[HIP09Write_21cyc_1FSU02_17RC], (instregex "^FSQRT_ZPmZ_[HS]_UNDEF$")>; ++ ++// Floating point square root, F64 ++def : InstRW<[HIP09Write_25cyc_1FSU02_21RC], (instregex "^FSQRT_ZPmZ_D$")>; ++def : InstRW<[HIP09Write_25cyc_1FSU02_21RC], (instregex "^FSQRT_ZPmZ_D_UNDEF")>; ++ ++// Floating point trigonometric exponentiation ++def : InstRW<[HIP09Write_2cyc_2FSU02], (instregex "^FEXPA_ZZ_[HSD]$")>; ++ ++// Floating point trigonometric multiply add ++def : InstRW<[HIP09Write_4cyc_1FSU02], (instregex "^FTMAD_ZZI_[HSD]$")>; ++ ++// Floating point trigonometric, miscellaneous ++def : InstRW<[HIP09Write_4cyc_1FSU02], (instregex "^FTSMUL_ZZZ_[HSD]$")>; ++ ++// Floating point trigonometric, miscellaneous ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^FTSSEL_ZZZ_[HSD]$")>; ++ ++// Floating point associative add, F16 ++def : InstRW<[HIP09Write_36cyc_1FSU02_32RC], (instrs FADDA_VPZ_H)>; ++ ++// Floating point associative add, F32 ++def : InstRW<[HIP09Write_20cyc_1FSU02_16RC], (instrs FADDA_VPZ_S)>; ++ ++// Floating point associative add, F64 ++def : InstRW<[HIP09Write_12cyc_1FSU02_8RC], (instrs FADDA_VPZ_D)>; ++ ++// SVE BFlot16 (BF16) Instructions ++// ----------------------------------------------------------------------------- ++ ++// Convert, F32 to BF16 ++def : InstRW<[HIP09Write_3cyc_2FSU02], (instregex "^BFCVT(NT)?_ZPmZ$")>; ++ ++// Dot product ++def : InstRW<[HIP09Write_6cyc_4FSU], (instregex "^BFDOT_ZZ[ZI]$")>; ++ ++// Matrix multiply accumulate ++def : InstRW<[HIP09Write_9cyc_1FSU_8RC], (instregex "^BFMMLA_ZZZ$")>; ++ ++// Multiply accumulate long ++def : InstRW<[HIP09Write_5cyc_2FSU], (instregex "^BFMLAL[BT]_ZZZI?$")>; ++ ++// SVE Load Instructions ++// ----------------------------------------------------------------------------- ++ ++// Load vector ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LDR_ZXI$")>; ++ ++// Load predicate ++def : InstRW<[HIP09Write_8cyc_1LD_1FSU], (instregex "^LDR_PXI$")>; ++ ++// Contiguous load, scalar + imm ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LD1(B|H|W|D)_IMM_REAL", ++ "^LD1(B|H|W|SB|SH|SW)_[HSD]_IMM_REAL")>; ++ ++// Contiguous load, scalar + scalar ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LD1(B|H|W|D|SB|SH|SW)(_[HSD])?$")>; ++ ++// Contiguous load broadcast, scalar + imm ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU02], (instregex "^LD1R(B|H|W|D|SB|SH|SW|Q)_IMM$", ++ "^LD1R(B|H|W|D|SB|SH|SW|Q)_[BHSWD]_IMM$")>; ++ ++// Contiguous load broadcast, scalar + scalar ++def : InstRW<[HIP09Write_8cyc_2LD_2FSU02], (instregex "^LD1RQ_[BHWD]$")>; ++ ++// Non-temporal load, scalar + imm ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LDNT1[BHWD]_ZRI$")>; ++ ++// Non-temporal load, scalar + scalar ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LDNT1[BHWD]_ZRR$")>; ++ ++// Contiguous first faulting load, scalar + scalar ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LDFF1(B|H|W|D|SB|SH|SW)_REAL$", ++ "^LDFF1(B|H|W|D|SB|SH|SW)_[HSD]_REAL$")>; ++ ++// Contiguous non-faulting load, scalar + imm ++def : InstRW<[HIP09Write_6cyc_1LD], (instregex "^LDNF1(B|H|W|D|SB|SH|SW)_IMM", ++ "^LDNF1(B|H|W|D|SB|SH|SW)_[HSD]_IMM")>; ++ ++// Contiguous Load two structures to two vectors, scalar + imm ++// Contiguous Load two structures to two vectors, scalar + scalar ++def : InstRW<[HIP09Write_9cyc_4LD_4FSU02], (instregex "^LD2[BHWD](_IMM)?$")>; ++ ++// Contiguous Load three structures to two vectors, scalar + imm ++// Contiguous Load three structures to two vectors, scalar + scalar ++def : InstRW<[HIP09Write_11cyc_6LD_6FSU02], (instregex "^LD3[BHWD](_IMM)?$")>; ++ ++// Contiguous Load four structures to two vectors, scalar + imm ++// Contiguous Load four structures to two vectors, scalar + scalar ++def : InstRW<[HIP09Write_16cyc_16LD_16FSU02], (instregex "^LD4[BHWD](_IMM)?$")>; ++ ++// Gather load, vector + imm, 32- bit element size ++def : InstRW<[HIP09Write_18cyc_1LD_4RC], (instregex "^GLD(FF)?1S?[BH]_S_(IMM|[SU]XTW)(_REAL)?$", ++ "^GLD(FF)?1W_(IMM|[SU]XTW)(_REAL)?")>; ++ ++// Gather load, vector + imm, 64- bit element size ++def : InstRW<[HIP09Write_16cyc_1LD_4RC], (instregex "^GLD(FF)?1S?[BHW]_D_(IMM|REAL|SCALED)", ++ "^GLD(FF)?1D_(IMM|REAL|SCALED)")>; ++ ++// Gather load, 32-bit scaled offset ++def : InstRW<[HIP09Write_18cyc_1LD_4RC], (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED(_REAL)?$")>; ++ ++// Gather load, 32-bit unpacked unscaled offset ++def : InstRW<[HIP09Write_18cyc_1LD_4RC], (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW", ++ "^GLD(FF)?1D_[SU]XTW")>; ++ ++// Prefetch ++def : InstRW<[HIP09Write_18cyc_1LD_4RC], (instregex "^PRF[BHWD]_PRI")>; ++def : InstRW<[HIP09Write_18cyc_1LD_4RC], (instregex "^PRF[BHWD]_PRR")>; ++def : InstRW<[HIP09Write_18cyc_1LD_4RC], (instregex "^PRF[BHW]_[SD]")>; ++def : InstRW<[HIP09Write_18cyc_1LD_4RC], (instregex "^PRFD_[SD]")>; ++ ++// SVE Store Instructions ++// ----------------------------------------------------------------------------- ++ ++// Store from predicate reg ++def : InstRW<[HIP09Write_3cyc_1FSU02_1ST_1STD], (instregex "^STR_PXI$")>; ++ ++// Store from vector reg ++def : InstRW<[HIP09Write_2cyc_2ST_2STD], (instregex "^STR_ZXI$")>; ++ ++// SVE contiguous store (scalar plus immediate) ++def : InstRW<[HIP09Write_2cyc_2ST_2STD], (instregex "^ST1[BHWD]_IMM$", ++ "^ST1B_[HSD]_IMM$", ++ "^ST1H_[SD]_IMM$", ++ "^ST1W_D_IMM$")>; ++ ++// SVE contiguous store (scalar plus scalar) ++def : InstRW<[HIP09Write_2cyc_2ST_2STD], (instregex "^ST1[BHWD]$", ++ "^ST1B_[HSD]$", ++ "^ST1H_[SD]$", ++ "^ST1W_D$")>; ++ ++// Contiguous store two structures from two vectors ++def : InstRW<[HIP09Write_6cyc_3FSU02_3ST_3STD], (instregex "^ST2[BHWD](_IMM)?$")>; ++ ++// Contiguous store three structures from three vectors ++def : InstRW<[HIP09Write_6cyc_4FSU02_4ST_4STD], (instregex "^ST3[BHWD](_IMM)?$")>; ++ ++// Contiguous store four structures from four vectors ++def : InstRW<[HIP09Write_8cyc_16FSU02_16ST_16STD], (instregex "^ST4[BHWD](_IMM)?$")>; ++ ++// non-tenporal store, scalar + imm ++def : InstRW<[HIP09Write_2cyc_2ST_2STD], (instregex "^STNT1[BHWD]_ZRI$")>; ++ ++// Non-temporal store, scalar + scala ++def : InstRW<[HIP09Write_2cyc_2ST_2STD], (instregex "^STNT1[BHWD]_ZRR$")>; ++ ++// Scatter store vector + imm 32-bit element size ++def : InstRW<[HIP09Write_4cyc_8ST_8STD], (instregex "^SST1[BH]_S_IMM$", ++ "^SST1W_IMM$")>; ++ ++// Scatter store vector + imm 64-bit element size ++def : InstRW<[HIP09Write_2cyc_4ST_4STD], (instregex "^SST1[BHW]_D_IMM$", ++ "^SST1D_IMM$")>; ++ ++// Scatter store, 32-bit scaled offset ++def : InstRW<[HIP09Write_4cyc_8ST_8STD], (instregex "^SST1H_S_[SU]XTW_SCALED$", ++ "^SST1W_[SU]XTW_SCALED$")>; ++ ++// Scatter store, 32-bit unpacked unscaled offset ++def : InstRW<[HIP09Write_4cyc_8ST_8STD], (instregex "^SST1[BHW]_D_[SU]XTW$", ++ "^SST1D_[SU]XTW$")>; ++ ++// Scatter store, 32-bit unpacked scaled offset ++def : InstRW<[HIP09Write_4cyc_8ST_8STD], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", ++ "^SST1D_[SU]XTW_SCALED$")>; ++ ++// Scatter store, 32-bit unscaled offset ++def : InstRW<[HIP09Write_4cyc_8ST_8STD], (instregex "^SST1[BH]_S_[SU]XTW$", ++ "^SST1W_[SU]XTW$")>; ++ ++// Scatter store, 64-bit scaled offset ++def : InstRW<[HIP09Write_2cyc_4ST_4STD], (instregex "^SST1[HW]_D_SCALED", ++ "^SST1D_SCALED")>; ++ ++// Scatter store, 64-bit unscaled offset ++def : InstRW<[HIP09Write_2cyc_4ST_4STD], (instregex "^SST1[BHW]_D$", ++ "^SST1D$")>; ++ ++// SVE Miscellaneous Instructions ++// ----------------------------------------------------------------------------- ++ ++// Read first fault register, unpredicated ++// Read first fault register, predicated ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^RDFFR_P(Pz)?_REAL$")>; ++ ++// Read first fault register and set flags ++def : InstRW<[HIP09Write_1cyc_1FSU02], (instregex "^RDFFRS_PPz$")>; ++ ++// Set first fault register ++def : InstRW<[HIP09Write_0cyc], (instregex "^SETFFR$")>; ++ ++// Write to first fault register ++def : InstRW<[HIP09Write_9cyc_18FSU02_9ALUM1], (instrs WRFFR)>; ++ ++ ++// ----------------------------------------------------------------------------- ++} // SchedModel = HIP09Model ++ +-- +2.43.0 + diff --git a/llvm.spec b/llvm.spec index 0e22159f1cd14239fd7cdc926a5553b69a79c41e..7de81b30a1c2852a4fae3607c696b5cfecb3c102 100644 --- a/llvm.spec +++ b/llvm.spec @@ -45,7 +45,7 @@ Name: %{pkg_name} Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 27 +Release: 28 Summary: The Low Level Virtual Machine License: NCSA @@ -89,6 +89,7 @@ Patch30: 0030-LICM-Solve-runtime-error-caused-by-the-signal-functi.patch Patch31: 0031-ACPO-ACPO-Infrastructure.patch Patch32: 0032-ACPO-Introduce-MLInliner-using-ACPO-infrastructure.patch Patch33: 0033-Find-Python3-in-default-env-PATH-for-ACPO.patch +Patch34: 0034-AArch64-Support-HiSilicon-s-HIP09-sched-model.patch BuildRequires: binutils-devel BuildRequires: cmake @@ -391,6 +392,9 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C ./_build %{install_includedir}/llvm-gmock %changelog +* Fri Nov 22 2024 xiajingze - 17.0.6-28 +- [AArch64] Support HiSilicon's HIP09 sched model + * Wed Nov 20 2024 eastb233 - 17.0.6-27 - Find Python3 in default env PATH for ACPO