From a8cfa61489c2e71e85214bb2682fe5fc9ec2fa53 Mon Sep 17 00:00:00 2001 From: xiajingze Date: Wed, 11 Sep 2024 10:54:40 +0800 Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 Processor (cherry picked from commit 95487e968ff91c07708ed07075820405d5a8b960) --- ...-Support-HiSilicon-s-HIP09-Processor.patch | 517 ++++++++++++++++++ llvm.spec | 6 +- 2 files changed, 522 insertions(+), 1 deletion(-) create mode 100644 0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch diff --git a/0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch b/0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch new file mode 100644 index 0000000..d759ab1 --- /dev/null +++ b/0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch @@ -0,0 +1,517 @@ +From cac43828d26b178807d194b4bd7c5df69603df29 Mon Sep 17 00:00:00 2001 +From: xiajingze +Date: Wed, 31 Jul 2024 18:37:29 +0800 +Subject: [PATCH] [AArch64] Support HiSilicon's HIP09 Processor + +Signed-off-by: xiajingze +--- + llvm/cmake/modules/HandleLLVMOptions.cmake | 8 ++ + .../llvm/TargetParser/AArch64TargetParser.h | 7 ++ + llvm/lib/Target/AArch64/AArch64.td | 36 +++++++ + .../lib/Target/AArch64/AArch64MacroFusion.cpp | 55 +++++++++++ + llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 9 ++ + llvm/lib/Target/AArch64/AArch64Subtarget.h | 9 +- + llvm/lib/Target/CMakeLists.txt | 4 + + llvm/lib/TargetParser/Host.cpp | 3 + + llvm/test/CodeGen/AArch64/cpus-hip09.ll | 11 +++ + .../CodeGen/AArch64/macro-fusion-mvnclz.mir | 20 ++++ + .../AArch64/misched-fusion-lit-hip09.ll | 73 ++++++++++++++ + llvm/test/CodeGen/AArch64/remat-hip09.ll | 18 ++++ + llvm/test/lit.site.cfg.py.in | 4 + + llvm/unittests/TargetParser/Host.cpp | 5 + + .../TargetParser/TargetParserTest.cpp | 16 +++ + 15 files changed, 277 insertions(+), 1 deletion(-) + create mode 100644 llvm/test/CodeGen/AArch64/cpus-hip09.ll + create mode 100644 llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir + create mode 100644 llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll + create mode 100644 llvm/test/CodeGen/AArch64/remat-hip09.ll + +diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake +index 8be5d4ba5..74e68e25d 100644 +--- a/llvm/cmake/modules/HandleLLVMOptions.cmake ++++ b/llvm/cmake/modules/HandleLLVMOptions.cmake +@@ -112,6 +112,14 @@ else() + set(LLVM_ENABLE_AUTOTUNER 0) + endif() + ++option(LLVM_ENABLE_AARCH64_HIP09 "Enable HIP09 Processor" ON) ++if(LLVM_ENABLE_AARCH64_HIP09) ++ set(LLVM_ENABLE_AARCH64_HIP09 1) ++ add_definitions( -DENABLE_AARCH64_HIP09 ) ++else() ++ set(LLVM_ENABLE_AARCH64_HIP09 0) ++endif() ++ + if(LLVM_ENABLE_EXPENSIVE_CHECKS) + add_compile_definitions(EXPENSIVE_CHECKS) + +diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h +index dc4cdfa8e..07cd2fcbb 100644 +--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h ++++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h +@@ -542,6 +542,13 @@ inline constexpr CpuInfo CpuInfos[] = { + (AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 | + AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES | + AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)}, ++#if defined(ENABLE_AARCH64_HIP09) ++ {"hip09", ARMV8_5A, ++ (AArch64::AEK_AES | AArch64::AEK_SM4 | AArch64::AEK_SHA2 | ++ AArch64::AEK_SHA3 | AArch64::AEK_FP16 | AArch64::AEK_PROFILE | ++ AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM | ++ AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16)}, ++#endif + }; + + // An alias for a CPU. +diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td +index 8f50af4b7..c8bfd770f 100644 +--- a/llvm/lib/Target/AArch64/AArch64.td ++++ b/llvm/lib/Target/AArch64/AArch64.td +@@ -296,6 +296,12 @@ def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature< + "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true", + "CPU fuses (a + b + 1) and (a - b - 1)">; + ++#ifdef ENABLE_AARCH64_HIP09 ++def FeatureFuseMvnClz : SubtargetFeature< ++ "fuse-mvn-clz", "HasFuseMvnClz", "true", ++ "CPU fuses mvn+clz operations">; ++#endif ++ + def FeatureDisableLatencySchedHeuristic : SubtargetFeature< + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", + "Disable latency scheduling heuristic">; +@@ -1205,6 +1211,21 @@ def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", + FeatureFuseAES, + FeaturePostRAScheduler]>; + ++#ifdef ENABLE_AARCH64_HIP09 ++def TuneHIP09 : SubtargetFeature<"hip09", "ARMProcFamily", "HIP09", ++ "HiSilicon HIP-09 processors", [ ++ FeatureCustomCheapAsMoveHandling, ++ FeatureExperimentalZeroingPseudos, ++ FeatureFuseAES, ++ FeatureLSLFast, ++ FeatureAscendStoreAddress, ++ FeatureCmpBccFusion, ++ FeatureArithmeticBccFusion, ++ FeatureFuseLiterals, ++ FeatureFuseMvnClz, ++ FeaturePostRAScheduler]>; ++#endif ++ + def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", + "Ampere Computing Ampere-1 processors", [ + FeaturePostRAScheduler, +@@ -1359,6 +1380,14 @@ def ProcessorFeatures { + list TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSPE, + FeatureFullFP16, FeatureFP16FML, FeatureDotProd]; ++#ifdef ENABLE_AARCH64_HIP09 ++ list HIP09 = [HasV8_5aOps, FeatureBF16, FeatureCrypto, FeatureFPARMv8, ++ FeatureMatMulInt8, FeatureMatMulFP32, FeatureMatMulFP64, ++ FeatureNEON, FeaturePerfMon, FeatureRandGen, FeatureSPE, ++ FeatureFullFP16, FeatureFP16FML, FeatureDotProd, ++ FeatureJS, FeatureComplxNum, FeatureSHA3, FeatureSM4, ++ FeatureSVE]; ++#endif + list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, + FeatureSSBS, FeatureRandGen, FeatureSB, + FeatureSHA2, FeatureSHA3, FeatureAES]; +@@ -1464,8 +1493,15 @@ def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, + // Marvell ThunderX3T110 Processors. + def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, + ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>; ++ ++// HiSilicon Processors. + def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110, + [TuneTSV110]>; ++#ifdef ENABLE_AARCH64_HIP09 ++// FIXME: HiSilicon HIP09 is currently modeled as a Cortex-A57. ++def : ProcessorModel<"hip09", CortexA57Model, ProcessorFeatures.HIP09, ++ [TuneHIP09]>; ++#endif + + // Support cyclone as an alias for apple-a7 so we can still LTO old bitcode. + def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7, +diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +index 05d60872b..4963ec350 100644 +--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp ++++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +@@ -51,6 +51,12 @@ static bool isArithmeticBccPair(const MachineInstr *FirstMI, + case AArch64::SUBSXrr: + case AArch64::BICSWrr: + case AArch64::BICSXrr: ++#if defined(ENABLE_AARCH64_HIP09) ++ case AArch64::ADCSWr: ++ case AArch64::ADCSXr: ++ case AArch64::SBCSWr: ++ case AArch64::SBCSXr: ++#endif + return true; + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: +@@ -183,6 +189,20 @@ static bool isLiteralsPair(const MachineInstr *FirstMI, + SecondMI.getOperand(3).getImm() == 16)) + return true; + ++#if defined(ENABLE_AARCH64_HIP09) ++ // 32 bit immediate. ++ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNWi) && ++ (SecondMI.getOpcode() == AArch64::MOVKWi && ++ SecondMI.getOperand(3).getImm() == 16)) ++ return true; ++ ++ // Lower half of 64 bit immediate. ++ if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVNXi) && ++ (SecondMI.getOpcode() == AArch64::MOVKWi && ++ SecondMI.getOperand(3).getImm() == 16)) ++ return true; ++#endif ++ + // Upper half of 64 bit immediate. + if ((FirstMI == nullptr || + (FirstMI->getOpcode() == AArch64::MOVKXi && +@@ -437,6 +457,37 @@ static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI, + return false; + } + ++#if defined(ENABLE_AARCH64_HIP09) ++static bool isMvnClzPair(const MachineInstr *FirstMI, ++ const MachineInstr &SecondMI) { ++ // HIP09 supports fusion of MVN + CLZ. ++ // The CLZ can be fused with MVN and make execution faster. ++ // And the fusion is not allowed for shifted forms. ++ // ++ // Instruction alias info: ++ // 1. MVN , {, #} is equivalent to ++ // ORN , WZR, {, #} ++ // 2. MVN , {, #} is equivalent to ++ // ORN , XZR, {, #} ++ // Assume the 1st instr to be a wildcard if it is unspecified. ++ if ((FirstMI == nullptr || ++ ((FirstMI->getOpcode() == AArch64::ORNWrs) && ++ (FirstMI->getOperand(1).getReg() == AArch64::WZR) && ++ (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) && ++ (SecondMI.getOpcode() == AArch64::CLZWr)) ++ return true; ++ ++ if ((FirstMI == nullptr || ++ ((FirstMI->getOpcode() == AArch64::ORNXrs) && ++ (FirstMI->getOperand(1).getReg() == AArch64::XZR) && ++ (!AArch64InstrInfo::hasShiftedReg(*FirstMI)))) && ++ (SecondMI.getOpcode() == AArch64::CLZXr)) ++ return true; ++ ++ return false; ++} ++#endif ++ + /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused + /// together. Given SecondMI, when FirstMI is unspecified, then check if + /// SecondMI may be part of a fused pair at all. +@@ -472,6 +523,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + if (ST.hasFuseAddSub2RegAndConstOne() && + isAddSub2RegAndConstOnePair(FirstMI, SecondMI)) + return true; ++#if defined(ENABLE_AARCH64_HIP09) ++ if (ST.hasFuseMvnClz() && isMvnClzPair(FirstMI, SecondMI)) ++ return true; ++#endif + + return false; + } +diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +index 450e27b8a..ddf22364c 100644 +--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp ++++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +@@ -266,6 +266,15 @@ void AArch64Subtarget::initializeProperties() { + PrefFunctionAlignment = Align(16); + PrefLoopAlignment = Align(4); + break; ++#if defined(ENABLE_AARCH64_HIP09) ++ case HIP09: ++ CacheLineSize = 64; ++ PrefFunctionAlignment = Align(16); ++ PrefLoopAlignment = Align(4); ++ VScaleForTuning = 2; ++ DefaultSVETFOpts = TailFoldingOpts::Simple; ++ break; ++#endif + case ThunderX3T110: + CacheLineSize = 64; + PrefFunctionAlignment = Align(16); +diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h +index 5e20d1646..5f481f4f9 100644 +--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h ++++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h +@@ -87,7 +87,10 @@ public: + ThunderXT83, + ThunderXT88, + ThunderX3T110, +- TSV110 ++ TSV110, ++#if defined(ENABLE_AARCH64_HIP09) ++ HIP09 ++#endif + }; + + protected: +@@ -239,7 +242,11 @@ public: + bool hasFusion() const { + return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || + hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || ++#if defined(ENABLE_AARCH64_HIP09) ++ hasFuseAdrpAdd() || hasFuseLiterals() || hasFuseMvnClz(); ++#else + hasFuseAdrpAdd() || hasFuseLiterals(); ++#endif + } + + unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } +diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt +index 2739233f9..501ce1f2f 100644 +--- a/llvm/lib/Target/CMakeLists.txt ++++ b/llvm/lib/Target/CMakeLists.txt +@@ -2,6 +2,10 @@ list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen) + + list(APPEND LLVM_TABLEGEN_FLAGS -I ${LLVM_MAIN_SRC_DIR}/lib/Target) + ++if(LLVM_ENABLE_AARCH64_HIP09) ++ list(APPEND LLVM_TABLEGEN_FLAGS "-DENABLE_AARCH64_HIP09") ++endif() ++ + add_llvm_component_library(LLVMTarget + Target.cpp + TargetIntrinsicInfo.cpp +diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp +index d11dc605e..8b23be02e 100644 +--- a/llvm/lib/TargetParser/Host.cpp ++++ b/llvm/lib/TargetParser/Host.cpp +@@ -257,6 +257,9 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { + // contents are specified in the various processor manuals. + return StringSwitch(Part) + .Case("0xd01", "tsv110") ++#if defined(ENABLE_AARCH64_HIP09) ++ .Case("0xd02", "hip09") ++#endif + .Default("generic"); + + if (Implementer == "0x51") // Qualcomm Technologies, Inc. +diff --git a/llvm/test/CodeGen/AArch64/cpus-hip09.ll b/llvm/test/CodeGen/AArch64/cpus-hip09.ll +new file mode 100644 +index 000000000..dcf32e4dc +--- /dev/null ++++ b/llvm/test/CodeGen/AArch64/cpus-hip09.ll +@@ -0,0 +1,11 @@ ++; REQUIRES: enable_enable_aarch64_hip09 ++; This tests that llc accepts all valid AArch64 CPUs ++ ++; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=hip09 2>&1 | FileCheck %s ++ ++; CHECK-NOT: {{.*}} is not a recognized processor for this target ++; INVALID: {{.*}} is not a recognized processor for this target ++ ++define i32 @f(i64 %z) { ++ ret i32 0 ++} +diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir +new file mode 100644 +index 000000000..64bf15937 +--- /dev/null ++++ b/llvm/test/CodeGen/AArch64/macro-fusion-mvnclz.mir +@@ -0,0 +1,20 @@ ++# REQUIRES: enable_enable_aarch64_hip09 ++# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION ++# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-mvn-clz -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION ++--- ++# CHECK-LABEL: name: fuse-mvn-clz ++# CHECK: $w2 = ORNWrs $wzr, $w1, 0 ++# FUSION: $w0 = CLZWr killed renamable $w2 ++# CHECK: $w3 = ADDWri killed renamable $w1, 1, 0 ++# NOFUSION: $w0 = CLZWr killed renamable $w2 ++name: fuse-mvn-clz ++tracksRegLiveness: true ++body: | ++ bb.0: ++ liveins: $w0, $w1, $w2, $w3 ++ ++ $w2 = ORNWrs $wzr, $w1, 0 ++ $w3 = ADDWri killed renamable $w1, 1, 0 ++ $w0 = CLZWr killed renamable $w2 ++ RET undef $lr, implicit $w0 ++... +diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll +new file mode 100644 +index 000000000..d67fa5b43 +--- /dev/null ++++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit-hip09.ll +@@ -0,0 +1,73 @@ ++; REQUIRES: enable_enable_aarch64_hip09 ++; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=hip09 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE-HIP09 ++ ++@g = common local_unnamed_addr global ptr null, align 8 ++ ++define dso_local ptr @litp(i32 %a, i32 %b) { ++entry: ++ %add = add nsw i32 %b, %a ++ %idx.ext = sext i32 %add to i64 ++ %add.ptr = getelementptr i8, ptr @litp, i64 %idx.ext ++ store ptr %add.ptr, ptr @g, align 8 ++ ret ptr %add.ptr ++ ++; CHECK-LABEL: litp: ++; CHECK: adrp [[R:x[0-9]+]], litp ++; CHECKFUSE-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp ++} ++ ++define dso_local ptr @litp_tune_generic(i32 %a, i32 %b) "tune-cpu"="generic" { ++entry: ++ %add = add nsw i32 %b, %a ++ %idx.ext = sext i32 %add to i64 ++ %add.ptr = getelementptr i8, ptr @litp_tune_generic, i64 %idx.ext ++ store ptr %add.ptr, ptr @g, align 8 ++ ret ptr %add.ptr ++ ++; CHECK-LABEL: litp_tune_generic: ++; CHECK: adrp [[R:x[0-9]+]], litp_tune_generic ++; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic ++} ++ ++define dso_local i32 @liti(i32 %a, i32 %b) { ++entry: ++ %add = add i32 %a, -262095121 ++ %add1 = add i32 %add, %b ++ ret i32 %add1 ++ ++; CHECK-LABEL: liti: ++; CHECK: mov [[R:w[0-9]+]], {{#[0-9]+}} ++; CHECKDONT-NEXT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} ++; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16 ++; CHECKFUSE-HIP09: movk [[R]], {{#[0-9]+}}, lsl #16 ++} ++ ++; Function Attrs: norecurse nounwind readnone ++define dso_local i64 @litl(i64 %a, i64 %b) { ++entry: ++ %add = add i64 %a, 2208998440489107183 ++ %add1 = add i64 %add, %b ++ ret i64 %add1 ++ ++; CHECK-LABEL: litl: ++; CHECK: mov [[R:x[0-9]+]], {{#[0-9]+}} ++; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} ++; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #16 ++; CHECK: movk [[R]], {{#[0-9]+}}, lsl #32 ++; CHECK-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48 ++} ++ ++; Function Attrs: norecurse nounwind readnone ++define dso_local double @litf() { ++entry: ++ ret double 0x400921FB54442D18 ++ ++; CHECK-LABEL: litf: ++; CHECK-DONT: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]] ++; CHECK-DONT-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}} ++; CHECKFUSE-HIP09: mov [[R:x[0-9]+]], #11544 ++; CHECKFUSE-HIP09: movk [[R]], #21572, lsl #16 ++; CHECKFUSE-HIP09: movk [[R]], #8699, lsl #32 ++; CHECKFUSE-HIP09: movk [[R]], #16393, lsl #48 ++; CHECKFUSE-HIP09: fmov {{d[0-9]+}}, [[R]] ++} +diff --git a/llvm/test/CodeGen/AArch64/remat-hip09.ll b/llvm/test/CodeGen/AArch64/remat-hip09.ll +new file mode 100644 +index 000000000..aec0d18ae +--- /dev/null ++++ b/llvm/test/CodeGen/AArch64/remat-hip09.ll +@@ -0,0 +1,18 @@ ++; REQUIRES: enable_enable_aarch64_hip09 ++; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=hip09 -o - %s | FileCheck %s ++ ++%X = type { i64, i64, i64 } ++declare void @f(ptr) ++define void @t() { ++entry: ++ %tmp = alloca %X ++ call void @f(ptr %tmp) ++; CHECK: add x0, sp, #8 ++; CHECK-NOT: mov ++; CHECK-NEXT: bl f ++ call void @f(ptr %tmp) ++; CHECK: add x0, sp, #8 ++; CHECK-NOT: mov ++; CHECK-NEXT: bl f ++ ret void ++} +diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in +index 20c1ecca1..6145a514f 100644 +--- a/llvm/test/lit.site.cfg.py.in ++++ b/llvm/test/lit.site.cfg.py.in +@@ -64,9 +64,13 @@ config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ + config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ + config.enable_enable_autotuner = @LLVM_ENABLE_AUTOTUNER@ ++config.enable_enable_aarch64_hip09 = @LLVM_ENABLE_AARCH64_HIP09@ + + import lit.llvm + lit.llvm.initialize(lit_config, config) + ++if config.enable_enable_aarch64_hip09: ++ config.available_features.add("enable_enable_aarch64_hip09") ++ + # Let the main config do the real work. + lit_config.load_config( + config, os.path.join(config.llvm_src_root, "test/lit.cfg.py")) +diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp +index 452d0326c..4b4c81514 100644 +--- a/llvm/unittests/TargetParser/Host.cpp ++++ b/llvm/unittests/TargetParser/Host.cpp +@@ -250,6 +250,11 @@ CPU part : 0x0a1 + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n" + "CPU part : 0xd01"), + "tsv110"); ++#if defined(ENABLE_AARCH64_HIP09) ++ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x48\n" ++ "CPU part : 0xd02"), ++ "hip09"); ++#endif + + // Verify A64FX. + const std::string A64FXProcCpuInfo = R"( +diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp +index 741d5a2d4..94e0047e5 100644 +--- a/llvm/unittests/TargetParser/TargetParserTest.cpp ++++ b/llvm/unittests/TargetParser/TargetParserTest.cpp +@@ -1421,6 +1421,18 @@ INSTANTIATE_TEST_SUITE_P( + AArch64::AEK_PROFILE | AArch64::AEK_FP16 | + AArch64::AEK_FP16FML | AArch64::AEK_DOTPROD, + "8.2-A"), ++#if defined(ENABLE_AARCH64_HIP09) ++ ARMCPUTestParams( ++ "hip09", "armv8.5-a", "crypto-neon-fp-armv8", ++ AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_SIMD | ++ AArch64::AEK_RAS | AArch64::AEK_LSE | AArch64::AEK_RDM | ++ AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_AES | ++ AArch64::AEK_SM4 | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 | ++ AArch64::AEK_FP16 | AArch64::AEK_PROFILE | ++ AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_I8MM | ++ AArch64::AEK_F32MM | AArch64::AEK_F64MM | AArch64::AEK_BF16, ++ "8.5-A"), ++#endif + ARMCPUTestParams("a64fx", "armv8.2-a", "crypto-neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_AES | + AArch64::AEK_SHA2 | AArch64::AEK_FP | +@@ -1437,7 +1449,11 @@ INSTANTIATE_TEST_SUITE_P( + "8.2-A"))); + + // Note: number of CPUs includes aliases. ++#if defined(ENABLE_AARCH64_HIP09) ++static constexpr unsigned NumAArch64CPUArchs = 63; ++#else + static constexpr unsigned NumAArch64CPUArchs = 62; ++#endif + + TEST(TargetParserTest, testAArch64CPUArchList) { + SmallVector List; +-- +2.19.1 + diff --git a/llvm.spec b/llvm.spec index f97805b..0e0db4c 100644 --- a/llvm.spec +++ b/llvm.spec @@ -44,7 +44,7 @@ Name: %{pkg_name} Version: %{maj_ver}.%{min_ver}.%{patch_ver} -Release: 17 +Release: 18 Summary: The Low Level Virtual Machine License: NCSA @@ -77,6 +77,7 @@ Patch19: 0019-Backport-LoongArch-Improve-the-support-for-atomic-and-clear_cache Patch20: 0020-Update-llvm-lit-config-to-support-build_for_openeule.patch Patch21: 0021-Add-BiSheng-Autotuner-support-for-LLVM-compiler.patch Patch22: 0022-Prevent-environment-variables-from-exceeding-NAME_MA.patch +Patch23: 0023-AArch64-Support-HiSilicon-s-HIP09-Processor.patch BuildRequires: binutils-devel BuildRequires: cmake @@ -373,6 +374,9 @@ LD_LIBRARY_PATH=%{buildroot}/%{install_libdir} %{__ninja} check-all -C ./_build %{install_includedir}/llvm-gmock %changelog +* Tue Sep 10 2024 xiajingze - 17.0.6-18 +- [AArch64] Support HiSilicon's HIP09 Processor + * Wed Sep 11 2024 hongjinghao - 17.0.6-17 - doc add Provides llvm-help -- Gitee