From bd5b02f5ab28a82003c94966f429e35df1e3fbe1 Mon Sep 17 00:00:00 2001 From: swcompiler Date: Thu, 23 Jan 2025 14:45:22 +0800 Subject: [PATCH 1/3] [Sw64] Add Sw64 target support for llvm --- llvm/CMakeLists.txt | 3 +- llvm/cmake/config-ix.cmake | 4 + llvm/cmake/config.guess | 9 + llvm/include/llvm/BinaryFormat/ELF.h | 34 + .../llvm/BinaryFormat/ELFRelocs/Sw64.def | 44 + .../llvm/CodeGen/MachineCombinerPattern.h | 10 + .../llvm/ExecutionEngine/Orc/OrcABISupport.h | 40 + llvm/include/llvm/IR/CMakeLists.txt | 1 + llvm/include/llvm/IR/Intrinsics.td | 1 + llvm/include/llvm/IR/IntrinsicsSw64.td | 651 +++ llvm/include/llvm/MC/MCAsmInfo.h | 4 + llvm/include/llvm/MC/MCExpr.h | 26 + llvm/include/llvm/Object/ELFObjectFile.h | 5 + llvm/include/llvm/Support/Sw64ABIFlags.h | 39 + .../include/llvm/Support/Sw64TargetParser.def | 28 + llvm/include/llvm/Support/Sw64TargetParser.h | 53 + llvm/include/llvm/TargetParser/Host.h | 1 + llvm/include/llvm/TargetParser/Triple.h | 25 +- llvm/include/module.modulemap | 1 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 81 + .../CodeGen/TargetLoweringObjectFileImpl.cpp | 7 + .../Orc/EPCIndirectionUtils.cpp | 3 + .../ExecutionEngine/Orc/IndirectionUtils.cpp | 10 + llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 4 + .../lib/ExecutionEngine/Orc/LazyReexports.cpp | 2 + .../lib/ExecutionEngine/Orc/OrcABISupport.cpp | 262 ++ .../RuntimeDyld/CMakeLists.txt | 1 + .../RuntimeDyld/RuntimeDyldELF.cpp | 59 + .../RuntimeDyld/RuntimeDyldELF.h | 7 + .../Targets/RuntimeDyldELFSw64.cpp | 217 + .../RuntimeDyld/Targets/RuntimeDyldELFSw64.h | 61 + llvm/lib/IR/Function.cpp | 1 + llvm/lib/MC/ELFObjectWriter.cpp | 33 + llvm/lib/MC/MCAsmStreamer.cpp | 11 +- llvm/lib/MC/MCELFStreamer.cpp | 10 + llvm/lib/MC/MCExpr.cpp | 50 + llvm/lib/MC/MCObjectFileInfo.cpp | 3 + llvm/lib/MC/MCSectionELF.cpp | 4 + llvm/lib/Object/ELF.cpp | 24 + llvm/lib/Object/RelocationResolver.cpp | 27 + llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/Sw64TargetParser.cpp | 96 + llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt | 13 + .../Target/Sw64/AsmParser/Sw64AsmParser.cpp | 2005 +++++++++ llvm/lib/Target/Sw64/CMakeLists.txt | 64 + .../Target/Sw64/Disassembler/CMakeLists.txt | 11 + .../Sw64/Disassembler/Sw64Disassembler.cpp | 390 ++ .../Target/Sw64/InstPrinter/CMakeLists.txt | 10 + .../Sw64/InstPrinter/Sw64InstPrinter.cpp | 148 + .../Target/Sw64/InstPrinter/Sw64InstPrinter.h | 57 + .../Target/Sw64/MCTargetDesc/CMakeLists.txt | 22 + .../Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp | 31 + .../Sw64/MCTargetDesc/Sw64ABIFlagsSection.h | 127 + .../Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp | 29 + .../Target/Sw64/MCTargetDesc/Sw64ABIInfo.h | 77 + .../Sw64/MCTargetDesc/Sw64AsmBackend.cpp | 317 ++ .../Target/Sw64/MCTargetDesc/Sw64AsmBackend.h | 96 + .../Target/Sw64/MCTargetDesc/Sw64BaseInfo.h | 146 + .../Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp | 463 ++ .../Sw64/MCTargetDesc/Sw64ELFStreamer.cpp | 108 + .../Sw64/MCTargetDesc/Sw64ELFStreamer.h | 83 + .../Target/Sw64/MCTargetDesc/Sw64FixupKinds.h | 174 + .../Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp | 42 + .../Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h | 32 + .../Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp | 451 ++ .../Sw64/MCTargetDesc/Sw64MCCodeEmitter.h | 111 + .../Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp | 176 + .../lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h | 97 + .../Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp | 189 + .../Sw64/MCTargetDesc/Sw64MCTargetDesc.h | 66 + .../Sw64/MCTargetDesc/Sw64OptionRecord.cpp | 32 + .../Sw64/MCTargetDesc/Sw64TargetStreamer.cpp | 388 ++ llvm/lib/Target/Sw64/README.txt | 7 + llvm/lib/Target/Sw64/Sw64.h | 56 + llvm/lib/Target/Sw64/Sw64.td | 154 + llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp | 308 ++ llvm/lib/Target/Sw64/Sw64BranchSelector.cpp | 81 + llvm/lib/Target/Sw64/Sw64CallingConv.td | 72 + llvm/lib/Target/Sw64/Sw64CombineLS.cpp | 63 + llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp | 1141 +++++ llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp | 334 ++ llvm/lib/Target/Sw64/Sw64FrameLowering.cpp | 456 ++ llvm/lib/Target/Sw64/Sw64FrameLowering.h | 82 + llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp | 138 + llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp | 1016 +++++ llvm/lib/Target/Sw64/Sw64ISelLowering.cpp | 3984 +++++++++++++++++ llvm/lib/Target/Sw64/Sw64ISelLowering.h | 476 ++ llvm/lib/Target/Sw64/Sw64InstrFormats.td | 452 ++ llvm/lib/Target/Sw64/Sw64InstrFormatsV.td | 389 ++ llvm/lib/Target/Sw64/Sw64InstrInfo.cpp | 1012 +++++ llvm/lib/Target/Sw64/Sw64InstrInfo.h | 143 + llvm/lib/Target/Sw64/Sw64InstrInfo.td | 2084 +++++++++ llvm/lib/Target/Sw64/Sw64InstrVector.td | 1767 ++++++++ llvm/lib/Target/Sw64/Sw64LLRP.cpp | 475 ++ llvm/lib/Target/Sw64/Sw64MCInstLower.cpp | 281 ++ llvm/lib/Target/Sw64/Sw64MCInstLower.h | 44 + .../Target/Sw64/Sw64MachineFunctionInfo.cpp | 33 + .../lib/Target/Sw64/Sw64MachineFunctionInfo.h | 69 + llvm/lib/Target/Sw64/Sw64MacroFusion.cpp | 65 + llvm/lib/Target/Sw64/Sw64MacroFusion.h | 28 + llvm/lib/Target/Sw64/Sw64OptionRecord.h | 67 + .../Target/Sw64/Sw64PreLegalizerCombiner.cpp | 96 + llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp | 296 ++ llvm/lib/Target/Sw64/Sw64RegisterInfo.h | 79 + llvm/lib/Target/Sw64/Sw64RegisterInfo.td | 306 ++ llvm/lib/Target/Sw64/Sw64Relocations.h | 30 + llvm/lib/Target/Sw64/Sw64SchedCore3.td | 213 + llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td | 57 + llvm/lib/Target/Sw64/Sw64SchedCore4.td | 75 + llvm/lib/Target/Sw64/Sw64Schedule.td | 86 + llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp | 54 + llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h | 34 + llvm/lib/Target/Sw64/Sw64Subtarget.cpp | 117 + llvm/lib/Target/Sw64/Sw64Subtarget.h | 163 + llvm/lib/Target/Sw64/Sw64TargetMachine.cpp | 193 + llvm/lib/Target/Sw64/Sw64TargetMachine.h | 61 + llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp | 121 + llvm/lib/Target/Sw64/Sw64TargetObjectFile.h | 49 + llvm/lib/Target/Sw64/Sw64TargetStreamer.h | 150 + .../Target/Sw64/Sw64TargetTransformInfo.cpp | 787 ++++ .../lib/Target/Sw64/Sw64TargetTransformInfo.h | 137 + llvm/lib/Target/Sw64/Sw64VectorVarDefine.td | 317 ++ .../lib/Target/Sw64/TargetInfo/CMakeLists.txt | 10 + .../Target/Sw64/TargetInfo/Sw64TargetInfo.cpp | 24 + .../Target/Sw64/TargetInfo/Sw64TargetInfo.h | 12 + llvm/lib/TargetParser/Host.cpp | 10 + llvm/lib/TargetParser/Triple.cpp | 22 + llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll | 2 +- llvm/test/ExecutionEngine/MCJIT/lit.local.cfg | 2 + .../MCJIT/remote/lit.local.cfg | 4 + .../ExecutionEngine/OrcLazy/lit.local.cfg | 1 + .../tools/llvm-reduce/file-output-type.test | 1 + third-party/benchmark/src/cycleclock.h | 5 + 133 files changed, 26692 insertions(+), 7 deletions(-) create mode 100644 llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def create mode 100644 llvm/include/llvm/IR/IntrinsicsSw64.td create mode 100644 llvm/include/llvm/Support/Sw64ABIFlags.h create mode 100644 llvm/include/llvm/Support/Sw64TargetParser.def create mode 100644 llvm/include/llvm/Support/Sw64TargetParser.h create mode 100644 llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp create mode 100644 llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h create mode 100644 llvm/lib/Support/Sw64TargetParser.cpp create mode 100644 llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt create mode 100644 llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp create mode 100644 llvm/lib/Target/Sw64/CMakeLists.txt create mode 100644 llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt create mode 100644 llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp create mode 100644 llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt create mode 100644 llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp create mode 100644 llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp create mode 100644 llvm/lib/Target/Sw64/README.txt create mode 100644 llvm/lib/Target/Sw64/Sw64.h create mode 100644 llvm/lib/Target/Sw64/Sw64.td create mode 100644 llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64BranchSelector.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64CallingConv.td create mode 100644 llvm/lib/Target/Sw64/Sw64CombineLS.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64FrameLowering.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64FrameLowering.h create mode 100644 llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64ISelLowering.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64ISelLowering.h create mode 100644 llvm/lib/Target/Sw64/Sw64InstrFormats.td create mode 100644 llvm/lib/Target/Sw64/Sw64InstrFormatsV.td create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.h create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.td create mode 100644 llvm/lib/Target/Sw64/Sw64InstrVector.td create mode 100644 llvm/lib/Target/Sw64/Sw64LLRP.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64MCInstLower.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64MCInstLower.h create mode 100644 llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h create mode 100644 llvm/lib/Target/Sw64/Sw64MacroFusion.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64MacroFusion.h create mode 100644 llvm/lib/Target/Sw64/Sw64OptionRecord.h create mode 100644 llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.h create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.td create mode 100644 llvm/lib/Target/Sw64/Sw64Relocations.h create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore3.td create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore4.td create mode 100644 llvm/lib/Target/Sw64/Sw64Schedule.td create mode 100644 llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h create mode 100644 llvm/lib/Target/Sw64/Sw64Subtarget.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64Subtarget.h create mode 100644 llvm/lib/Target/Sw64/Sw64TargetMachine.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64TargetMachine.h create mode 100644 llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64TargetObjectFile.h create mode 100644 llvm/lib/Target/Sw64/Sw64TargetStreamer.h create mode 100644 llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp create mode 100644 llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h create mode 100644 llvm/lib/Target/Sw64/Sw64VectorVarDefine.td create mode 100644 llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt create mode 100644 llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp create mode 100644 llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index b0afb47a7243..3b69d027e006 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -441,6 +441,7 @@ set(LLVM_ALL_TARGETS PowerPC RISCV Sparc + Sw64 SystemZ VE WebAssembly @@ -458,7 +459,7 @@ set(LLVM_ALL_EXPERIMENTAL_TARGETS ) # List of targets with JIT support: -set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ) +set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ Sw64) set(LLVM_TARGETS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index f63c3f1a351f..5d430f974bbf 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -512,6 +512,10 @@ elseif (LLVM_NATIVE_ARCH STREQUAL "m68k") set(LLVM_NATIVE_ARCH M68k) elseif (LLVM_NATIVE_ARCH MATCHES "loongarch") set(LLVM_NATIVE_ARCH LoongArch) +elseif (LLVM_NATIVE_ARCH MATCHES "sw64") + set(LLVM_NATIVE_ARCH Sw64) +elseif (LLVM_NATIVE_ARCH MATCHES "sw_64") + set(LLVM_NATIVE_ARCH Sw64) else () message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}") endif () diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess index 71abbf939f97..121b903140eb 100644 --- a/llvm/cmake/config.guess +++ b/llvm/cmake/config.guess @@ -880,6 +880,15 @@ EOF if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} exit ;; + sw_64:Linux:*:* | sw_64:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + SW6) UNAME_MACHINE=sw_64 ;; + esac + UNAME_MACHINE=sw_64 + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-sunway-linux-gnu${LIBC} + exit ;; arm*:Linux:*:*) eval $set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index f5a7cdb387a6..bf13694cf142 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -320,6 +320,7 @@ enum { EM_VE = 251, // NEC SX-Aurora VE EM_CSKY = 252, // C-SKY 32-bit processor EM_LOONGARCH = 258, // LoongArch + EM_SW64 = 0x9916, // SW64 }; // Object file classes. @@ -604,6 +605,31 @@ enum { ODK_PAGESIZE = 11 // Page size information }; +// SW64 Specific e_flags +enum { + + EF_SW64_NOREORDER = 0x00000001, // Don't reorder instructions + EF_SW64_PIC = 0x00000002, // Position independent code + EF_SW64_CPIC = 0x00000004, // Call object with Position independent code + EF_SW64_ABI2 = 0x00000020, // File uses N32 ABI + EF_SW64_32BITMODE = 0x00000100, // Code compiled for a 64-bit machine + // in 32-bit mode + EF_SW64_FP64 = 0x00000200, // Code compiled for a 32-bit machine + // but uses 64-bit FP registers + EF_SW64_NAN2008 = 0x00000400, // Uses IEE 754-2008 NaN encoding + // ABI flags + EF_SW64_ABI_EABI64 = 0x00004000, // EABI in 64 bit mode. + EF_SW64_ABI = 0x0000f000, // Mask for selecting EF_SW64_ABI_ variant. + EF_SW64_32BIT = 0x00000001, // All addresses must be below 2GB. + EF_SW64_CANRELAX = 0x00000002 // All relocations needed for relaxation with + // code movement are present. +}; + +// ELF Relocation types for Sw64. +enum { +#include "ELFRelocs/Sw64.def" +}; + // Hexagon-specific e_flags enum { // Object processor version flags, bits[11:0] @@ -1075,6 +1101,11 @@ enum : unsigned { SHT_CSKY_ATTRIBUTES = 0x70000001U, + SHT_SW64_ABIFLAGS = 0x7000002a, // ABI information. + SHT_SW64_REGINFO = 0x70000002, // Register usage information + SHT_SW64_OPTIONS = 0x7000000d, // General options + SHT_SW64_DWARF = 0x7000001e, // DWARF debugging section. + SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type. SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. SHT_HIUSER = 0xffffffff // Highest type reserved for applications. @@ -1180,6 +1211,9 @@ enum : unsigned { // Section data is string data by default. SHF_MIPS_STRING = 0x80000000, + // Do not strip this section. + SHF_SW64_NOSTRIP = 0x8000000, + // Make code section unreadable when in execute-only mode SHF_ARM_PURECODE = 0x20000000 }; diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def new file mode 100644 index 000000000000..0edecd02be6d --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def @@ -0,0 +1,44 @@ +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_SW_64_NONE, 0) +ELF_RELOC(R_SW_64_REFLONG, 1) +ELF_RELOC(R_SW_64_REFQUAD, 2) +ELF_RELOC(R_SW_64_GPREL32, 3) +ELF_RELOC(R_SW_64_LITERAL, 4) +ELF_RELOC(R_SW_64_LITUSE, 5) +ELF_RELOC(R_SW_64_GPDISP, 6) +ELF_RELOC(R_SW_64_BRADDR, 7) +ELF_RELOC(R_SW_64_HINT, 8) +ELF_RELOC(R_SW_64_SREL16, 9) +ELF_RELOC(R_SW_64_SREL32, 10) +ELF_RELOC(R_SW_64_SREL64, 11) +ELF_RELOC(R_SW_64_GPRELHIGH, 17) +ELF_RELOC(R_SW_64_GPRELLOW, 18) +ELF_RELOC(R_SW_64_GPREL16, 19) +ELF_RELOC(R_SW_64_COPY, 24) +ELF_RELOC(R_SW_64_GLOB_DAT, 25) +ELF_RELOC(R_SW_64_JMP_SLOT, 26) +ELF_RELOC(R_SW_64_RELATIVE, 27) +ELF_RELOC(R_SW_64_BRSGP, 28) +ELF_RELOC(R_SW_64_TLSGD, 29) +ELF_RELOC(R_SW_64_TLSLDM, 30) +ELF_RELOC(R_SW_64_DTPMOD64, 31) +ELF_RELOC(R_SW_64_GOTDTPREL, 32) +ELF_RELOC(R_SW_64_DTPREL64, 33) +ELF_RELOC(R_SW_64_DTPRELHI, 34) +ELF_RELOC(R_SW_64_DTPRELLO, 35) +ELF_RELOC(R_SW_64_DTPREL16, 36) +ELF_RELOC(R_SW_64_GOTTPREL, 37) +ELF_RELOC(R_SW_64_TPREL64, 38) +ELF_RELOC(R_SW_64_TPRELHI, 39) +ELF_RELOC(R_SW_64_TPRELLO, 40) +ELF_RELOC(R_SW_64_TPREL16, 41) +ELF_RELOC(R_SW_64_NUM, 42) +ELF_RELOC(R_SW_64_LITERAL_GOT, 43) +ELF_RELOC(R_SW_64_TLSREL_GOT, 44) +ELF_RELOC(R_SW_64_PC32, 48) +ELF_RELOC(R_SW_64_EH, 49) +ELF_RELOC(R_SW_64_DUMMY_LITERAL, 98) +ELF_RELOC(R_SW_64_DUMMY_LITUSE, 99) diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index 89eed7463bd7..f73715f6d30a 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -180,6 +180,16 @@ enum class MachineCombinerPattern { DPWSSD, FNMADD, + + // SW64 + VMULADDS_OP1, + VMULADDS_OP2, + VMULADDD_OP1, + VMULADDD_OP2, + VMULSUBS_OP1, + VMULSUBS_OP2, + VMULSUBD_OP1, + VMULSUBD_OP2, }; } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h index 5d25a3e85464..e514b0a46f52 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h @@ -416,6 +416,46 @@ public: unsigned NumStubs); }; +// @brief Sw64 support. +// +// Sw64 supports lazy JITing. +class OrcSw64 { +public: + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 40; + static constexpr unsigned StubSize = 32; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0x218; + + /// Write the resolver code into the given memory. The user is + /// responsible for allocating the memory and setting permissions. + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + ExecutorAddr ResolverTargetAddress, + ExecutorAddr ReentryFnAddr, + ExecutorAddr ReentryCtxAddr); + + /// Write the requested number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(char *TrampolineBlockWorkingMem, + ExecutorAddr TrampolineBlockTargetAddress, + ExecutorAddr ResolverFnAddr, + unsigned NumTrampolines); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock(char *StubsBlockWorkingMem, + ExecutorAddr StubsBlockTargetAddress, + ExecutorAddr PointersBlockTargetAddress, + unsigned NumStubs); +}; + } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt index 468d663796ed..7a1343b14dfe 100644 --- a/llvm/include/llvm/IR/CMakeLists.txt +++ b/llvm/include/llvm/IR/CMakeLists.txt @@ -22,4 +22,5 @@ tablegen(LLVM IntrinsicsWebAssembly.h -gen-intrinsic-enums -intrinsic-prefix=was tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86) tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore) tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve) +tablegen(LLVM IntrinsicsSw64.h -gen-intrinsic-enums -intrinsic-prefix=sw64) add_public_tablegen_target(intrinsics_gen) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e51c04fbad2f..20a8fa419465 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2535,6 +2535,7 @@ include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" include "llvm/IR/IntrinsicsSPIRV.td" +include "llvm/IR/IntrinsicsSw64.td" include "llvm/IR/IntrinsicsVE.td" include "llvm/IR/IntrinsicsDirectX.td" include "llvm/IR/IntrinsicsLoongArch.td" diff --git a/llvm/include/llvm/IR/IntrinsicsSw64.td b/llvm/include/llvm/IR/IntrinsicsSw64.td new file mode 100644 index 000000000000..1d5671885a94 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsSw64.td @@ -0,0 +1,651 @@ +//==- IntrinsicsSw64.td - Sw64 intrinsics -*- tablegen -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the Sw64-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "sw64" in { // All intrinsics start with "llvm.sw64.". +def int_sw64_umulh : ClangBuiltin<"__builtin_sw_64_umulh">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + +def int_sw64_crc32b : ClangBuiltin<"__builtin_sw64_crc32b">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_crc32h : ClangBuiltin<"__builtin_sw64_crc32h">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_crc32w : ClangBuiltin<"__builtin_sw64_crc32w">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_crc32l : ClangBuiltin<"__builtin_sw64_crc32l">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_crc32cb : ClangBuiltin<"__builtin_sw64_crc32cb">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_crc32ch : ClangBuiltin<"__builtin_sw64_crc32ch">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_crc32cw : ClangBuiltin<"__builtin_sw64_crc32cw">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_crc32cl : ClangBuiltin<"__builtin_sw64_crc32cl">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + +def int_sw64_sbt : ClangBuiltin<"__builtin_sw64_sbt">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_cbt : ClangBuiltin<"__builtin_sw64_cbt">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + +def int_sw64_ctpopow : ClangBuiltin<"__builtin_sw_ctpopow">, + Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_ctlzow : ClangBuiltin<"__builtin_sw_ctlzow">, + Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_reduc_plusw : ClangBuiltin<"__builtin_sw_reduc_plusw">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_reduc_pluss : ClangBuiltin<"__builtin_sw_reduc_pluss">, + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_reduc_plusd : ClangBuiltin<"__builtin_sw_reduc_plusd">, + Intrinsic<[llvm_double_ty], [llvm_v4f64_ty], + [IntrNoMem]>; + +// SIMD Intrincs +def int_sw64_vaddw : ClangBuiltin<"__builtin_sw_vaddw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vucaddw : ClangBuiltin<"__builtin_sw_vucaddw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vucaddwi : ClangBuiltin<"__builtin_sw_vucaddwi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vucsubw : ClangBuiltin<"__builtin_sw_vucsubw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vucsubwi : ClangBuiltin<"__builtin_sw_vucsubwi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vucaddh : ClangBuiltin<"__builtin_sw_vucaddh">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vucaddhi : ClangBuiltin<"__builtin_sw_vucaddhi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vucsubh : ClangBuiltin<"__builtin_sw_vucsubh">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vucsubhi : ClangBuiltin<"__builtin_sw_vucsubhi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vucaddb : ClangBuiltin<"__builtin_sw_vucaddb">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vucaddbi : ClangBuiltin<"__builtin_sw_vucaddbi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vucsubb : ClangBuiltin<"__builtin_sw_vucsubb">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vucsubbi : ClangBuiltin<"__builtin_sw_vucsubbi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vslls : ClangBuiltin<"__builtin_sw_vslls">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vslld : ClangBuiltin<"__builtin_sw_vslld">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsrls : ClangBuiltin<"__builtin_sw_vsrls">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsrld : ClangBuiltin<"__builtin_sw_vsrld">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i64_ty], + [IntrNoMem]>; + +class sw64VectorIntArg + : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>; +def int_sw64_vsll : sw64VectorIntArg; +def int_sw64_vsrl : sw64VectorIntArg; +def int_sw64_vsra : sw64VectorIntArg; +def int_sw64_vrol : sw64VectorIntArg; + +def int_sw64_vsllw : ClangBuiltin<"__builtin_sw_vsllw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsllh : ClangBuiltin<"__builtin_sw_vsllh">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsllb : ClangBuiltin<"__builtin_sw_vslln">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vslll : ClangBuiltin<"__builtin_sw_vslll">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsrlw : ClangBuiltin<"__builtin_sw_vsrlw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsrlh : ClangBuiltin<"__builtin_sw_vsrlh">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsrlb : ClangBuiltin<"__builtin_sw_vsrlb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsrll : ClangBuiltin<"__builtin_sw_vsrll">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsraw : ClangBuiltin<"__builtin_sw_vsraw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsrah : ClangBuiltin<"__builtin_sw_vsrah">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsrab : ClangBuiltin<"__builtin_sw_vsrab">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsral : ClangBuiltin<"__builtin_sw_vsral">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; + +def int_sw64_vrolw : ClangBuiltin<"__builtin_sw_vrolw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vrolwi : ClangBuiltin<"__builtin_sw_vrolwi">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vrolb : ClangBuiltin<"__builtin_sw_vrolb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vrolbi : ClangBuiltin<"__builtin_sw_vrolbi">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vrolh : ClangBuiltin<"__builtin_sw_vrolh">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vrolhi : ClangBuiltin<"__builtin_sw_vrolhi">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vroll : ClangBuiltin<"__builtin_sw_vroll">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vrolli : ClangBuiltin<"__builtin_sw_vrolli">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_sw64_sllow : ClangBuiltin<"__builtin_sw_sllow">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_srlow : ClangBuiltin<"__builtin_sw_srlow">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_sllowi : ClangBuiltin<"__builtin_sw_sllowi">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_srlowi : ClangBuiltin<"__builtin_sw_srlowi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_sw64_vsqrts : ClangBuiltin<"__builtin_sw_vsqrts">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vsqrtd : ClangBuiltin<"__builtin_sw_vsqrtd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; + +def int_sw64_vornotw : ClangBuiltin<"__builtin_sw_vornotw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_veqvw : ClangBuiltin<"__builtin_sw_veqvw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +def int_sw64_vfcmpeqs : ClangBuiltin<"__builtin_sw_vfcmpeqs">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfcmples : ClangBuiltin<"__builtin_sw_vfcmples">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfcmplts : ClangBuiltin<"__builtin_sw_vfcmplts">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfcmpuns : ClangBuiltin<"__builtin_sw_vfcmpuns">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfcmpeqd : ClangBuiltin<"__builtin_sw_vfcmpeqd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcmpled : ClangBuiltin<"__builtin_sw_vfcmpled">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcmpltd : ClangBuiltin<"__builtin_sw_vfcmpltd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcmpund : ClangBuiltin<"__builtin_sw_vfcmpund">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vcpyss : ClangBuiltin<"__builtin_sw_vcpyss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vcpysns : ClangBuiltin<"__builtin_sw_vcpysns">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vcpyses : ClangBuiltin<"__builtin_sw_vcpyses">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vcpysd : ClangBuiltin<"__builtin_sw_vcpysd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vcpysnd : ClangBuiltin<"__builtin_sw_vcpysnd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vcpysed : ClangBuiltin<"__builtin_sw_vcpysed">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vseleqw : ClangBuiltin<"__builtin_sw_vseleqw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vseleqwi : ClangBuiltin<"__builtin_sw_vseleqwi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vselltw : ClangBuiltin<"__builtin_sw_vselltw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vselltwi : ClangBuiltin<"__builtin_sw_vselltwi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsellew : ClangBuiltin<"__builtin_sw_vsellew">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vsellewi : ClangBuiltin<"__builtin_sw_vsellewi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vsellbcw : ClangBuiltin<"__builtin_sw_vsellbcw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vsellbcwi : ClangBuiltin<"__builtin_sw_vsellbcwi">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vfseleqs : ClangBuiltin<"__builtin_sw_vfseleqs">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfselles : ClangBuiltin<"__builtin_sw_vfselles">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfsellts : ClangBuiltin<"__builtin_sw_vfsellts">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfseleqd : ClangBuiltin<"__builtin_sw_vfseleqd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfselled : ClangBuiltin<"__builtin_sw_vfselled">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfselltd : ClangBuiltin<"__builtin_sw_vfselltd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +// Multiply-add instructions +// FIXME +def int_sw64_vnmsd : ClangBuiltin<"__builtin_sw_vnmsd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +// Vector Insert Intrinsic +def int_sw64_vinsfs : ClangBuiltin<"__builtin_sw_vinsfs">, + Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_v4f32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vinsfd : ClangBuiltin<"__builtin_sw_vinsfd">, + Intrinsic<[llvm_v4f64_ty], [llvm_double_ty, llvm_v4f64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vinsw : ClangBuiltin<"__builtin_sw_vinsw">, + Intrinsic<[llvm_v8i32_ty], [llvm_i64_ty, llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vinsb : ClangBuiltin<"__builtin_sw_vinsb">, + Intrinsic<[llvm_v32i8_ty], [llvm_i64_ty, llvm_v32i8_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vinsh : ClangBuiltin<"__builtin_sw_vinsh">, + Intrinsic<[llvm_v16i16_ty], [llvm_i64_ty, llvm_v16i16_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vinsl : ClangBuiltin<"__builtin_sw_vinsl">, + Intrinsic<[llvm_v4i64_ty], [llvm_i64_ty, llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; + +def int_sw64_vextfs : ClangBuiltin<"__builtin_sw_vextfs">, + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vextfd : ClangBuiltin<"__builtin_sw_vextfd">, + Intrinsic<[llvm_double_ty], [llvm_v4f64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vextw : ClangBuiltin<"__builtin_sw_vextw">, + Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vextl : ClangBuiltin<"__builtin_sw_vextl">, + Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vconw : ClangBuiltin<"__builtin_sw_vconw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_ptr_ty], + [IntrNoMem, IntrArgMemOnly]>; +def int_sw64_vconl : ClangBuiltin<"__builtin_sw_vconl">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_ptr_ty], + [IntrNoMem, IntrArgMemOnly]>; +def int_sw64_vcons : ClangBuiltin<"__builtin_sw_vcons">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_ptr_ty], + [IntrNoMem, IntrArgMemOnly]>; +def int_sw64_vcond : ClangBuiltin<"__builtin_sw_vcond">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_ptr_ty], + [IntrNoMem, IntrArgMemOnly]>; + +def int_sw64_vlogzz : ClangBuiltin<"__builtin_sw_vlogzz">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; + +def int_sw64_vshfw : ClangBuiltin<"__builtin_sw_vshfw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vucaddh_v16hi : ClangBuiltin<"__builtin_sw_vucaddh_v16hi">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_vucaddhi_v16hi : ClangBuiltin<"__builtin_sw_vucaddhi_v16hi">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_vucsubh_v16hi : ClangBuiltin<"__builtin_sw_vucsubh_v16hi">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_vucsubhi_v16hi : ClangBuiltin<"__builtin_sw_vucsubhi_v16hi">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_vucaddb_v32qi : ClangBuiltin<"__builtin_sw_vucaddb_v32qi">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vucaddbi_v32qi : ClangBuiltin<"__builtin_sw_vucaddbi_v32qi">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vucsubb_v32qi : ClangBuiltin<"__builtin_sw_vucsubb_v32qi">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vucsubbi_v32qi : ClangBuiltin<"__builtin_sw_vucsubbi_v32qi">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vmaxb : ClangBuiltin<"__builtin_sw_vmaxb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vminb : ClangBuiltin<"__builtin_sw_vminb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vmaxh : ClangBuiltin<"__builtin_sw_vmaxh">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_vminh : ClangBuiltin<"__builtin_sw_vminh">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_vmaxw : ClangBuiltin<"__builtin_sw_vmaxw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vminw : ClangBuiltin<"__builtin_sw_vminw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vmaxl : ClangBuiltin<"__builtin_sw_vmaxl">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_vminl : ClangBuiltin<"__builtin_sw_vminl">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_vumaxb : ClangBuiltin<"__builtin_sw_vumaxb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vuminb : ClangBuiltin<"__builtin_sw_vuminb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vumaxh : ClangBuiltin<"__builtin_sw_vumaxh">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_vuminh : ClangBuiltin<"__builtin_sw_vuminh">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_vumaxw : ClangBuiltin<"__builtin_sw_vumaxw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vuminw : ClangBuiltin<"__builtin_sw_vuminw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vumaxl : ClangBuiltin<"__builtin_sw_vumaxl">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_vuminl : ClangBuiltin<"__builtin_sw_vuminl">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_sraow : ClangBuiltin<"__builtin_sw_sraow">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_sraowi : ClangBuiltin<"__builtin_sw_sraowi">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_sw64_vcmpgew : ClangBuiltin<"__builtin_sw_vcmpgew">, + Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vcmpeqw : ClangBuiltin<"__builtin_sw_vcmpeqw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vcmplew : ClangBuiltin<"__builtin_sw_vcmplew">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vcmpltw : ClangBuiltin<"__builtin_sw_vcmpltw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vcmpulew : ClangBuiltin<"__builtin_sw_vcmpulew">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vcmpultw : ClangBuiltin<"__builtin_sw_vcmpultw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +def int_sw64_vcmpueqb : ClangBuiltin<"__builtin_sw_vcmpueqb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vcmpugtb : ClangBuiltin<"__builtin_sw_vcmpugtb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_vsumw : ClangBuiltin<"__builtin_sw_vsumw">, + Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vsuml : ClangBuiltin<"__builtin_sw_vsuml">, + Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_vbinvw : ClangBuiltin<"__builtin_sw_vbinvw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vwinv : ClangBuiltin<"__builtin_sw_vwinv">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_vseleql : ClangBuiltin<"__builtin_sw_vseleql">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_veqvb : ClangBuiltin<"__builtin_sw_veqvb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_sw64_veqvh : ClangBuiltin<"__builtin_sw_veqvh">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_sw64_veqvl : ClangBuiltin<"__builtin_sw_veqvl">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_vsums : ClangBuiltin<"__builtin_sw_vsums">, + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vsumd : ClangBuiltin<"__builtin_sw_vsumd">, + Intrinsic<[llvm_double_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfrecs : ClangBuiltin<"__builtin_sw_vfrecs">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfrecd : ClangBuiltin<"__builtin_sw_vfrecd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfris : ClangBuiltin<"__builtin_sw_vfris">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfris_g : ClangBuiltin<"__builtin_sw_vfris_g">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfris_p : ClangBuiltin<"__builtin_sw_vfris_p">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfris_z : ClangBuiltin<"__builtin_sw_vfris_z">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfris_n : ClangBuiltin<"__builtin_sw_vfris_n">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfrid : ClangBuiltin<"__builtin_sw_vfrid">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfrid_g : ClangBuiltin<"__builtin_sw_vfrid_g">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfrid_p : ClangBuiltin<"__builtin_sw_vfrid_p">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfrid_z : ClangBuiltin<"__builtin_sw_vfrid_z">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfrid_n : ClangBuiltin<"__builtin_sw_vfrid_n">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vmaxs : ClangBuiltin<"__builtin_sw_vmaxs">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vmins : ClangBuiltin<"__builtin_sw_vmins">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vmaxd : ClangBuiltin<"__builtin_sw_vmaxd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vmind : ClangBuiltin<"__builtin_sw_vmind">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtsd : ClangBuiltin<"__builtin_sw_vfcvtsd">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +def int_sw64_vfcvtds : ClangBuiltin<"__builtin_sw_vfcvtds">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtsh : ClangBuiltin<"__builtin_sw_vfcvtsh">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vfcvths : ClangBuiltin<"__builtin_sw_vfcvths">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtls : ClangBuiltin<"__builtin_sw_vfcvtls">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtld : ClangBuiltin<"__builtin_sw_vfcvtld">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtdl : ClangBuiltin<"__builtin_sw_vfcvtdl">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtdl_g : ClangBuiltin<"__builtin_sw_vfcvtdl_g">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtdl_p : ClangBuiltin<"__builtin_sw_vfcvtdl_p">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtdl_z : ClangBuiltin<"__builtin_sw_vfcvtdl_z">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtdl_n : ClangBuiltin<"__builtin_sw_vfcvtdl_n">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; +def int_sw64_vfcvtsl : ClangBuiltin<"__builtin_sw_vfcvtsl">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty], + [IntrNoMem]>; + +def int_sw64_vshfq : ClangBuiltin<"__builtin_sw_vshfq">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_sw64_vshfqb : ClangBuiltin<"__builtin_sw_vshfqb">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; + +def int_sw64_vsm3msw : ClangBuiltin<"__builtin_sw_vsm3msw">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_sw64_vsm3r : ClangBuiltin<"__builtin_sw_vsm3r">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vsm4key : ClangBuiltin<"__builtin_sw_vsm4key">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_sw64_vsm4r : ClangBuiltin<"__builtin_sw_vsm4r">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +def int_sw64_vldw : ClangBuiltin<"__builtin_sw_vldw">, + Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_vldl : ClangBuiltin<"__builtin_sw_vldl">, + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_vlds : ClangBuiltin<"__builtin_sw_vlds">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_vldd : ClangBuiltin<"__builtin_sw_vldd">, + Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_sw64_vload : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_vloadu : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_vload_u : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_vloade : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_vloadnc : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_vstore : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_sw64_vstoreu : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_sw64_vstore_u : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_sw64_vstoreuh : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_sw64_vstoreul : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_sw64_vstorenc : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; + +def int_sw64_loadu : ClangBuiltin<"__builtin_sw_loadu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_loadu_i : ClangBuiltin<"__builtin_sw_loadu_i">, + Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_loadu_l : ClangBuiltin<"__builtin_sw_loadu_l">, + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_loadu_f : ClangBuiltin<"__builtin_sw_loadu_f">, + Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_loadu_d : ClangBuiltin<"__builtin_sw_loadu_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_sw64_load_u : ClangBuiltin<"__builtin_sw_load_u">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +} diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index c28cd1211235..125fdefd094f 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -536,6 +536,9 @@ protected: // %hi(), and similar unary operators. bool HasMipsExpressions = false; + // If true, then the assembler supports the .set directive. + bool HasSw64SetDirective = false; + // If true, use Motorola-style integers in Assembly (ex. $0ac). bool UseMotorolaIntegers = false; @@ -881,6 +884,7 @@ public: bool canRelaxRelocations() const { return RelaxELFRelocations; } void setRelaxELFRelocations(bool V) { RelaxELFRelocations = V; } bool hasMipsExpressions() const { return HasMipsExpressions; } + bool hasSw64SetDirective() const { return HasSw64SetDirective; } bool needsFunctionDescriptors() const { return NeedsFunctionDescriptors; } bool shouldUseMotorolaIntegers() const { return UseMotorolaIntegers; } }; diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h index 5bc5e04f79ff..5d788b02ae6d 100644 --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -314,6 +314,32 @@ public: VK_PPC_NOTOC, // symbol@notoc VK_PPC_PCREL_OPT, // .reloc expr, R_PPC64_PCREL_OPT, expr + VK_SW64_ELF_LITERAL, + VK_SW64_LITUSE_ADDR, + VK_SW64_LITUSE_BASE, + VK_SW64_LITUSE_BYTOFF, + VK_SW64_LITUSE_JSR, + VK_SW64_LITUSE_TLSGD, + VK_SW64_LITUSE_TLSLDM, + VK_SW64_LITUSE_JSRDIRECT, + VK_SW64_GPDISP, + VK_SW64_GPDISP_HI16, + VK_SW64_GPDISP_LO16, + VK_SW64_GPREL_HI16, + VK_SW64_GPREL_LO16, + VK_SW64_GPREL16, + VK_SW64_BRSGP, + VK_SW64_TLSGD, + VK_SW64_TLSLDM, + VK_SW64_GOTDTPREL16, + VK_SW64_DTPREL_HI16, + VK_SW64_DTPREL_LO16, + VK_SW64_DTPREL16, + VK_SW64_GOTTPREL16, + VK_SW64_TPREL_HI16, + VK_SW64_TPREL_LO16, + VK_SW64_TPREL16, + VK_COFF_IMGREL32, // symbol@imgrel (image-relative) VK_Hexagon_LO16, diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index f3016cc141b0..3c004b384e32 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -1254,6 +1254,8 @@ StringRef ELFObjectFile::getFileFormatName() const { return "elf64-ve"; case ELF::EM_LOONGARCH: return "elf64-loongarch"; + case ELF::EM_SW64: + return "elf64-sw_64"; default: return "elf64-unknown"; } @@ -1352,6 +1354,9 @@ template Triple::ArchType ELFObjectFile::getArch() const { case ELF::EM_XTENSA: return Triple::xtensa; + case ELF::EM_SW64: + return Triple::sw_64; + default: return Triple::UnknownArch; } diff --git a/llvm/include/llvm/Support/Sw64ABIFlags.h b/llvm/include/llvm/Support/Sw64ABIFlags.h new file mode 100644 index 000000000000..44fc9dbf102d --- /dev/null +++ b/llvm/include/llvm/Support/Sw64ABIFlags.h @@ -0,0 +1,39 @@ +//===--- Sw64ABIFlags.h - SW64 ABI flags ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the constants for the ABI flags structure contained +// in the .Sw64.abiflags section. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_Sw64ABIFLAGS_H +#define LLVM_SUPPORT_Sw64ABIFLAGS_H + +namespace llvm { +namespace Sw64 { + +// Values for the xxx_size bytes of an ABI flags structure. +enum AFL_REG { + AFL_REG_NONE = 0x00, // No registers + AFL_REG_32 = 0x01, // 32-bit registers + AFL_REG_64 = 0x02, // 64-bit registers + AFL_REG_128 = 0x03 // 128-bit registers +}; + +// Values for the flags1 word of an ABI flags structure. +enum AFL_FLAGS1 { AFL_FLAGS1_ODDSPREG = 1 }; + +enum AFL_EXT { + AFL_EXT_NONE = 0, // None + AFL_EXT_OCTEON = 5 // Cavium Networks Octeon +}; +} // namespace Sw64 +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/Sw64TargetParser.def b/llvm/include/llvm/Support/Sw64TargetParser.def new file mode 100644 index 000000000000..cb598dc25c23 --- /dev/null +++ b/llvm/include/llvm/Support/Sw64TargetParser.def @@ -0,0 +1,28 @@ +//===- Sw64TargetParser.def - Sw64 target parsing defines ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides defines to build up the Sw64 target parser's logic. +// +//===----------------------------------------------------------------------===// + +#ifndef PROC_ALIAS +#define PROC_ALIAS(NAME, SW64) +#endif + +#undef PROC_ALIAS + +#ifndef SW64_CPU +#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) +#endif + +SW64_CPU(INVALID, {"invalid"}, FK_INVALID, {""}) +SW64_CPU(SW6B, {"sw6b"}, FK_64BIT, {"core3b"}) +SW64_CPU(SW4D, {"sw4d"}, FK_64BIT, {"core3b"}) +SW64_CPU(SW8A, {"sw8a"}, FK_64BIT, {"core4"}) + +#undef SW64_CPU diff --git a/llvm/include/llvm/Support/Sw64TargetParser.h b/llvm/include/llvm/Support/Sw64TargetParser.h new file mode 100644 index 000000000000..ceb0caff4a78 --- /dev/null +++ b/llvm/include/llvm/Support/Sw64TargetParser.h @@ -0,0 +1,53 @@ +//===-- Sw64TargetParser - Parser for Sw64 features -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise SW64 hardware features +// such as FPU/CPU/ARCH and extension names. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_SW64TARGETPARSER_H +#define LLVM_SUPPORT_SW64TARGETPARSER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include + +// FIXME:This should be made into class design,to avoid dupplication. +namespace llvm { +class StringRef; + +namespace Sw64 { + +enum CPUKind : unsigned { CK_INVALID = 0, CK_SW6B, CK_SW4D, CK_SW8A }; + +enum FeatureKind : unsigned { + FK_INVALID = 0, + FK_NONE = 1, + FK_STDEXTM = 1 << 2, + FK_STDEXTA = 1 << 3, + FK_STDEXTF = 1 << 4, + FK_STDEXTD = 1 << 5, + FK_STDEXTC = 1 << 6, + FK_64BIT = 1 << 7, +}; + +bool checkCPUKind(CPUKind Kind, bool IsSw64); +bool checkTuneCPUKind(CPUKind Kind, bool IsSw64); +CPUKind parseARCHKind(StringRef CPU); +CPUKind parseTuneCPUKind(StringRef CPU, bool IsSw64); +StringRef getMcpuFromMArch(StringRef CPU); +void fillValidCPUArchList(SmallVectorImpl &Values, bool IsSw64); +void fillValidTuneCPUArchList(SmallVectorImpl &Values, bool IsSw64); +StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsSw64); +CPUKind parseCPUArch(StringRef CPU); + +} // namespace Sw64 +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/TargetParser/Host.h b/llvm/include/llvm/TargetParser/Host.h index af72045a8fe6..8da1991e3035 100644 --- a/llvm/include/llvm/TargetParser/Host.h +++ b/llvm/include/llvm/TargetParser/Host.h @@ -67,6 +67,7 @@ namespace sys { StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForSPARC(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForBPF(); + StringRef getHostCPUNameForSW64(StringRef ProcCpuinfoContent); /// Helper functions to extract CPU details from CPUID on x86. namespace x86 { diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h index e0ac050b1c7b..91d4190836fb 100644 --- a/llvm/include/llvm/TargetParser/Triple.h +++ b/llvm/include/llvm/TargetParser/Triple.h @@ -17,6 +17,7 @@ #undef NetBSD #undef mips #undef sparc +#undef sw_64 namespace llvm { @@ -106,7 +107,8 @@ public: renderscript32, // 32-bit RenderScript renderscript64, // 64-bit RenderScript ve, // NEC SX-Aurora Vector Engine - LastArchType = ve + sw_64, // sw64:basic Arch for SW + LastArchType = sw_64 }; enum SubArchType { NoSubArch, @@ -153,6 +155,11 @@ public: MipsSubArch_r6, + Sw64SubArch_4d, + Sw64SubArch_6a, + Sw64SubArch_6b, + Sw64SubArch_8a, + PPCSubArch_spe, // SPIR-V sub-arch corresponds to its version. @@ -890,6 +897,21 @@ public: return isMIPS32() || isMIPS64(); } + /// Tests whether the target is SW64 64-bit (little endian). + bool isSw64() const { return getArch() == Triple::sw_64; } + + bool isSw6a() const { return getSubArch() == Triple::Sw64SubArch_6a; } + + bool isSw6b() const { return getSubArch() == Triple::Sw64SubArch_6b; } + + bool isSw4d() const { return getSubArch() == Triple::Sw64SubArch_4d; } + + bool isSw8a() const { return getSubArch() == Triple::Sw64SubArch_8a; } + + bool isSW() const { + return isSw64() || isSw6a() || isSw6b() || isSw4d() || isSw8a(); + } + /// Tests whether the target is PowerPC (32- or 64-bit LE or BE). bool isPPC() const { return getArch() == Triple::ppc || getArch() == Triple::ppc64 || @@ -1140,5 +1162,4 @@ public: } // End llvm namespace - #endif diff --git a/llvm/include/module.modulemap b/llvm/include/module.modulemap index 4c2ba437edb9..22256bb2cdcb 100644 --- a/llvm/include/module.modulemap +++ b/llvm/include/module.modulemap @@ -100,6 +100,7 @@ module LLVM_BinaryFormat { textual header "llvm/BinaryFormat/ELFRelocs/PowerPC.def" textual header "llvm/BinaryFormat/ELFRelocs/RISCV.def" textual header "llvm/BinaryFormat/ELFRelocs/Sparc.def" + textual header "llvm/BinaryFormat/ELFRelocs/Sw64.def" textual header "llvm/BinaryFormat/ELFRelocs/SystemZ.def" textual header "llvm/BinaryFormat/ELFRelocs/VE.def" textual header "llvm/BinaryFormat/ELFRelocs/x86_64.def" diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a84d35a6ea4e..5a2fca731ba7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -408,6 +408,87 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); NewLHS = Call.first; NewRHS = DAG.getConstant(0, dl, RetVT); + if (Triple(this->getTargetMachine().getTargetTriple()).getArch() == + Triple::sw_64) { + + ShouldInvertCC = false; + switch (CCCode) { + + case llvm::ISD::SETOGT: + case llvm::ISD::SETUGT: + case llvm::ISD::SETGT: + // from: + // ldi $1,0($31) + // cmplt $1,$0,$0 + // to: + // + Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewLHS = Call.first; + NewRHS = DAG.getConstant(0, dl, RetVT); + break; + case llvm::ISD::SETOGE: + case llvm::ISD::SETUGE: + case llvm::ISD::SETGE: + // from: + // ldi $1,-1($31) + // cmplt $1,$0,$0 + // to: + // ldi $1 0($31) + // complt $1,$0,$0 + + Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewLHS = Call.first; + NewRHS = DAG.getConstant(1, dl, RetVT); + break; + case llvm::ISD::SETOLT: + case llvm::ISD::SETULT: + case llvm::ISD::SETLT: + + // from: + // cmplt $0,0,$0 + // to: + // cmplt $31,$0,$0 + + Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewRHS = Call.first; + NewLHS = DAG.getConstant(0, dl, RetVT); + break; + case llvm::ISD::SETOLE: + case llvm::ISD::SETULE: + case llvm::ISD::SETLE: + // from: + // cmplt $0,-1,$0 + // to: + // cmplt $31,$0,$0 + Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewRHS = Call.first; + NewLHS = DAG.getConstant(1, dl, RetVT); + break; + case llvm::ISD::SETUEQ: + case llvm::ISD::SETOEQ: + case llvm::ISD::SETEQ: + // from: + // cmplt $0,0,$0 + // to: + // cmplt $0,-1,$0 + // + Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewLHS = Call.first; + NewRHS = DAG.getConstant(1, dl, RetVT); + break; + case llvm::ISD::SETONE: + ShouldInvertCC = true; + Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); + NewLHS = Call.first; + NewRHS = DAG.getConstant(1, dl, RetVT); + break; + case llvm::ISD::SETO: + ShouldInvertCC = true; + LLVM_FALLTHROUGH; + default: + break; + } + } CCCode = getCmpLibcallCC(LC1); if (ShouldInvertCC) { diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 4ffffd85ee53..0980e8238365 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -290,6 +290,13 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; break; + case Triple::sw_64: + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + break; default: break; } diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index 833be826f8ae..8141c8ce74af 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -262,6 +262,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) { case Triple::riscv64: return CreateWithABI(EPC); + case Triple::sw_64: + return CreateWithABI(EPC); + case Triple::x86_64: if (TT.getOS() == Triple::OSType::Win32) return CreateWithABI(EPC); diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index a0d81cdf2086..d8520bb9680b 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -158,6 +158,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, return CCMgrT::Create(ES, ErrorHandlerAddress); } + case Triple::sw_64: { + typedef orc::LocalJITCompileCallbackManager CCMgrT; + return CCMgrT::Create(ES, ErrorHandlerAddress); + } + case Triple::x86_64: { if (T.getOS() == Triple::OSType::Win32) { typedef orc::LocalJITCompileCallbackManager CCMgrT; @@ -224,6 +229,11 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) { orc::LocalIndirectStubsManager>(); }; + case Triple::sw_64: + return []() { + return std::make_unique>(); + }; + case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) { return [](){ diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 7c7c2f000368..da3a19b25501 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -913,6 +913,10 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) { S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le)) Layer->setAutoClaimResponsibilityForObjectSymbols(true); + if (S.JTMB->getTargetTriple().isOSBinFormatELF() && + S.JTMB->getTargetTriple().getArch() == Triple::ArchType::sw_64) + Layer->setAutoClaimResponsibilityForObjectSymbols(true); + // FIXME: Explicit conversion to std::unique_ptr added to silence // errors from some GCC / libstdc++ bots. Remove this conversion (i.e. // just return ObjLinkingLayer) once those bots are upgraded. diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index d95a642934f1..0e5dc629ddd8 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -139,6 +139,8 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, case Triple::riscv64: return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); + case Triple::sw_64: + return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp index 6d568199378a..07c1d14daabd 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -915,6 +915,268 @@ void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem, } } +void OrcSw64::writeResolverCode(char *ResolverWorkingMem, + ExecutorAddr ResolverTargetAddress, + ExecutorAddr ReentryFnAddr, + ExecutorAddr ReentryCtxAddr) { + const uint32_t ResolverCode[] = { + // resolver_entry: + 0xfbdefe38, // ldi sp,-456(sp) + 0xac1e0000, // stl v0,0(sp) + 0xae1e0008, // stl a0,8(sp) + 0xae3e0010, // stl a1,16(sp) + 0xae5e0018, // stl a2,24(sp) + 0xae7e0020, // stl a3,32(sp) + 0xae9e0028, // stl a4,40(sp) + 0xaebe0030, // stl a5,48(sp) + 0xad3e0038, // stl s0,56(sp) + 0xad5e0040, // stl s1,64(sp) + 0xad7e0048, // stl s2,72(sp) + 0xad9e0050, // stl s3,80(sp) + 0xadbe0058, // stl s4,88(sp) + 0xadde0060, // stl s5,96(sp) + 0xac3e0068, // stl t0,104(sp) + 0xac5e0070, // stl t1,112(sp) + 0xac7e0078, // stl t2,120(sp) + 0xac9e0080, // stl t3,128(sp) + 0xacbe0088, // stl t4,136(sp) + 0xacde0090, // stl t5,144(sp) + 0xacfe0098, // stl t6,152(sp) + 0xad1e00a0, // stl t7,160(sp) + 0xaede00a8, // stl t8,168(sp) + 0xaefe00b0, // stl t9,176(sp) + 0xaf1e00b8, // stl t10,184(sp) + 0xaf3e00c0, // stl t11,192(sp) + 0xaf7e00c8, // stl t12,200(sp) + 0xadfe00d0, // stl fp,208(sp) + 0xaf5e00d8, // stl ra,216(sp) + + 0xbc5e00e0, // fstd $f2,224(sp) + 0xbc7e00e8, // fstd $f3,232(sp) + 0xbc9e00f0, // fstd $f4,240(sp) + 0xbcbe00f8, // fstd $f5,248(sp) + 0xbcde0100, // fstd $f6,256(sp) + 0xbcfe0108, // fstd $f7,264(sp) + 0xbd1e0110, // fstd $f8,272(sp) + 0xbd3e0118, // fstd $f9,280(sp) + 0xbd5e0120, // fstd $f10,288(sp) + 0xbd7e0128, // fstd $f11,296(sp) + 0xbd9e0130, // fstd $f12,304(sp) + 0xbdbe0138, // fstd $f13,312(sp) + 0xbdde0140, // fstd $f14,320(sp) + 0xbdfe0148, // fstd $f15,328(sp) + 0xbe1e0150, // fstd $f16,336(sp) + 0xbe3e0158, // fstd $f17,344(sp) + 0xbe5e0160, // fstd $f18,352(sp) + 0xbe7e0168, // fstd $f19,360(sp) + 0xbe9e0170, // fstd $f20,368(sp) + 0xbebe0178, // fstd $f21,376(sp) + 0xbede0180, // fstd $f22,384(sp) + 0xbefe0188, // fstd $f23,392(sp) + 0xbf1e0190, // fstd $f24,400(sp) + 0xbf3e0198, // fstd $f25,408(sp) + 0xbf5e01a0, // fstd $f26,416(sp) + 0xbf7e01a8, // fstd $f27,424(sp) + 0xbf9e01b0, // fstd $f28,432(sp) + 0xbfbe01b8, // fstd $f29,440(sp) + 0xbfde01c0, // fstd $f30,448(sp) + + // JIT re-entry ctx addr. + 0x00000000, // ldih $16,ctxhighest($31) + 0x00000000, // ldi $16,ctxhigher($16) + 0x00000000, // sll $16,16,$16 + 0x00000000, // ldi $16,ctxhi($16) + 0x00000000, // sll $16,16,$16 + 0x00000000, // ldi $16,ctxlo($16) + 0x435a0751, // or ra,ra,a1 + 0xfa31ffe0, // ldi a1,-32(a1) + // JIT re-entry fn addr: + 0x00000000, // ldih $27,reentry($31) + 0x00000000, // ldi $27,reentry($27) + 0x00000000, // sll $27,16,$27 + 0x00000000, // ldi $27,reentryhi($27) + 0x00000000, // sll $27,16,$27 + 0x00000000, // ldi $27,reentrylo($27) + 0x075b0000, // call ra,(t12),6c + 0x43ff075f, // nop + + 0x9fde01c0, // fldd $f30,448(sp) + 0x9fbe01b8, // fldd $f29,440(sp) + 0x9f9e01b0, // fldd $f28,432(sp) + 0x9f7e01a8, // fldd $f27,424(sp) + 0x9f5e01a0, // fldd $f26,416(sp) + 0x9f3e0198, // fldd $f25,408(sp) + 0x9f1e0190, // fldd $f24,400(sp) + 0x9efe0188, // fldd $f23,392(sp) + 0x9ede0180, // fldd $f22,384(sp) + 0x9ebe0178, // fldd $f21,376(sp) + 0x9e9e0170, // fldd $f20,368(sp) + 0x9e7e0168, // fldd $f19,360(sp) + 0x9e5e0160, // fldd $f18,352(sp) + 0x9e3e0158, // fldd $f17,344(sp) + 0x9e1e0150, // fldd $f16,336(sp) + 0x9dfe0148, // fldd $f15,328(sp) + 0x9dde0140, // fldd $f14,320(sp) + 0x9dbe0138, // fldd $f13,312(sp) + 0x9d9e0130, // fldd $f12,304(sp) + 0x9d7e0128, // fldd $f11,296(sp) + 0x9d5e0120, // fldd $f10,288(sp) + 0x9d3e0118, // fldd $f9,280(sp) + 0x9d1e0110, // fldd $f8,272(sp) + 0x9cfe0108, // fldd $f7,264(sp) + 0x9cde0100, // fldd $f6,256(sp) + 0x9cbe00f8, // fldd $f5,248(sp) + 0x9c9e00f0, // fldd $f4,240(sp) + 0x9c7e00e8, // fldd $f3,232(sp) + 0x9c5e00e0, // fldd $f2,224(sp) + + 0x8f5e00d8, // ldl ra,216(sp) + 0x8dfe00d0, // ldl fp,208(sp) + 0x8f7e00c8, // ldl t12,200(sp) + 0x8f3e00c0, // ldl t11,192(sp) + 0x8f1e00b8, // ldl t10,184(sp) + 0x8efe00b0, // ldl t9,176(sp) + 0x8ede00a8, // ldl t8,168(sp) + 0x8d1e00a0, // ldl t7,160(sp) + 0x8cfe0098, // ldl t6,152(sp) + 0x8cde0090, // ldl t5,144(sp) + 0x8cbe0088, // ldl t4,136(sp) + 0x8c9e0080, // ldl t3,128(sp) + 0x8c7e0078, // ldl t2,120(sp) + 0x8c5e0070, // ldl t1,112(sp) + 0x8c3e0068, // ldl t0,104(sp) + 0x8dde0060, // ldl s5,96(sp) + 0x8dbe0058, // ldl s4,88(sp) + 0x8d9e0050, // ldl s3,80(sp) + 0x8d7e0048, // ldl s2,72(sp) + 0x8d5e0040, // ldl s1,64(sp) + 0x8d3e0038, // ldl s0,56(sp) + 0x8ebe0030, // ldl a5,48(sp) + 0x8e9e0028, // ldl a4,40(sp) + 0x8e7e0020, // ldl a3,32(sp) + 0x8e5e0018, // ldl a2,24(sp) + 0x8e3e0010, // ldl a1,16(sp) + 0x8e1e0008, // ldl a0,8(sp) + 0xfbde01c8, // ldi sp,456(sp) + + 0x4339075a, // or t11,t11,ra + 0x4000075b, // or v0,v0,t12 + 0x0ffb0000, // jmp zero,(t12),c4 + }; + const unsigned ReentryFnAddrOffset = 0x108; // JIT re-entry fn addr lui + const unsigned ReentryCtxAddrOffset = 0xe8; // JIT re-entry ctx addr lui + + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + + uint32_t ReentryCtxLDIh = + 0xfe1f0000 | (((ReentryCtxAddr.getValue() >> 48) + + ((ReentryCtxAddr.getValue() >> 47) & 1)) & + 0xFFFF); + uint32_t ReentryCtxLDI = + 0xfa100000 | (((ReentryCtxAddr.getValue() >> 32) + + ((ReentryCtxAddr.getValue() >> 31) & 1)) & + 0xFFFF); + uint32_t ReentryCtxSLL = 0x4a020910; + uint32_t ReentryCtxLDI2 = + 0xfa100000 | (((ReentryCtxAddr.getValue() >> 16) + + ((ReentryCtxAddr.getValue() >> 15) & 1)) & + 0xFFFF); + uint32_t ReentryCtxSLL2 = 0x4a020910; + uint32_t ReentryCtxLDI3 = 0xfa100000 | (ReentryCtxAddr.getValue() & 0xFFFF); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLDIh, + sizeof(ReentryCtxLDIh)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxLDI, + sizeof(ReentryCtxLDI)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 8, &ReentryCtxSLL, + sizeof(ReentryCtxSLL)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 12, &ReentryCtxLDI2, + sizeof(ReentryCtxLDI2)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 16, &ReentryCtxSLL2, + sizeof(ReentryCtxSLL2)); + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 20, &ReentryCtxLDI3, + sizeof(ReentryCtxLDI3)); + + uint32_t ReentryFnLDIh = + 0xff7f0000 | (((ReentryFnAddr.getValue() >> 48) + + ((ReentryFnAddr.getValue() >> 47) & 1)) & + 0xFFFF); + uint32_t ReentryFnLDI = + 0xfb7b0000 | (((ReentryFnAddr.getValue() >> 32) + + ((ReentryFnAddr.getValue() >> 31) & 1)) & + 0xFFFF); + uint32_t ReentryFnSLL = 0x4b62091b; + uint32_t ReentryFnLDI2 = + 0xfb7b0000 | (((ReentryFnAddr.getValue() >> 16) + + ((ReentryFnAddr.getValue() >> 15) & 1)) & + 0xFFFF); + uint32_t ReentryFnSLL2 = 0x4b62091b; + uint32_t ReentryFnLDI3 = 0xfb7b0000 | (ReentryFnAddr.getValue() & 0xFFFF); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLDIh, + sizeof(ReentryFnLDIh)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnLDI, + sizeof(ReentryFnLDI)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 8, &ReentryFnSLL, + sizeof(ReentryFnSLL)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 12, &ReentryFnLDI2, + sizeof(ReentryFnLDI2)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 16, &ReentryFnSLL2, + sizeof(ReentryFnSLL2)); + memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 20, &ReentryFnLDI3, + sizeof(ReentryFnLDI3)); +} + +void OrcSw64::writeTrampolines(char *TrampolineBlockWorkingMem, + ExecutorAddr TrampolineBlockTargetAddress, + ExecutorAddr ResolverAddr, + unsigned NumTrampolines) { + + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); + uint64_t HighestAddr = + (ResolverAddr.getValue() >> 48) + ((ResolverAddr.getValue() >> 47) & 1); + uint64_t HigherAddr = + (ResolverAddr.getValue() >> 32) + ((ResolverAddr.getValue() >> 31) & 1); + uint64_t HiAddr = + (ResolverAddr.getValue() >> 16) + ((ResolverAddr.getValue() >> 15) & 1); + + for (unsigned I = 0; I < NumTrampolines; ++I) { + Trampolines[10 * I + 0] = 0x435a0759; // or ra,ra,t11 + Trampolines[10 * I + 1] = 0xff7f0000 | (HighestAddr & 0xFFFF); + Trampolines[10 * I + 2] = 0xfb7b0000 | (HigherAddr & 0xFFFF); + Trampolines[10 * I + 3] = 0x4b62091b; // sll + Trampolines[10 * I + 4] = 0xfb7b0000 | (HiAddr & 0xFFFF); + Trampolines[10 * I + 5] = 0x4b62091b; // sll2 + Trampolines[10 * I + 6] = 0xfb7b0000 | (ResolverAddr.getValue() & 0xFFFF); + Trampolines[10 * I + 7] = 0x075b0000; // call + Trampolines[10 * I + 8] = 0x43ff075f; // nop + Trampolines[10 * I + 9] = 0x43ff075f; // nop + } +} + +void OrcSw64::writeIndirectStubsBlock(char *StubsBlockWorkingMem, + ExecutorAddr StubsBlockTargetAddress, + ExecutorAddr PointersBlockTargetAddress, + unsigned NumStubs) { + + // Populate the stubs page stubs and mark it executable. + uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + uint64_t PtrAddr = PointersBlockTargetAddress.getValue(); + + for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) { + uint64_t HighestAddr = (PtrAddr >> 48) + ((PtrAddr >> 47) & 1); + uint64_t HigherAddr = (PtrAddr >> 32) + ((PtrAddr >> 31) & 1); + uint64_t HiAddr = (PtrAddr >> 16) + ((PtrAddr >> 15) & 1); + Stub[8 * I + 0] = 0xff7f0000 | (HighestAddr & 0xFFFF); // ldih + Stub[8 * I + 1] = 0xfb7b0000 | (HigherAddr & 0xFFFF); // ldi + Stub[8 * I + 2] = 0x4b62091b; // sll + Stub[8 * I + 3] = 0xfb7b0000 | (HiAddr & 0xFFFF); // ldi + Stub[8 * I + 4] = 0x4b62091b; // sll2 + Stub[8 * I + 5] = 0x8f7b0000 | (PtrAddr & 0xFFFF); // ldl + Stub[8 * I + 6] = 0x0ffb0000; // jmp $31,($27),0 + Stub[8 * I + 7] = 0x43ff075f; // nop + } +} + void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem, ExecutorAddr ResolverTargetAddress, ExecutorAddr ReentryFnAddr, diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt index 1278e2f43c3b..79c1fa6a4a04 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMRuntimeDyld RuntimeDyldELF.cpp RuntimeDyldMachO.cpp Targets/RuntimeDyldELFMips.cpp + Targets/RuntimeDyldELFSw64.cpp DEPENDS intrinsics_gen diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index d439b1b4ebfb..66ed10693b9e 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -13,6 +13,7 @@ #include "RuntimeDyldELF.h" #include "RuntimeDyldCheckerImpl.h" #include "Targets/RuntimeDyldELFMips.h" +#include "Targets/RuntimeDyldELFSw64.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" @@ -241,6 +242,8 @@ llvm::RuntimeDyldELF::create(Triple::ArchType Arch, case Triple::mips64: case Triple::mips64el: return std::make_unique(MemMgr, Resolver); + case Triple::sw_64: + return make_unique(MemMgr, Resolver); } } @@ -1878,6 +1881,42 @@ RuntimeDyldELF::processRelocationRef( } else { processSimpleRelocation(SectionID, Offset, RelType, Value); } + } else if (Arch == Triple::sw_64) { + uint32_t r_type = RelType & 0xff; + RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); + LLVM_DEBUG(dbgs() << "Resolve Sw64 reloc" << TargetName << "\n"); + if (r_type == ELF::R_SW_64_GPDISP) { + TargetName = "gphi"; + StringMap::iterator i = GOTSymbolOffsets.find(TargetName); + if (i != GOTSymbolOffsets.end()) + RE.SymOffset = i->second; + else { + RE.SymOffset = allocateGOTEntries(1); + GOTSymbolOffsets[TargetName] = RE.SymOffset; + } + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + } else if (RelType == ELF::R_SW_64_BRADDR) { + // This is an Sw64 branch relocation, need to use a stub function. + LLVM_DEBUG(dbgs() << "\t\tThis is a Sw64 branch relocation."); + llvm_unreachable(" Sw64 branch relocation not yet supported."); + } else if (r_type == ELF::R_SW_64_LITERAL) { + StringMap::iterator a = GOTSymbolOffsets.find(TargetName); + if (a != GOTSymbolOffsets.end()) + RE.SymOffset = a->second; + else { + RE.SymOffset = allocateGOTEntries(1); + GOTSymbolOffsets[TargetName] = RE.SymOffset; + } + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + } else { + processSimpleRelocation(SectionID, Offset, RelType, Value); + } } else { if (Arch == Triple::x86) { Value.Addend += support::ulittle32_t::ref(computePlaceholderAddress(SectionID, Offset)); @@ -2221,6 +2260,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() { case Triple::aarch64_be: case Triple::ppc64: case Triple::ppc64le: + case Triple::sw_64: case Triple::systemz: Result = sizeof(uint64_t); break; @@ -2390,6 +2430,25 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, } GOTSymbolOffsets.clear(); } + if (Arch == Triple::sw_64) { + // To correctly resolve Sw64 GOT relocations, we need a mapping from + // object's sections to GOTs. + for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end(); + SI != SE; ++SI) { + if (SI->relocation_begin() != SI->relocation_end()) { + Expected RelSecOrErr = SI->getRelocatedSection(); + if (!RelSecOrErr) + return make_error( + toString(RelSecOrErr.takeError())); + + section_iterator RelocatedSection = *RelSecOrErr; + ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); + assert(i != SectionMap.end()); + SectionToGOTMap[i->second] = GOTSectionID; + } + } + GOTSymbolOffsets.clear(); + } } // Look for and record the EH frame section. diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index dfdd98cb3a34..13fa4e6ef5ac 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -15,6 +15,8 @@ #include "RuntimeDyldImpl.h" #include "llvm/ADT/DenseMap.h" +#include +using namespace std; using namespace llvm; @@ -60,6 +62,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); + void resolveSW64Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int32_t Addend); + unsigned getMaxStubSize() const override { if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) return 20; // movz; movk; movk; movk; br @@ -75,6 +80,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl { return 6; // 2-byte jmp instruction + 32-bit relative address else if (Arch == Triple::systemz) return 16; + else if (Arch == Triple::sw_64) + return 16; else return 0; } diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp new file mode 100644 index 000000000000..81a819abc74f --- /dev/null +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp @@ -0,0 +1,217 @@ +//===-- RuntimeDyldELFSw64.cpp ---- ELF/Sw64 specific code. -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "RuntimeDyldELFSw64.h" +#include "llvm/BinaryFormat/ELF.h" + +#define DEBUG_TYPE "dyld" + +void RuntimeDyldELFSw64::resolveRelocation(const RelocationEntry &RE, + uint64_t Value) { + const SectionEntry &Section = Sections[RE.SectionID]; + + resolveSw64Relocation(Section, RE.Offset, Value, RE.RelType, RE.Addend, + RE.SymOffset, RE.SectionID); +} + +uint64_t RuntimeDyldELFSw64::evaluateRelocation(const RelocationEntry &RE, + uint64_t Value, + uint64_t Addend) { + const SectionEntry &Section = Sections[RE.SectionID]; + Value = evaluateSw64Relocation(Section, RE.Offset, Value, RE.RelType, Addend, + RE.SymOffset, RE.SectionID); + return Value; +} + +void RuntimeDyldELFSw64::applyRelocation(const RelocationEntry &RE, + uint64_t Value) { + const SectionEntry &Section = Sections[RE.SectionID]; + applySw64Relocation(Section.getAddressWithOffset(RE.Offset), Value, + RE.RelType); + return; +} + +int64_t RuntimeDyldELFSw64::evaluateSw64Relocation( + const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, + int64_t Addend, uint64_t SymOffset, SID SectionID) { + + LLVM_DEBUG(dbgs() << "evaluateSw64Relocation, LocalAddress: 0x" + << format("%llx", Section.getAddressWithOffset(Offset)) + << " GOTAddr: 0x" + << format("%llx", + getSectionLoadAddress(SectionToGOTMap[SectionID])) + << " FinalAddress: 0x" + << format("%llx", Section.getLoadAddressWithOffset(Offset)) + << " Value: 0x" << format("%llx", Value) << " Type: 0x" + << format("%x", Type) << " Addend: 0x" + << format("%llx", Addend) + << " Offset: " << format("%llx", Offset) + << " SID: " << format("%d", SectionID) + << " SymOffset: " << format("%x", SymOffset) << "\n"); + + switch (Type) { + default: + llvm_unreachable("Not implemented relocation type!"); + break; + case ELF::R_SW_64_GPDISP: { + uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]); + uint32_t *LocalAddress = + reinterpret_cast(Section.getAddressWithOffset(Offset)); + + uint8_t *LocalGOTAddr = + getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset; + uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, getGOTEntrySize()); + + LLVM_DEBUG(dbgs() << "Debug gpdisp: " + << " GOTAddr: 0x" << format("%llx", GOTAddr) + << " GOTEntry: 0x" << format("%llx", GOTEntry) + << " LocalGOTAddr: 0x" << format("%llx", LocalGOTAddr) + << " LocalAddress: 0x" << format("%llx", LocalAddress) + << "\n"); + if (GOTEntry) + assert(GOTEntry == Value && "GOT entry has two different addresses."); + else + writeBytesUnaligned(Value, LocalGOTAddr, getGOTEntrySize()); + + return (int64_t)GOTAddr + 0x8000 - (int64_t)LocalAddress; + } + case ELF::R_SW_64_LITERAL: { + uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]); + uint32_t *LocalAddress = + reinterpret_cast(Section.getAddressWithOffset(Offset)); + + uint8_t *LocalGOTAddr = + getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset; + uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, getGOTEntrySize()); + + LLVM_DEBUG(dbgs() << "Debug literal: " + << " GOTAddr: 0x" << format("%llx", GOTAddr) + << " GOTEntry: 0x" << format("%llx", GOTEntry) + << " LocalGOTAddr: 0x" << format("%llx", LocalGOTAddr) + << " LocalAddress: 0x" << format("%llx", LocalAddress) + << "\n"); + + Value += Addend; + if (GOTEntry) + assert(GOTEntry == Value && "GOT entry has two different addresses."); + else + writeBytesUnaligned(Value, LocalGOTAddr, getGOTEntrySize()); + + if (SymOffset > 65536) + report_fatal_error(".got subsegment exceeds 64K (literal)!!\n"); + + if ((SymOffset) < 32768) + return (int64_t)(SymOffset - 0x8000); + else + return (int64_t)(0x8000 - SymOffset); + } + case ELF::R_SW_64_GPRELHIGH: { + // Get the higher 16-bits. + uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]); + uint64_t Disp = Value + Addend - (GOTAddr + 0x8000); + if (Disp & 0x8000) + return ((Disp + 0x8000) >> 16) & 0xffff; + else + return (Disp >> 16) & 0xffff; + } + case ELF::R_SW_64_GPRELLOW: { + uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]); + + return (Value + Addend - (GOTAddr + 0x8000)) & 0xffff; + } + case ELF::R_SW_64_REFQUAD: { + return Value + Addend; + } + case ELF::R_SW_64_SREL32: { + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); + return Value + Addend - FinalAddress; + } + case ELF::R_SW_64_GPREL32: { + uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]); + return Value + Addend - (GOTAddr + 0x7ff0); + } + case ELF::R_SW_64_TPRELHI: + case ELF::R_SW_64_TPRELLO: + report_fatal_error("Current Sw64 JIT does not support TPREL relocs"); + break; + case ELF::R_SW_64_LITERAL_GOT: + case ELF::R_SW_64_HINT: + case ELF::R_SW_64_LITUSE: + return 0; + } + return 0; +} + +void RuntimeDyldELFSw64::applySw64Relocation(uint8_t *TargetPtr, int64_t Value, + uint32_t Type) { + uint32_t Insn = readBytesUnaligned(TargetPtr, 4); + int64_t Disp_hi, Disp_lo; + + switch (Type) { + default: + llvm_unreachable("Unknown relocation type!"); + break; + case ELF::R_SW_64_GPDISP: { + uint32_t Insn1 = readBytesUnaligned(TargetPtr + 4, 4); + if ((Value > 2147483647LL) || (Value < -2147483648LL)) { + llvm::dbgs() << "gpdisp Value=" << Value << "\n"; + report_fatal_error(".got subsegment exceeds 2GB (gpdisp)!!\n"); + } + + Disp_hi = (Value + 0x8000) >> 16; + Disp_lo = Value & 0xffff; + + Insn = (Insn & 0xffff0000) | (Disp_hi & 0x0000ffff); + Insn1 = (Insn1 & 0xffff0000) | (Disp_lo & 0x0000ffff); + + writeBytesUnaligned(Insn, TargetPtr, 4); + writeBytesUnaligned(Insn1, TargetPtr + 4, 4); + break; + } + case ELF::R_SW_64_LITERAL: + Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff); + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + case ELF::R_SW_64_LITERAL_GOT: + Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff); + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + case ELF::R_SW_64_GPRELHIGH: + case ELF::R_SW_64_GPRELLOW: + Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff); + writeBytesUnaligned(Insn, TargetPtr, 4); + break; + case ELF::R_SW_64_REFQUAD: + writeBytesUnaligned(Value, TargetPtr, 8); + break; + case ELF::R_SW_64_SREL32: + writeBytesUnaligned(Value & 0xffffffff, TargetPtr, 4); + break; + case ELF::R_SW_64_GPREL32: + writeBytesUnaligned(Value & 0xffffffff, TargetPtr, 4); + break; + } +} + +void RuntimeDyldELFSw64::resolveSw64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend, + uint64_t SymOffset, + SID SectionID) { + uint32_t r_type = Type & 0xff; + + // RelType is used to keep information for which relocation type we are + // applying relocation. + uint32_t RelType = r_type; + int64_t CalculatedValue = evaluateSw64Relocation( + Section, Offset, Value, RelType, Addend, SymOffset, SectionID); + + applySw64Relocation(Section.getAddressWithOffset(Offset), CalculatedValue, + RelType); +} diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h new file mode 100644 index 000000000000..c333dc4bdf85 --- /dev/null +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h @@ -0,0 +1,61 @@ +//===-- RuntimeDyldELFSw64.h ---- ELF/Sw64 specific code. -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFSw64_H +#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFSw64_H + +#include "../RuntimeDyldELF.h" +#include + +#define DEBUG_TYPE "dyld" + +namespace llvm { + +class RuntimeDyldELFSw64 : public RuntimeDyldELF { +public: + typedef uint64_t TargetPtrT; + + RuntimeDyldELFSw64(RuntimeDyld::MemoryManager &MM, + JITSymbolResolver &Resolver) + : RuntimeDyldELF(MM, Resolver) {} + + void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override; + +protected: + void resolveSw64Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend, + uint64_t SymOffset, SID SectionID); + + uint64_t GOTOffset = 0; + uint64_t GPOffset_Modify = 0; + +private: + /// A object file specific relocation resolver + /// \param RE The relocation to be resolved + /// \param Value Target symbol address to apply the relocation action + uint64_t evaluateRelocation(const RelocationEntry &RE, uint64_t Value, + uint64_t Addend); + + /// A object file specific relocation resolver + /// \param RE The relocation to be resolved + /// \param Value Target symbol address to apply the relocation action + void applyRelocation(const RelocationEntry &RE, uint64_t Value); + + int64_t evaluateSw64Relocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend, + uint64_t SymOffset, SID SectionID); + + void applySw64Relocation(uint8_t *TargetPtr, int64_t CalculatedValue, + uint32_t Type); +}; +} // namespace llvm + +#undef DEBUG_TYPE + +#endif diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index ec2620efac38..eec4fb8054e4 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -44,6 +44,7 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsSw64.h" #include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 6a6befdd3054..0c45a7d4ffff 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -1318,6 +1318,8 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, // in a relocation with a null section which is the desired result. case MCSymbolRefExpr::VK_PPC_TOCBASE: return false; + case MCSymbolRefExpr::VK_SW64_GPDISP: + return false; // These VariantKind cause the relocation to refer to something other than // the symbol itself, like a linker generated table. Since the address of @@ -1501,6 +1503,21 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, SecA ? cast(SecA->getBeginSymbol()) : nullptr; if (SectionSymbol) SectionSymbol->setUsedInReloc(); + if (TargetObjectWriter->getEMachine() == ELF::EM_SW64) { + const MCFixupKindInfo &FKI = + Asm.getBackend().getFixupKindInfo((MCFixupKind)Fixup.getKind()); + if (strcmp(FKI.Name, "fixup_SW64_GPDISP_HI16") == 0) { + + Addend = 4; + const auto *RenamedSymA = + cast(Asm.getContext().getOrCreateSymbol(".text")); + + RenamedSymA->setUsedInReloc(); + ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, C); + Relocations[&FixupSection].push_back(Rec); + return; + } + } ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, C); Relocations[&FixupSection].push_back(Rec); return; @@ -1511,6 +1528,22 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, if (const MCSymbolELF *R = Renames.lookup(SymA)) RenamedSymA = R; + if (TargetObjectWriter->getEMachine() == ELF::EM_SW64) { + const MCFixupKindInfo &FKI = + Asm.getBackend().getFixupKindInfo((MCFixupKind)Fixup.getKind()); + if (strcmp(FKI.Name, "fixup_SW64_GPDISP_HI16") == 0) { + Addend = 4; + SymA = nullptr; + for (auto it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie; + ++it) { + if (it->isInSection() && &(it->getSection()) == Fragment->getParent()) { + RenamedSymA = cast(&*it); + break; + } + } + } + } + if (ViaWeakRef) RenamedSymA->setIsWeakrefUsedInReloc(); else diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 06de70ad2f39..09d41f78889e 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -681,9 +681,14 @@ void MCAsmStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) { if (E->inlineAssignedExpr()) EmitSet = false; if (EmitSet) { - OS << ".set "; - Symbol->print(OS, MAI); - OS << ", "; + if (MAI->hasSw64SetDirective()) { + Symbol->print(OS, MAI); + OS << " = "; + } else { + OS << ".set "; + Symbol->print(OS, MAI); + OS << ", "; + } Value->print(OS, MAI); EmitEOL(); diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index 653ff4e9435a..abdd002f9d84 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -472,6 +472,16 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HI: case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA: case MCSymbolRefExpr::VK_PPC_TLSLD: + case MCSymbolRefExpr::VK_SW64_TLSGD: + case MCSymbolRefExpr::VK_SW64_TLSLDM: + case MCSymbolRefExpr::VK_SW64_GOTDTPREL16: + case MCSymbolRefExpr::VK_SW64_DTPREL_HI16: + case MCSymbolRefExpr::VK_SW64_DTPREL_LO16: + case MCSymbolRefExpr::VK_SW64_DTPREL16: + case MCSymbolRefExpr::VK_SW64_GOTTPREL16: + case MCSymbolRefExpr::VK_SW64_TPREL_HI16: + case MCSymbolRefExpr::VK_SW64_TPREL_LO16: + case MCSymbolRefExpr::VK_SW64_TPREL16: break; } getAssembler().registerSymbol(symRef.getSymbol()); diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index c9ff1865cf91..4864468da236 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -347,6 +347,56 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_PPC_LOCAL: return "local"; case VK_PPC_NOTOC: return "notoc"; case VK_PPC_PCREL_OPT: return "<>"; + case VK_SW64_ELF_LITERAL: + return "ELF_LITERAL"; + case VK_SW64_LITUSE_ADDR: + return "LITUSE_ADDR"; + case VK_SW64_LITUSE_BASE: + return "LITUSE_BASE"; + case VK_SW64_LITUSE_BYTOFF: + return "LITUSE_BYTOFF"; + case VK_SW64_LITUSE_JSR: + return "LITUSE_JSR"; + case VK_SW64_LITUSE_TLSGD: + return "LITUSE_TLSGD"; + case VK_SW64_LITUSE_TLSLDM: + return "LITUSE_TLSLDM"; + case VK_SW64_LITUSE_JSRDIRECT: + return "LITUSE_JSRDIRECT"; + case VK_SW64_GPDISP: + return "GPDISP"; + case VK_SW64_GPDISP_HI16: + return "GPDISP_HI16"; + case VK_SW64_GPDISP_LO16: + return "GPDISP_LO16"; + case VK_SW64_GPREL_HI16: + return "GPREL_HI16"; + case VK_SW64_GPREL_LO16: + return "GPREL_LO16"; + case VK_SW64_GPREL16: + return "GPREL16"; + case VK_SW64_BRSGP: + return "BRSGP"; + case VK_SW64_TLSGD: + return "TLSGD"; + case VK_SW64_TLSLDM: + return "TLSLDM"; + case VK_SW64_GOTDTPREL16: + return "GOTDTPREL16"; + case VK_SW64_DTPREL_HI16: + return "DTPREL_HI16"; + case VK_SW64_DTPREL_LO16: + return "DTPREL_LO16"; + case VK_SW64_DTPREL16: + return "DTPREL16"; + case VK_SW64_GOTTPREL16: + return "GOTTPREL16"; + case VK_SW64_TPREL_HI16: + return "TPREL_HI16"; + case VK_SW64_TPREL_LO16: + return "TPREL_LO16"; + case VK_SW64_TPREL16: + return "TPREL16"; case VK_COFF_IMGREL32: return "IMGREL"; case VK_Hexagon_LO16: return "LO16"; case VK_Hexagon_HI16: return "HI16"; diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index 0b5109e41e71..745ab757839c 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -365,6 +365,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { case Triple::xtensa: FDECFIEncoding = dwarf::DW_EH_PE_sdata4; break; + case Triple::sw_64: + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + break; default: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; break; diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp index 666252ffcb74..768fa2cb6bb1 100644 --- a/llvm/lib/MC/MCSectionELF.cpp +++ b/llvm/lib/MC/MCSectionELF.cpp @@ -153,6 +153,10 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T, // Print hex value of the flag while we do not have // any standard symbolic representation of the flag. OS << "0x7000001e"; + else if (Type == ELF::SHT_SW64_DWARF) + // Print hex value of the flag while we do not have + // any standard symbolic representation of the flag. + OS << "0x7000001e"; else if (Type == ELF::SHT_LLVM_ODRTAB) OS << "llvm_odrtab"; else if (Type == ELF::SHT_LLVM_LINKER_OPTIONS) diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 0d1862e57371..62c4d41beeb1 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -181,6 +181,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, break; } break; + case ELF::EM_SW64: + switch (Type) { +#include "llvm/BinaryFormat/ELFRelocs/Sw64.def" + default: + break; + } + break; default: break; } @@ -233,6 +240,8 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) { break; case ELF::EM_LOONGARCH: return ELF::R_LARCH_RELATIVE; + case ELF::EM_SW64: + break; default: break; } @@ -276,6 +285,14 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC); STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_STATIC); } + case ELF::EM_SW64: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_SW64_REGINFO); + STRINGIFY_ENUM_CASE(ELF, SHT_SW64_OPTIONS); + STRINGIFY_ENUM_CASE(ELF, SHT_SW64_ABIFLAGS); + STRINGIFY_ENUM_CASE(ELF, SHT_SW64_DWARF); + } + break; default: break; } @@ -498,6 +515,13 @@ std::string ELFFile::getDynamicTagAsString(unsigned Arch, } break; + case ELF::EM_SW64: + switch (Type) { +#define SW64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) +#include "llvm/BinaryFormat/DynamicTags.def" +#undef SW64_DYNAMIC_TAG + } + case ELF::EM_PPC64: switch (Type) { #define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value) diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp index 0e5036d7dfcc..2f97afd147c8 100644 --- a/llvm/lib/Object/RelocationResolver.cpp +++ b/llvm/lib/Object/RelocationResolver.cpp @@ -428,6 +428,31 @@ static uint64_t resolveSparc32(uint64_t Type, uint64_t Offset, uint64_t S, return LocData; } +static bool supportsSw64(uint64_t Type) { + switch (Type) { + case ELF::R_SW_64_REFLONG: + case ELF::R_SW_64_REFQUAD: + case ELF::R_SW_64_SREL32: + return true; + default: + return false; + } +} + +static uint64_t resolveSw64(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t /*LocData*/, int64_t Addend) { + switch (Type) { + case ELF::R_SW_64_REFLONG: + case ELF::R_SW_64_REFQUAD: + return S + Addend; + case ELF::R_SW_64_SREL32: + return (S + Addend) & 0xFFFFFFFF; + default: + llvm_unreachable("Invalid relocation type"); + } + return 0; +} + static bool supportsHexagon(uint64_t Type) { return Type == ELF::R_HEX_32; } @@ -807,6 +832,8 @@ getRelocationResolver(const ObjectFile &Obj) { return {supportsAmdgpu, resolveAmdgpu}; case Triple::riscv64: return {supportsRISCV, resolveRISCV}; + case Triple::sw_64: + return {supportsSw64, resolveSw64}; default: if (isAMDGPU(Obj)) return {supportsAmdgpu, resolveAmdgpu}; diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 87fe7bebf688..7be623c3608d 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -220,6 +220,7 @@ add_llvm_component_library(LLVMSupport StringRef.cpp SuffixTreeNode.cpp SuffixTree.cpp + Sw64TargetParser.cpp SystemUtils.cpp TarWriter.cpp ThreadPool.cpp diff --git a/llvm/lib/Support/Sw64TargetParser.cpp b/llvm/lib/Support/Sw64TargetParser.cpp new file mode 100644 index 000000000000..f31238c8f4b9 --- /dev/null +++ b/llvm/lib/Support/Sw64TargetParser.cpp @@ -0,0 +1,96 @@ +//===-- Sw64TargetParser - Parser for Sw64 features -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise Sw64 hardware features +// such as FPU/CPU/ARCH and extension names. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Sw64TargetParser.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/TargetParser/Triple.h" +#include + +namespace llvm { +namespace Sw64 { + +struct CPUInfo { + StringLiteral Name; + CPUKind Kind; + unsigned Features; + StringLiteral DefaultMarch; + bool is64Bit() const { return (Features & FK_64BIT); } +}; + +constexpr CPUInfo Sw64CPUInfo[] = { +#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) \ + {NAME, CK_##ENUM, FEATURES, DEFAULT_MARCH}, +#include "llvm/Support/Sw64TargetParser.def" +}; + +bool checkTuneCPUKind(CPUKind Kind, bool IsSw64) { + if (Kind == CK_INVALID) + return false; + return Sw64CPUInfo[static_cast(Kind)].is64Bit() == IsSw64; +} + +CPUKind parseARCHKind(StringRef CPU) { + return llvm::StringSwitch(CPU) +#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) \ + .Case(DEFAULT_MARCH, CK_##ENUM) +#include "llvm/Support/Sw64TargetParser.def" + .Default(CK_INVALID); +} + +StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsSw64) { + return llvm::StringSwitch(TuneCPU) +#define PROC_ALIAS(NAME, Sw64) .Case(NAME, StringRef(Sw64)) +#include "llvm/Support/Sw64TargetParser.def" + .Default(TuneCPU); +} + +CPUKind parseTuneCPUKind(StringRef TuneCPU, bool IsSw64) { + TuneCPU = resolveTuneCPUAlias(TuneCPU, IsSw64); + + return llvm::StringSwitch(TuneCPU) +#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM) +#include "llvm/Support/Sw64TargetParser.def" + .Default(CK_INVALID); +} + +StringRef getMcpuFromMArch(StringRef CPU) { + CPUKind Kind = parseARCHKind(CPU); + return Sw64CPUInfo[static_cast(Kind)].Name; +} + +void fillValidCPUArchList(SmallVectorImpl &Values, bool IsSw64) { + for (const auto &C : Sw64CPUInfo) { + if (C.Kind != CK_INVALID && IsSw64 == C.is64Bit()) + Values.emplace_back(C.Name); + } +} + +void fillValidTuneCPUArchList(SmallVectorImpl &Values, bool IsSw64) { + for (const auto &C : Sw64CPUInfo) { + if (C.Kind != CK_INVALID && IsSw64 == C.is64Bit()) + Values.emplace_back(C.Name); + } + +#define PROC_ALIAS(NAME, Sw64) Values.emplace_back(StringRef(NAME)); +#include "llvm/Support/Sw64TargetParser.def" +} + +CPUKind parseCPUArch(StringRef CPU) { + return llvm::StringSwitch(CPU) +#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM) +#include "llvm/Support/Sw64TargetParser.def" + .Default(CK_INVALID); +} + +} // namespace Sw64 +} // namespace llvm diff --git a/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt b/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt new file mode 100644 index 000000000000..90d61cd90208 --- /dev/null +++ b/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt @@ -0,0 +1,13 @@ +add_llvm_component_library(LLVMSw64AsmParser + Sw64AsmParser.cpp + + LINK_COMPONENTS + MC + MCParser + Sw64Desc + Sw64Info + Support + + ADD_TO_COMPONENT + Sw64 + ) diff --git a/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp b/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp new file mode 100644 index 000000000000..e3ce6f0a61c0 --- /dev/null +++ b/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp @@ -0,0 +1,2005 @@ +//===-- Sw64AsmParser.cpp - Parse Sw64 assembly to MCInst instructions ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/Sw64ABIFlagsSection.h" +#include "MCTargetDesc/Sw64ABIInfo.h" +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "MCTargetDesc/Sw64MCExpr.h" +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "Sw64TargetStreamer.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/SubtargetFeature.h" +#include "llvm/TargetParser/Triple.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "sw_64-asm-parser" + +static const StringRef RelocTable[] = { + "literal", "lituse_addr", "lituse_jsr", "gpdisp", + "gprelhigh", "gprellow", "gprel", "tlsgd", + "tlsldm", "gotdtprel", "dtprelhi", "dtprello", + "gottprel", "tprelhi", "tprello", "tprel"}; + +namespace llvm { + +class MCInstrInfo; + +} // end namespace llvm + +namespace { + +class Sw64AssemblerOptions { +public: + Sw64AssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {} + + Sw64AssemblerOptions(const Sw64AssemblerOptions *Opts) { + ATReg = Opts->getATRegIndex(); + Reorder = Opts->isReorder(); + Macro = Opts->isMacro(); + Features = Opts->getFeatures(); + } + + unsigned getATRegIndex() const { return ATReg; } + bool setATRegIndex(unsigned Reg) { + if (Reg > 31) + return false; + + ATReg = Reg; + return true; + } + + bool isReorder() const { return Reorder; } + void setReorder() { Reorder = true; } + void setNoReorder() { Reorder = false; } + + bool isMacro() const { return Macro; } + void setMacro() { Macro = true; } + void setNoMacro() { Macro = false; } + + const FeatureBitset &getFeatures() const { return Features; } + void setFeatures(const FeatureBitset &Features_) { Features = Features_; } + + // Set of features that are either architecture features or referenced + // by them (e.g.: FeatureNaN2008 implied by FeatureSw6432r6). + // The full table can be found in Sw64GenSubtargetInfo.inc (Sw64FeatureKV[]). + // The reason we need this mask is explained in the selectArch function. + // FIXME: Ideally we would like TableGen to generate this information. + static const FeatureBitset AllArchRelatedMask; + +private: + unsigned ATReg = 1; + bool Reorder = true; + bool Macro = true; + FeatureBitset Features; +}; + +} // end anonymous namespace + +const FeatureBitset Sw64AssemblerOptions::AllArchRelatedMask = { + Sw64::FeatureCIX, Sw64::Featurecore3b, Sw64::Featurecore4, + Sw64::FeatureRelax, Sw64::FeatureEv}; + +namespace { + +class Sw64AsmParser : public MCTargetAsmParser { + Sw64TargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast(TS); + } + + Sw64ABIInfo ABI; + SmallVector, 2> AssemblerOptions; + MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a + // nullptr, which indicates that no function is currently + // selected. This usually happens after an '.end func' + // directive. + bool IsLittleEndian; + bool IsPicEnabled; + bool IsCpRestoreSet; + int CpRestoreOffset; + unsigned CpSaveLocation; + // If true, then CpSaveLocation is a register, otherwise it's an offset. + bool CpSaveLocationIsRegister; + + // Map of register aliases created via the .set directive. + StringMap RegisterSets; + +#define GET_ASSEMBLER_HEADER +#include "Sw64GenAsmMatcher.inc" + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + + // Parse a register as used in CFI directives + bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) override; + + OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) override; + + bool parseParenSuffix(StringRef Name, OperandVector &Operands); + + bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID); + + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + + bool ParseDirective(AsmToken DirectiveID) override; + + OperandMatchResultTy + matchAnyRegisterNameWithoutDollar(OperandVector &Operands, + StringRef Identifier, SMLoc S); + OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, + const AsmToken &Token, + SMLoc S); + OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, + SMLoc S); + OperandMatchResultTy parseAnyRegister(OperandVector &Operands); + OperandMatchResultTy parseMemOperand(OperandVector &Operands); + OperandMatchResultTy parseMemOperands(OperandVector &Operands); + OperandMatchResultTy parseJmpImm(OperandVector &Operands); + + bool searchSymbolAlias(OperandVector &Operands); + + bool parseOperand(OperandVector &, StringRef Mnemonic); + + void ParsingFixupOperands(std::pair reloc); + + enum MacroExpanderResultTy { + MER_NotAMacro, + MER_Success, + MER_Fail, + }; + + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; + + bool loadAndAddSymbolAddress(const MCExpr *SymExpr, unsigned DstReg, + unsigned SrcReg, bool Is32BitSym, SMLoc IDLoc, + MCStreamer &Out, const MCSubtargetInfo *STI); + + void expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad); + + bool reportParseError(Twine ErrorMsg); + + bool parseMemOffset(const MCExpr *&Res, bool isParenExpr); + + bool isEvaluated(const MCExpr *Expr); + bool parseSetArchDirective(); + bool parseDirectiveSet(); + + bool parseSetAtDirective(); + bool parseSetNoAtDirective(); + bool parseSetMacroDirective(); + bool parseSetNoMacroDirective(); + bool parseSetReorderDirective(); + bool parseSetNoReorderDirective(); + + bool parseSetAssignment(); + + bool parseFpABIValue(Sw64ABIFlagsSection::FpABIKind &FpABI, + StringRef Directive); + + int matchCPURegisterName(StringRef Symbol); + + int matchFPURegisterName(StringRef Name); + + bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + + // Helper function that checks if the value of a vector index is within the + // boundaries of accepted values for each RegisterKind + // Example: INSERT.B $w0[n], $1 => 16 > n >= 0 + bool validateMSAIndex(int Val, int RegKind); + + // Selects a new architecture by updating the FeatureBits with the necessary + // info including implied dependencies. + // Internally, it clears all the feature bits related to *any* architecture + // and selects the new one using the ToggleFeature functionality of the + // MCSubtargetInfo object that handles implied dependencies. The reason we + // clear all the arch related bits manually is because ToggleFeature only + // clears the features that imply the feature being cleared and not the + // features implied by the feature being cleared. This is easier to see + // with an example: + // -------------------------------------------------- + // | Feature | Implies | + // | -------------------------------------------------| + // | FeatureCIX | | + // | FeatureEV | | + // | FeatureSw6a | | + // | FeatureSw6b | | + // | ... | | + // -------------------------------------------------- + // + // Setting Sw643 is equivalent to set: (FeatureSw643 | FeatureSw642 | + // FeatureSw64GP64 | FeatureSw641) + // Clearing Sw643 is equivalent to clear (FeatureSw643 | FeatureSw644). + void selectArch(StringRef ArchFeature) { + MCSubtargetInfo &STI = copySTI(); + FeatureBitset FeatureBits = STI.getFeatureBits(); + FeatureBits &= ~Sw64AssemblerOptions::AllArchRelatedMask; + STI.setFeatureBits(FeatureBits); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(ArchFeature))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); + } + + void setFeatureBits(uint64_t Feature, StringRef FeatureString) { + if (!(getSTI().getFeatureBits()[Feature])) { + MCSubtargetInfo &STI = copySTI(); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); + } + } + + void clearFeatureBits(uint64_t Feature, StringRef FeatureString) { + if (getSTI().getFeatureBits()[Feature]) { + MCSubtargetInfo &STI = copySTI(); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); + } + } + + void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { + setFeatureBits(Feature, FeatureString); + AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); + } + + void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { + clearFeatureBits(Feature, FeatureString); + AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); + } + +public: + MCFixupKind FixupKind; + + enum Sw64MatchResultTy { + Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY, + Match_RequiresDifferentOperands, + Match_RequiresNoZeroRegister, + Match_RequiresSameSrcAndDst, + Match_NoFCCRegisterForCurrentISA, + Match_NonZeroOperandForSync, + Match_NonZeroOperandForMTCX, + Match_RequiresPosSizeRange0_32, + Match_RequiresPosSizeRange33_64, + Match_RequiresPosSizeUImm6, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "Sw64GenAsmMatcher.inc" +#undef GET_OPERAND_DIAGNOSTIC_TYPES + }; + + Sw64AsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, sti, MII), + ABI(Sw64ABIInfo::computeTargetABI(Triple(sti.getTargetTriple()), + sti.getCPU(), Options)) { + FixupKind = llvm::FirstTargetFixupKind; + + MCAsmParserExtension::Initialize(parser); + parser.addAliasForDirective(".asciiz", ".asciz"); + parser.addAliasForDirective(".hword", ".2byte"); + parser.addAliasForDirective(".word", ".4byte"); + parser.addAliasForDirective(".dword", ".8byte"); + + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + + // Remember the initial assembler options. The user can not modify these. + AssemblerOptions.push_back( + std::make_unique(getSTI().getFeatureBits())); + + // Create an assembler options environment for the user to modify. + AssemblerOptions.push_back( + std::make_unique(getSTI().getFeatureBits())); + + CurrentFn = nullptr; + + IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent(); + + IsCpRestoreSet = false; + CpRestoreOffset = -1; + } + + const Sw64ABIInfo &getABI() const { return ABI; } + + const MCExpr *createTargetUnaryExpr(const MCExpr *E, + AsmToken::TokenKind OperatorToken, + MCContext &Ctx) override { + switch (OperatorToken) { + default: + return nullptr; + case AsmToken::PercentGp_Rel: + return Sw64MCExpr::create(Sw64MCExpr::MEK_ELF_LITERAL, E, Ctx); + case AsmToken::PercentDtprel_Hi: + return Sw64MCExpr::create(Sw64MCExpr::MEK_GPREL_HI16, E, Ctx); + case AsmToken::PercentDtprel_Lo: + return Sw64MCExpr::create(Sw64MCExpr::MEK_GPREL_LO16, E, Ctx); + case AsmToken::PercentGot_Hi: + return Sw64MCExpr::create(Sw64MCExpr::MEK_GPDISP_HI16, E, Ctx); + case AsmToken::PercentGot_Lo: + return Sw64MCExpr::create(Sw64MCExpr::MEK_GPDISP_LO16, E, Ctx); + + case AsmToken::PercentTprel_Hi: + return Sw64MCExpr::create(Sw64MCExpr::MEK_TPREL_HI16, E, Ctx); + case AsmToken::PercentTprel_Lo: + return Sw64MCExpr::create(Sw64MCExpr::MEK_TPREL_LO16, E, Ctx); + } + } +}; + +// Sw64Operand - Instances of this class represent a parsed Sw64 machine +// instruction. +class Sw64Operand : public MCParsedAsmOperand { +public: + // Broad categories of register classes + // The exact class is finalized by the render method. + enum RegKind { + RegKind_GPR = 1, // Sw64 GPR Register + RegKind_FPR = 2, // Sw64 FPR Register + RegKind_TC = 4, // Sw64 Time counter + RegKind_CSR = 8, // Sw64 Control & Status Register + RegKind_FPCR = 16, // Sw64 Floating-point Control Register + // Potentially any (e.g. $1) + RegKind_Numeric = + RegKind_GPR | RegKind_FPR | RegKind_TC | RegKind_CSR | RegKind_FPCR + }; + +private: + enum KindTy { + k_Immediate, // An immediate (possibly involving symbol references) + k_Memory, // Base + Offset Memory Address + k_Register, // A RegKind. + k_RegisterIndex, // A register index in one or more RegKind. + k_Token // A simple token + } Kind; + +public: + Sw64Operand(KindTy K, Sw64AsmParser &Parser) + : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {} + + ~Sw64Operand() override { + switch (Kind) { + case k_Immediate: + break; + case k_Memory: + delete Mem.Base; + break; + case k_Register: + case k_RegisterIndex: + case k_Token: + break; + } + } + +private: + // For diagnostics, and checking the assembler temporary + Sw64AsmParser &AsmParser; + + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegIdxOp { + unsigned Index; // Index into the register class + RegKind Kind; // Bitfield of the kinds it could possibly be + struct Token Tok; // The input token this operand originated from. + const MCRegisterInfo *RegInfo; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + Sw64Operand *Base; + const MCExpr *Off; + }; + + struct RegListOp { + SmallVector *List; + }; + + union { + struct Token Tok; + struct RegIdxOp RegIdx; + struct ImmOp Imm; + struct MemOp Mem; + struct RegListOp RegList; + }; + + SMLoc StartLoc, EndLoc; + + // Internal constructor for register kinds + static std::unique_ptr CreateReg(unsigned Index, StringRef Str, + RegKind RegKind, + const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, + Sw64AsmParser &Parser) { + auto Op = std::make_unique(k_Register, Parser); + Op->RegIdx.Index = Index; + Op->RegIdx.RegInfo = RegInfo; + Op->RegIdx.Kind = RegKind; + Op->RegIdx.Tok.Data = Str.data(); + Op->RegIdx.Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + +public: + // Coerce the register to GPR64 and return the real register for the current + // target. + unsigned getGPRReg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); + return RegIdx.Index; + } + + bool isV256AsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_FPR && + RegIdx.Index <= Sw64::F31 && RegIdx.Index >= Sw64::F0; + } + + void addMemOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::createReg(getMemBase()->getGPR64Reg())); + + const MCExpr *Expr = getMemOff(); + addExpr(Inst, Expr); + } + +private: + // Coerce the register to FPR64 and return the real register for the current + // target. + unsigned getFPR64Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!"); + return RegIdx.Index; + } + +public: + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediate when possible. Null MCExpr = 0. + if (!Expr) + Inst.addOperand(MCOperand::createImm(0)); + else if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::createImm(CE->getValue())); + else + Inst.addOperand(MCOperand::createExpr(Expr)); + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + if (RegIdx.Index > 32) + Inst.addOperand(MCOperand::createReg(getGPRReg())); + else + Inst.addOperand(MCOperand::createReg(getFPR64Reg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCExpr *Expr = getImm(); + addExpr(Inst, Expr); + } + + bool isReg() const override { + // As a special case until we sort out the definition of div/divu, accept + // $0/$zero here so that MCK_ZERO works correctly. + return isGPRAsmReg() || isFPRAsmReg(); + } + + bool isRegIdx() const { return Kind == k_Register; } // Operand.Kind + bool isImm() const override { return Kind == k_Immediate; } + + bool isConstantImm() const { + int64_t Res; + return isImm() && getImm()->evaluateAsAbsolute(Res); + } + + bool isToken() const override { + // Note: It's not possible to pretend that other operand kinds are tokens. + // The matcher emitter checks tokens first. + return Kind == k_Token; + } + + bool isMem() const override { return Kind == k_Memory; } + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const override { + // As a special case until we sort out the definition of div/divu, accept + // $0/$zero here so that MCK_ZERO works correctly. + if (Kind == k_Register && RegIdx.Kind & RegKind_GPR) + return getGPRReg(); // FIXME: GPR64 too + + if (Kind == k_Register && RegIdx.Kind & RegKind_FPR) + return getFPR64Reg(); // FIXME: GPR64 too + + llvm_unreachable("Invalid access!"); + return 0; + } + + const MCExpr *getImm() const { + assert((Kind == k_Immediate) && "Invalid access!"); + return Imm.Val; + } + + int64_t getConstantImm() const { + const MCExpr *Val = getImm(); + int64_t Value = 0; + (void)Val->evaluateAsAbsolute(Value); + return Value; + } + + Sw64Operand *getMemBase() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Base; + } + + const MCExpr *getMemOff() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Off; + } + + int64_t getConstantMemOff() const { + return static_cast(getMemOff())->getValue(); + } + + static std::unique_ptr CreateToken(StringRef Str, SMLoc S, + Sw64AsmParser &Parser) { + auto Op = std::make_unique(k_Token, Parser); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + // Create a numeric register (e.g. $1). The exact register remains + // unresolved until an instruction successfully matches + static std::unique_ptr + createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, Sw64AsmParser &Parser) { + LLVM_DEBUG(dbgs() << "createNumericReg(" << Index + 65 << ", ...)\n"); + return CreateReg(Index + 65, Str, RegKind_Numeric, RegInfo, S, E, Parser); + } + + // Create a register that is definitely a GPR. + // This is typically only used for named registers such as $gp. + static std::unique_ptr + createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, Sw64AsmParser &Parser) { + return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser); + } + + // Create a register that is definitely a FPR. + // This is typically only used for named registers such as $f0. + static std::unique_ptr + createFPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, Sw64AsmParser &Parser) { + return CreateReg(Index, Str, RegKind_FPR, RegInfo, S, E, Parser); + } + + static std::unique_ptr + CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, Sw64AsmParser &Parser) { + auto Op = std::make_unique(k_Immediate, Parser); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + CreateMem(std::unique_ptr Base, const MCExpr *Off, SMLoc S, + SMLoc E, Sw64AsmParser &Parser) { + auto Op = std::make_unique(k_Memory, Parser); + Op->Mem.Base = Base.release(); + Op->Mem.Off = Off; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + bool isGPRAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_GPR && + RegIdx.Index <= Sw64::R31 && RegIdx.Index >= Sw64::R0; + } + + bool isFPRAsmReg() const { + // AFPR64 is $0-$15 but we handle this in getAFGR64() + return isRegIdx() && RegIdx.Kind & RegKind_FPR && + RegIdx.Index <= Sw64::F31 && RegIdx.Index >= Sw64::F0; + // return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 64 && + // RegIdx.Index >= 33; + } + + // Coerce the register to GPR64 and return the real register for the current + // target. + unsigned getGPR64Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); + return RegIdx.Index; + } + + unsigned getFGR64Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!"); + return RegIdx.Index; + } + + void addF4RCAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR64Reg())); + } + + void addF8RCAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR64Reg())); + } + + bool isFGRAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_FPR && RegIdx.Index <= 32; + } + + // getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const override { return StartLoc; } + // getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const override { return EndLoc; } + + void print(raw_ostream &OS) const override { + switch (Kind) { + case k_Immediate: + OS << "Imm<"; + OS << *Imm.Val; + OS << ">"; + break; + case k_Memory: + OS << "Mem<"; + Mem.Base->print(OS); + OS << ", "; + OS << *Mem.Off; + OS << ">"; + break; + case k_Register: + OS << "Reg<" << RegIdx.Kind << ", " + << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">"; + break; + case k_RegisterIndex: + OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", " + << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">"; + break; + case k_Token: + OS << getToken(); + break; + } + } + + bool isValidForTie(const Sw64Operand &Other) const { + if (Kind != Other.Kind) + return false; + + switch (Kind) { + default: + llvm_unreachable("Unexpected kind"); + return false; + case k_RegisterIndex: { + StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length); + StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length); + return Token == OtherToken; + } + } + } + + template bool isScaledSImm() const { + if (isConstantImm() && + isShiftedInt(getConstantImm())) + return true; + // Operand can also be a symbol or symbol plus + // offset in case of relocations. + if (Kind != k_Immediate) + return false; + MCValue Res; + bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr); + return Success && isShiftedInt(Res.getConstant()); + } + + template + void addConstantSImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + int64_t Imm = getConstantImm() - Offset; + Imm = SignExtend64(Imm); + Imm += Offset; + Imm += AdjustOffset; + Inst.addOperand(MCOperand::createImm(Imm)); + } + + template + void addConstantUImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + uint64_t Imm = getConstantImm() - Offset; + Imm &= (1ULL << Bits) - 1; + Imm += Offset; + Imm += AdjustOffset; + Inst.addOperand(MCOperand::createImm(Imm)); + } + + template bool isConstantUImmRange() const { + return isConstantImm() && getConstantImm() >= Bottom && + getConstantImm() <= Top; + } + + template bool isScaledUImm() const { + return isConstantImm() && + isShiftedUInt(getConstantImm()); + } + + template bool isConstantSImm() const { + return isConstantImm() && isInt(getConstantImm() - Offset); + } + + template bool isConstantUImm() const { + return isConstantImm() && isUInt(getConstantImm() - Offset); + } + + // Coerce the register to SIMD and return the real register for the current + // target. + unsigned getV256Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!"); + // It doesn't matter which of the MSA128[BHWD] classes we use. They are all + // identical + unsigned ClassID = Sw64::V256LRegClassID; + // RegIdx.Index should be sub 1, or it will be error. such as: $f1 -> $f2 + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index - 1); + } + + void addV256AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getV256Reg())); + } + + bool isConstantMemOff() const { + return isMem() && isa(getMemOff()); + } + + // Allow relocation operators. + // FIXME: This predicate and others need to look through binary expressions + // and determine whether a Value is a constant or not. + template + bool isMemWithSimmOffset() const { + if (!isMem()) + return false; + if (!getMemBase()->isGPRAsmReg()) + return false; + if (isa(getMemOff()) || + (isConstantMemOff() && + isShiftedInt(getConstantMemOff()))) + return true; + MCValue Res; + bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); + return IsReloc && isShiftedInt(Res.getConstant()); + } + + template bool isSImm() const { + return isConstantImm() ? isInt(getConstantImm()) : isImm(); + } + + template bool isUImm() const { + return isConstantImm() ? isUInt(getConstantImm()) : isImm(); + } + + template bool isAnyImm() const { + return isConstantImm() ? (isInt(getConstantImm()) || + isUInt(getConstantImm())) + : isImm(); + } + +}; // class Sw64Operand + +} // end anonymous namespace + +namespace llvm {} // end namespace llvm + +bool Sw64AsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + Inst.setLoc(IDLoc); + + if (MCID.mayLoad() || MCID.mayStore()) { + // Check the offset of memory operand, if it is a symbol + // reference or immediate we may have to expand instructions. + const MCOperandInfo &OpInfo = MCID.operands()[1]; + if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || + (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { + MCOperand &Op = Inst.getOperand(1); + if (Op.isImm()) { + const unsigned Opcode = Inst.getOpcode(); + switch (Opcode) { + default: + break; + } + + int64_t MemOffset = Op.getImm(); + if (MemOffset < -32768 || MemOffset > 32767) { + // Offset can't exceed 16bit value. + expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + return getParser().hasPendingError(); + } + } else if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *SR = + static_cast(Expr); + if (SR->getKind() == MCSymbolRefExpr::VK_None) { + // Expand symbol. + expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + return getParser().hasPendingError(); + } + } else if (!isEvaluated(Expr)) { + expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + return getParser().hasPendingError(); + } + } + } + } // if load/store + static int lockReg = -1; + if (Inst.getOpcode() == Sw64::STQ_C || Inst.getOpcode() == Sw64::STL_C) { + lockReg = Inst.getOperand(0).getReg(); + } + + if (Inst.getOpcode() == Sw64::RD_F) { + if (lockReg != Inst.getOperand(0).getReg() && lockReg != -1) { + Error(IDLoc, "lstX and rd_f must use the same reg!"); + lockReg = -1; + return false; + } + } + + Out.emitInstruction(Inst, *STI); + return true; +} + +// Can the value be represented by a unsigned N-bit value and a shift left? +template static bool isShiftedUIntAtAnyPosition(uint64_t x) { + return x && isUInt(x >> llvm::countr_zero(x)); +} + +OperandMatchResultTy Sw64AsmParser::parseJmpImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseJumpTarget\n"); + + SMLoc S = getLexer().getLoc(); + + // Registers are a valid target and have priority over symbols. + OperandMatchResultTy ResTy = parseAnyRegister(Operands); + if (ResTy != MatchOperand_NoMatch) + return ResTy; + + // Integers and expressions are acceptable + const MCExpr *Expr = nullptr; + if (Parser.parseExpression(Expr)) { + // We have no way of knowing if a symbol was consumed so we must ParseFail + return MatchOperand_ParseFail; + } + Operands.push_back( + Sw64Operand::CreateImm(Expr, S, getLexer().getLoc(), *this)); + return MatchOperand_Success; +} + +OperandMatchResultTy Sw64AsmParser::parseMemOperands(OperandVector &Operands) { + LLVM_DEBUG(dbgs() << "Parsing Memory Operand for store/load\n"); + SMLoc S = getParser().getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + + const AsmToken &Tok = getParser().getTok(); + switch (Tok.getKind()) { + default: + return MatchOperand_NoMatch; + case AsmToken::EndOfStatement: + // Zero register assumed, add a memory operand with ZERO as its base. + // "Base" will be managed by k_Memory. + auto Base = Sw64Operand::createGPRReg( + 0, "0", getContext().getRegisterInfo(), S, E, *this); + Operands.push_back( + Sw64Operand::CreateMem(std::move(Base), nullptr, S, E, *this)); + return MatchOperand_Success; + } + + return MatchOperand_NoMatch; +} + +void Sw64AsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad) { + // ldl $0,a($gp) Op0 Op1 Op2 + // > + + const MCSymbolRefExpr *SR; + MCInst TempInst; + unsigned ImmOffset, HiOffset, LoOffset; + const MCExpr *ExprOffset; + + // 1st operand is either the source or destination register. + assert(Inst.getOperand(0).isReg() && "expected register operand kind"); + unsigned RegOpNum = Inst.getOperand(0).getReg(); + + // 3nd operand is the base register. + assert(Inst.getOperand(2).isReg() && "expected register operand kind"); + unsigned BaseRegNum = Inst.getOperand(2).getReg(); + const MCOperand &OffsetOp = Inst.getOperand(1); + + // 2rd operand is either an immediate or expression. + if (OffsetOp.isImm()) { + assert(Inst.getOperand(1).isImm() && "expected immediate operand kind"); + ImmOffset = Inst.getOperand(2).getImm(); + LoOffset = ImmOffset & 0x0000ffff; + HiOffset = (ImmOffset & 0xffff0000) >> 16; + // If msb of LoOffset is 1(negative number) we must increment HiOffset. + if (LoOffset & 0x8000) + HiOffset++; + } else + ExprOffset = Inst.getOperand(1).getExpr(); + // All instructions will have the same location. + TempInst.setLoc(IDLoc); + TempInst.setOpcode(Inst.getOpcode()); + TempInst.addOperand(MCOperand::createReg(RegOpNum)); + if (OffsetOp.isImm()) + TempInst.addOperand(MCOperand::createImm(ImmOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + SR = static_cast(ExprOffset); + + TempInst.addOperand(MCOperand::createExpr(SR)); + } else { + llvm_unreachable("Memory offset is not SymbolRef!"); + } + } + TempInst.addOperand(MCOperand::createReg(BaseRegNum)); + Out.emitInstruction(TempInst, *STI); + // Prepare TempInst for next instruction. + TempInst.clear(); +} + +// Expand a integer division macro. +// +// Notably we don't have to emit a warning when encountering $rt as the $zero +// register, or 0 as an immediate. processInstruction() has already done that. +// +// The destination register can only be $zero when expanding (S)DivIMacro or +// D(S)DivMacro. + +bool Sw64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + + switch (MatchResult) { + case Match_Success: + if (processInstruction(Inst, IDLoc, Out, STI)) + return true; + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidTiedOperand: + Error(IDLoc, "operand must match destination register"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = Operands[ErrorInfo]->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + } + llvm_unreachable("Implement any new match types added!"); +} + +int Sw64AsmParser::matchCPURegisterName(StringRef Name) { + int CC; + CC = StringSwitch(Name) + .Cases("v0", "r0", Sw64::R0) + .Cases("t0", "r1", Sw64::R1) + .Cases("t1", "r2", Sw64::R2) + .Cases("t2", "r3", Sw64::R3) + .Cases("t3", "r4", Sw64::R4) + .Cases("t4", "r5", Sw64::R5) + .Cases("t5", "r6", Sw64::R6) + .Cases("t6", "r7", Sw64::R7) + .Cases("t7", "r8", Sw64::R8) + .Cases("s0", "r9", Sw64::R9) + .Cases("s1", "r10", Sw64::R10) + .Cases("s2", "r11", Sw64::R11) + .Cases("s3", "r12", Sw64::R12) + .Cases("s4", "r13", Sw64::R13) + .Cases("s5", "r14", Sw64::R14) + .Cases("fp", "r15", Sw64::R15) + .Cases("a0", "r16", Sw64::R16) + .Cases("a1", "r17", Sw64::R17) + .Cases("a2", "r18", Sw64::R18) + .Cases("a3", "r19", Sw64::R19) + .Cases("a4", "r20", Sw64::R20) + .Cases("a5", "r21", Sw64::R21) + .Cases("t8", "r22", Sw64::R22) + .Cases("t9", "r23", Sw64::R23) + .Cases("t10", "r24", Sw64::R24) + .Cases("t11", "r25", Sw64::R25) + .Cases("ra", "r26", Sw64::R26) + .Cases("pv", "r27", Sw64::R27) + .Cases("at", "r28", Sw64::R28) + .Cases("gp", "r29", Sw64::R29) + .Cases("sp", "r30", Sw64::R30) + .Cases("zero", "r31", Sw64::R31) + .Default(-1); + + return CC; +} + +int Sw64AsmParser::matchFPURegisterName(StringRef Name) { + if (Name[0] == 'f') { + StringRef NumString = Name.substr(1); + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return -1; // This is not an integer. + if (IntVal > 31) // Maximum index for fpu register. + return -1; + return IntVal + 1; + } + return -1; +} + +bool Sw64AsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseOperand\n"); + + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + if (parseMemOperands(Operands) == MatchOperand_Success) + return false; + + LLVM_DEBUG(dbgs() << ".. Generic Parser\n"); + + switch (getLexer().getKind()) { + case AsmToken::Dollar: { + // Parse the register. + SMLoc S = Parser.getTok().getLoc(); + + // Almost all registers have been parsed by custom parsers. There is only + // one exception to this. $zero (and it's alias $0) will reach this point + // for div, divu, and similar instructions because it is not an operand + // to the instruction definition but an explicit register. Special case + // this situation for now. + if (parseAnyRegister(Operands) != MatchOperand_NoMatch) + return false; + + // Maybe it is a symbol reference. + StringRef Identifier; + if (Parser.parseIdentifier(Identifier)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); + + // Otherwise create a symbol reference. + const MCExpr *Res = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + + Operands.push_back(Sw64Operand::CreateImm(Res, S, E, *this)); + return false; + } + // parse jmp & ret: ($GPRC) + case AsmToken::LParen: { + return parseParenSuffix(Mnemonic, Operands); + } + case AsmToken::Minus: + case AsmToken::Plus: + case AsmToken::String: + case AsmToken::Integer: { + LLVM_DEBUG(dbgs() << ".. generic integer expression\n"); + const MCExpr *IdVal; + SMLoc S = Parser.getTok().getLoc(); // Start location of the operand. + if (getParser().parseExpression(IdVal)) + return true; + + std::string Reloc; + const MCExpr *Expr; + const char *Mnem = Mnemonic.data(); + AsmToken::TokenKind FirstTokenKind; + MCContext &Ctx = getStreamer().getContext(); + std::string Stxt = S.getPointer(); + size_t a = Stxt.find_first_of('!'); + size_t c = Stxt.find_first_of('\n'); + + if (a != 0 && a < c) { + std::string Reloc1 = Stxt.substr(a + 1, c - a - 1); + size_t b = Reloc1.find_last_of('!'); + + Reloc = Reloc1.substr(0, b); + + if (Reloc == "gpdisp") { + if (strcmp(Mnem, "ldih") == 0) + FirstTokenKind = AsmToken::TokenKind::PercentGot_Hi; + else if (strcmp(Mnem, "ldi") == 0) + FirstTokenKind = AsmToken::TokenKind::PercentGot_Lo; + + Expr = createTargetUnaryExpr(IdVal, FirstTokenKind, Ctx); + } + SMLoc E = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(Sw64Operand::CreateImm(Expr, S, E, *this)); + return false; + } + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(Sw64Operand::CreateImm(IdVal, S, E, *this)); + return false; + } + default: { + LLVM_DEBUG(dbgs() << ".. generic expr expression\n"); + + const MCExpr *Expr; + SMLoc S = Parser.getTok().getLoc(); + if (getParser().parseExpression(Expr)) + return true; + + std::string Reloc; + AsmToken::TokenKind FirstTokenKind; + MCContext &Ctx = getStreamer().getContext(); + std::string Stxt = S.getPointer(); + size_t a = Stxt.find_first_of('!'); + size_t b = Stxt.find_first_of('\n'); + Reloc = Stxt.substr(a + 1, b - a - 1); + + if (a < b) { + if (Reloc == "literal") + FirstTokenKind = AsmToken::TokenKind::PercentGp_Rel; + else if (Reloc == "gprelhigh") + FirstTokenKind = AsmToken::TokenKind::PercentDtprel_Hi; + else if (Reloc == "gprellow") + FirstTokenKind = AsmToken::TokenKind::PercentDtprel_Lo; + else if (Reloc == "tprelhi") + FirstTokenKind = AsmToken::TokenKind::PercentTprel_Hi; + else if (Reloc == "tprello") + FirstTokenKind = AsmToken::TokenKind::PercentTprel_Lo; + + Expr = createTargetUnaryExpr(Expr, FirstTokenKind, Ctx); + } + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(Sw64Operand::CreateImm(Expr, S, E, *this)); + return false; + } + } + return true; +} + +bool Sw64AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success; +} + +OperandMatchResultTy Sw64AsmParser::tryParseRegister(MCRegister &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) { + SmallVector, 1> Operands; + OperandMatchResultTy ResTy = parseAnyRegister(Operands); + if (ResTy == MatchOperand_Success) { + assert(Operands.size() == 1); + Sw64Operand &Operand = static_cast(*Operands.front()); + StartLoc = Operand.getStartLoc(); + EndLoc = Operand.getEndLoc(); + + // AFAIK, we only support numeric registers and named GPR's in CFI + // directives. + // Don't worry about eating tokens before failing. Using an unrecognised + // register is a parse error. + if (Operand.isGPRAsmReg()) { + // Resolve to GPR32 or GPR64 appropriately. + RegNo = Operand.getGPRReg(); + } + + return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch + : MatchOperand_Success; + } + + assert(Operands.size() == 0); + return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success; +} + +bool Sw64AsmParser::isEvaluated(const MCExpr *Expr) { + switch (Expr->getKind()) { + case MCExpr::Constant: + return true; + case MCExpr::SymbolRef: + return (cast(Expr)->getKind() != MCSymbolRefExpr::VK_None); + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + if (!isEvaluated(BE->getLHS())) + return false; + return isEvaluated(BE->getRHS()); + } + case MCExpr::Unary: + return isEvaluated(cast(Expr)->getSubExpr()); + case MCExpr::Target: + return true; + } + return false; +} + +bool Sw64AsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) { + SMLoc S; + + if (isParenExpr) + return getParser().parseParenExprOfDepth(0, Res, S); + return getParser().parseExpression(Res); +} + +OperandMatchResultTy Sw64AsmParser::parseMemOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseMemOperand\n"); + const MCExpr *IdVal = nullptr; + SMLoc S; + bool isParenExpr = false; + OperandMatchResultTy Res = MatchOperand_NoMatch; + // First operand is the offset. + S = Parser.getTok().getLoc(); + + if (getLexer().getKind() == AsmToken::LParen) { + Parser.Lex(); + isParenExpr = true; + } + + if (getLexer().getKind() != AsmToken::Dollar) { + if (parseMemOffset(IdVal, isParenExpr)) + return MatchOperand_ParseFail; + + const AsmToken &Tok = Parser.getTok(); // Get the next token. + if (Tok.isNot(AsmToken::LParen)) { + Sw64Operand &Mnemonic = static_cast(*Operands[0]); + if (Mnemonic.getToken() == "la" || Mnemonic.getToken() == "dla") { + SMLoc E = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(Sw64Operand::CreateImm(IdVal, S, E, *this)); + return MatchOperand_Success; + } + if (Tok.is(AsmToken::EndOfStatement)) { + SMLoc E = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + // Zero register assumed, add a memory operand with ZERO as its base. + // "Base" will be managed by k_Memory. + auto Base = Sw64Operand::createGPRReg( + 0, "0", getContext().getRegisterInfo(), S, E, *this); + Operands.push_back( + Sw64Operand::CreateMem(std::move(Base), IdVal, S, E, *this)); + return MatchOperand_Success; + } + + MCBinaryExpr::Opcode Opcode; + // GAS and LLVM treat comparison operators different. GAS will generate -1 + // or 0, while LLVM will generate 0 or 1. Since a comparsion operator is + // highly unlikely to be found in a memory offset expression, we don't + // handle them. + switch (Tok.getKind()) { + case AsmToken::Plus: + Opcode = MCBinaryExpr::Add; + Parser.Lex(); + break; + case AsmToken::Minus: + Opcode = MCBinaryExpr::Sub; + Parser.Lex(); + break; + case AsmToken::Star: + Opcode = MCBinaryExpr::Mul; + Parser.Lex(); + break; + case AsmToken::Pipe: + Opcode = MCBinaryExpr::Or; + Parser.Lex(); + break; + case AsmToken::Amp: + Opcode = MCBinaryExpr::And; + Parser.Lex(); + break; + case AsmToken::LessLess: + Opcode = MCBinaryExpr::Shl; + Parser.Lex(); + break; + case AsmToken::GreaterGreater: + Opcode = MCBinaryExpr::LShr; + Parser.Lex(); + break; + case AsmToken::Caret: + Opcode = MCBinaryExpr::Xor; + Parser.Lex(); + break; + case AsmToken::Slash: + Opcode = MCBinaryExpr::Div; + Parser.Lex(); + break; + case AsmToken::Percent: + Opcode = MCBinaryExpr::Mod; + Parser.Lex(); + break; + default: + Error(Parser.getTok().getLoc(), "'(' or expression expected"); + return MatchOperand_ParseFail; + } + const MCExpr *NextExpr; + if (getParser().parseExpression(NextExpr)) + return MatchOperand_ParseFail; + IdVal = MCBinaryExpr::create(Opcode, IdVal, NextExpr, getContext()); + } + + Parser.Lex(); // Eat the '(' token. + } + + Res = parseAnyRegister(Operands); + if (Res != MatchOperand_Success) + return Res; + + if (Parser.getTok().isNot(AsmToken::RParen)) { + Error(Parser.getTok().getLoc(), "')' expected"); + return MatchOperand_ParseFail; + } + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Parser.Lex(); // Eat the ')' token. + + if (!IdVal) + IdVal = MCConstantExpr::create(0, getContext()); + + // Replace the register operand with the memory operand. + std::unique_ptr op( + static_cast(Operands.back().release())); + // Remove the register from the operands. + // "op" will be managed by k_Memory. + Operands.pop_back(); + + // Add the memory operand. + if (const MCBinaryExpr *BE = dyn_cast(IdVal)) { + int64_t Imm; + if (IdVal->evaluateAsAbsolute(Imm)) + IdVal = MCConstantExpr::create(Imm, getContext()); + else if (BE->getLHS()->getKind() != MCExpr::SymbolRef) + IdVal = MCBinaryExpr::create(BE->getOpcode(), BE->getRHS(), BE->getLHS(), + getContext()); + } + + Operands.push_back(Sw64Operand::CreateMem(std::move(op), IdVal, S, E, *this)); + return MatchOperand_Success; +} + +bool Sw64AsmParser::searchSymbolAlias(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier()); + if (!Sym) + return false; + + SMLoc S = Parser.getTok().getLoc(); + if (Sym->isVariable()) { + const MCExpr *Expr = Sym->getVariableValue(); + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast(Expr); + StringRef DefSymbol = Ref->getSymbol().getName(); + if (DefSymbol.startswith("$")) { + OperandMatchResultTy ResTy = + matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); + return true; + } + if (ResTy == MatchOperand_ParseFail) + llvm_unreachable("Should never ParseFail"); + } + } + } else if (Sym->isUnset()) { + // If symbol is unset, it might be created in the `parseSetAssignment` + // routine as an alias for a numeric register name. + // Lookup in the aliases list. + auto Entry = RegisterSets.find(Sym->getName()); + if (Entry != RegisterSets.end()) { + OperandMatchResultTy ResTy = + matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); + return true; + } + } + } + + return false; +} + +OperandMatchResultTy Sw64AsmParser::matchAnyRegisterNameWithoutDollar( + OperandVector &Operands, StringRef Identifier, SMLoc S) { + int Index = matchCPURegisterName(Identifier); + if (Index != -1) { + Operands.push_back(Sw64Operand::createGPRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + Index = matchFPURegisterName(Identifier); + if (Index != -1) { + Operands.push_back(Sw64Operand::createFPRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +OperandMatchResultTy +Sw64AsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, + const AsmToken &Token, SMLoc S) { + if (Token.is(AsmToken::Identifier)) { + LLVM_DEBUG(dbgs() << ".. identifier\n"); + StringRef Identifier = Token.getIdentifier(); + OperandMatchResultTy ResTy = + matchAnyRegisterNameWithoutDollar(Operands, Identifier, S); + return ResTy; + } else if (Token.is(AsmToken::Integer)) { + LLVM_DEBUG(dbgs() << ".. integer\n"); + int64_t RegNum = Token.getIntVal(); + Operands.push_back(Sw64Operand::createNumericReg( + RegNum, Token.getString(), getContext().getRegisterInfo(), S, + Token.getLoc(), *this)); + return MatchOperand_Success; + } + + LLVM_DEBUG(dbgs() << Token.getKind() << "\n"); + + return MatchOperand_NoMatch; +} + +OperandMatchResultTy +Sw64AsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) { + auto Token = getLexer().peekTok(false); + return matchAnyRegisterWithoutDollar(Operands, Token, S); +} + +OperandMatchResultTy Sw64AsmParser::parseAnyRegister(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseAnyRegister\n"); + + auto Token = Parser.getTok(); + + SMLoc S = Token.getLoc(); + + if (Token.isNot(AsmToken::Dollar)) { + LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n"); + if (Token.is(AsmToken::Identifier)) { + if (searchSymbolAlias(Operands)) + return MatchOperand_Success; + } + LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n"); + return MatchOperand_NoMatch; + } + LLVM_DEBUG(dbgs() << ".. $\n"); + + OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); // $ + Parser.Lex(); // identifier + } + return ResTy; +} + +bool Sw64AsmParser::parseParenSuffix(StringRef Name, OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + if (getLexer().is(AsmToken::LParen)) { + Operands.push_back( + Sw64Operand::CreateToken("(", getLexer().getLoc(), *this)); + Parser.Lex(); + if (Name == "ret") { + Operands.push_back( + Sw64Operand::CreateToken("$26)", getLexer().getLoc(), *this)); + Parser.Lex(); // eat "$" + Parser.Lex(); // eat "26" + Parser.Lex(); // eat ")" + } else { + if (parseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + if (Parser.getTok().isNot(AsmToken::RParen)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token, expected ')'"); + } + Operands.push_back( + Sw64Operand::CreateToken(")", getLexer().getLoc(), *this)); + Parser.Lex(); + } + } + return false; +} + +bool Sw64AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "ParseInstruction\n"); + + std::pair RelocOperands; + // We have reached first instruction, module directive are now forbidden. + // getTargetStreamer().forbidModuleDirective(); + + // Check if we have valid mnemonic + if (!mnemonicIsValid(Name, 0)) { + return Error(NameLoc, "unknown instruction"); + } + // First operand in MCInst is instruction mnemonic. + Operands.push_back(Sw64Operand::CreateToken(Name, NameLoc, *this)); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (parseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + // Parse and remember the operand. + if (parseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + // Parse parenthesis suffixes before we iterate + if (getLexer().is(AsmToken::LParen) && parseParenSuffix(Name, Operands)) + return true; + } + } + while (Parser.getTok().is(AsmToken::Exclaim)) { + if (false) { + LLVM_DEBUG(dbgs() << ".. Skip Parse " << Name << " Relocation Symbol\n"); + Parser.Lex(); // Eat ! + Parser.Lex(); // Eat reloction symbol. + } else { + LLVM_DEBUG(dbgs() << ".. Parse \"!"); + Parser.Lex(); // Eat ! + + if (Parser.getTok().is(AsmToken::Identifier)) { + // Parse Relocation Symbol ,Add Rel Kind Here ! + StringRef Identifier = Parser.getTok().getIdentifier(); + LLVM_DEBUG(dbgs() << Identifier << "\"\n"); + RelocOperands.first = Identifier; + } + if (Parser.getTok().is(AsmToken::Integer)) { + int64_t RelNum = Parser.getTok().getIntVal(); + LLVM_DEBUG(dbgs() << RelNum << "\"\n"); + RelocOperands.second = RelNum; + } + ParsingFixupOperands(RelocOperands); + Parser.Lex(); // Eat reloction symbol. + } + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +// FIXME: Given that these have the same name, these should both be +// consistent on affecting the Parser. +bool Sw64AsmParser::reportParseError(Twine ErrorMsg) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, ErrorMsg); +} + +bool Sw64AsmParser::parseSetNoAtDirective() { + MCAsmParser &Parser = getParser(); + // Line should look like: ".set noat". + + // Set the $at register to $0. + AssemblerOptions.back()->setATRegIndex(0); + + Parser.Lex(); // Eat "noat". + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool Sw64AsmParser::parseSetAtDirective() { + // Line can be: ".set at", which sets $at to $1 + // or ".set at=$reg", which sets $at to $reg. + MCAsmParser &Parser = getParser(); + Parser.Lex(); // Eat "at". + + if (getLexer().is(AsmToken::EndOfStatement)) { + // No register was specified, so we set $at to $1. + AssemblerOptions.back()->setATRegIndex(1); + + Parser.Lex(); // Consume the EndOfStatement. + return false; + } + + if (getLexer().isNot(AsmToken::Equal)) { + reportParseError("unexpected token, expected equals sign"); + return false; + } + Parser.Lex(); // Eat "=". + + if (getLexer().isNot(AsmToken::Dollar)) { + if (getLexer().is(AsmToken::EndOfStatement)) { + reportParseError("no register specified"); + return false; + } else { + reportParseError("unexpected token, expected dollar sign '$'"); + return false; + } + } + Parser.Lex(); // Eat "$". + + // Find out what "reg" is. + unsigned AtRegNo; + const AsmToken &Reg = Parser.getTok(); + if (Reg.is(AsmToken::Identifier)) { + AtRegNo = matchCPURegisterName(Reg.getIdentifier()); + } else if (Reg.is(AsmToken::Integer)) { + AtRegNo = Reg.getIntVal(); + } else { + reportParseError("unexpected token, expected identifier or integer"); + return false; + } + + // Check if $reg is a valid register. If it is, set $at to $reg. + if (!AssemblerOptions.back()->setATRegIndex(AtRegNo)) { + reportParseError("invalid register"); + return false; + } + Parser.Lex(); // Eat "reg". + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool Sw64AsmParser::parseSetReorderDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + AssemblerOptions.back()->setReorder(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool Sw64AsmParser::parseSetNoReorderDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + AssemblerOptions.back()->setNoReorder(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool Sw64AsmParser::parseSetMacroDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + AssemblerOptions.back()->setMacro(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool Sw64AsmParser::parseSetNoMacroDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + if (AssemblerOptions.back()->isReorder()) { + reportParseError("`noreorder' must be set before `nomacro'"); + return false; + } + AssemblerOptions.back()->setNoMacro(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool Sw64AsmParser::parseSetAssignment() { + StringRef Name; + const MCExpr *Value; + MCAsmParser &Parser = getParser(); + + if (Parser.parseIdentifier(Name)) + return reportParseError("expected identifier after .set"); + + if (getLexer().isNot(AsmToken::Comma)) + return reportParseError("unexpected token, expected comma"); + Lex(); // Eat comma + + if (getLexer().is(AsmToken::Dollar) && + getLexer().peekTok().is(AsmToken::Integer)) { + // Parse assignment of a numeric register: + // .set r1,$1 + Parser.Lex(); // Eat $. + RegisterSets[Name] = Parser.getTok(); + Parser.Lex(); // Eat identifier. + getContext().getOrCreateSymbol(Name); + } else if (!Parser.parseExpression(Value)) { + // Parse assignment of an expression including + // symbolic registers: + // .set $tmp, $BB0-$BB1 + // .set r2, $f2 + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + Sym->setVariableValue(Value); + } else { + return reportParseError("expected valid expression after comma"); + } + + return false; +} + +bool Sw64AsmParser::parseSetArchDirective() { + MCAsmParser &Parser = getParser(); + + StringRef Arch; + if (Parser.parseIdentifier(Arch)) + return reportParseError("expected arch identifier"); + + StringRef ArchFeatureName = StringSwitch(Arch) + .Case("sw_64", "sw_64") + .Case("core3b", "core3b") + .Case("core4", "core4") + .Default(""); + + if (ArchFeatureName.empty()) + return reportParseError("unsupported architecture"); + + selectArch(ArchFeatureName); + return false; +} + +bool Sw64AsmParser::parseDirectiveSet() { + const AsmToken &Tok = getParser().getTok(); + StringRef IdVal = Tok.getString(); + + if (IdVal == "noat") + return parseSetNoAtDirective(); + if (IdVal == "at") + return parseSetAtDirective(); + if (IdVal == "arch") + return parseSetArchDirective(); + + if (Tok.getString() == "reorder") { + return parseSetReorderDirective(); + } + if (Tok.getString() == "noreorder") { + return parseSetNoReorderDirective(); + } + if (Tok.getString() == "macro") { + return parseSetMacroDirective(); + } + if (Tok.getString() == "nomacro") { + return parseSetNoMacroDirective(); + } + // TODO: temp write + if (Tok.getString() == "volatile") { + return parseSetNoMacroDirective(); + } + // It is just an identifier, look for an assignment. + return parseSetAssignment(); +} + +bool Sw64AsmParser::ParseDirective(AsmToken DirectiveID) { + // This returns false if this function recognizes the directive + // regardless of whether it is successfully handles or reports an + // error. Otherwise it returns true to give the generic parser a + // chance at recognizing it. + + MCAsmParser &Parser = getParser(); + StringRef IDVal = DirectiveID.getString(); + + if (IDVal == ".ent") { + // Ignore this directive for now. + Parser.Lex(); + return false; + } + + if (IDVal == ".end") { + // Ignore this directive for now. + Parser.Lex(); + return false; + } + + if (IDVal == ".frame") { + // Ignore this directive for now. + Parser.eatToEndOfStatement(); + return false; + } + + if (IDVal == ".set") { + parseDirectiveSet(); + return false; + } + + if (IDVal == ".mask" || IDVal == ".fmask") { + // Ignore this directive for now. + Parser.eatToEndOfStatement(); + return false; + } + if (IDVal == ".arch") { + // Ignore this directive for now. + parseSetArchDirective(); + Parser.eatToEndOfStatement(); + return false; + } + if (IDVal == ".word") { + // Ignore this directive for now. + Parser.eatToEndOfStatement(); + } + return true; +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64AsmParser() { + RegisterMCAsmParser X(getTheSw64Target()); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "Sw64GenAsmMatcher.inc" + +bool Sw64AsmParser::mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) { + // Find the appropriate table for this asm variant. + const MatchEntry *Start, *End; + switch (VariantID) { + default: + llvm_unreachable("invalid variant!"); + case 0: + Start = std::begin(MatchTable0); + End = std::end(MatchTable0); + break; + } + // Search the table. + auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode()); + return MnemonicRange.first != MnemonicRange.second; +} + +unsigned Sw64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, + unsigned Kind) { + Sw64Operand &Op = static_cast(AsmOp); + int64_t ExpectedVal; + + switch (Kind) { + default: + return Match_InvalidOperand; + } + + if (!Op.isReg()) + return Match_InvalidOperand; + + if (Op.getReg() == ExpectedVal) + return Match_Success; + return Match_InvalidOperand; +} + +void Sw64AsmParser::ParsingFixupOperands(std::pair reloc) { + for (auto i : RelocTable) { + if (reloc.first.startswith(i)) + FixupKind = + StringSwitch(i) + .Case("literal", (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL) + .Case("literal_got", + (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL_GOT) + .Case("lituse_addr", (MCFixupKind)Sw64::fixup_SW64_LITUSE) + .Case("lituse_jsr", (MCFixupKind)Sw64::fixup_SW64_HINT) + .Case("gpdisp", (MCFixupKind)Sw64::fixup_SW64_GPDISP) + .Case("gprelhigh", (MCFixupKind)Sw64::fixup_SW64_GPDISP_HI16) + .Case("gprellow", (MCFixupKind)Sw64::fixup_SW64_GPDISP_LO16) + .Case("gprel", (MCFixupKind)Sw64::fixup_SW64_GPREL16) + .Case("tlsgd", (MCFixupKind)Sw64::fixup_SW64_TLSGD) + .Case("tlsldm", (MCFixupKind)Sw64::fixup_SW64_TLSLDM) + .Case("gotdtprel", (MCFixupKind)Sw64::fixup_SW64_GOTDTPREL16) + .Case("dtprelhi", (MCFixupKind)Sw64::fixup_SW64_DTPREL_HI16) + .Case("dtprello", (MCFixupKind)Sw64::fixup_SW64_DTPREL_LO16) + .Case("gottprel", (MCFixupKind)Sw64::fixup_SW64_GOTTPREL16) + .Case("tprelhi", (MCFixupKind)Sw64::fixup_SW64_TPREL_HI16) + .Case("tprello", (MCFixupKind)Sw64::fixup_SW64_TPREL_LO16) + .Case("tprel", (MCFixupKind)Sw64::fixup_SW64_TPREL16) + .Default(llvm::FirstTargetFixupKind); + } +} diff --git a/llvm/lib/Target/Sw64/CMakeLists.txt b/llvm/lib/Target/Sw64/CMakeLists.txt new file mode 100644 index 000000000000..11598fbb8104 --- /dev/null +++ b/llvm/lib/Target/Sw64/CMakeLists.txt @@ -0,0 +1,64 @@ +add_llvm_component_group(Sw64) + +set(LLVM_TARGET_DEFINITIONS Sw64.td) + +tablegen(LLVM Sw64GenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM Sw64GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM Sw64GenCallingConv.inc -gen-callingconv) +tablegen(LLVM Sw64GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM Sw64GenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM Sw64GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM Sw64GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM Sw64GenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM Sw64GenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM Sw64GenMCPseudoLowering.inc -gen-pseudo-lowering) + +add_public_tablegen_target(Sw64CommonTableGen) + +add_llvm_target(Sw64CodeGen + Sw64AsmPrinter.cpp + Sw64FrameLowering.cpp + Sw64LLRP.cpp + Sw64BranchSelector.cpp + Sw64InstrInfo.cpp + Sw64ISelDAGToDAG.cpp + Sw64ISelLowering.cpp + Sw64MCInstLower.cpp + Sw64MachineFunctionInfo.cpp + Sw64MacroFusion.cpp + Sw64RegisterInfo.cpp + Sw64Subtarget.cpp + Sw64TargetMachine.cpp + Sw64TargetObjectFile.cpp + Sw64SelectionDAGInfo.cpp + Sw64ExpandPseudo.cpp + Sw64ExpandPseudo2.cpp + Sw64PreLegalizerCombiner.cpp + Sw64CombineLS.cpp + Sw64IEEEConstraint.cpp + Sw64TargetTransformInfo.cpp + + LINK_COMPONENTS + Analysis + AsmPrinter + CodeGen + Core + MC + SelectionDAG + Support + Target + TransformUtils + Sw64AsmPrinter + Sw64Desc + Sw64Info + GlobalISel + + ADD_TO_COMPONENT + Sw64 + ) + +add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) +add_subdirectory(Disassembler) +add_subdirectory(TargetInfo) +add_subdirectory(AsmParser) diff --git a/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt b/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt new file mode 100644 index 000000000000..123e27b07e2b --- /dev/null +++ b/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt @@ -0,0 +1,11 @@ +add_llvm_component_library(LLVMSw64Disassembler + Sw64Disassembler.cpp + + LINK_COMPONENTS + MCDisassembler + Sw64Info + Support + + ADD_TO_COMPONENT + Sw64 + ) diff --git a/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp b/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp new file mode 100644 index 000000000000..9141e7172323 --- /dev/null +++ b/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp @@ -0,0 +1,390 @@ +//===-- Sw64Disassembler.cpp - Disassembler for Sw64 --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Sw64Disassembler class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "TargetInfo/Sw64TargetInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDecoderOps.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; + +#define DEBUG_TYPE "Sw64-disassembler" + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { +class Sw64Disassembler : public MCDisassembler { + +public: + Sw64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + ~Sw64Disassembler() {} + + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const override; +}; +} // end anonymous namespace + +static MCDisassembler *createSw64Disassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new Sw64Disassembler(STI, Ctx); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64Disassembler() { + // Register the disassembler for each target. + TargetRegistry::RegisterMCDisassembler(getTheSw64Target(), + createSw64Disassembler); +} + +static const unsigned GPRDecoderTable[] = { + Sw64::R0, Sw64::R1, Sw64::R2, Sw64::R3, Sw64::R4, Sw64::R5, Sw64::R6, + Sw64::R7, Sw64::R8, Sw64::R9, Sw64::R10, Sw64::R11, Sw64::R12, Sw64::R13, + Sw64::R14, Sw64::R15, Sw64::R16, Sw64::R17, Sw64::R18, Sw64::R19, Sw64::R20, + Sw64::R21, Sw64::R22, Sw64::R23, Sw64::R24, Sw64::R25, Sw64::R26, Sw64::R27, + Sw64::R28, Sw64::R29, Sw64::R30, Sw64::R31}; + +// This instruction does not have a working decoder, and needs to be +// fixed. This "fixme" function was introduced to keep the backend comiling +// while making changes to tablegen code. +static DecodeStatus DecodeFIXMEInstruction(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo > std::size(GPRDecoderTable)) + return MCDisassembler::Fail; + + // We must define our own mapping from RegNo to register identifier. + // Accessing index RegNo in the register class will work in the case that + // registers were added in ascending order, but not in general. + unsigned Reg = GPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static const unsigned FPRDecoderTable[] = { + Sw64::F0, Sw64::F1, Sw64::F2, Sw64::F3, Sw64::F4, Sw64::F5, Sw64::F6, + Sw64::F7, Sw64::F8, Sw64::F9, Sw64::F10, Sw64::F11, Sw64::F12, Sw64::F13, + Sw64::F14, Sw64::F15, Sw64::F16, Sw64::F17, Sw64::F18, Sw64::F19, Sw64::F20, + Sw64::F21, Sw64::F22, Sw64::F23, Sw64::F24, Sw64::F25, Sw64::F26, Sw64::F27, + Sw64::F28, Sw64::F29, Sw64::F30, Sw64::F31}; + +static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 32) { + return MCDisassembler::Fail; + } + unsigned Reg = FPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 32) { + return MCDisassembler::Fail; + } + unsigned Reg = FPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeV256LRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 32) { + return MCDisassembler::Fail; + } + unsigned Reg = FPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPRC_loRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 32) { + return MCDisassembler::Fail; + } + unsigned Reg = FPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 32) { + return MCDisassembler::Fail; + } + unsigned Reg = FPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + Inst.addOperand(MCOperand::createImm(Imm)); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const void *Decoder) { + if (Imm == 0) + return MCDisassembler::Fail; + return decodeUImmOperand(Inst, Imm, Address, Decoder); +} + +template +static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, const void *Decoder) { + assert(isUInt(Imm) && "Invalid immediate"); + // Sign-extend the number in the bottom N bits of Imm + Inst.addOperand(MCOperand::createImm(SignExtend64(Imm))); + return MCDisassembler::Success; +} + +template +static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm, + int64_t Address, + const void *Decoder) { + if (Imm == 0) + return MCDisassembler::Fail; + return decodeSImmOperand(Inst, Imm, Address, Decoder); +} + +static DecodeStatus decodeFloatCopyInstruction(uint32_t func, MCInst &MI, + uint32_t Insn, uint64_t Address, + const void *Decoder) { + switch (func) { + default: + return MCDisassembler::Fail; + case 0x30: + MI.setOpcode(Sw64::CPYSS); + break; + case 0x31: + MI.setOpcode(Sw64::CPYSNS); + break; + case 0x32: + MI.setOpcode(Sw64::CPYSES); + break; + } + uint32_t RegOp1 = Insn << 6 >> 27; // Inst {25-21} Reg operand 1 + uint32_t RegOp2 = Insn << 11 >> 27; // Inst [20-16] Reg operand 2 + uint32_t RegOp3 = Insn & 0x1F; // Inst [4-0 ] Reg operand 3 + MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp3])); + MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp1])); + MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp2])); + return MCDisassembler::Success; +} + +static DecodeStatus decodeFloatInstruction(MCInst &MI, uint32_t Insn, + uint64_t Address, + const void *Decoder) { + uint32_t func = (Insn & 0x1FE0) >> 5; + switch ((func & 0xF0) >> 4) { + default: + return MCDisassembler::Fail; + case 0x3: + return decodeFloatCopyInstruction(func, MI, Insn, Address, Decoder); + } +} + +static DecodeStatus decodeFloatSelectInstruction(MCInst &MI, uint32_t Insn, + uint64_t Address, + const void *Decoder) { + uint32_t func = (Insn & 0xFC00) >> 10; + switch (func) { + default: + return MCDisassembler::Fail; + case 0x10: + MI.setOpcode(Sw64::FSELEQS); + break; + case 0x11: + MI.setOpcode(Sw64::FSELNES); + break; + case 0x12: + MI.setOpcode(Sw64::FSELLTS); + break; + case 0x13: + MI.setOpcode(Sw64::FSELLES); + break; + case 0x14: + MI.setOpcode(Sw64::FSELGTS); + break; + case 0x15: + MI.setOpcode(Sw64::FSELGES); + break; + } + uint32_t RegOp1 = Insn << 6 >> 27; // Inst {25-21} Reg operand 1 + uint32_t RegOp2 = Insn << 11 >> 27; // Inst [20-16] Reg operand 2 + uint32_t RegOp3 = (Insn & 0x3E0) >> 5; // Inst [4-0 ] Reg operand 3 + uint32_t RegOp4 = Insn & 0x1F; // Inst [4-0 ] Reg operand 3 + MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp4])); + MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp3])); + MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp2])); + MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp1])); + return MCDisassembler::Success; +} + +static DecodeStatus decodePostLSInstruction(MCInst &MI, uint32_t Insn, + uint64_t Address, + const void *Decoder) { + uint32_t func = (Insn & 0xFC00) >> 12; + bool isFloat = false; + bool isStore = false; + switch (func) { + default: + return MCDisassembler::Fail; + case 0x0: + MI.setOpcode(Sw64::LDBU_A); + break; + case 0x1: + MI.setOpcode(Sw64::LDHU_A); + break; + case 0x2: + MI.setOpcode(Sw64::LDW_A); + break; + case 0x3: + MI.setOpcode(Sw64::LDL_A); + break; + case 0x4: + MI.setOpcode(Sw64::LDS_A); + isFloat = true; + break; + case 0x5: + MI.setOpcode(Sw64::LDD_A); + isFloat = true; + break; + case 0x6: + MI.setOpcode(Sw64::STB_A); + break; + case 0x7: + MI.setOpcode(Sw64::STH_A); + break; + case 0x8: + MI.setOpcode(Sw64::STW_A); + break; + case 0x9: + MI.setOpcode(Sw64::STL_A); + break; + case 0xA: + MI.setOpcode(Sw64::STS_A); + isFloat = true; + isStore = true; + break; + case 0xB: + MI.setOpcode(Sw64::STD_A); + isFloat = true; + isStore = true; + break; + } + uint32_t RegOp1 = Insn << 6 >> 27; // Inst {25-21} Reg operand 1 + uint32_t RegOp2 = Insn << 11 >> 27; // Inst [20-16] Reg operand 2 + unsigned RegOp3 = Insn & 0xFFF; // Inst [11-0 ] Reg operand 3 + uint32_t RegOp4 = Insn << 11 >> 27; + MI.addOperand((isFloat && !isStore) + ? MCOperand::createReg(FPRDecoderTable[RegOp1]) + : MCOperand::createReg(GPRDecoderTable[RegOp1])); + MI.addOperand((isFloat && isStore) + ? MCOperand::createReg(FPRDecoderTable[RegOp4]) + : MCOperand::createReg(GPRDecoderTable[RegOp4])); + MI.addOperand(MCOperand::createReg(GPRDecoderTable[RegOp2])); + MI.addOperand(MCOperand::createImm(RegOp3)); + return MCDisassembler::Success; +} + +static DecodeStatus decodeBarrierInstruction(MCInst &MI, uint32_t Insn, + uint64_t Address, + const void *Decoder) { + uint32_t func = Insn & 0xFFFF; + switch (func) { + default: + return MCDisassembler::Fail; + case 0x00: + MI.setOpcode(Sw64::MB); + break; + case 0x01: + MI.setOpcode(Sw64::IMEMB); + break; + case 0x02: + MI.setOpcode(Sw64::WMEMB); + break; + } + return MCDisassembler::Success; +} + +static DecodeStatus decodeConlictInstruction(MCInst &MI, uint32_t Insn, + uint64_t Address, + const void *Decoder) { + uint32_t Opcode = Insn >> 26; + switch (Opcode) { + default: + return MCDisassembler::Fail; + case 0x06: + return decodeBarrierInstruction(MI, Insn, Address, Decoder); + case 0x18: + return decodeFloatInstruction(MI, Insn, Address, Decoder); + case 0x19: + return decodeFloatSelectInstruction(MI, Insn, Address, Decoder); + case 0x1E: + return decodePostLSInstruction(MI, Insn, Address, Decoder); + } +} + +#include "Sw64GenDisassemblerTables.inc" + +DecodeStatus Sw64Disassembler::getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const { + // TODO: This will need modification when supporting instruction set + // extensions with instructions > 32-bits (up to 176 bits wide). + uint32_t Insn; + DecodeStatus Result; + + if (Bytes.size() < 4) { + Size = 0; + return MCDisassembler::Fail; + } + Insn = support::endian::read32le(Bytes.data()); + LLVM_DEBUG(dbgs() << "Trying Decode Conflict Instruction :\n"); + Result = decodeConlictInstruction(Instr, Insn, Address, this); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + LLVM_DEBUG(dbgs() << "Trying Sw64 table :\n"); + Result = decodeInstruction(DecoderTable32, Instr, Insn, Address, this, STI); + Size = 4; + + return Result; +} diff --git a/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt b/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt new file mode 100644 index 000000000000..b07b33f37e09 --- /dev/null +++ b/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt @@ -0,0 +1,10 @@ +add_llvm_component_library(LLVMSw64AsmPrinter + Sw64InstPrinter.cpp + + LINK_COMPONENTS + MC + Support + + ADD_TO_COMPONENT + Sw64 + ) diff --git a/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp new file mode 100644 index 000000000000..74ae067f9f9e --- /dev/null +++ b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp @@ -0,0 +1,148 @@ +//===-- Sw64InstPrinter.cpp - Convert Sw64 MCInst to assembly syntax ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Sw64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "Sw64InstPrinter.h" +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "MCTargetDesc/Sw64MCExpr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +#include "Sw64GenAsmWriter.inc" + +void Sw64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { + OS << StringRef(getRegisterName(Reg)).lower(); +} + +void Sw64InstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, const MCSubtargetInfo &STI, + raw_ostream &OS) { + printInstruction(MI, Address, OS); + if (!Annot.empty()) { + OS << "\t" << Annot; + } else + printAnnotation(OS, Annot); +} + +void Sw64InstPrinter::printInlineJT(const MCInst *MI, int opNum, + raw_ostream &O) { + report_fatal_error("can't handle InlineJT"); +} + +void Sw64InstPrinter::printInlineJT32(const MCInst *MI, int opNum, + raw_ostream &O) { + report_fatal_error("can't handle InlineJT32"); +} + +void Sw64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + printRegName(O, Op.getReg()); + return; + } + + if (Op.isImm()) { + if (Op.getImm() > 65535) { + O << formatHex(Op.getImm()); + return; + } + O << Op.getImm(); + return; + } + + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI, true); +} + +void Sw64InstPrinter::printMemoryArg(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + + if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::Target) { + const Sw64MCExpr *Sw64Expr = cast(Expr); + + switch (Sw64Expr->getKind()) { + default: + break; + case Sw64MCExpr::MEK_GPDISP_HI16: + case Sw64MCExpr::MEK_GPDISP_LO16: + case Sw64MCExpr::MEK_GPDISP: + O << "0"; + return; + } + } + } + printOperand(MI, OpNo, O); +} + +void Sw64InstPrinter::printMemOperand(const MCInst *MI, int opNum, + raw_ostream &O) { + // Load/Store memory operands -- imm($reg) + + if (MI->getOperand(opNum).isImm() && MI->getOperand(opNum + 1).isReg()) { + printOperand(MI, opNum, O); + O << "("; + printOperand(MI, opNum + 1, O); + O << ")"; + } else { + printOperand(MI, opNum + 1, O); + O << "("; + printOperand(MI, opNum, O); + O << ")"; + } +} + +template +void Sw64InstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) { + uint64_t Imm = MO.getImm(); + Imm -= Offset; + Imm &= (1 << Bits) - 1; + Imm += Offset; + if (MI->getOpcode() == Sw64::VLOGZZ) + O << format("%x", Imm); + else + O << formatImm(Imm); + return; + } + + printOperand(MI, opNum, O); +} + +// Only for Instruction VLOG +void Sw64InstPrinter::printHexImm(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) { + uint64_t Imm = MO.getImm(); + O << format("%x", ((Imm >> 4) & 0xf)) << format("%x", (Imm & 0xf)); + return; + } + + printOperand(MI, opNum, O); +} diff --git a/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h new file mode 100644 index 000000000000..8d721ac01ac3 --- /dev/null +++ b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h @@ -0,0 +1,57 @@ +//== Sw64InstPrinter.h - Convert Sw64 MCInst to assembly syntax -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the Sw64InstPrinter class, +// which is used to print Sw64 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H +#define LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class Sw64InstPrinter : public MCInstPrinter { +public: + Sw64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + // Autogenerated by tblgen. + std::pair getMnemonic(const MCInst *MI) override; + void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); + static const char *getRegisterName(MCRegister Reg); + + void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &OS) override; + void printMemoryArg(const MCInst *MI, unsigned OpNo, raw_ostream &O); + +private: + void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O); + void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, + raw_ostream &O) { + printOperand(MI, OpNum, O); + } + void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); + + void printHexImm(const MCInst *MI, int opNum, raw_ostream &O); + + template + void printUImm(const MCInst *MI, int opNum, raw_ostream &O); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt new file mode 100644 index 000000000000..69169bf24b07 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,22 @@ +add_llvm_component_library(LLVMSw64Desc + Sw64ABIInfo.cpp + Sw64ABIFlagsSection.cpp + Sw64AsmBackend.cpp + Sw64ELFObjectWriter.cpp + Sw64ELFStreamer.cpp + Sw64MCAsmInfo.cpp + Sw64MCCodeEmitter.cpp + Sw64MCExpr.cpp + Sw64MCTargetDesc.cpp + Sw64OptionRecord.cpp + Sw64TargetStreamer.cpp + + LINK_COMPONENTS + MC + Sw64Info + Sw64AsmPrinter + Support + + ADD_TO_COMPONENT + Sw64 +) diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp new file mode 100644 index 000000000000..0f714e724bcd --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp @@ -0,0 +1,31 @@ +//===- Sw64ABIFlagsSection.cpp - Sw64 ELF ABI Flags Section ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/Sw64ABIFlagsSection.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Sw64ABIFlags.h" + +using namespace llvm; + +uint8_t Sw64ABIFlagsSection::getFpABIValue() { + llvm_unreachable("unexpected fp abi value"); +} + +StringRef Sw64ABIFlagsSection::getFpABIString(FpABIKind Value) { + llvm_unreachable("unsupported fp abi value"); +} +namespace llvm { + +MCStreamer &operator<<(MCStreamer &OS, Sw64ABIFlagsSection &ABIFlagsSection) { + return OS; +} + +} // end namespace llvm diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h new file mode 100644 index 000000000000..058c47f58d44 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h @@ -0,0 +1,127 @@ +//===- Sw64ABIFlagsSection.h - Sw64 ELF ABI Flags Section -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Sw64ABIFlags.h" +#include + +namespace llvm { + +class MCStreamer; + +struct Sw64ABIFlagsSection { + // Internal representation of the fp_abi related values used in .module. + enum class FpABIKind { ANY, XX, S32, S64, SOFT }; + + // Version of flags structure. + uint16_t Version = 0; + // The level of the ISA: 1-5, 32, 64. + uint8_t ISALevel = 0; + // The revision of ISA: 0 for SW64 V and below, 1-n otherwise. + uint8_t ISARevision = 0; + // The size of general purpose registers. + Sw64::AFL_REG GPRSize = Sw64::AFL_REG_NONE; + // The size of co-processor 1 registers. + Sw64::AFL_REG CPR1Size = Sw64::AFL_REG_NONE; + // The size of co-processor 2 registers. + Sw64::AFL_REG CPR2Size = Sw64::AFL_REG_NONE; + // Processor-specific extension. + Sw64::AFL_EXT ISAExtension = Sw64::AFL_EXT_NONE; + // Mask of ASEs used. + uint32_t ASESet = 0; + + bool OddSPReg = false; + +protected: + // The floating-point ABI. + FpABIKind FpABI = FpABIKind::ANY; + +public: + Sw64ABIFlagsSection() = default; + + uint16_t getVersionValue() { return (uint16_t)Version; } + uint8_t getISALevelValue() { return (uint8_t)ISALevel; } + uint8_t getISARevisionValue() { return (uint8_t)ISARevision; } + uint8_t getGPRSizeValue() { return (uint8_t)GPRSize; } + uint8_t getCPR1SizeValue(); + uint8_t getCPR2SizeValue() { return (uint8_t)CPR2Size; } + uint8_t getFpABIValue(); + uint32_t getISAExtensionValue() { return (uint32_t)ISAExtension; } + uint32_t getASESetValue() { return (uint32_t)ASESet; } + + uint32_t getFlags1Value() { + uint32_t Value = 0; + + if (OddSPReg) + Value |= (uint32_t)Sw64::AFL_FLAGS1_ODDSPREG; + + return Value; + } + + uint32_t getFlags2Value() { return 0; } + + FpABIKind getFpABI() { return FpABI; } + void setFpABI(FpABIKind Value) { + FpABI = Value; + } + + StringRef getFpABIString(FpABIKind Value); + + template + void setGPRSizeFromPredicates(const PredicateLibrary &P) { + GPRSize = P.isGP64bit() ? Sw64::AFL_REG_64 : Sw64::AFL_REG_32; + } + + template + void setCPR1SizeFromPredicates(const PredicateLibrary &P) { + if (P.useSoftFloat()) + CPR1Size = Sw64::AFL_REG_NONE; + else if (P.hasMSA()) + CPR1Size = Sw64::AFL_REG_128; + else + CPR1Size = P.isFP64bit() ? Sw64::AFL_REG_64 : Sw64::AFL_REG_32; + } + + template + void setISAExtensionFromPredicates(const PredicateLibrary &P) { + if (P.hasCnSw64()) + ISAExtension = Sw64::AFL_EXT_OCTEON; + else + ISAExtension = Sw64::AFL_EXT_NONE; + } + + template + void setFpAbiFromPredicates(const PredicateLibrary &P) { + FpABI = FpABIKind::ANY; + if (P.useSoftFloat()) + FpABI = FpABIKind::SOFT; + + if (P.isABI_S64()) + FpABI = FpABIKind::S64; + } + + template + void setAllFromPredicates(const PredicateLibrary &P) { + setGPRSizeFromPredicates(P); + setCPR1SizeFromPredicates(P); + setISAExtensionFromPredicates(P); + setFpAbiFromPredicates(P); + OddSPReg = P.useOddSPReg(); + } +}; + +MCStreamer &operator<<(MCStreamer &OS, Sw64ABIFlagsSection &ABIFlagsSection); + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp new file mode 100644 index 000000000000..ea5b1f585883 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp @@ -0,0 +1,29 @@ +//===---- Sw64ABIInfo.cpp - Information about SW64 ABI's ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sw64ABIInfo.h" +#include "Sw64RegisterInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCTargetOptions.h" + +using namespace llvm; +Sw64ABIInfo Sw64ABIInfo::computeTargetABI(const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { + if (Options.getABIName().startswith("n64")) + return Sw64ABIInfo::S64(); + + assert(Options.getABIName().empty() && "Unknown ABI option for SW64"); + + if (TT.isSw64()) + return Sw64ABIInfo::S64(); + else + assert(!TT.isSw64() && "sw_64 ABI is not appoint 64 bit."); + return Sw64ABIInfo::S64(); +} diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h new file mode 100644 index 000000000000..ae758ca8d6f3 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h @@ -0,0 +1,77 @@ +//===---- Sw64ABIInfo.h - Information about SW64 ABI's --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIINFO_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIINFO_H + +#include "llvm/IR/CallingConv.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/TargetParser/Triple.h" + +namespace llvm { + +template class ArrayRef; +class MCTargetOptions; +class StringRef; +class TargetRegisterClass; + +class Sw64ABIInfo { +public: + enum class ABI { Unknown, S64 }; + +protected: + ABI ThisABI; + +public: + Sw64ABIInfo(ABI ThisABI) : ThisABI(ThisABI) {} + + static Sw64ABIInfo Unknown() { return Sw64ABIInfo(ABI::Unknown); } + static Sw64ABIInfo S64() { return Sw64ABIInfo(ABI::S64); } + static Sw64ABIInfo computeTargetABI(const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); + + bool IsKnown() const { return ThisABI != ABI::Unknown; } + bool IsS64() const { return ThisABI == ABI::S64; } + ABI GetEnumValue() const { return ThisABI; } + + /// The registers to use for byval arguments. + ArrayRef GetByValArgRegs() const; + + /// The registers to use for the variable argument list. + ArrayRef GetVarArgRegs() const; + + /// Obtain the size of the area allocated by the callee for arguments. + /// CallingConv::FastCall affects the value for S32. + unsigned GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const; + + /// Ordering of ABI's + /// Sw64GenSubtargetInfo.inc will use this to resolve conflicts when given + /// multiple ABI options. + bool operator<(const Sw64ABIInfo Other) const { + return ThisABI < Other.GetEnumValue(); + } + + unsigned GetStackPtr() const; + unsigned GetFramePtr() const; + unsigned GetBasePtr() const; + unsigned GetGlobalPtr() const; + unsigned GetNullPtr() const; + unsigned GetZeroReg() const; + unsigned GetPtrAdduOp() const; + unsigned GetPtrAddiuOp() const; + unsigned GetPtrSubuOp() const; + unsigned GetPtrAndOp() const; + unsigned GetGPRMoveOp() const; + inline bool ArePtrs64bit() const { return IsS64(); } + inline bool AreGprs64bit() const { return IsS64(); } + + unsigned GetEhDataReg(unsigned I) const; +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp new file mode 100644 index 000000000000..c1cf8243b30d --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp @@ -0,0 +1,317 @@ +//===-- Sw64AsmBackend.cpp - Sw64 Asm Backend ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Sw64AsmBackend class. +// +//===----------------------------------------------------------------------===// +// + +#include "MCTargetDesc/Sw64AsmBackend.h" +#include "MCTargetDesc/Sw64ABIInfo.h" +#include "MCTargetDesc/Sw64FixupKinds.h" +#include "MCTargetDesc/Sw64MCExpr.h" +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Prepare value for the target space for it +static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + MCContext &Ctx) { + + unsigned Kind = Fixup.getKind(); + switch (Kind) { + default: + return 0; + case Sw64::fixup_SW64_32: + case Sw64::fixup_SW64_64: + case FK_Data_4: + case FK_Data_8: + case Sw64::fixup_SW64_GPREL32: + case Sw64::fixup_SW64_LITUSE: + case Sw64::fixup_SW64_GPREL_HI16: + case Sw64::fixup_SW64_GPREL_LO16: + case Sw64::fixup_SW64_GPREL16: + case Sw64::fixup_SW64_TLSGD: + case Sw64::fixup_SW64_TLSLDM: + case Sw64::fixup_SW64_DTPMOD64: + case Sw64::fixup_SW64_GOTDTPREL16: + case Sw64::fixup_SW64_DTPREL64: + case Sw64::fixup_SW64_DTPREL_HI16: + case Sw64::fixup_SW64_DTPREL_LO16: + case Sw64::fixup_SW64_DTPREL16: + case Sw64::fixup_SW64_GOTTPREL16: + case Sw64::fixup_SW64_TPREL64: + case Sw64::fixup_SW64_TPREL_HI16: + case Sw64::fixup_SW64_TPREL_LO16: + case Sw64::fixup_SW64_TPREL16: + break; + case Sw64::fixup_SW64_23_PCREL_S2: + // So far we are only using this type for branches. + // For branches we start 1 instruction after the branch + // so the displacement will be one instruction size less. + Value -= 4; + // The displacement is then divided by 4 to give us an 18 bit + // address range. + Value >>= 2; + break; + case Sw64::fixup_SW64_BRSGP: + // So far we are only using this type for jumps. + // The displacement is then divided by 4 to give us an 28 bit + // address range. + Value >>= 2; + break; + case Sw64::fixup_SW64_ELF_LITERAL: + Value &= 0xffff; + break; + case Sw64::fixup_SW64_ELF_LITERAL_GOT: + Value = ((Value + 0x8000) >> 16) & 0xffff; + break; + } + return Value; +} + +std::unique_ptr +Sw64AsmBackend::createObjectTargetWriter() const { + return createSw64ELFObjectWriter(TheTriple, IsS32); +} + +/// ApplyFixup - Apply the Value for given Fixup into the provided +/// data fragment, at the offset specified by the fixup and following the +/// fixup kind as appropriate. +void Sw64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved, + const MCSubtargetInfo *STI) const { + MCFixupKind Kind = Fixup.getKind(); + MCContext &Ctx = Asm.getContext(); + Value = adjustFixupValue(Fixup, Value, Ctx); + + if (!Value) + return; // Doesn't change encoding. + + // Where do we start in the object + unsigned Offset = Fixup.getOffset(); + // Number of bytes we need to fixup + unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8; + // Used to point to big endian bytes + unsigned FullSize; + + switch ((unsigned)Kind) { + case Sw64::fixup_SW64_32: + FullSize = 4; + break; + case Sw64::fixup_SW64_64: + FullSize = 8; + break; + default: + FullSize = 4; + break; + } + + // Grab current value, if any, from bits. + uint64_t CurVal = 0; + + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = Endian == support::little ? i : (FullSize - 1 - i); + CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i * 8); + } + + uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); + CurVal |= Value & Mask; + + // Write out the fixed up bytes back to the code/data bits. + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = Endian == support::little ? i : (FullSize - 1 - i); + Data[Offset + Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff); + } +} + +std::optional Sw64AsmBackend::getFixupKind(StringRef Name) const { + return StringSwitch>(Name) + .Case("R_SW_64_REFLONG", (MCFixupKind)Sw64::fixup_SW64_32) + .Case("R_SW_64_REFQUAD", (MCFixupKind)Sw64::fixup_SW64_64) + .Case("R_SW_64_REFQUAD", (MCFixupKind)Sw64::fixup_SW64_CTOR) + .Case("R_SW_64_GPREL32", (MCFixupKind)Sw64::fixup_SW64_GPREL32) + .Case("R_SW_64_LITERAL", (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL) + .Case("R_SW_64_LITUSE", (MCFixupKind)Sw64::fixup_SW64_LITUSE) + .Case("R_SW_64_GPDISP", (MCFixupKind)Sw64::fixup_SW64_GPDISP) + .Case("R_SW_64_BRADDR", (MCFixupKind)Sw64::fixup_SW64_23_PCREL_S2) + .Case("R_SW_64_HINT", (MCFixupKind)Sw64::fixup_SW64_HINT) + .Case("R_SW_64_SREL16", (MCFixupKind)Sw64::fixup_SW64_16_PCREL) + .Case("R_SW_64_SREL32", (MCFixupKind)Sw64::fixup_SW64_32_PCREL) + .Case("R_SW_64_SREL64", (MCFixupKind)Sw64::fixup_SW64_64_PCREL) + .Case("R_SW_64_GPRELHIGH", (MCFixupKind)Sw64::fixup_SW64_GPREL_HI16) + .Case("R_SW_64_GPRELLOW", (MCFixupKind)Sw64::fixup_SW64_GPREL_LO16) + .Case("R_SW_64_GPREL16", (MCFixupKind)Sw64::fixup_SW64_GPREL16) + .Case("R_SW_64_BRSGP", (MCFixupKind)Sw64::fixup_SW64_BRSGP) + .Case("R_SW_64_TLSGD", (MCFixupKind)Sw64::fixup_SW64_TLSGD) + .Case("R_SW_64_TLSLDM", (MCFixupKind)Sw64::fixup_SW64_TLSLDM) + .Case("R_SW_64_DTPMOD64", (MCFixupKind)Sw64::fixup_SW64_DTPMOD64) + .Case("R_SW_64_GOTDTPREL", (MCFixupKind)Sw64::fixup_SW64_GOTDTPREL16) + .Case("R_SW_64_DTPREL64", (MCFixupKind)Sw64::fixup_SW64_DTPREL64) + .Case("R_SW_64_DTPRELHI", (MCFixupKind)Sw64::fixup_SW64_DTPREL_HI16) + .Case("R_SW_64_DTPRELLO", (MCFixupKind)Sw64::fixup_SW64_DTPREL_LO16) + .Case("R_SW_64_DTPREL16", (MCFixupKind)Sw64::fixup_SW64_DTPREL16) + .Case("R_SW_64_GOTTPREL", (MCFixupKind)Sw64::fixup_SW64_GOTTPREL16) + .Case("R_SW_64_TPREL64", (MCFixupKind)Sw64::fixup_SW64_TPREL64) + .Case("R_SW_64_TPRELHI", (MCFixupKind)Sw64::fixup_SW64_TPREL_HI16) + .Case("R_SW_64_TPRELLO", (MCFixupKind)Sw64::fixup_SW64_TPREL_LO16) + .Case("R_SW_64_TPREL16", (MCFixupKind)Sw64::fixup_SW64_TPREL16) + .Case("R_SW_64_LITERAL_GOT", + (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL_GOT) + .Default(MCAsmBackend::getFixupKind(Name)); +} + +const MCFixupKindInfo & +Sw64AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo LittleEndianInfos[] = { + // This table *must* be in same the order of fixup_* kinds in + // Sw64FixupKinds.h. + // name offset bits flags + {"fixup_SW64_NONE", 0, 0, 0}, + {"fixup_SW64_32", 0, 32, 0}, + {"fixup_SW64_64", 0, 64, 0}, + {"fixup_SW64_CTOR", 0, 64, 0}, + {"fixup_SW64_GPREL32", 0, 32, 0}, + {"fixup_SW64_ELF_LITERAL", 0, 16, 0}, + {"fixup_SW64_LITUSE", 0, 32, 0}, + {"fixup_SW64_GPDISP", 0, 16, 0}, + {"fixup_SW64_GPDISP_HI16", 0, 16, 0}, + {"fixup_SW64_GPDISP_LO16", 0, 16, 0}, + {"fixup_SW64_23_PCREL_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_SW64_HINT", 0, 14, 0}, + {"fixup_SW64_16_PCREL", 0, 16, 0}, + {"fixup_SW64_32_PCREL", 0, 32, 0}, + {"fixup_SW64_64_PCREL", 0, 64, 0}, + {"fixup_SW64_GPREL_HI16", 0, 16, 0}, + {"fixup_SW64_GPREL_LO16", 0, 16, 0}, + {"fixup_SW64_GPREL16", 0, 16, 0}, + {"fixup_SW64_BRSGP", 0, 21, 0}, + {"fixup_SW64_TLSGD", 0, 16, 0}, + {"fixup_SW64_TLSLDM", 0, 16, 0}, + {"fixup_SW64_DTPMOD64", 0, 64, 0}, + {"fixup_SW64_GOTDTPREL16", 0, 16, 0}, + {"fixup_SW64_DTPREL64", 0, 64, 0}, + {"fixup_SW64_DTPREL_HI16", 0, 16, 0}, + {"fixup_SW64_DTPREL_LO16", 0, 16, 0}, + {"fixup_SW64_DTPREL16", 0, 16, 0}, + {"fixup_SW64_GOTTPREL16", 0, 16, 0}, + {"fixup_SW64_TPREL64", 0, 64, 0}, + {"fixup_SW64_TPREL_HI16", 0, 16, 0}, + {"fixup_SW64_TPREL_LO16", 0, 16, 0}, + {"fixup_SW64_TPREL16", 0, 16, 0}, + {"fixup_SW64_ELF_LITERAL_GOT", 0, 16, 0}, + {"fixup_SW64_LITERAL_BASE", 0, 16, 0}, + {"fixup_SW64_LITUSE_JSRDIRECT", 0, 16, 0}}; + + static_assert(std::size(LittleEndianInfos) == Sw64::NumTargetFixupKinds, + "Not all SW64 little endian fixup kinds added!"); + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + + if (Endian == support::little) + return LittleEndianInfos[Kind - FirstTargetFixupKind]; + else + llvm_unreachable("sw_64 is not appoint litter endian."); +} + +/// WriteNopData - Write an (optimal) nop sequence of Count bytes +/// to the given output. If the target cannot generate such a sequence, +/// it should return an error. +/// +/// \return - True on success. +bool Sw64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const { + // If the count is not 4-byte aligned, we must be writing data into the text + // section (otherwise we have unaligned instructions, and thus have far + // bigger problems), so just write zeros instead. + OS.write_zeros(Count % 4); + + // We are properly aligned, so write NOPs as requested. + Count /= 4; + for (uint64_t i = 0; i != Count; ++i) + support::endian::write(OS, 0x43ff075f, support::little); + return true; +} + +bool Sw64AsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { + const unsigned FixupKind = Fixup.getKind(); + switch (FixupKind) { + default: + return false; + // All these relocations require special processing + // at linking time. Delegate this work to a linker. + case Sw64::fixup_SW64_32: + case Sw64::fixup_SW64_64: + case Sw64::fixup_SW64_CTOR: + case Sw64::fixup_SW64_GPREL32: + case Sw64::fixup_SW64_ELF_LITERAL: + case Sw64::fixup_SW64_LITUSE: + case Sw64::fixup_SW64_GPDISP: + case Sw64::fixup_SW64_GPDISP_HI16: + case Sw64::fixup_SW64_HINT: + case Sw64::fixup_SW64_16_PCREL: + case Sw64::fixup_SW64_32_PCREL: + case Sw64::fixup_SW64_64_PCREL: + case Sw64::fixup_SW64_GPREL_HI16: + case Sw64::fixup_SW64_GPREL_LO16: + case Sw64::fixup_SW64_GPREL16: + case Sw64::fixup_SW64_BRSGP: + case Sw64::fixup_SW64_TLSGD: + case Sw64::fixup_SW64_TLSLDM: + case Sw64::fixup_SW64_DTPMOD64: + case Sw64::fixup_SW64_GOTDTPREL16: + case Sw64::fixup_SW64_DTPREL64: + case Sw64::fixup_SW64_DTPREL_HI16: + case Sw64::fixup_SW64_DTPREL_LO16: + case Sw64::fixup_SW64_DTPREL16: + case Sw64::fixup_SW64_GOTTPREL16: + case Sw64::fixup_SW64_TPREL64: + case Sw64::fixup_SW64_TPREL_HI16: + case Sw64::fixup_SW64_TPREL_LO16: + case Sw64::fixup_SW64_TPREL16: + case Sw64::fixup_SW64_ELF_LITERAL_GOT: + return true; + case Sw64::fixup_SW64_23_PCREL_S2: + return false; + } +} + +MCAsmBackend *llvm::createSw64AsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options) { + Sw64ABIInfo ABI = Sw64ABIInfo::computeTargetABI(STI.getTargetTriple(), + STI.getCPU(), Options); + return new Sw64AsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), + ABI.IsS64()); +} diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h new file mode 100644 index 000000000000..3f8bb0cf391b --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h @@ -0,0 +1,96 @@ +//===-- Sw64AsmBackend.h - Sw64 Asm Backend ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sw64AsmBackend class. +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ASMBACKEND_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ASMBACKEND_H + +#include "MCTargetDesc/Sw64FixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/TargetParser/Triple.h" + +namespace llvm { + +class MCAssembler; +struct MCFixupKindInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSymbolELF; +class Target; + +class Sw64AsmBackend : public MCAsmBackend { + Triple TheTriple; + bool IsS32; + +public: + Sw64AsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, + StringRef CPU, bool S32) + : MCAsmBackend(support::little), TheTriple(TT), IsS32(S32) {} + + std::unique_ptr + createObjectTargetWriter() const override; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + + std::optional getFixupKind(StringRef Name) const override; + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + + unsigned getNumFixupKinds() const override { + return Sw64::NumTargetFixupKinds; + } + + /// @name Target Relaxation Interfaces + /// @{ + + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override { + return false; + } + + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + // FIXME. + llvm_unreachable("RelaxInstruction() unimplemented"); + return false; + } + + /// RelaxInstruction - Relax the instruction in the given fragment + /// to the next wider instruction. + /// + /// \param Inst - The instruction to relax, which may be the same + /// as the output. + /// \param [out] Res On return, the relaxed instruction. + + /// @} + + bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const override; + + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; + +}; // class Sw64AsmBackend + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h new file mode 100644 index 000000000000..822d043816c2 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h @@ -0,0 +1,146 @@ +//===-- Sw64BaseInfo.h - Top level definitions for SW64 MC ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the Sw64 target useful for the compiler back-end and the MC libraries. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64BASEINFO_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64BASEINFO_H + +#include "Sw64FixupKinds.h" +#include "Sw64MCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { + +/// Sw64II - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace Sw64II { +/// Target Operand Flag enum. +enum TOF { + //===------------------------------------------------------------------===// + // Sw64 Specific MachineOperand flags. + MO_NO_FLAG, + + /// MO_GOT - Represents the offset into the global offset table at which + /// the address the relocation entry symbol resides during execution. + MO_LITERAL, // LITERAL + MO_GPDISP, + MO_GPDISP_HI, + MO_GPDISP_LO, + + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides + /// during execution. This is different from the above since this flag + /// can only be present in call instructions. + MO_GOT_CALL, + + /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HI, + MO_ABS_LO, + MO_ABS_HILO, + MO_ABS_LI, + + /// MO_GPREL - Represents the offset from the current gp value to be used + /// for the relocatable object file being produced. + MO_GPREL_HI, + MO_GPREL_LO, + + /// MO_TLSGD - Represents the offset into the global offset table at which + // the module ID and TSL block offset reside during execution (General + // Dynamic TLS). + MO_TLSGD, + + /// MO_TLSLDM - Represents the offset into the global offset table at which + // the module ID and TSL block offset reside during execution (Local + // Dynamic TLS). + MO_TLSLDM, + MO_DTPREL_HI, + MO_DTPREL_LO, + + /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial + // Exec TLS). + MO_GOTTPREL, + + /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from + // the thread pointer (Local Exec TLS). + MO_TPREL_HI, + MO_TPREL_LO, + + // S32/64 Flags. + MO_GPOFF_HI, + MO_GPOFF_LO, + MO_GOT_DISP, + MO_GOT_PAGE, + MO_GOT_OFST, + + /// MO_HIGHER/HIGHEST - Represents the highest or higher half word of a + /// 64-bit symbol address. + MO_HIGHER, + MO_HIGHEST, + + /// MO_GOT_HI16/LO16, MO_CALL_HI16/LO16 - Relocations used for large GOTs. + MO_GOT_HI16, + MO_GOT_LO16, + MO_CALL_HI16, + MO_CALL_LO16, + + /// Helper operand used to generate R_SW64_JALR + MO_JALR, + + // LITERAL_GOT + MO_LITERAL_GOT, + + MO_HINT, + MO_LITERAL_BASE = 0x40, + MO_LITUSE = 0x80 // LITERAL +}; + +enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for + // Sw64 instructions. + // + + // Pseudo - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// FrmR - This form is for instructions of the format R. + FrmR = 1, + /// FrmI - This form is for instructions of the format I. + FrmI = 2, + /// FrmJ - This form is for instructions of the format J. + FrmJ = 3, + /// FrmFR - This form is for instructions of the format FR. + FrmFR = 4, + /// FrmFI - This form is for instructions of the format FI. + FrmFI = 5, + /// FrmOther - This form is for instructions that have no specific format. + FrmOther = 6, + FormMask = 15, + /// IsCTI - Instruction is a Control Transfer Instruction. + IsCTI = 1 << 4, + /// HasForbiddenSlot - Instruction has a forbidden slot. + HasForbiddenSlot = 1 << 5, + /// IsPCRelativeLoad - A Load instruction with implicit source register + /// ($pc) with explicit offset and destination register + IsPCRelativeLoad = 1 << 6, + /// HasFCCRegOperand - Instruction uses an $fcc register. + HasFCCRegOperand = 1 << 7 +}; +} // namespace Sw64II +} // namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp new file mode 100644 index 000000000000..3db5c0ab442c --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp @@ -0,0 +1,463 @@ +//===-- Sw64ELFObjectWriter.cpp - Sw64 ELF Writer -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/Sw64FixupKinds.h" +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "sw_64-elf-object-writer" + +using namespace llvm; + +namespace { + +// Holds additional information needed by the relocation ordering algorithm. +struct Sw64RelocationEntry { + const ELFRelocationEntry R; // < The relocation. + bool Matched = false; // < Is this relocation part of a match. + + Sw64RelocationEntry(const ELFRelocationEntry &R) : R(R) {} + + void print(raw_ostream &Out) const { + R.print(Out); + Out << ", Matched=" << Matched; + } +}; + +#ifndef NDEBUG +raw_ostream &operator<<(raw_ostream &OS, const Sw64RelocationEntry &RHS) { + RHS.print(OS); + return OS; +} +#endif + +class Sw64ELFObjectWriter : public MCELFObjectTargetWriter { +public: + Sw64ELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64); + + ~Sw64ELFObjectWriter() override = default; + + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; + bool needsRelocateWithSymbol(const MCSymbol &Sym, + unsigned Type) const override; + void sortRelocs(const MCAssembler &Asm, + std::vector &Relocs) override; +}; + +// The possible results of the Predicate function used by find_best. +enum FindBestPredicateResult { + FindBest_NoMatch = 0, // < The current element is not a match. + FindBest_Match, // < The current element is a match but better ones are + // possible. + FindBest_PerfectMatch, // < The current element is an unbeatable match. +}; + +} // end anonymous namespace + +// Copy elements in the range [First, Last) to d1 when the predicate is true or +// d2 when the predicate is false. This is essentially both std::copy_if and +// std::remove_copy_if combined into a single pass. +template +static std::pair copy_if_else(InputIt First, InputIt Last, + OutputIt1 d1, OutputIt2 d2, + UnaryPredicate Predicate) { + for (InputIt I = First; I != Last; ++I) { + if (Predicate(*I)) { + *d1 = *I; + d1++; + } else { + *d2 = *I; + d2++; + } + } + + return std::make_pair(d1, d2); +} + +// Find the best match in the range [First, Last). +// +// An element matches when Predicate(X) returns FindBest_Match or +// FindBest_PerfectMatch. A value of FindBest_PerfectMatch also terminates +// the search. BetterThan(A, B) is a comparator that returns true when A is a +// better match than B. The return value is the position of the best match. +// +// This is similar to std::find_if but finds the best of multiple possible +// matches. +template +static InputIt find_best(InputIt First, InputIt Last, + UnaryPredicate Predicate) { + InputIt Best = Last; + + for (InputIt I = First; I != Last; ++I) { + unsigned Matched = Predicate(*I); + if (Matched != FindBest_NoMatch) { + LLVM_DEBUG(dbgs() << std::distance(First, I) << " is a match ("; + I->print(dbgs()); dbgs() << ")\n"); + if (Best == Last) { + LLVM_DEBUG(dbgs() << ".. and it beats the last one\n"); + Best = I; + } + } + if (Matched == FindBest_PerfectMatch) { + LLVM_DEBUG(dbgs() << ".. and it is unbeatable\n"); + break; + } + } + + return Best; +} + +#ifndef NDEBUG +// Print all the relocations. +template +static void dumpRelocs(const char *Prefix, const Container &Relocs) { + for (const auto &R : Relocs) { + dbgs() << Prefix; + R.print(dbgs()); + dbgs() << "\n"; + } +} +#endif + +Sw64ELFObjectWriter::Sw64ELFObjectWriter(uint8_t OSABI, + bool HasRelocationAddend, bool Is64) + : MCELFObjectTargetWriter(Is64, OSABI, ELF::EM_SW64, HasRelocationAddend) {} + +unsigned Sw64ELFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // Determine the type of the relocation. + unsigned Kind = (unsigned)Fixup.getKind(); + switch (Kind) { + case Sw64::fixup_SW64_NONE: + return ELF::R_SW_64_NONE; + case FK_Data_1: + case FK_Data_2: + Ctx.reportError(Fixup.getLoc(), + "SW64 does not support one byte relocations"); + return ELF::R_SW_64_NONE; + case FK_Data_4: + if (Fixup.getValue()->getKind() == MCExpr::Binary) + return ELF::R_SW_64_SREL32; // .cfi_startproc + else + return ELF::R_SW_64_REFLONG; // R_SW_64_32 + break; + case FK_Data_8: // .8byte ($.str) + if (IsPCRel) + return ELF::R_SW_64_SREL64; + else + return ELF::R_SW_64_REFQUAD; // R_SW_64_64 + break; + case Sw64::fixup_SW64_32: + return ELF::R_SW_64_REFLONG; + break; + case Sw64::fixup_SW64_64: + case Sw64::fixup_SW64_CTOR: + return ELF::R_SW_64_REFQUAD; + break; + case Sw64::fixup_SW64_GPREL32: + return ELF::R_SW_64_GPREL32; + break; + + case Sw64::fixup_SW64_ELF_LITERAL: + return ELF::R_SW_64_LITERAL; + break; + case Sw64::fixup_SW64_LITUSE: + return ELF::R_SW_64_LITUSE; + break; + case Sw64::fixup_SW64_LITERAL_BASE: + return ELF::R_SW_64_DUMMY_LITERAL; + break; + case Sw64::fixup_SW64_LITUSE_JSRDIRECT: + return ELF::R_SW_64_DUMMY_LITUSE; + break; + case Sw64::fixup_SW64_GPDISP: + return ELF::R_SW_64_GPDISP; + break; + case Sw64::fixup_SW64_GPDISP_HI16: + return ELF::R_SW_64_GPDISP; + break; + case Sw64::fixup_SW64_GPDISP_LO16: + return ELF::R_SW_64_GPDISP; + break; + case Sw64::fixup_SW64_23_PCREL_S2: + return ELF::R_SW_64_BRADDR; + break; + case Sw64::fixup_SW64_HINT: + return ELF::R_SW_64_HINT; + break; + case Sw64::fixup_SW64_16_PCREL: + return ELF::R_SW_64_SREL16; + break; + case Sw64::fixup_SW64_32_PCREL: + return ELF::R_SW_64_SREL32; + break; + case Sw64::fixup_SW64_64_PCREL: + return ELF::R_SW_64_SREL64; + break; + case Sw64::fixup_SW64_GPREL_HI16: + return ELF::R_SW_64_GPRELHIGH; + break; + case Sw64::fixup_SW64_GPREL_LO16: + return ELF::R_SW_64_GPRELLOW; + break; + case Sw64::fixup_SW64_GPREL16: + return ELF::R_SW_64_GPREL16; + break; + case Sw64::fixup_SW64_BRSGP: + return ELF::R_SW_64_BRSGP; + break; + case Sw64::fixup_SW64_TLSGD: + return ELF::R_SW_64_TLSGD; + break; + case Sw64::fixup_SW64_TLSLDM: + return ELF::R_SW_64_TLSLDM; + break; + case Sw64::fixup_SW64_DTPMOD64: + return ELF::R_SW_64_DTPMOD64; + break; + case Sw64::fixup_SW64_GOTDTPREL16: + return ELF::R_SW_64_GOTDTPREL; + break; + case Sw64::fixup_SW64_DTPREL64: + return ELF::R_SW_64_DTPREL64; + break; + case Sw64::fixup_SW64_DTPREL_HI16: + return ELF::R_SW_64_DTPRELHI; + break; + case Sw64::fixup_SW64_DTPREL_LO16: + return ELF::R_SW_64_DTPRELLO; + break; + case Sw64::fixup_SW64_DTPREL16: + return ELF::R_SW_64_DTPREL16; + break; + case Sw64::fixup_SW64_GOTTPREL16: + return ELF::R_SW_64_GOTTPREL; + break; + case Sw64::fixup_SW64_TPREL64: + return ELF::R_SW_64_TPREL64; + break; + case Sw64::fixup_SW64_TPREL_HI16: + return ELF::R_SW_64_TPRELHI; + break; + case Sw64::fixup_SW64_TPREL_LO16: + return ELF::R_SW_64_TPRELLO; + break; + case Sw64::fixup_SW64_TPREL16: + return ELF::R_SW_64_TPREL16; + break; + case Sw64::fixup_SW64_ELF_LITERAL_GOT: + return ELF::R_SW_64_LITERAL_GOT; + break; + } + llvm_unreachable("invalid fixup kind!"); +} + +// Determine whether a relocation (X) matches the one given in R. +// +// A relocation matches if: +// - It's type matches that of a corresponding low part. This is provided in +// MatchingType for efficiency. +// - It's based on the same symbol. +// - It's offset of greater or equal to that of the one given in R. +// It should be noted that this rule assumes the programmer does not use +// offsets that exceed the alignment of the symbol. The carry-bit will be +// incorrect if this is not true. +// +// A matching relocation is unbeatable if: +// - It is not already involved in a match. +// - It's offset is exactly that of the one given in R. +static FindBestPredicateResult isMatchingReloc(const Sw64RelocationEntry &X, + const ELFRelocationEntry &R, + unsigned MatchingType) { + if (X.R.Type == MatchingType && X.R.OriginalSymbol == R.OriginalSymbol) { + if (!X.Matched && X.R.OriginalAddend == R.OriginalAddend) + return FindBest_PerfectMatch; + } + return FindBest_NoMatch; +} + +// Rewrite Reloc Target And Type +static ELFRelocationEntry RewriteTypeReloc(const ELFRelocationEntry R, + const MCSymbolELF *RenamedSymA) { + ELFRelocationEntry Entry = R; + switch (R.Type) { + default: + break; + case ELF::R_SW_64_DUMMY_LITUSE: + Entry.Type = ELF::R_SW_64_LITUSE; + Entry.Symbol = RenamedSymA; + Entry.Addend = 0x3; + break; + case ELF::R_SW_64_DUMMY_LITERAL: + Entry.Type = ELF::R_SW_64_LITERAL; + break; + case ELF::R_SW_64_GPDISP: + Entry.Symbol = RenamedSymA; + Entry.Addend = 0x4; + break; + } + return Entry; +} + +void Sw64ELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector &Relocs) { + if (Relocs.size() < 2) + return; + + MCContext &Ctx = Asm.getContext(); + std::list Sorted; + std::list Remainder; + std::list Orig; + const auto *RenamedSymA = cast(Ctx.getOrCreateSymbol(".text")); + + LLVM_DEBUG(dumpRelocs("R: ", Relocs)); + + // Sort relocations by the address they are applied to. + llvm::sort(Relocs, + [](const ELFRelocationEntry &A, const ELFRelocationEntry &B) { + return A.Offset < B.Offset; + }); + + // copy all reloc entry into remainder, except lituse. + // all lituse will be insert literal->next later. + copy_if_else(Relocs.begin(), Relocs.end(), std::back_inserter(Remainder), + std::back_inserter(Sorted), [](const ELFRelocationEntry &Reloc) { + return Reloc.Type == ELF::R_SW_64_DUMMY_LITUSE; + }); + + // Separate the movable relocations (AHL relocations using the high bits) from + // the immobile relocations (everything else). This does not preserve high/low + // matches that already existed in the input. + for (auto &R : Remainder) { + LLVM_DEBUG(dbgs() << "Matching: " << R << "\n"); + + auto InsertionPoint = find_best( + Sorted.begin(), Sorted.end(), [&R](const Sw64RelocationEntry &X) { + return isMatchingReloc(X, R, ELF::R_SW_64_DUMMY_LITERAL); + }); + + if (InsertionPoint != Sorted.end()) { + // if lit_use and literal correctly matched, + // InsertPoint is the reloc entry next to the literal + InsertionPoint->Matched = true; + InsertionPoint = std::next(InsertionPoint, 1); + } + Sorted.insert(InsertionPoint, R)->Matched = true; + } + assert(Relocs.size() == Sorted.size() && "Some relocs were not consumed"); + + // Overwrite the original vector with the sorted elements. The caller expects + // them in reverse order. + unsigned CopyTo = 0; + for (const auto &R : reverse(Sorted)) { + ELFRelocationEntry Entry = RewriteTypeReloc(R.R, RenamedSymA); + Relocs[CopyTo++] = Entry; + } +} + +bool Sw64ELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, + unsigned Type) const { + if (!isUInt<8>(Type)) + return needsRelocateWithSymbol(Sym, Type & 0xff) || + needsRelocateWithSymbol(Sym, (Type >> 8) & 0xff) || + needsRelocateWithSymbol(Sym, (Type >> 16) & 0xff); + + switch (Type) { + default: + errs() << Type << "\n"; + llvm_unreachable("Unexpected relocation"); + return true; + + // This relocation doesn't affect the section data. + case ELF::R_SW_64_NONE: + return false; + // On REL ABI's (e.g. S32), these relocations form pairs. The pairing is done + // by the static linker by matching the symbol and offset. + // We only see one relocation at a time but it's still safe to relocate with + // the section so long as both relocations make the same decision. + // + // Some older linkers may require the symbol for particular cases. Such cases + // are not supported yet but can be added as required. + case ELF::R_SW_64_REFLONG: + case ELF::R_SW_64_REFQUAD: + case ELF::R_SW_64_GPREL32: + case ELF::R_SW_64_LITERAL: + case ELF::R_SW_64_DUMMY_LITERAL: + case ELF::R_SW_64_DUMMY_LITUSE: + case ELF::R_SW_64_LITUSE: + case ELF::R_SW_64_BRADDR: + case ELF::R_SW_64_HINT: + case ELF::R_SW_64_SREL16: + case ELF::R_SW_64_SREL32: + case ELF::R_SW_64_SREL64: + case ELF::R_SW_64_GPRELHIGH: + case ELF::R_SW_64_GPRELLOW: + case ELF::R_SW_64_GPREL16: + case ELF::R_SW_64_COPY: + case ELF::R_SW_64_GLOB_DAT: + case ELF::R_SW_64_JMP_SLOT: + case ELF::R_SW_64_RELATIVE: + case ELF::R_SW_64_BRSGP: + case ELF::R_SW_64_TLSGD: + case ELF::R_SW_64_TLSLDM: + case ELF::R_SW_64_DTPMOD64: + case ELF::R_SW_64_GOTDTPREL: + case ELF::R_SW_64_DTPREL64: + case ELF::R_SW_64_DTPRELHI: + case ELF::R_SW_64_DTPRELLO: + case ELF::R_SW_64_DTPREL16: + case ELF::R_SW_64_GOTTPREL: + case ELF::R_SW_64_TPREL64: + case ELF::R_SW_64_TPRELHI: + case ELF::R_SW_64_TPRELLO: + case ELF::R_SW_64_TPREL16: + case ELF::R_SW_64_NUM: + case ELF::R_SW_64_LITERAL_GOT: + case ELF::R_SW_64_PC32: + case ELF::R_SW_64_EH: + return false; + + case ELF::R_SW_64_GPDISP: + return true; + } +} + +std::unique_ptr +llvm::createSw64ELFObjectWriter(const Triple &TT, bool IsS32) { + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); + bool IsS64 = true; + bool HasRelocationAddend = TT.isArch64Bit(); + return std::make_unique(OSABI, HasRelocationAddend, + IsS64); +} diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp new file mode 100644 index 000000000000..2d5271da7c7d --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp @@ -0,0 +1,108 @@ +//===-------- Sw64ELFStreamer.cpp - ELF Object Output ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sw64ELFStreamer.h" +#include "Sw64OptionRecord.h" +#include "Sw64TargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +Sw64ELFStreamer::Sw64ELFStreamer(MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter)) { + RegInfoRecord = new Sw64RegInfoRecord(this, Context); + Sw64OptionRecords.push_back( + std::unique_ptr(RegInfoRecord)); +} + +void Sw64ELFStreamer::emitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { + MCELFStreamer::emitInstruction(Inst, STI); + + MCContext &Context = getContext(); + const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo(); + + for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) { + const MCOperand &Op = Inst.getOperand(OpIndex); + + if (!Op.isReg()) + continue; + + unsigned Reg = Op.getReg(); + RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo); + } + + createPendingLabelRelocs(); +} + +void Sw64ELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { + Frame.Begin = getContext().createTempSymbol(); + MCELFStreamer::emitLabel(Frame.Begin); +} + +MCSymbol *Sw64ELFStreamer::emitCFILabel() { + MCSymbol *Label = getContext().createTempSymbol("cfi", true); + MCELFStreamer::emitLabel(Label); + return Label; +} + +void Sw64ELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { + Frame.End = getContext().createTempSymbol(); + MCELFStreamer::emitLabel(Frame.End); +} + +void Sw64ELFStreamer::createPendingLabelRelocs() { Labels.clear(); } + +void Sw64ELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { + MCELFStreamer::emitLabel(Symbol); + Labels.push_back(Symbol); +} + +void Sw64ELFStreamer::switchSection(MCSection *Section, + const MCExpr *Subsection) { + MCELFStreamer::switchSection(Section, Subsection); + Labels.clear(); +} + +void Sw64ELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size, + SMLoc Loc) { + MCELFStreamer::emitValueImpl(Value, Size, Loc); + Labels.clear(); +} + +void Sw64ELFStreamer::emitIntValue(uint64_t Value, unsigned Size) { + MCELFStreamer::emitIntValue(Value, Size); + Labels.clear(); +} + +void Sw64ELFStreamer::EmitSw64OptionRecords() { + for (const auto &I : Sw64OptionRecords) + I->EmitSw64OptionRecord(); +} + +MCELFStreamer *llvm::createSw64ELFStreamer( + MCContext &Context, std::unique_ptr MAB, + std::unique_ptr OW, std::unique_ptr Emitter, + bool RelaxAll) { + return new Sw64ELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter)); +} diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h new file mode 100644 index 000000000000..73a1d382a4c6 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h @@ -0,0 +1,83 @@ +//===- Sw64ELFStreamer.h - ELF Object Output --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a custom MCELFStreamer which allows us to insert some hooks before +// emitting data into an actual object file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H + +#include "Sw64OptionRecord.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCELFStreamer.h" +#include + +namespace llvm { + +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCSubtargetInfo; +struct MCDwarfFrameInfo; + +class Sw64ELFStreamer : public MCELFStreamer { + SmallVector, 8> Sw64OptionRecords; + Sw64RegInfoRecord *RegInfoRecord; + SmallVector Labels; + +public: + Sw64ELFStreamer(MCContext &Context, std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter); + + // Overriding this function allows us to add arbitrary behaviour before the + // Inst is actually emitted. For example, we can inspect the operands and + // gather sufficient information that allows us to reason about the register + // usage for the translation unit. + void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + + // Overriding this function allows us to record all labels that should be + // marked as microSW64. Based on this data marking is done in + // EmitInstruction. + void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; + + // Overriding this function allows us to dismiss all labels that are + // candidates for marking as microSW64 when .section directive is processed. + void switchSection(MCSection *Section, + const MCExpr *Subsection = nullptr) override; + + // Overriding these functions allows us to dismiss all labels that are + // candidates for marking as microSW64 when .word/.long/.4byte etc + // directives are emitted. + void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; + void emitIntValue(uint64_t Value, unsigned Size) override; + + // Overriding these functions allows us to avoid recording of these labels + // in EmitLabel and later marking them as microSW64. + void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; + void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; + MCSymbol *emitCFILabel() override; + + // Emits all the option records stored up until the point it's called. + void EmitSw64OptionRecords(); + + // Mark labels as microSW64, if necessary for the subtarget. + void createPendingLabelRelocs(); +}; + +MCELFStreamer *createSw64ELFStreamer(MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter, + bool RelaxAll); +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h new file mode 100644 index 000000000000..ae378ac175bf --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h @@ -0,0 +1,174 @@ +//===-- Sw64FixupKinds.h - Sw64 Specific Fixup Entries ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64FIXUPKINDS_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace Sw64 { +// Although most of the current fixup types reflect a unique relocation +// one can have multiple fixup types for a given relocation and thus need +// to be uniquely named. +// +// This table *must* be in the same order of +// MCFixupKindInfo Infos[Sw64::NumTargetFixupKinds] +// in Sw64AsmBackend.cpp. +// +enum Fixups { + // Branch fixups resulting in R_SW64_NONE. + fixup_SW64_NONE = FirstTargetFixupKind, + + // A 32 bit reference to a symbol. + // resulting in R_SW_64_REFLONG. + fixup_SW64_32, + + // A 64 bit reference to a symbol. + // resulting in - R_SW_64_REFQUAD. + fixup_SW64_64, + + // A 64 bit reference to a symbol. + // resulting in - R_SW_64_REFQUAD. + fixup_SW64_CTOR, + + // A 32 bit GP relative offset. This is just like REFLONG except + // that when the value is used the value of the gp register will be + // added in. + // resulting in - R_SW_64_GPREL32. + fixup_SW64_GPREL32, + + // Used for an instruction that refers to memory off the GP register + // resulting in - R_SW_64_LITERAL. + fixup_SW64_ELF_LITERAL, + // This reloc only appears immediately following an ELF_LITERAL reloc. + // It identifies a use of the literal. The symbol index is special: + // 1 means the literal address is in the base register of a memory + // format instruction; 2 means the literal address is in the byte + // offset register of a byte-manipulation instruction; 3 means the + // literal address is in the target register of a jsr instruction. + // This does not actually do any relocation. + // resulting in - R_SW_64_LITUSE. + fixup_SW64_LITUSE, + + // Load the gp register. This is always used for a ldih instruction + // which loads the upper 16 bits of the gp register. The symbol + // index of the GPDISP instruction is an offset in bytes to the lda + // instruction that loads the lower 16 bits. The value to use for + // the relocation is the difference between the GP value and the + // current location; the load will always be done against a register + // holding the current address. + // resulting in - R_SW_64_GPDISP. + fixup_SW64_GPDISP, + fixup_SW64_GPDISP_HI16, + fixup_SW64_GPDISP_LO16, + + // A 21 bit branch. + // resulting in - R_SW_64_BRADDR. + fixup_SW64_23_PCREL_S2, + // A hint for a jump to a register. + // resulting in - R_SW_64_HINT. + fixup_SW64_HINT, + + // 16 bit PC relative offset. + // resulting in - R_SW_64_SREL16. + fixup_SW64_16_PCREL, + + // 32 bit PC relative offset. + // resulting in - R_SW_64_SREL32. + fixup_SW64_32_PCREL, + + // 64 bit PC relative offset. + // resulting in - R_SW_64_SREL64. + fixup_SW64_64_PCREL, + + // The high 16 bits of the displacement from GP to the target + // resulting in - R_SW_64_GPRELHIGH. + fixup_SW64_GPREL_HI16, + + // The low 16 bits of the displacement from GP to the target + // resulting in - R_SW_64_GPRELLOW. + fixup_SW64_GPREL_LO16, + + // A 16-bit displacement from the GP to the target + // resulting in - R_SW_64_GPREL16. + fixup_SW64_GPREL16, + // A 21 bit branch that adjusts for gp loads + // resulting in - R_SW_64_BRSGP. + fixup_SW64_BRSGP, + + // Creates a tls_index for the symbol in the got. + // resulting in - R_SW_64_TLSGD. + fixup_SW64_TLSGD, + + // Creates a tls_index for the (current) module in the got. + // resulting in - R_SW_64_TLSLDM. + fixup_SW64_TLSLDM, + + // A dynamic relocation for a DTP module entry. + // resulting in - R_SW_64_DTPMOD64. + fixup_SW64_DTPMOD64, + + // Creates a 64-bit offset in the got for the displacement from DTP to the + // target. + // resulting in - R_SW_64_GOTDTPREL. + fixup_SW64_GOTDTPREL16, + + // A dynamic relocation for a displacement from DTP to the target. + // resulting in - R_SW_64_DTPREL64. + fixup_SW64_DTPREL64, + + // The high 16 bits of the displacement from DTP to the target. + // resulting in - R_SW_64_DTPRELHI. + fixup_SW64_DTPREL_HI16, + // The low 16 bits of the displacement from DTP to the target. + // resulting in - R_SW_64_DTPRELLO. + fixup_SW64_DTPREL_LO16, + + // A 16-bit displacement from DTP to the target. + // resulting in - R_SW_64_DTPREL16 + fixup_SW64_DTPREL16, + + // Creates a 64-bit offset in the got for the displacement from TP to the + // target. + // resulting in - R_SW_64_GOTTPREL + fixup_SW64_GOTTPREL16, + + // A dynamic relocation for a displacement from TP to the target. + // resulting in - R_SW_64_TPREL64 + fixup_SW64_TPREL64, + + // The high 16 bits of the displacement from TP to the target. + // resulting in - R_SW_64_TPRELHI + fixup_SW64_TPREL_HI16, + + // The low 16 bits of the displacement from TP to the target. + // resulting in - R_SW_64_TPRELLO + fixup_SW64_TPREL_LO16, + + // A 16-bit displacement from TP to the target. + // resulting in - R_SW_64_TPREL16 + fixup_SW64_TPREL16, + + // Used for an instruction that refers to memory off the GP register + // together with literal, expand call range to 32 bits offset + // resulting in - R_SW_64_LITERAL_GOT + fixup_SW64_ELF_LITERAL_GOT, + + // TODO: for literal sorting reloc + fixup_SW64_LITERAL_BASE, + fixup_SW64_LITUSE_JSRDIRECT, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // namespace Sw64 +} // namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp new file mode 100644 index 000000000000..bdbd6d0bdf54 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp @@ -0,0 +1,42 @@ +//===-- Sw64MCAsmInfo.cpp - Sw64 asm properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the Sw64MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "Sw64MCAsmInfo.h" +#include "llvm/TargetParser/Triple.h" + +using namespace llvm; + +void Sw64MCAsmInfo::anchor() {} + +Sw64MCAsmInfo::Sw64MCAsmInfo(const Triple &TheTriple, + const MCTargetOptions &Options) { + IsLittleEndian = TheTriple.isLittleEndian(); + assert(IsLittleEndian == true && "sw_64 machine is litter endian!"); + + CodePointerSize = CalleeSaveStackSlotSize = 8; + + PrivateGlobalPrefix = ".L"; + AlignmentIsInBytes = false; + Data16bitsDirective = "\t.2byte\t"; + Data32bitsDirective = "\t.4byte\t"; + Data64bitsDirective = "\t.8byte\t"; + WeakRefDirective = "\t.weak\t"; + CommentString = "#"; + // For chang assemble directer ".set LA, LB" to "LA = LB" + HasSw64SetDirective = true; + UsesELFSectionDirectiveForBSS = true; + SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::DwarfCFI; + DwarfRegNumForCFI = true; + UseIntegratedAssembler = true; +} diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h new file mode 100644 index 000000000000..f7809419ecb8 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h @@ -0,0 +1,32 @@ +//===-- Sw64MCAsmInfo.h - Sw64 Asm Info ------------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the Sw64MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCASMINFO_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCASMINFO_H + +#include "llvm/MC/MCAsmInfoELF.h" + +namespace llvm { +class Triple; + +class Sw64MCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit Sw64MCAsmInfo(const Triple &TheTriple, + const MCTargetOptions &Options); +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp new file mode 100644 index 000000000000..2e56da22b398 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp @@ -0,0 +1,451 @@ +//===-- Sw64MCCodeEmitter.cpp - Convert Sw64 Code to Machine Code ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Sw64MCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "Sw64MCCodeEmitter.h" +#include "MCTargetDesc/Sw64FixupKinds.h" +#include "MCTargetDesc/Sw64MCExpr.h" +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +#define GET_INSTRMAP_INFO +#include "Sw64GenInstrInfo.inc" +#undef GET_INSTRMAP_INFO + +namespace llvm { + +MCCodeEmitter *createSw64MCCodeEmitterEB(const MCInstrInfo &MCII, + MCContext &Ctx) { + return new Sw64MCCodeEmitter(MCII, Ctx, false); +} + +MCCodeEmitter *createSw64MCCodeEmitterEL(const MCInstrInfo &MCII, + MCContext &Ctx) { + return new Sw64MCCodeEmitter(MCII, Ctx, true); +} + +} // end namespace llvm + +MCInst Sw64MCCodeEmitter::LowerCompactBranch(MCInst TmpInst) const { + // > + // ==> > + + MCInst TI; + unsigned int Size = TmpInst.getNumOperands(); + // for test op is or not a imm + // as "bsr $RA,disp" will be convert to " bsr disp" will be an error + TI.setOpcode(TmpInst.getOpcode()); + if (TmpInst.getOperand(0).isImm()) + for (unsigned int i = 0; i < Size; i++) { + if (i == 0) + continue; + TI.addOperand(TmpInst.getOperand(i)); + } + else { + return TmpInst; + } + + return TI; +} + +void Sw64MCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; +} + +void Sw64MCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size, + const MCSubtargetInfo &STI, + raw_ostream &OS) const { + // Output the instruction encoding in little endian byte order. + // Little-endian byte ordering: + // sw_64: 4 | 3 | 2 | 1 + for (unsigned i = 0; i < Size; ++i) { + unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8; + EmitByte((Val >> Shift) & 0xff, OS); + } +} + +/// encodeInstruction - Emit the instruction. +/// Size the instruction with Desc.getSize(). +void Sw64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Non-pseudo instructions that get changed for direct object + // only based on operand values. + // If this list of instructions get much longer we will move + // the check to a function call. Until then, this is more efficient. + MCInst TmpInst = MI; + + switch (MI.getOpcode()) { + // If shift amount is >= 32 it the inst needs to be lowered further + case Sw64::BEQ: + case Sw64::BGE: + case Sw64::BGT: + case Sw64::BLBC: + case Sw64::BLBS: + case Sw64::BLE: + case Sw64::BLT: + case Sw64::BNE: + case Sw64::BR: + case Sw64::BSR: + case Sw64::FBEQ: + case Sw64::FBGE: + case Sw64::FBGT: + case Sw64::FBLE: + case Sw64::FBLT: + case Sw64::FBNE: + break; + case Sw64::ALTENT: + return; + } + + uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + + const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); + + // Get byte count of instruction + unsigned Size = Desc.getSize(); + if (!Size) + llvm_unreachable("Desc.getSize() returns 0"); + + EmitInstruction(Binary, Size, STI, OS); +} + +/// getBranchTargetOpValue - Return binary encoding of the branch +/// target operand. If the machine operand requires relocation, +/// [(store F4RC:$RA, (Sw64_gprello tglobaladdr:$DISP, +/// GPRC:$RB))], s_ild_lo>; +/// record the relocation and return zero. +unsigned +Sw64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + + // If the destination is an immediate, divide by 4. + if (MO.isImm()) + return MO.getImm() >> 2; + + assert(MO.isExpr() && + "getBranchTargetOpValue expects only expressions or immediates"); + + const MCExpr *FixupExpression = MO.getExpr(); + + Fixups.push_back(MCFixup::create(0, FixupExpression, + MCFixupKind(Sw64::fixup_SW64_23_PCREL_S2))); + return 0; +} + +/// getJumpTargetOpValue - Return binary encoding of the jump +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned +Sw64MCCodeEmitter::getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) + return MO.getImm() >> 2; + + assert(MO.isExpr() && + "getJumpTargetOpValue expects only expressions or an immediate"); + + const MCExpr *FixupExpression = MO.getExpr(); + + Fixups.push_back(MCFixup::create(0, FixupExpression, + MCFixupKind(Sw64::fixup_SW64_23_PCREL_S2))); + return 0; +} + +static MCOperand createLituse(MCContext *Ctx) { + const MCSymbol *Sym = Ctx->getOrCreateSymbol(".text"); + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, *Ctx); + + return MCOperand::createExpr( + Sw64MCExpr::create(Sw64MCExpr::MEK_LITUSE_JSR, Expr, *Ctx)); +} + +unsigned Sw64MCCodeEmitter::getExprOpValue(const MCExpr *Expr, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + MCExpr::ExprKind Kind = Expr->getKind(); + if (Kind == MCExpr::Constant) { + return cast(Expr)->getValue(); + } + + if (Kind == MCExpr::Binary) { + unsigned Res = + getExprOpValue(cast(Expr)->getLHS(), Fixups, STI); + Res += getExprOpValue(cast(Expr)->getRHS(), Fixups, STI); + return Res; + } + + if (Kind == MCExpr::Target) { + const Sw64MCExpr *Sw64Expr = cast(Expr); + + Sw64::Fixups FixupKind = Sw64::Fixups(0); + switch (Sw64Expr->getKind()) { + default: + llvm_unreachable("Unknown fixup kind!"); + break; + case Sw64MCExpr::MEK_LITUSE_BASE: + FixupKind = Sw64::fixup_SW64_LITERAL_BASE; + break; + case Sw64MCExpr::MEK_LITUSE_JSRDIRECT: + FixupKind = Sw64::fixup_SW64_LITUSE_JSRDIRECT; + Fixups.push_back( + MCFixup::create(0, Sw64Expr, MCFixupKind(Sw64::fixup_SW64_HINT))); + break; + case Sw64MCExpr::MEK_ELF_LITERAL: + FixupKind = Sw64::fixup_SW64_ELF_LITERAL; + break; + case Sw64MCExpr::MEK_LITUSE_ADDR: + FixupKind = Sw64::fixup_SW64_LITUSE; + break; + case Sw64MCExpr::MEK_LITUSE_BYTOFF: + FixupKind = Sw64::fixup_SW64_LITUSE; + break; + case Sw64MCExpr::MEK_LITUSE_JSR: + FixupKind = Sw64::fixup_SW64_LITUSE; + break; + case Sw64MCExpr::MEK_LITUSE_TLSGD: + FixupKind = Sw64::fixup_SW64_LITUSE; + break; + case Sw64MCExpr::MEK_LITUSE_TLSLDM: + FixupKind = Sw64::fixup_SW64_LITUSE; + break; + case Sw64MCExpr::MEK_HINT: + FixupKind = Sw64::fixup_SW64_HINT; + break; + case Sw64MCExpr::MEK_GPDISP: + FixupKind = Sw64::fixup_SW64_GPDISP; + break; + case Sw64MCExpr::MEK_GPDISP_HI16: + FixupKind = Sw64::fixup_SW64_GPDISP_HI16; + break; + case Sw64MCExpr::MEK_GPDISP_LO16: + return 0; + case Sw64MCExpr::MEK_GPREL_HI16: + FixupKind = Sw64::fixup_SW64_GPREL_HI16; + break; + case Sw64MCExpr::MEK_GPREL_LO16: + FixupKind = Sw64::fixup_SW64_GPREL_LO16; + break; + case Sw64MCExpr::MEK_GPREL16: + FixupKind = Sw64::fixup_SW64_GPREL16; + break; + case Sw64MCExpr::MEK_BRSGP: + FixupKind = Sw64::fixup_SW64_BRSGP; + break; + case Sw64MCExpr::MEK_TLSGD: + FixupKind = Sw64::fixup_SW64_TLSGD; + break; + case Sw64MCExpr::MEK_TLSLDM: + FixupKind = Sw64::fixup_SW64_TLSLDM; + break; + case Sw64MCExpr::MEK_GOTDTPREL16: + FixupKind = Sw64::fixup_SW64_GOTDTPREL16; + break; + case Sw64MCExpr::MEK_DTPREL_HI16: + FixupKind = Sw64::fixup_SW64_DTPREL_HI16; + break; + case Sw64MCExpr::MEK_DTPREL_LO16: + FixupKind = Sw64::fixup_SW64_DTPREL_LO16; + break; + case Sw64MCExpr::MEK_DTPREL16: + FixupKind = Sw64::fixup_SW64_DTPREL16; + break; + case Sw64MCExpr::MEK_GOTTPREL16: + FixupKind = Sw64::fixup_SW64_GOTTPREL16; + break; + case Sw64MCExpr::MEK_TPREL_HI16: + FixupKind = Sw64::fixup_SW64_TPREL_HI16; + break; + case Sw64MCExpr::MEK_TPREL_LO16: + FixupKind = Sw64::fixup_SW64_TPREL_LO16; + break; + case Sw64MCExpr::MEK_TPREL16: + FixupKind = Sw64::fixup_SW64_TPREL16; + break; + case Sw64MCExpr::MEK_ELF_LITERAL_GOT: + FixupKind = Sw64::fixup_SW64_ELF_LITERAL_GOT; + break; + } // switch + + Fixups.push_back(MCFixup::create(0, Sw64Expr, MCFixupKind(FixupKind))); + return 0; + } + + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned +Sw64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg); + return RegNo; + } else if (MO.isImm()) { + return static_cast(MO.getImm()); + } else if (MO.isDFPImm()) { + return static_cast(bit_cast(MO.getDFPImm())); + } + + // beq op1 op2 + // to + // beq opc op1 op2 + if (MCII.get(MI.getOpcode()).isBranch() && MI.getNumOperands() == 3) { + // for beq/bne/fbeq .... + return getBranchTargetOpValue(MI, 2, Fixups, STI); + } else if (MCII.get(MI.getOpcode()).isBranch() && MI.getNumOperands() == 2) { + // for br/bsr + return getJumpTargetOpValue(MI, 1, Fixups, STI); + } + + // MO must be an Expr. + assert(MO.isExpr()); + return getExprOpValue(MO.getExpr(), Fixups, STI); +} + +/// Return binary encoding of memory related operand. +/// If the offset operand requires relocation, record the relocation. +template +unsigned Sw64MCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + unsigned RegBits; // Base register is encoded in bits 20-16. + unsigned OffBits; // offset is encoded in bits 15-0. + + if (MI.getOperand(OpNo).isImm()) { // vload + RegBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI) << 16; + OffBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); + } else { // vstore + RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) << 16; + OffBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI); + } + + // Apply the scale factor if there is one. + // OffBits >>= ShiftAmount; + + return (OffBits & 0xFFFF) | RegBits; +} + +// FIXME: should be called getMSBEncoding +unsigned +Sw64MCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(MI.getOperand(OpNo - 1).isImm()); + assert(MI.getOperand(OpNo).isImm()); + unsigned Position = + getMachineOpValue(MI, MI.getOperand(OpNo - 1), Fixups, STI); + unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); + + return Position + Size - 1; +} + +unsigned Sw64MCCodeEmitter::getUImm4AndValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(MI.getOperand(OpNo).isImm()); + const MCOperand &MO = MI.getOperand(OpNo); + unsigned Value = MO.getImm(); + switch (Value) { + case 128: + return 0x0; + case 1: + return 0x1; + case 2: + return 0x2; + case 3: + return 0x3; + case 4: + return 0x4; + case 7: + return 0x5; + case 8: + return 0x6; + case 15: + return 0x7; + case 16: + return 0x8; + case 31: + return 0x9; + case 32: + return 0xa; + case 63: + return 0xb; + case 64: + return 0xc; + case 255: + return 0xd; + case 32768: + return 0xe; + case 65535: + return 0xf; + } + llvm_unreachable("Unexpected value"); +} + +unsigned +Sw64MCCodeEmitter::getRegisterListOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + unsigned res = 0; + + // Register list operand is always first operand of instruction and it is + // placed before memory operand (register + imm). + + for (unsigned I = OpNo, E = MI.getNumOperands() - 2; I < E; ++I) { + unsigned Reg = MI.getOperand(I).getReg(); + unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg); + if (RegNo != 31) + res++; + else + res |= 0x10; + } + return res; +} + +unsigned +Sw64MCCodeEmitter::getRegisterListOpValue16(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return (MI.getNumOperands() - 4); +} + +#include "Sw64GenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h new file mode 100644 index 000000000000..56539f35c2ee --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h @@ -0,0 +1,111 @@ +//===- Sw64MCCodeEmitter.h - Convert Sw64 Code to Machine Code --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sw64MCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H + +#include "llvm/MC/MCCodeEmitter.h" +#include + +namespace llvm { + +class MCContext; +class MCExpr; +class MCFixup; +class MCInst; +class MCInstrInfo; +class MCOperand; +class MCSubtargetInfo; +class raw_ostream; + +class Sw64MCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + MCContext &Ctx; + bool IsLittleEndian; + +public: + Sw64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle) + : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {} + Sw64MCCodeEmitter(const Sw64MCCodeEmitter &) = delete; + Sw64MCCodeEmitter &operator=(const Sw64MCCodeEmitter &) = delete; + ~Sw64MCCodeEmitter() override = default; + + void EmitByte(unsigned char C, raw_ostream &OS) const; + + void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI, + raw_ostream &OS) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getJumpTargetOpValue - Return binary encoding of the jump + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getBranchTargetOpValue - Return binary encoding of the branch + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getMachineOpValue - Return binary encoding of operand. If the machin + // operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getMSAMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + template + unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getUImm4AndValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getRegisterListOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getRegisterListOpValue16(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + +private: + MCInst LowerCompactBranch(MCInst TempInst) const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp new file mode 100644 index 000000000000..a1f2c430646d --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp @@ -0,0 +1,176 @@ +//===-- Sw64MCExpr.cpp - Sw64 specific MC expression classes --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sw64MCExpr.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "sw64mcexpr" + +const Sw64MCExpr *Sw64MCExpr::create(Sw64MCExpr::Sw64ExprKind Kind, + const MCExpr *Expr, MCContext &Ctx) { + return new (Ctx) Sw64MCExpr(Kind, Expr); +} + +void Sw64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + int64_t AbsVal; + // FIXME: the end "(" need match + if (Expr->evaluateAsAbsolute(AbsVal)) + OS << AbsVal; + else + Expr->print(OS, MAI, true); +} + +bool Sw64MCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { + if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup)) + return false; + + if (Res.getRefKind() != MCSymbolRefExpr::VK_None) + return false; + + // evaluateAsAbsolute() and evaluateAsValue() require that we evaluate the + // %hi/%lo/etc. here. Fixup is a null pointer when either of these is the + // caller. + if (Res.isAbsolute() && Fixup == nullptr) { + int64_t AbsVal = Res.getConstant(); + switch (Kind) { + case MEK_None: + llvm_unreachable("MEK_None is invalid"); + case MEK_DTPREL16: + // MEK_DTPREL is used for marking TLS DIEExpr only + // and contains a regular sub-expression. + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); + case MEK_ELF_LITERAL: /* !literal relocation. */ + case MEK_LITUSE_ADDR: /* !lituse_addr relocation. */ + case MEK_LITUSE_BASE: /* !lituse_base relocation. */ + case MEK_LITUSE_BYTOFF: /* !lituse_bytoff relocation. */ + case MEK_LITUSE_JSR: /* !lituse_jsr relocation. */ + case MEK_LITUSE_TLSGD: /* !lituse_tlsgd relocation. */ + case MEK_LITUSE_TLSLDM: /* !lituse_tlsldm relocation. */ + case MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation. */ + case MEK_GPDISP: /* !gpdisp relocation. */ + case MEK_GPDISP_HI16: + case MEK_GPDISP_LO16: + case MEK_GPREL_HI16: /* !gprelhigh relocation. */ + case MEK_GPREL_LO16: /* !gprellow relocation. */ + case MEK_GPREL16: /* !gprel relocation. */ + case MEK_BRSGP: /* !samegp relocation. */ + case MEK_TLSGD: /* !tlsgd relocation. */ + case MEK_TLSLDM: /* !tlsldm relocation. */ + case MEK_GOTDTPREL16: /* !gotdtprel relocation. */ + case MEK_DTPREL_HI16: /* !dtprelhi relocation. */ + case MEK_DTPREL_LO16: /* !dtprello relocation. */ + case MEK_GOTTPREL16: /* !gottprel relocation. */ + case MEK_TPREL_HI16: /* !tprelhi relocation. */ + case MEK_TPREL_LO16: /* !tprello relocation. */ + case MEK_TPREL16: /* !tprel relocation. */ + case MEK_ELF_LITERAL_GOT: /* !literal_got relocation. */ + return false; + } + Res = MCValue::get(AbsVal); + return true; + } + // We want to defer it for relocatable expressions since the constant is + // applied to the whole symbol value. + // + // The value of getKind() that is given to MCValue is only intended to aid + // debugging when inspecting MCValue objects. It shouldn't be relied upon + // for decision making. + Res = + MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); + + return true; +} + +void Sw64MCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + case MCExpr::Constant: + break; + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + case MCExpr::SymbolRef: { + // We're known to be under a TLS fixup, so any symbol should be + // modified. There should be only one. + const MCSymbolRefExpr &SymRef = *cast(Expr); + cast(SymRef.getSymbol()).setType(ELF::STT_TLS); + break; + } + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + } +} + +// For lituse relocation, we don't need to change symbol type +// to tls. +void Sw64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + case MEK_None: + llvm_unreachable("MEK_None and MEK_Special are invalid"); + break; + case MEK_GPDISP: + case MEK_LITUSE_BASE: /* !lituse_base relocation. */ + case MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation. */ + case MEK_GPDISP_HI16: + case MEK_GPDISP_LO16: + case MEK_ELF_LITERAL: + case MEK_ELF_LITERAL_GOT: + case MEK_GPREL_HI16: + case MEK_GPREL_LO16: + case MEK_GPREL16: + case MEK_BRSGP: + // If we do have nested target-specific expressions, they will be in + // a consecutive chain. + if (const Sw64MCExpr *E = dyn_cast(getSubExpr())) + E->fixELFSymbolsInTLSFixups(Asm); + break; + case MEK_DTPREL16: + case MEK_LITUSE_ADDR: /* !lituse_addr relocation. */ + case MEK_LITUSE_BYTOFF: /* !lituse_bytoff relocation. */ + case MEK_LITUSE_JSR: /* !lituse_jsr relocation. */ + case MEK_LITUSE_TLSGD: /* !lituse_tlsgd relocation. */ + case MEK_LITUSE_TLSLDM: /* !lituse_tlsldm relocation. */ + case MEK_TLSGD: /* !tlsgd relocation. */ + case MEK_TLSLDM: /* !tlsldm relocation. */ + case MEK_GOTDTPREL16: /* !gotdtprel relocation. */ + case MEK_DTPREL_HI16: /* !dtprelhi relocation. */ + case MEK_DTPREL_LO16: /* !dtprello relocation. */ + case MEK_GOTTPREL16: /* !gottprel relocation. */ + case MEK_TPREL_HI16: /* !tprelhi relocation. */ + case MEK_TPREL_LO16: /* !tprello relocation. */ + case MEK_TPREL16: /* !tprel relocation. */ + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); + break; + } +} diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h new file mode 100644 index 000000000000..a83efa56ff1a --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h @@ -0,0 +1,97 @@ +//===- Sw64MCExpr.h - Sw64 specific MC expression classes -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H + +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" + +namespace llvm { +class Sw64MCExpr : public MCTargetExpr { +public: + // for linker relax, add complex relocation + // exprkind here + enum Sw64ExprKind { + // use for relax + MEK_HINT = 0x100, + MEK_LITERAL = 0x200, + MEK_LITUSE = 0x400, + + // do complex relocation + MEK_LITUSE_BASE = MEK_LITERAL | MEK_LITUSE, + MEK_LITUSE_JSRDIRECT = MEK_HINT | MEK_LITUSE, + + // None + MEK_None = 0x000, + + // final reloc + MEK_ELF_LITERAL, /* !literal relocation. */ + MEK_ELF_LITERAL_GOT, /* !literal_got relocation */ + MEK_LITUSE_ADDR, /* !lituse_addr relocation. */ + MEK_LITUSE_BYTOFF, /* !lituse_bytoff relocation. */ + MEK_LITUSE_JSR, /* !lituse_jsr relocation. */ + MEK_LITUSE_TLSGD, /* !lituse_tlsgd relocation. */ + MEK_LITUSE_TLSLDM, /* !lituse_tlsldm relocation. */ + MEK_GPDISP, /* !gpdisp relocation. */ + MEK_GPDISP_HI16, + MEK_GPDISP_LO16, + MEK_GPREL_HI16, /* !gprelhigh relocation. */ + MEK_GPREL_LO16, /* !gprellow relocation. */ + MEK_GPREL16, /* !gprel relocation. */ + MEK_BRSGP, /* !samegp relocation. */ + MEK_TLSGD, /* !tlsgd relocation. */ + MEK_TLSLDM, /* !tlsldm relocation. */ + MEK_GOTDTPREL16, /* !gotdtprel relocation. */ + MEK_DTPREL_HI16, /* !dtprelhi relocation. */ + MEK_DTPREL_LO16, /* !dtprello relocation. */ + MEK_DTPREL16, /* !dtprel relocation. */ + MEK_GOTTPREL16, /* !gottprel relocation. */ + MEK_TPREL_HI16, /* !tprelhi relocation. */ + MEK_TPREL_LO16, /* !tprello relocation. */ + MEK_TPREL16, /* !tprel relocation. */ + }; + +private: + const Sw64ExprKind Kind; + const MCExpr *Expr; + + explicit Sw64MCExpr(Sw64ExprKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} + +public: + static const Sw64MCExpr *create(Sw64ExprKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + // Get the kind of this expression. + Sw64ExprKind getKind() const { return Kind; } + + // Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp new file mode 100644 index 000000000000..d07dc3ff582d --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp @@ -0,0 +1,189 @@ +//===-- Sw64MCTargetDesc.cpp - Sw64 Target Descriptions -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Sw64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "Sw64MCTargetDesc.h" +#include "InstPrinter/Sw64InstPrinter.h" +#include "Sw64AsmBackend.h" +#include "Sw64ELFStreamer.h" +#include "Sw64MCAsmInfo.h" +#include "Sw64TargetStreamer.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/TargetParser/Triple.h" + +using namespace llvm; +namespace llvm { + +class MCInstrInfo; + +} // end namespace llvm +#define GET_INSTRINFO_MC_DESC +#include "Sw64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "Sw64GenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "Sw64GenRegisterInfo.inc" + +/// Select the Sw64 CPU for the given triple and cpu name. +/// FIXME: Merge with the copy in Sw64Subtarget.cpp +StringRef SW64_MC::selectSw64CPU(const Triple &TT, StringRef CPU) { + return CPU = "sw_64"; +} + +static MCInstrInfo *createSw64MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSw64MCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createSw64MCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSw64MCRegisterInfo(X, Sw64::R26); + return X; +} + +static MCSubtargetInfo *createSw64MCSubtargetInfo(const Triple &TT, + StringRef CPU, StringRef FS) { + CPU = SW64_MC::selectSw64CPU(TT, CPU); + return createSw64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); +} + +static MCAsmInfo *createSw64MCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT, + const MCTargetOptions &Options) { + MCAsmInfo *MAI = new Sw64MCAsmInfo(TT, Options); + + unsigned SP = MRI.getDwarfRegNum(Sw64::R30, true); + MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0); + MAI->addInitialFrameState(Inst); + + return MAI; +} + +static MCInstPrinter *createSw64MCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new Sw64InstPrinter(MAI, MII, MRI); +} + +static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, + bool RelaxAll) { + MCStreamer *S; + S = createSw64ELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter), RelaxAll); + return S; +} + +static MCTargetStreamer *createSw64AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new Sw64TargetAsmStreamer(S, OS); +} + +static MCTargetStreamer *createSw64NullTargetStreamer(MCStreamer &S) { + return new Sw64TargetStreamer(S); +} + +static MCTargetStreamer * +createSw64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new Sw64TargetELFStreamer(S, STI); +} + +namespace { + +class Sw64MCInstrAnalysis : public MCInstrAnalysis { +public: + Sw64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} + + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, + uint64_t &Target) const override { + unsigned NumOps = Inst.getNumOperands(); + if (NumOps == 0) + return false; + if (Inst.getOpcode() == Sw64::JSR || Inst.getOpcode() == Sw64::JSR) { + Target = Inst.getOperand(NumOps - 1).getImm() != 0 + ? Inst.getOperand(NumOps - 2).getImm() + : Addr + 4; + return true; + } + switch (Info->get(Inst.getOpcode()).operands()[NumOps - 1].OperandType) { + default: + return false; + case MCOI::OPERAND_PCREL: + Target = Addr + Inst.getOperand(NumOps - 1).getImm() * 4 + 4; + return true; + } + } +}; +} // namespace + +static MCInstrAnalysis *createSw64MCInstrAnalysis(const MCInstrInfo *Info) { + return new Sw64MCInstrAnalysis(Info); +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64TargetMC() { + Target *T = &getTheSw64Target(); + + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createSw64MCAsmInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createSw64MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createSw64MCRegisterInfo); + + // Register the elf streamer. + TargetRegistry::RegisterELFStreamer(*T, createMCStreamer); + + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createSw64AsmTargetStreamer); + + TargetRegistry::RegisterNullTargetStreamer(*T, createSw64NullTargetStreamer); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createSw64MCSubtargetInfo); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(*T, createSw64MCInstrAnalysis); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createSw64MCInstPrinter); + + TargetRegistry::RegisterObjectTargetStreamer(*T, + createSw64ObjectTargetStreamer); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(*T, createSw64AsmBackend); + + // Register the MC Code Emitter + TargetRegistry::RegisterMCCodeEmitter(*T, createSw64MCCodeEmitterEL); +} diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h new file mode 100644 index 000000000000..4ab9d2fff507 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h @@ -0,0 +1,66 @@ +//===-- Sw64MCTargetDesc.h - Sw64 Target Descriptions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Sw64 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCTARGETDESC_H +#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCTARGETDESC_H + +#include "llvm/Support/DataTypes.h" + +#include + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectTargetWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class MCTargetOptions; +class StringRef; +class Target; +class Triple; +class raw_ostream; +class raw_pwrite_stream; + +Target &getTheSw64Target(); + +MCCodeEmitter *createSw64MCCodeEmitterEL(const MCInstrInfo &MCII, + MCContext &Ctx); + +MCAsmBackend *createSw64AsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options); + +std::unique_ptr +createSw64ELFObjectWriter(const Triple &TT, bool IsS32); + +namespace SW64_MC { +StringRef selectSw64CPU(const Triple &TT, StringRef CPU); +} + +} // namespace llvm + +// Defines symbolic names for Sw64 registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "Sw64GenRegisterInfo.inc" + +// Defines symbolic names for the Sw64 instructions. +#define GET_INSTRINFO_ENUM +#include "Sw64GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "Sw64GenSubtargetInfo.inc" + +#endif diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp new file mode 100644 index 000000000000..07bddfbacfb7 --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp @@ -0,0 +1,32 @@ +//===- Sw64OptionRecord.cpp - Abstraction for storing information ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sw64OptionRecord.h" +#include "Sw64ABIInfo.h" +#include "Sw64ELFStreamer.h" +#include "Sw64TargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSectionELF.h" +#include + +using namespace llvm; + +void Sw64RegInfoRecord::EmitSw64OptionRecord() { + + // We need to distinguish between S64 and the rest because at the moment + // we don't emit .Sw64.options for other ELFs other than S64. + // Since .reginfo has the same information as .Sw64.options (ODK_REGINFO), + // we can use the same abstraction (Sw64RegInfoRecord class) to handle both. +} + +void Sw64RegInfoRecord::SetPhysRegUsed(unsigned Reg, + const MCRegisterInfo *MCRegInfo) {} diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp new file mode 100644 index 000000000000..19cdbc7d0c5a --- /dev/null +++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp @@ -0,0 +1,388 @@ +//===-- Sw64TargetStreamer.cpp - Sw64 Target Streamer Methods -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Sw64 specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "Sw64TargetStreamer.h" +#include "InstPrinter/Sw64InstPrinter.h" +#include "MCTargetDesc/Sw64ABIInfo.h" +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "MCTargetDesc/Sw64MCExpr.h" +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "Sw64ELFStreamer.h" +#include "Sw64MCExpr.h" +#include "Sw64MCTargetDesc.h" +#include "Sw64TargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; +namespace llvm { +struct Sw64InstrTable { + MCInstrDesc Insts[4445]; + MCOperandInfo OperandInfo[3026]; + MCPhysReg ImplicitOps[130]; +}; +extern const Sw64InstrTable Sw64Descs; +} // end namespace llvm + +namespace { +static cl::opt RoundSectionSizes( + "sw_64-round-section-sizes", cl::init(false), + cl::desc("Round section sizes up to the section alignment"), cl::Hidden); +} // end anonymous namespace + +Sw64TargetStreamer::Sw64TargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), ModuleDirectiveAllowed(true) { + GPRInfoSet = FPRInfoSet = FrameInfoSet = false; +} +void Sw64TargetStreamer::emitDirectiveSetReorder() { forbidModuleDirective(); } +void Sw64TargetStreamer::emitDirectiveSetNoReorder() {} +void Sw64TargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); } +void Sw64TargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); } +void Sw64TargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); } +void Sw64TargetStreamer::emitDirectiveSetNoAt() { forbidModuleDirective(); } +void Sw64TargetStreamer::emitDirectiveEnd(StringRef Name) {} +void Sw64TargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {} +void Sw64TargetStreamer::emitDirectiveNaN2008() {} +void Sw64TargetStreamer::emitDirectiveNaNLegacy() {} +void Sw64TargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); } +void Sw64TargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg) {} + +void Sw64TargetStreamer::emitDirectiveSetCore3b() {} +void Sw64TargetStreamer::emitDirectiveSetCore4() {} + +void Sw64TargetAsmStreamer::emitDirectiveSetCore3b() { + OS << "\t.arch= \t core3b\n"; + forbidModuleDirective(); +} +void Sw64TargetAsmStreamer::emitDirectiveSetCore4() { + OS << "\t.arch= \t core4\n"; + forbidModuleDirective(); +} + +void Sw64TargetStreamer::emitDirectiveSetArch(StringRef Arch) { + forbidModuleDirective(); +} + +void Sw64TargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {} + +Sw64TargetAsmStreamer::Sw64TargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS) + : Sw64TargetStreamer(S), OS(OS) {} + +void Sw64TargetAsmStreamer::emitDirectiveSetReorder() { + Sw64TargetStreamer::emitDirectiveSetReorder(); +} + +void Sw64TargetAsmStreamer::emitDirectiveSetNoReorder() { + forbidModuleDirective(); +} + +void Sw64TargetAsmStreamer::emitDirectiveSetMacro() { + Sw64TargetStreamer::emitDirectiveSetMacro(); +} + +void Sw64TargetAsmStreamer::emitDirectiveSetNoMacro() { + Sw64TargetStreamer::emitDirectiveSetNoMacro(); +} + +void Sw64TargetAsmStreamer::emitDirectiveSetAt() { + Sw64TargetStreamer::emitDirectiveSetAt(); +} + +void Sw64TargetAsmStreamer::emitDirectiveSetNoAt() { + Sw64TargetStreamer::emitDirectiveSetNoAt(); +} + +void Sw64TargetAsmStreamer::emitDirectiveEnd(StringRef Name) { + OS << "\t.end\t" << Name << '\n'; +} + +void Sw64TargetAsmStreamer::emitDirectiveEnt(const MCSymbol &Symbol) { + OS << "\t.ent\t" << Symbol.getName() << '\n'; +} + +void Sw64TargetAsmStreamer::emitDirectiveNaN2008() { OS << "\t.nan\t2008\n"; } + +void Sw64TargetAsmStreamer::emitDirectiveNaNLegacy() { + OS << "\t.nan\tlegacy\n"; +} + +void Sw64TargetAsmStreamer::emitDirectiveInsn() { + Sw64TargetStreamer::emitDirectiveInsn(); + OS << "\t.insn\n"; +} + +void Sw64TargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg) { + OS << "\t.frame\t$" + << StringRef(Sw64InstPrinter::getRegisterName(StackReg)).lower() << "," + << StackSize << ",$" + << StringRef(Sw64InstPrinter::getRegisterName(ReturnReg)).lower() << '\n'; +} + +void Sw64TargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) { + OS << "\t.set arch=" << Arch << "\n"; + Sw64TargetStreamer::emitDirectiveSetArch(Arch); +} + +// This part is for ELF object output. +Sw64TargetELFStreamer::Sw64TargetELFStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) + : Sw64TargetStreamer(S), STI(STI) { + MCAssembler &MCA = getStreamer().getAssembler(); + + // It's possible that MCObjectFileInfo isn't fully initialized at this point + // due to an initialization order problem where LLVMTargetMachine creates the + // target streamer before TargetLoweringObjectFile calls + // InitializeMCObjectFileInfo. There doesn't seem to be a single place that + // covers all cases so this statement covers most cases and direct object + // emission must call setPic() once MCObjectFileInfo has been initialized. The + // cases we don't handle here are covered by Sw64AsmPrinter. + Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent(); + + // Set the header flags that we can in the constructor. + // FIXME: This is a fairly terrible hack. We set the rest + // of these in the destructor. The problem here is two-fold: + // + // a: Some of the eflags can be set/reset by directives. + // b: There aren't any usage paths that initialize the ABI + // pointer until after we initialize either an assembler + // or the target machine. + // We can fix this by making the target streamer construct + // the ABI, but this is fraught with wide ranging dependency + // issues as well. + unsigned EFlags = MCA.getELFHeaderEFlags(); + + // FIXME: Fix a dependency issue by instantiating the ABI object to some + // default based off the triple. The triple doesn't describe the target + // fully, but any external user of the API that uses the MCTargetStreamer + // would otherwise crash on assertion failure. + + ABI = Sw64ABIInfo(Sw64ABIInfo::S64()); + + MCA.setELFHeaderEFlags(EFlags); +} + +void Sw64TargetELFStreamer::emitLabel(MCSymbol *S) { + auto *Symbol = cast(S); + getStreamer().getAssembler().registerSymbol(*Symbol); + uint8_t Type = Symbol->getType(); + if (Type != ELF::STT_FUNC) + return; +} + +void Sw64TargetELFStreamer::finish() { + MCAssembler &MCA = getStreamer().getAssembler(); + const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo(); + + // .bss, .text and .data are always at least 16-byte aligned. + MCSection &TextSection = *OFI.getTextSection(); + MCA.registerSection(TextSection); + MCSection &DataSection = *OFI.getDataSection(); + MCA.registerSection(DataSection); + MCSection &BSSSection = *OFI.getBSSSection(); + MCA.registerSection(BSSSection); + + TextSection.ensureMinAlignment(Align(16)); + DataSection.ensureMinAlignment(Align(16)); + BSSSection.ensureMinAlignment(Align(16)); + + if (RoundSectionSizes) { + // Make sections sizes a multiple of the alignment. This is useful for + // verifying the output of IAS against the output of other assemblers but + // it's not necessary to produce a correct object and increases section + // size. + MCStreamer &OS = getStreamer(); + for (MCSection &S : MCA) { + MCSectionELF &Section = static_cast(S); + + Align Alignment = Section.getAlign(); + OS.switchSection(&Section); + if (Section.useCodeAlign()) + OS.emitCodeAlignment(Alignment, &STI, Alignment.value()); + else + OS.emitValueToAlignment(Alignment, 0, 1, Alignment.value()); + } + } + + // Update e_header flags. See the FIXME and comment above in + // the constructor for a full rundown on this. + unsigned EFlags = MCA.getELFHeaderEFlags(); + + if (Pic) + EFlags |= ELF::EF_SW64_PIC | ELF::EF_SW64_CPIC; + + MCA.setELFHeaderEFlags(EFlags); + + // Emit all the option records. + // At the moment we are only emitting .Sw64.options (ODK_REGINFO) and + // .reginfo. + Sw64ELFStreamer &MEF = static_cast(Streamer); + MEF.EmitSw64OptionRecords(); +} + +MCELFStreamer &Sw64TargetELFStreamer::getStreamer() { + return static_cast(Streamer); +} + +void Sw64TargetELFStreamer::emitDirectiveSetNoReorder() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Flags |= ELF::EF_SW64_NOREORDER; + MCA.setELFHeaderEFlags(Flags); + forbidModuleDirective(); +} + +void Sw64TargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) { + GPRInfoSet = FPRInfoSet = FrameInfoSet = false; + + // .ent also acts like an implicit '.type symbol, STT_FUNC' + static_cast(Symbol).setType(ELF::STT_FUNC); +} + +void Sw64TargetELFStreamer::emitDirectiveNaN2008() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Flags |= ELF::EF_SW64_NAN2008; + MCA.setELFHeaderEFlags(Flags); +} + +void Sw64TargetELFStreamer::emitDirectiveNaNLegacy() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Flags &= ~ELF::EF_SW64_NAN2008; + MCA.setELFHeaderEFlags(Flags); +} + +void Sw64TargetELFStreamer::emitDirectiveInsn() { + Sw64TargetStreamer::emitDirectiveInsn(); + Sw64ELFStreamer &MEF = static_cast(Streamer); + MEF.createPendingLabelRelocs(); +} + +void Sw64TargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg_) { + MCContext &Context = getStreamer().getAssembler().getContext(); + const MCRegisterInfo *RegInfo = Context.getRegisterInfo(); + + FrameInfoSet = true; + FrameReg = RegInfo->getEncodingValue(StackReg); + FrameOffset = StackSize; + ReturnReg = RegInfo->getEncodingValue(ReturnReg_); +} + +static const char *getRelType(const MCExpr *Expr, const MCSubtargetInfo &STI) { + const Sw64MCExpr *Sw64Expr = cast(Expr); + static int curgpdist = 0; + switch (Sw64Expr->getKind()) { + default: + return ""; + case Sw64MCExpr::MEK_GPDISP_HI16: + case Sw64MCExpr::MEK_GPDISP_LO16: + case Sw64MCExpr::MEK_GPDISP: { + std::string a = + std::string("!gpdisp!") + std::to_string((curgpdist) / 2 + 1); + curgpdist++; + return strdup(a.c_str()); + } + case Sw64MCExpr::MEK_ELF_LITERAL: + return "!literal"; + case Sw64MCExpr::MEK_LITUSE_ADDR: /* !lituse_addr relocation. */ + return "!lituse_addr"; + case Sw64MCExpr::MEK_LITUSE_BASE: /* !lituse_base relocation. */ + return "!literal"; + case Sw64MCExpr::MEK_LITUSE_BYTOFF: /* !lituse_bytoff relocation. */ + return "!lituse_bytoff"; + case Sw64MCExpr::MEK_LITUSE_JSR: /* !lituse_jsr relocation. */ + return "!lituse_jsr"; + case Sw64MCExpr::MEK_LITUSE_TLSGD: /* !lituse_tlsgd relocation. */ + return "!lituse_tlsgd"; + case Sw64MCExpr::MEK_LITUSE_TLSLDM: /* !lituse_tlsldm relocation. */ + return "!lituse_tlsldm"; + // case Sw64MCExpr::MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation. + // */ + // return "!lituse_jsrdirect"; + case Sw64MCExpr::MEK_GPREL_HI16: /* !gprelhigh relocation. */ + return "!gprelhigh"; + case Sw64MCExpr::MEK_GPREL_LO16: /* !gprellow relocation. */ + return "!gprellow"; + case Sw64MCExpr::MEK_GPREL16: /* !gprel relocation. */ + return "!gprel"; + case Sw64MCExpr::MEK_BRSGP: /* !samegp relocation. */ + return "!samegp"; + case Sw64MCExpr::MEK_TLSGD: /* !tlsgd relocation. */ + return "!tlsgd"; + case Sw64MCExpr::MEK_TLSLDM: /* !tlsldm relocation. */ + return "!tlsldm"; + case Sw64MCExpr::MEK_GOTDTPREL16: /* !gotdtprel relocation. */ + return "!gotdtprel"; + case Sw64MCExpr::MEK_DTPREL_HI16: /* !dtprelhi relocation. */ + return "!dtprelhi"; + case Sw64MCExpr::MEK_DTPREL_LO16: /* !dtprello relocation. */ + return "!dtprello"; + case Sw64MCExpr::MEK_DTPREL16: /* !dtprel relocation. */ + return "!dtprel"; + case Sw64MCExpr::MEK_GOTTPREL16: /* !gottprel relocation. */ + return "!gottprel"; + case Sw64MCExpr::MEK_TPREL_HI16: /* !tprelhi relocation. */ + return "!tprelhi"; + case Sw64MCExpr::MEK_TPREL_LO16: /* !tprello relocation. */ + return "!tprello"; + case Sw64MCExpr::MEK_TPREL16: /* !tprel relocation. */ + return "!tprel"; + case Sw64MCExpr::MEK_ELF_LITERAL_GOT: /* !literal_got relocation. */ + return "!literal_got"; + } +} + +static void printRelocInst(MCInstPrinter &InstPrinter, const MCInst &Inst, + raw_ostream &OS, const MCSubtargetInfo &STI, + uint64_t Address) { + MCOperand Op = Inst.getOperand(1); + if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::Target) { + const char *RelName = getRelType(Expr, STI); + InstPrinter.printInst(&Inst, Address, RelName, STI, OS); + return; + } + } + InstPrinter.printInst(&Inst, Address, "", STI, OS); +} + +void Sw64TargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter, + uint64_t Address, const MCInst &Inst, + const MCSubtargetInfo &STI, + raw_ostream &OS) { + const MCInstrDesc &MCID = + Sw64Descs.Insts[Sw64::INSTRUCTION_LIST_END - 1 - Inst.getOpcode()]; + // while moving mayload flags for ldi/ldih + // adding opcode determine here + if (MCID.mayLoad() || MCID.mayStore() || Inst.getOpcode() == Sw64::LDAH || + Inst.getOpcode() == Sw64::LDA) { + printRelocInst(InstPrinter, Inst, OS, STI, Address); + return; + } + InstPrinter.printInst(&Inst, Address, "", STI, OS); +} diff --git a/llvm/lib/Target/Sw64/README.txt b/llvm/lib/Target/Sw64/README.txt new file mode 100644 index 000000000000..b69205b49b6c --- /dev/null +++ b/llvm/lib/Target/Sw64/README.txt @@ -0,0 +1,7 @@ +To-do +----- + +* Instruction encodings +* Tailcalls +* Investigate loop alignment +* Add builtins diff --git a/llvm/lib/Target/Sw64/Sw64.h b/llvm/lib/Target/Sw64/Sw64.h new file mode 100644 index 000000000000..1d2d3f05bb4f --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64.h @@ -0,0 +1,56 @@ +//===-- Sw64.h - Top-level interface for Sw64 representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Sw64 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64_H +#define LLVM_LIB_TARGET_SW64_SW64_H + +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +namespace Sw64 { +// These describe LDAx +static const int IMM_LOW = -32768; +static const int IMM_HIGH = 32767; +static const int IMM_MULT = 65536; +} // namespace Sw64 + +class FunctionPass; +class ModulePass; +class TargetMachine; +class Sw64TargetMachine; +class formatted_raw_ostream; + +FunctionPass *createSw64ISelDag(Sw64TargetMachine &TM, + CodeGenOpt::Level OptLevel); + +FunctionPass *createSw64LLRPPass(Sw64TargetMachine &tm); +FunctionPass *createSw64BranchSelectionPass(); +FunctionPass *createSw64BranchSelection(); +FunctionPass *createSw64PreLegalizeCombiner(); // for fmad +FunctionPass *createSw64ExpandPseudoPass(); +FunctionPass *createSw64ExpandPseudo2Pass(); +FunctionPass *createSw64CombineLSPass(); +FunctionPass *createSw64IEEEConstraintPass(); + +bool LowerSw64MachineOperandToMCOperand(const MachineOperand &MO, + MCOperand &MCOp, const AsmPrinter &AP); + +void initializeSw64BranchSelectionPass(PassRegistry &); +void initializeSw64PreLegalizerCombinerPass(PassRegistry &); // for fmad +void initializeSw64DAGToDAGISelPass(PassRegistry &); +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/Sw64/Sw64.td b/llvm/lib/Target/Sw64/Sw64.td new file mode 100644 index 000000000000..fba48fc8115f --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64.td @@ -0,0 +1,154 @@ +//===- Sw64.td - Describe the Sw64 Target Machine --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing... + +include "llvm/Target/Target.td" + +//Sw64 is little endian + +//===----------------------------------------------------------------------===// +// Subtarget Features +//===----------------------------------------------------------------------===// + +def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true", + "Enable CIX extensions">; + +// argument, type, value, help text + +def Featurecore3b : SubtargetFeature<"core3b", "Sw64ArchVersion", "core3b", + "Enable core4b Feature">; + +def Featurecore4 : SubtargetFeature<"core4", "Sw64ArchVersion", "core4", + "Enable core4 Feature">; + +def FeatureRelax : SubtargetFeature<"relax", "relax", "true", + "Enable relax ld attribute">; + +def FeatureEv : SubtargetFeature<"swEv", "Ev", "true", + "Enable Sw6a Feature test">; +foreach i = {1-14, 22-25} in + def FeatureReserve#i : SubtargetFeature<"reserve-r"#i, "ReserveRegister["#i#"]", "true", + "Reserve "#i#", making it unavailable " + "as a GPR">; + +def FeatureOptMul : SubtargetFeature<"swOptMul", "Sw64OptMul", "true", + "Enable Sw6b optimize mul">; + +def Featureintarith : SubtargetFeature<"swIntArith", "Sw64EnableIntAri", "true", + "Enable core4 integer arithmetic instructions">; +def Featureintshift : SubtargetFeature<"swIntShift", "Sw64EnableIntShift", "true", + "Enable core4 integer shift instructions">; +def Featurebyteinst : SubtargetFeature<"swByteInst", "Sw64EnableByteInst", "true", + "Enable core4 byte manipulation instructions">; +def Featurefloatarith : SubtargetFeature<"swFloatArith", "Sw64EnableFloatAri", "true", + "Enable core4 float arithmetic instructions">; +def Featurefloatround : SubtargetFeature<"swFloatRound", "Sw64EnableFloatRound", "true", + "Enable core4 float round instructions">; +def Featurepostinc : SubtargetFeature<"swPostInc", "Sw64EnablePostInc", "true", + "Enable core4 post-inc load and store instructions">; +def Featurecrcinst : SubtargetFeature<"swCrcInst", "Sw64EnableCrcInst", "true", + "Enable core4 crc32 instructions">; + +def FeatureSIMD : SubtargetFeature<"simd", "HasSIMD", "true", + "Sw64 SIMD Instruction">; + +//*********************** +// Subtarget Support test +//*********************** +def HasMieee : Predicate<"MF->getSubtarget().hasMieee()">, + AssemblerPredicate<(all_of FeatureCIX)>; + +def HasCore3b : Predicate<"Subtarget->hasCore3b()">, + AssemblerPredicate<(all_of Featurecore3b)>; + +def HasCore4 : Predicate<"Subtarget->hasCore4()">, + AssemblerPredicate<(all_of Featurecore4)>; + +def enRelax : Predicate<"Subtarget->enRelax()">, + AssemblerPredicate<(all_of FeatureRelax)>; + +def HasEv : Predicate<"Subtarget->hasEv()">, + AssemblerPredicate<(all_of FeatureEv)>; + + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "Sw64RegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Calling Convention Description +//===----------------------------------------------------------------------===// + +include "Sw64CallingConv.td" + +//===----------------------------------------------------------------------===// +// Base Schedule Description +//===----------------------------------------------------------------------===// + +include "Sw64Schedule.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "Sw64InstrInfo.td" + + +//===----------------------------------------------------------------------===// +// MicroArchitechural Schedule Descriptions +//===----------------------------------------------------------------------===// + +include "Sw64SchedCore3.td" +include "Sw64SchedCore4.td" +include "Sw64SchedCore3SIMD.td" + +def Sw64InstrInfo : InstrInfo { +} + +//===----------------------------------------------------------------------===// +// Sw64 Processor Definitions +//===----------------------------------------------------------------------===// + +//*********************** +// Sw processor test +//*********************** + +class Proc Features> + : ProcessorModel; + +def : Proc<"sw_64", []>; +def : Proc<"sw6a", [Featurecore3b]>; +def : Proc<"sw6b", [Featurecore3b]>; +def : Proc<"sw4d", [Featurecore3b]>; +def : Proc<"sw8a", [Featurecore3b, Featurecore4]>; + +//===----------------------------------------------------------------------===// +// The Sw64 Target +//===----------------------------------------------------------------------===// +def Sw64AsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +def Sw64AsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +def Sw64 : Target { + // Pull in Instruction Info: + let InstructionSet = Sw64InstrInfo; + let AssemblyWriters = [Sw64AsmWriter]; + let AssemblyParsers = [Sw64AsmParser]; +} diff --git a/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp b/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp new file mode 100644 index 000000000000..36168e986db4 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp @@ -0,0 +1,308 @@ +//===-- Sw64AsmPrinter.cpp - Sw64 LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the XAS-format Sw64 assembly language. +// +//===----------------------------------------------------------------------===// + +#include "InstPrinter/Sw64InstPrinter.h" +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64InstrInfo.h" +#include "Sw64MCInstLower.h" +#include "Sw64Subtarget.h" +#include "Sw64TargetMachine.h" +#include "Sw64TargetStreamer.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include +#include +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +namespace { +class Sw64AsmPrinter : public AsmPrinter { + Sw64MCInstLower MCInstLowering; + Sw64TargetStreamer &getTargetStreamer(); + /// InConstantPool - Maintain state when emitting a sequence of constant + /// pool entries so we can properly mark them as data regions. + bool InConstantPool = false; + +public: + explicit Sw64AsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {} + + StringRef getPassName() const override { return "Sw64 Assembly Printer"; } + + void printOp(const MachineOperand &MO, raw_ostream &O); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + const char *ExtraCode, raw_ostream &O) override; + + void emitFunctionEntryLabel() override; + void emitInstruction(const MachineInstr *MI) override; + void emitFunctionBodyStart() override; + void emitFunctionBodyEnd() override; + void emitStartOfAsmFile(Module &M) override; + bool isBlockOnlyReachableByFallthrough( + const MachineBasicBlock *MBB) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { + return LowerSw64MachineOperandToMCOperand(MO, MCOp, *this); + } +}; +} // end of anonymous namespace + +bool Sw64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + + // Initialize TargetLoweringObjectFile. + AsmPrinter::runOnMachineFunction(MF); + return true; +} + +bool Sw64AsmPrinter::isBlockOnlyReachableByFallthrough( + const MachineBasicBlock *MBB) const { + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *MBB->pred_begin(); + + // If the predecessor is a switch statement, assume a jump table + // implementation, so it is not a fall through. + if (const BasicBlock *bb = Pred->getBasicBlock()) + if (isa(bb->getTerminator())) + return false; + + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isEHPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Otherwise, check the last instruction. + // Check if the last terminator is an unconditional branch. + MachineBasicBlock::const_iterator I = Pred->end(); + while (I != Pred->begin() && !(--I)->isTerminator()) + ; + return false; + // return !I->isBarrier(); + // ; +} + +Sw64TargetStreamer &Sw64AsmPrinter::getTargetStreamer() { + return static_cast(*OutStreamer->getTargetStreamer()); +} + +//===----------------------------------------------------------------------===// +// Frame and Set directives +//===----------------------------------------------------------------------===// +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void Sw64AsmPrinter::emitFunctionBodyStart() { + MCInstLowering.Initialize(&MF->getContext()); +} + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void Sw64AsmPrinter::emitFunctionBodyEnd() { + // Emit function end directives + Sw64TargetStreamer &TS = getTargetStreamer(); + + // There are instruction for this macros, but they must + // always be at the function end, and we can't emit and + // break with BB logic. + TS.emitDirectiveSetAt(); + TS.emitDirectiveSetMacro(); + TS.emitDirectiveSetReorder(); + + TS.emitDirectiveEnd(CurrentFnSym->getName()); + // Make sure to terminate any constant pools that were at the end + // of the function. + if (!InConstantPool) + return; + InConstantPool = false; + OutStreamer->emitDataRegion(MCDR_DataRegionEnd); +} + +void Sw64AsmPrinter::emitFunctionEntryLabel() { + Sw64TargetStreamer &TS = getTargetStreamer(); + + TS.emitDirectiveEnt(*CurrentFnSym); + OutStreamer->emitLabel(CurrentFnSym); +} + +void Sw64AsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + + if (MO.isReg()) { + assert(Register::isPhysicalRegister(MO.getReg()) && "Not physreg??"); + O << Sw64InstPrinter::getRegisterName(MO.getReg()); + } else if (MO.isImm()) { + O << MO.getImm(); + } else { + printOp(MO, O); + } +} +void Sw64AsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << Sw64InstPrinter::getRegisterName(MO.getReg()); + return; + + case MachineOperand::MO_Immediate: + assert(0 && "printOp() does not handle immediate values"); + return; + + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(O, MAI); + return; + + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + return; + + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + return; + + case MachineOperand::MO_GlobalAddress: + getSymbol(MO.getGlobal())->print(O, MAI); + return; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' + << MO.getIndex(); + return; + + default: + O << ""; + return; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +bool Sw64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + // Print the operand if there is no operand modifier. + if (!ExtraCode || !ExtraCode[0]) { + printOperand(MI, OpNo, O); + return false; + } + if (ExtraCode && ExtraCode[0]) + if (ExtraCode[1] != 0) + return true; + + switch (ExtraCode[0]) { + default: + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); + case 'r': + printOperand(MI, OpNo, O); + return false; + } + // Otherwise fallback on the default implementation. + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); +} + +void Sw64AsmPrinter::emitStartOfAsmFile(Module &M) { + if (OutStreamer->hasRawTextSupport()) { + OutStreamer->emitRawText(StringRef("\t.set noreorder")); + OutStreamer->emitRawText(StringRef("\t.set volatile")); + OutStreamer->emitRawText(StringRef("\t.set noat")); + OutStreamer->emitRawText(StringRef("\t.set nomacro")); + } +} + +bool Sw64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, + const char *ExtraCode, + raw_ostream &O) { + assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands"); + + const MachineOperand &BaseMO = MI->getOperand(OpNum); + + assert(BaseMO.isReg() && + "Unexpected base pointer for inline asm memory operand."); + + if (ExtraCode && ExtraCode[0]) { + return true; // Unknown modifier. + } + + O << "0(" << Sw64InstPrinter::getRegisterName(BaseMO.getReg()) << ")"; + + return false; +} + +#include "Sw64GenMCPseudoLowering.inc" + +void Sw64AsmPrinter::emitInstruction(const MachineInstr *MI) { + if (MI->isDebugValue()) + return; + SmallString<128> Str; + raw_svector_ostream O(Str); + + if (emitPseudoExpansionLowering(*OutStreamer, MI)) + return; + + if (MI->getOpcode() == Sw64::STQ_C || MI->getOpcode() == Sw64::STL_C) + OutStreamer->emitCodeAlignment(Align(8), &getSubtargetInfo()); + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + EmitToStreamer(*OutStreamer, TmpInst); +} + +// Force static initialization. +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64AsmPrinter() { + RegisterAsmPrinter X(getTheSw64Target()); +} diff --git a/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp b/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp new file mode 100644 index 000000000000..cd1c3c4c3ce8 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp @@ -0,0 +1,81 @@ +//===-- Sw64BranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Replace Pseudo COND_BRANCH_* with their appropriate real branch +// Simplified version of the PPC Branch Selector +// +//===----------------------------------------------------------------------===// + +#include "Sw64.h" +#include "Sw64InstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "sw_64-branch-expansion" + +namespace { +class Sw64BranchSelection : public MachineFunctionPass { +public: + static char ID; + + Sw64BranchSelection() : MachineFunctionPass(ID) { + initializeSw64BranchSelectionPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Sw64 Branch Expansion Pass"; + } + + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } +}; +} // end of anonymous namespace + +char Sw64BranchSelection::ID = 0; + +INITIALIZE_PASS(Sw64BranchSelection, DEBUG_TYPE, + "Expand out of range branch instructions and fix forbidden" + " slot hazards", + false, false) + +/// Returns a pass that clears pipeline hazards. +FunctionPass *llvm::createSw64BranchSelection() { + return new Sw64BranchSelection(); +} + +bool Sw64BranchSelection::runOnMachineFunction(MachineFunction &F) { + + return true; +} diff --git a/llvm/lib/Target/Sw64/Sw64CallingConv.td b/llvm/lib/Target/Sw64/Sw64CallingConv.td new file mode 100644 index 000000000000..7b0275c8c9fb --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64CallingConv.td @@ -0,0 +1,72 @@ +//===- Sw64CallingConv.td - Calling Conventions for Sw64 -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for Sw64 architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf" + "(State.getMachineFunction().getSubtarget()).", + F), A>; + +//===----------------------------------------------------------------------===// +// Sw64 Return Value Calling Convention +//===----------------------------------------------------------------------===// +def RetCC_Sw64 : CallingConv<[ + // i64 is returned in register R0 + // R1 is an llvm extension, I don't know what gcc does + CCIfType<[i64], CCAssignToReg<[R0, R1, R2, R3]>>, + + // f32 / f64 are returned in F0/F1 + CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>, + + CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64], + CCAssignToReg<[V0, V1]>>, + + CCIfSubtarget<"hasSIMD()", + CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64], + CCAssignToReg<[F0, F1]>>> +]>; + +// In soft-mode, register R16+R17, instead of R0+R1, is used to return a long +// double value. +def RetCC_F128Soft_Sw64 : CallingConv<[ + CCIfType<[i64], CCAssignToReg<[R16, R17]>> +]>; + + +//===----------------------------------------------------------------------===// +// Sw64 Argument Calling Conventions +//===----------------------------------------------------------------------===// +def CC_Sw64 : CallingConv<[ + // The first 6 arguments are passed in registers, whether integer or + // floating-point + + CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21], + [F16, F17, F18, F19, F20, F21]>>, + + CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21], + [R16, R17, R18, R19, R20, R21]>>, + + CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f64, v4f32], + CCAssignToRegWithShadow<[V16, V17, V18, V19, V20, V21], + [R16, R17, R18, R19, R20, R21]>>, + + // Stack slots are 8 bytes in size and 8-byte aligned. + CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>, + + CCIfSubtarget<"hasSIMD()", + CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64], + CCAssignToReg<[F16, F17, F18, F19, F20, F21]>>> +]>; + +// CalleeSavedRegs +def CSR_I64 : CalleeSavedRegs<(add (sequence "R%u", 9, 14), R15, R26)>; + +def CSR_F64 : CalleeSavedRegs<(add CSR_I64, (sequence "F%u", 2, 9))>; diff --git a/llvm/lib/Target/Sw64/Sw64CombineLS.cpp b/llvm/lib/Target/Sw64/Sw64CombineLS.cpp new file mode 100644 index 000000000000..fbf63b69f7ab --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64CombineLS.cpp @@ -0,0 +1,63 @@ +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64FrameLowering.h" +#include "Sw64Subtarget.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "sw_64-combineLS" + +using namespace llvm; + +namespace llvm { + +struct Sw64CombineLS : public MachineFunctionPass { + /// Target machine description which we query for reg. names, data + /// layout, etc. + static char ID; + Sw64CombineLS() : MachineFunctionPass(ID) {} + + StringRef getPassName() const { return "Sw64 Combine Load Store insn"; } + + bool runOnMachineFunction(MachineFunction &F) { + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; + ++FI) { + MachineBasicBlock &MBB = *FI; + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + NMBBI++; + for (; NMBBI != MBB.end(); MBBI++, NMBBI++) { + + MachineInstr &MI = *MBBI, &NMI = *NMBBI; + DebugLoc DL = MI.getDebugLoc(); + const MCInstrDesc &MCID = NMI.getDesc(); + + if (MI.getOpcode() == Sw64::LDA && + (MCID.mayLoad() || MCID.mayStore())) { + LLVM_DEBUG(dbgs() << "combining Load/Store instr\n"; MI.dump(); + dbgs() << "\n"; NMI.dump(); dbgs() << "\n"); + + if (MI.getOperand(0).getReg() == NMI.getOperand(2).getReg() && + NMI.getOperand(2).getReg() != Sw64::R30) { + BuildMI(MBB, MBBI, DL, MCID) + .add(NMI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(0)); + NMI.eraseFromParent(); + MI.eraseFromParent(); + } + } + } + } + return true; + } +}; +char Sw64CombineLS::ID = 0; +} // end namespace llvm + +FunctionPass *llvm::createSw64CombineLSPass() { return new Sw64CombineLS(); } diff --git a/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp b/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp new file mode 100644 index 000000000000..42a71f72e4a9 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp @@ -0,0 +1,1141 @@ +//===-- Sw64ExpandPseudoInsts.cpp - Expand pseudo instructions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling, if-conversion, and other late +// optimizations. This pass should be run after register allocation but before +// the post-regalloc scheduling pass. +// +// This is currently only used for expanding atomic pseudos after register +// allocation. We do this to avoid the fast register allocator introducing +// spills between ll and sc. These stores cause some other implementations to +// abort the atomic RMW sequence. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64InstrInfo.h" +#include "Sw64Subtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "sw_64-pseudo" +namespace llvm { +extern const MCInstrDesc Sw64Insts[]; +} + +namespace { +class Sw64ExpandPseudo : public MachineFunctionPass { +public: + static char ID; + Sw64ExpandPseudo() : MachineFunctionPass(ID) {} + + const Sw64InstrInfo *TII; + const Sw64Subtarget *STI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "Sw64 pseudo instruction expansion pass"; + } + +private: + bool expandAtomicCmpSwap(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned Size); + bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandAtomicBinOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, unsigned Size); + bool expandAtomicBinOpSubword(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandCurGpdisp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + + bool expandLoadAddress(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandLoadCPAddress(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandLdihInstPair(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned FlagsHi, unsigned SecondOpcode, + unsigned FlagsLo = Sw64II::MO_GPREL_LO, + unsigned srcReg = Sw64::R29); + + bool expandLoadGotAddress(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB); + + bool expandMBB(MachineBasicBlock &MBB); + bool expandIntReduceSum(MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandFPReduceSum(MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); +}; +char Sw64ExpandPseudo::ID = 0; +} // namespace + +bool Sw64ExpandPseudo::expandAtomicCmpSwapSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + DebugLoc DL = I->getDebugLoc(); + + unsigned LL, SC, BEQ; + unsigned BIC, BIS; + unsigned EXTL, INSL, MASKL; + unsigned mask; + BIS = Sw64::BISr; + BIC = Sw64::BICi; + BEQ = Sw64::BEQ; + LL = Sw64 ::LDQ_L; + SC = Sw64::STQ_C; + Register Dest = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register OldVal = I->getOperand(2).getReg(); + Register NewVal = I->getOperand(3).getReg(); + // add + Register Reg_bic = I->getOperand(4).getReg(); + Register Reg_ins = I->getOperand(5).getReg(); + Register LockVal = I->getOperand(6).getReg(); + Register Reg_cmp = I->getOperand(7).getReg(); + Register Reg_mas = I->getOperand(8).getReg(); + switch (I->getOpcode()) { + case Sw64::ATOMIC_CMP_SWAP_I8_POSTRA: + mask = 1; + EXTL = Sw64::EXTLBr; + INSL = Sw64::INSLBr; + MASKL = Sw64::MASKLBr; + break; + case Sw64::ATOMIC_CMP_SWAP_I16_POSTRA: + mask = 3; + EXTL = Sw64::EXTLHr; + INSL = Sw64::INSLHr; + MASKL = Sw64::MASKLHr; + break; + default: + llvm_unreachable("Unknown pseudo atomic!"); + } + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(exitMBB); + loopMBB->normalizeSuccProbs(); + + // memb + BuildMI(loopMBB, DL, TII->get(Sw64::MB)); + + // bic + BuildMI(loopMBB, DL, TII->get(BIC), Reg_bic).addReg(Ptr).addImm(7); + + // inslh + BuildMI(loopMBB, DL, TII->get(INSL), Reg_ins).addReg(NewVal).addReg(Ptr); + + // lldl + BuildMI(loopMBB, DL, TII->get(LL), LockVal).addImm(0).addReg(Reg_bic); + + // extlh + BuildMI(loopMBB, DL, TII->get(EXTL), Dest).addReg(LockVal).addReg(Ptr); + + // cmpeq + // zapnot + BuildMI(loopMBB, DL, TII->get(Sw64::ZAPNOTi), OldVal) + .addReg(OldVal) + .addImm(mask); + BuildMI(loopMBB, DL, TII->get(Sw64::ZAPNOTi), Dest).addReg(Dest).addImm(mask); + BuildMI(loopMBB, DL, TII->get(Sw64::CMPEQr), Reg_cmp) + .addReg(OldVal) + .addReg(Dest); + + if (STI->hasCore4()) + // beq + BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB); + else + // wr_f + BuildMI(loopMBB, DL, TII->get(Sw64::WR_F)).addReg(Reg_cmp); + + // masklh + BuildMI(loopMBB, DL, TII->get(MASKL), Reg_mas).addReg(LockVal).addReg(Ptr); + + // bis + BuildMI(loopMBB, DL, TII->get(BIS), Reg_ins).addReg(Reg_mas).addReg(Reg_ins); + + // lstw + BuildMI(loopMBB, DL, TII->get(SC)).addReg(Reg_ins).addImm(0).addReg(Reg_bic); + + if (!STI->hasCore4()) + // rd_f + BuildMI(loopMBB, DL, TII->get(Sw64::RD_F)).addReg(Reg_ins); + + // beq + BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB); + + // beq + BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_ins).addMBB(loopMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); // The instruction is gone now. + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loopMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + return true; +} + +bool Sw64ExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, + unsigned Size) { + MachineFunction *MF = BB.getParent(); + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC; + unsigned BEQ = Sw64::BEQ; + + if (Size == 4) { + LL = Sw64 ::LDL_L; + SC = Sw64::STL_C; + } else { + LL = Sw64::LDQ_L; + SC = Sw64::STQ_C; + } + + Register Dest = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register OldVal = I->getOperand(2).getReg(); + Register NewVal = I->getOperand(3).getReg(); + Register Scratch = I->getOperand(4).getReg(); + // add + Register Reg_cmp = I->getOperand(5).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + + loop1MBB->addSuccessor(loop1MBB); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->normalizeSuccProbs(); + + // memb + BuildMI(loop1MBB, DL, TII->get(Sw64::MB)); + + // ldi + BuildMI(loop1MBB, DL, TII->get(Sw64::LDA), Ptr).addImm(0).addReg(Ptr); + + // lldw + BuildMI(loop1MBB, DL, TII->get(LL), Dest).addImm(0).addReg(Ptr); + + // zapnot + if (Size == 4) { + BuildMI(loop1MBB, DL, TII->get(Sw64::ZAPNOTi), OldVal) + .addReg(OldVal) + .addImm(15); + BuildMI(loop1MBB, DL, TII->get(Sw64::ZAPNOTi), Dest) + .addReg(Dest) + .addImm(15); + } + + // cmpeq + BuildMI(loop1MBB, DL, TII->get(Sw64::CMPEQr)) + .addReg(Reg_cmp) + .addReg(OldVal) + .addReg(Dest); + + if (STI->hasCore4()) + // beq + BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB); + else + // wr_f + BuildMI(loop1MBB, DL, TII->get(Sw64::WR_F)).addReg(Reg_cmp); + + // mov + BuildMI(loop1MBB, DL, TII->get(Sw64::BISr), Scratch) + .addReg(NewVal) + .addReg(NewVal); + + // lstw + BuildMI(loop1MBB, DL, TII->get(SC)).addReg(Scratch).addImm(0).addReg(Ptr); + + if (!STI->hasCore4()) + // rd_f + BuildMI(loop1MBB, DL, TII->get(Sw64::RD_F)).addReg(Scratch); + + // beq + BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB); + + BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Scratch).addMBB(loop1MBB); + + NMBBI = BB.end(); + I->eraseFromParent(); // The instruction is gone now. + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + return true; +} + +bool Sw64ExpandPseudo::expandAtomicBinOpSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC, ZERO, BEQ; + unsigned EXTL, INSL, MASKL; + + unsigned WR_F, RD_F, LDA, BIS, BIC; + WR_F = Sw64::WR_F; + RD_F = Sw64::RD_F; + LDA = Sw64::LDA; + BIS = Sw64::BISr; + BIC = Sw64::BICi; + LL = Sw64::LDQ_L; + SC = Sw64::STQ_C; + ZERO = Sw64::R31; + BEQ = Sw64::BEQ; + + Register OldVal = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register Incr = I->getOperand(2).getReg(); + Register StoreVal = I->getOperand(3).getReg(); + // add + Register LockVal = I->getOperand(4).getReg(); + Register Reg_bic = I->getOperand(5).getReg(); + Register cmpres = I->getOperand(6).getReg(); + + unsigned Opcode = 0; + switch (I->getOpcode()) { + case Sw64::ATOMIC_LOAD_ADD_I8_POSTRA: + Opcode = Sw64::ADDLr; + EXTL = Sw64::EXTLBr; + INSL = Sw64::INSLBr; + MASKL = Sw64::MASKLBr; + break; + case Sw64::ATOMIC_LOAD_SUB_I8_POSTRA: + Opcode = Sw64::SUBLr; + EXTL = Sw64::EXTLBr; + INSL = Sw64::INSLBr; + MASKL = Sw64::MASKLBr; + break; + case Sw64::ATOMIC_LOAD_AND_I8_POSTRA: + Opcode = Sw64::ANDr; + EXTL = Sw64::EXTLBr; + INSL = Sw64::INSLBr; + MASKL = Sw64::MASKLBr; + break; + case Sw64::ATOMIC_LOAD_OR_I8_POSTRA: + Opcode = Sw64::BISr; + EXTL = Sw64::EXTLBr; + INSL = Sw64::INSLBr; + MASKL = Sw64::MASKLBr; + break; + case Sw64::ATOMIC_LOAD_XOR_I8_POSTRA: + Opcode = Sw64::XORr; + EXTL = Sw64::EXTLBr; + INSL = Sw64::INSLBr; + MASKL = Sw64::MASKLBr; + break; + case Sw64::ATOMIC_SWAP_I8_POSTRA: + EXTL = Sw64::EXTLBr; + INSL = Sw64::INSLBr; + MASKL = Sw64::MASKLBr; + break; + case Sw64::ATOMIC_LOAD_ADD_I16_POSTRA: + Opcode = Sw64::ADDQr; + EXTL = Sw64::EXTLHr; + INSL = Sw64::INSLHr; + MASKL = Sw64::MASKLHr; + break; + case Sw64::ATOMIC_LOAD_SUB_I16_POSTRA: + Opcode = Sw64::SUBQr; + EXTL = Sw64::EXTLHr; + INSL = Sw64::INSLHr; + MASKL = Sw64::MASKLHr; + break; + case Sw64::ATOMIC_LOAD_AND_I16_POSTRA: + Opcode = Sw64::ANDr; + EXTL = Sw64::EXTLHr; + INSL = Sw64::INSLHr; + MASKL = Sw64::MASKLHr; + break; + case Sw64::ATOMIC_LOAD_OR_I16_POSTRA: + Opcode = Sw64::BISr; + EXTL = Sw64::EXTLHr; + INSL = Sw64::INSLHr; + MASKL = Sw64::MASKLHr; + break; + case Sw64::ATOMIC_LOAD_XOR_I16_POSTRA: + Opcode = Sw64::XORr; + EXTL = Sw64::EXTLHr; + INSL = Sw64::INSLHr; + MASKL = Sw64::MASKLHr; + break; + case Sw64::ATOMIC_SWAP_I16_POSTRA: + EXTL = Sw64::EXTLHr; + INSL = Sw64::INSLHr; + MASKL = Sw64::MASKLHr; + break; + case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA: + case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA: + case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA: + case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA: + case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA: + EXTL = Sw64::EXTLBr; + INSL = Sw64::INSLBr; + MASKL = Sw64::MASKLBr; + break; + case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA: + case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA: + case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA: + case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA: + case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA: + EXTL = Sw64::EXTLHr; + INSL = Sw64::INSLHr; + MASKL = Sw64::MASKLHr; + break; + default: + llvm_unreachable("Unknown pseudo atomic!"); + } + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(exitMBB); + loopMBB->normalizeSuccProbs(); + + // memb + BuildMI(loopMBB, DL, TII->get(Sw64::MB)); + + // bic + BuildMI(loopMBB, DL, TII->get(BIC), Reg_bic).addReg(Ptr).addImm(7); + + // lldl + BuildMI(loopMBB, DL, TII->get(LL), LockVal).addImm(0).addReg(Reg_bic); + + // ldi + BuildMI(loopMBB, DL, TII->get(LDA), StoreVal).addImm(1).addReg(ZERO); + + if (!STI->hasCore4()) + // wr_f + BuildMI(loopMBB, DL, TII->get(WR_F)).addReg(StoreVal); + + // extlh + BuildMI(loopMBB, DL, TII->get(EXTL), OldVal).addReg(LockVal).addReg(Ptr); + + BuildMI(loopMBB, DL, TII->get(EXTL), OldVal).addReg(LockVal).addReg(Ptr); + + // BinOpcode + // Use a tmp reg since the src and dst reg of ORNOT op shall not be the same + // one for unknown reason. + switch (I->getOpcode()) { + case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA: + // cmpult OldVal, Incr, cmpres -- cmpres = OldVal < Incr ? 1 : 0 + // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal : + // Incr + BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(OldVal) + .addReg(Incr); + break; + case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA: + // cmplt OldVal, Incr, cmpres + // seleq cmpres, OldVal, Incr, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(OldVal) + .addReg(Incr); + break; + case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA: + // cmpult OldVal, Incr, cmpres -- cmpres = OldVal < Incr ? 1 : 0 + // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr : + // OldVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(Incr) + .addReg(OldVal); + break; + case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA: + // cmplt OldVal, Incr, cmpres + // seleq cmpres, Incr, OldVal, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(Incr) + .addReg(OldVal); + break; + case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA: + // and OldVal, Incr, andres + // ornot andres, 0, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal) + .addReg(Sw64::R31) + .addReg(cmpres); + break; + case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA: + // cmpult OldVal, Incr, cmpres -- cmpres = OldVal < Incr ? 1 : 0 + // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal : + // Incr + BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(OldVal) + .addReg(Incr); + break; + case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA: + // cmplt OldVal, Incr, cmpres + // seleq cmpres, OldVal, Incr, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(OldVal) + .addReg(Incr); + break; + case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA: + // cmpult OldVal, Incr, cmpres -- cmpres = OldVal < Incr ? 1 : 0 + // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr : + // OldVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(Incr) + .addReg(OldVal); + break; + case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA: + // cmplt OldVal, Incr, cmpres + // seleq cmpres, Incr, OldVal, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(Incr) + .addReg(OldVal); + break; + case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA: + // and OldVal, Incr, andres + // ornot andres, 0, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal) + .addReg(Sw64::R31) + .addReg(cmpres); + break; + default: + if (Opcode) { + BuildMI(loopMBB, DL, TII->get(Opcode), StoreVal) + .addReg(OldVal) + .addReg(Incr); + } else { + BuildMI(loopMBB, DL, TII->get(Sw64::BISr), StoreVal) + .addReg(Incr) + .addReg(Incr); + } + } + + // inslh + BuildMI(loopMBB, DL, TII->get(INSL), StoreVal).addReg(StoreVal).addReg(Ptr); + + // masklh + BuildMI(loopMBB, DL, TII->get(MASKL), LockVal).addReg(LockVal).addReg(Ptr); + + // bis + BuildMI(loopMBB, DL, TII->get(BIS), LockVal).addReg(LockVal).addReg(StoreVal); + + // lstl + BuildMI(loopMBB, DL, TII->get(SC)).addReg(LockVal).addImm(0).addReg(Reg_bic); + + if (!STI->hasCore4()) + // rd_f + BuildMI(loopMBB, DL, TII->get(RD_F)).addReg(LockVal); + + // beq + BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(LockVal).addMBB(loopMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); // The instruction is gone now. + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loopMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + return true; +} + +bool Sw64ExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, + unsigned Size) { + MachineFunction *MF = BB.getParent(); + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC; + unsigned LDA = Sw64::LDA; + unsigned ZERO = Sw64::R31; + unsigned BEQ = Sw64::BEQ; + + if (Size == 4) { + LL = Sw64::LDL_L; + SC = Sw64::STL_C; + } else { + LL = Sw64::LDQ_L; + SC = Sw64::STQ_C; + } + + Register OldVal = I->getOperand(0).getReg(); + Register Ptr = I->getOperand(1).getReg(); + Register Incr = I->getOperand(2).getReg(); + Register StoreVal = I->getOperand(3).getReg(); + Register Scratch1 = I->getOperand(4).getReg(); + Register cmpres = I->getOperand(5).getReg(); + + unsigned Opcode = 0; + switch (I->getOpcode()) { + case Sw64::ATOMIC_LOAD_ADD_I32_POSTRA: + Opcode = Sw64::ADDLr; + break; + case Sw64::ATOMIC_LOAD_SUB_I32_POSTRA: + Opcode = Sw64::SUBLr; + break; + case Sw64::ATOMIC_LOAD_AND_I32_POSTRA: + Opcode = Sw64::ANDr; + break; + case Sw64::ATOMIC_LOAD_OR_I32_POSTRA: + Opcode = Sw64::BISr; + break; + case Sw64::ATOMIC_LOAD_XOR_I32_POSTRA: + Opcode = Sw64::XORr; + break; + case Sw64::ATOMIC_SWAP_I32_POSTRA: + break; + case Sw64::ATOMIC_LOAD_ADD_I64_POSTRA: + Opcode = Sw64::ADDQr; + break; + case Sw64::ATOMIC_LOAD_SUB_I64_POSTRA: + Opcode = Sw64::SUBQr; + break; + case Sw64::ATOMIC_LOAD_AND_I64_POSTRA: + Opcode = Sw64::ANDr; + break; + case Sw64::ATOMIC_LOAD_OR_I64_POSTRA: + Opcode = Sw64::BISr; + break; + case Sw64::ATOMIC_LOAD_XOR_I64_POSTRA: + Opcode = Sw64::XORr; + break; + case Sw64::ATOMIC_SWAP_I64_POSTRA: + break; + case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA: + case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA: + case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA: + case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA: + case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA: + + case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA: + case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA: + case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA: + case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA: + case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA: + break; + default: + llvm_unreachable("Unknown pseudo atomic!"); + } + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(loopMBB); + loopMBB->addSuccessor(exitMBB); + loopMBB->normalizeSuccProbs(); + + // memb + BuildMI(loopMBB, DL, TII->get(Sw64::MB)); + + // ldi + BuildMI(loopMBB, DL, TII->get(Sw64::LDA), Ptr).addImm(0).addReg(Ptr); + + // lldw + BuildMI(loopMBB, DL, TII->get(LL), OldVal).addImm(0).addReg(Ptr); + + // ldi + BuildMI(loopMBB, DL, TII->get(LDA), Scratch1).addImm(1).addReg(ZERO); + + if (!STI->hasCore4()) + // wr_f + BuildMI(loopMBB, DL, TII->get(Sw64::WR_F)).addReg(Scratch1); + + // BinOpcode + + // Use a tmp reg since the src and dst reg of ORNOT op shall not be the same + // one for unknown reason. + switch (I->getOpcode()) { + case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA: + // cmpult OldVal, Incr, cmpres -- cmpres = OldVal < Incr ? 1 : 0 + // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal : + // Incr + BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(OldVal) + .addReg(Incr); + break; + case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA: + // cmplt OldVal, Incr, cmpres + // seleq cmpres, OldVal, Incr, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(OldVal) + .addReg(Incr); + break; + case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA: + // cmpult OldVal, Incr, cmpres -- cmpres = OldVal < Incr ? 1 : 0 + // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr : + // OldVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(Incr) + .addReg(OldVal); + break; + case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA: + // cmplt OldVal, Incr, cmpres + // seleq cmpres, Incr, OldVal, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(Incr) + .addReg(OldVal); + break; + case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA: + // and OldVal, Incr, cmpres + // ornot cmpres, 0, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal) + .addReg(Sw64::R31) + .addReg(cmpres); + break; + case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA: + // cmpult OldVal, Incr, cmpres -- cmpres = OldVal < Incr ? 1 : 0 + // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal : + // Incr + BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(OldVal) + .addReg(Incr); + break; + case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA: + // cmplt OldVal, Incr, cmpres + // seleq cmpres, OldVal, Incr, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(OldVal) + .addReg(Incr); + break; + case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA: + // cmpult OldVal, Incr, cmpres -- cmpres = OldVal < Incr ? 1 : 0 + // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr : + // OldVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(Incr) + .addReg(OldVal); + break; + case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA: + // cmplt OldVal, Incr, cmpres + // seleq cmpres, Incr, OldVal, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal) + .addReg(cmpres) + .addReg(Incr) + .addReg(OldVal); + break; + case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA: + // and OldVal, Incr, cmpres + // ornot cmpres, 0, StoreVal + BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal) + .addReg(Sw64::R31) + .addReg(cmpres); + break; + default: + if (Opcode) { + BuildMI(loopMBB, DL, TII->get(Opcode), StoreVal) + .addReg(OldVal) + .addReg(Incr); + } else { + BuildMI(loopMBB, DL, TII->get(Sw64::BISr), StoreVal) + .addReg(Incr) + .addReg(Incr); + } + } + + // lstw + BuildMI(loopMBB, DL, TII->get(SC)).addReg(StoreVal).addImm(0).addReg(Ptr); + + if (!STI->hasCore4()) + // rd_f + BuildMI(loopMBB, DL, TII->get(Sw64::RD_F)).addReg(StoreVal); + + // beq + BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(StoreVal).addMBB(loopMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); // The instruction is gone now. + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loopMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + return true; +} + +bool Sw64ExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB) { + + bool Modified = false; + + switch (MBBI->getOpcode()) { + case Sw64::ATOMIC_CMP_SWAP_I32_POSTRA: + return expandAtomicCmpSwap(MBB, MBBI, NMBB, 4); + case Sw64::ATOMIC_CMP_SWAP_I64_POSTRA: + return expandAtomicCmpSwap(MBB, MBBI, NMBB, 8); + + case Sw64::ATOMIC_CMP_SWAP_I8_POSTRA: + case Sw64::ATOMIC_CMP_SWAP_I16_POSTRA: + return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB); + + case Sw64::ATOMIC_SWAP_I8_POSTRA: + case Sw64::ATOMIC_SWAP_I16_POSTRA: + case Sw64::ATOMIC_LOAD_ADD_I8_POSTRA: + case Sw64::ATOMIC_LOAD_ADD_I16_POSTRA: + case Sw64::ATOMIC_LOAD_SUB_I8_POSTRA: + case Sw64::ATOMIC_LOAD_SUB_I16_POSTRA: + case Sw64::ATOMIC_LOAD_AND_I8_POSTRA: + case Sw64::ATOMIC_LOAD_AND_I16_POSTRA: + case Sw64::ATOMIC_LOAD_OR_I8_POSTRA: + case Sw64::ATOMIC_LOAD_OR_I16_POSTRA: + case Sw64::ATOMIC_LOAD_XOR_I8_POSTRA: + case Sw64::ATOMIC_LOAD_XOR_I16_POSTRA: + + case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA: + case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA: + case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA: + case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA: + case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA: + case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA: + case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA: + case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA: + case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA: + case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA: + return expandAtomicBinOpSubword(MBB, MBBI, NMBB); + + case Sw64::ATOMIC_LOAD_ADD_I32_POSTRA: + case Sw64::ATOMIC_LOAD_SUB_I32_POSTRA: + case Sw64::ATOMIC_LOAD_AND_I32_POSTRA: + case Sw64::ATOMIC_LOAD_OR_I32_POSTRA: + case Sw64::ATOMIC_LOAD_XOR_I32_POSTRA: + case Sw64::ATOMIC_SWAP_I32_POSTRA: + case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA: + case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA: + case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA: + case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA: + case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA: + return expandAtomicBinOp(MBB, MBBI, NMBB, 4); + + case Sw64::ATOMIC_LOAD_ADD_I64_POSTRA: + case Sw64::ATOMIC_LOAD_SUB_I64_POSTRA: + case Sw64::ATOMIC_LOAD_AND_I64_POSTRA: + case Sw64::ATOMIC_LOAD_OR_I64_POSTRA: + case Sw64::ATOMIC_LOAD_XOR_I64_POSTRA: + case Sw64::ATOMIC_SWAP_I64_POSTRA: + case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA: + case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA: + case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA: + case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA: + case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA: + return expandAtomicBinOp(MBB, MBBI, NMBB, 8); + case Sw64::MOVProgPCGp: + case Sw64::MOVaddrPCGp: + return expandCurGpdisp(MBB, MBBI); + case Sw64::LOADlitSym: + case Sw64::LOADlit: + return expandLoadGotAddress(MBB, MBBI, NMBB); + case Sw64::LOADconstant: + return expandLoadCPAddress(MBB, MBBI, NMBB); + case Sw64::MOVaddrCP: + case Sw64::MOVaddrBA: + case Sw64::MOVaddrGP: + case Sw64::MOVaddrEXT: + case Sw64::MOVaddrJT: + return expandLoadAddress(MBB, MBBI, NMBB); + default: + return Modified; + } +} + +bool Sw64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool Sw64ExpandPseudo::expandCurGpdisp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + MachineOperand addr = MI.getOperand(0); + MachineOperand dstReg = MI.getOperand(2); + + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R29) + .addGlobalAddress(addr.getGlobal(), 0, Sw64II::MO_GPDISP_HI) + .add(dstReg); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDA), Sw64::R29) + .addGlobalAddress(addr.getGlobal(), 0, Sw64II::MO_GPDISP_LO) + .addReg(Sw64::R29); + + MI.eraseFromParent(); + return true; +} + +bool Sw64ExpandPseudo::expandLoadCPAddress( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI, + Sw64::LDL); +} + +bool Sw64ExpandPseudo::expandLoadGotAddress( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + LLVM_DEBUG(dbgs() << "expand Loadlit LoadlitSym" << *MBBI); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + unsigned DestReg = MI.getOperand(0).getReg(); + const MachineOperand &Symbol = MI.getOperand(1); + + MachineFunction *MF = MBB.getParent(); + switch (MF->getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model for lowering"); + case CodeModel::Small: { + if (Symbol.isSymbol()) + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL) + .addReg(Sw64::R29); + else + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg) + .addDisp(Symbol, 0, Sw64II::MO_LITERAL) + .addReg(Sw64::R29); + break; + } + + case CodeModel::Medium: { + if (Symbol.isSymbol()) { + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT) + .addReg(Sw64::R29); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL) + .addReg(DestReg); + } else { + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg) + .addDisp(Symbol, 0, Sw64II::MO_LITERAL_GOT) + .addReg(Sw64::R29); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg) + .addDisp(Symbol, 0, Sw64II::MO_LITERAL) + .addReg(DestReg); + } + break; + } + } + MI.eraseFromParent(); + return true; +} + +bool Sw64ExpandPseudo::expandLoadAddress( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI, + Sw64::LDA); +} + +bool Sw64ExpandPseudo::expandLdihInstPair(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned FlagsHi, + unsigned SecondOpcode, + unsigned FlagsLo, unsigned srcReg) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + unsigned DestReg = MI.getOperand(0).getReg(); + const MachineOperand &Symbol = MI.getOperand(1); + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg) + .add(Symbol) + .addReg(srcReg); + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg) + .add(Symbol) + .addReg(DestReg); + + MachineInstr *tmpInst = MIB.getInstr(); + MachineInstr *tmpInst1 = MIB1.getInstr(); + + MachineOperand &SymbolHi = tmpInst->getOperand(1); + MachineOperand &SymbolLo = tmpInst1->getOperand(1); + + SymbolHi.addTargetFlag(FlagsHi); + SymbolLo.addTargetFlag(FlagsLo); + + MI.eraseFromParent(); + return true; +} + +bool Sw64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + STI = &static_cast(MF.getSubtarget()); + TII = STI->getInstrInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) + Modified |= expandMBB(*MFI); + + if (Modified) + MF.RenumberBlocks(); + + return Modified; +} + +/// createSw64ExpandPseudoPass - returns an instance of the pseudo instruction +/// expansion pass. +FunctionPass *llvm::createSw64ExpandPseudoPass() { + return new Sw64ExpandPseudo(); +} diff --git a/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp b/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp new file mode 100644 index 000000000000..550c2f52036f --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp @@ -0,0 +1,334 @@ +//===-- Sw64ExpandPseudoInsts.cpp - Expand pseudo instructions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling, if-conversion, and other late +// optimizations. This pass should be run after register allocation but before +// the post-regalloc scheduling pass. +// +// This is currently only used for expanding atomic pseudos after register +// allocation. We do this to avoid the fast register allocator introducing +// spills between ll and sc. These stores cause some other implementations to +// abort the atomic RMW sequence. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64InstrInfo.h" +#include "Sw64Subtarget.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "sw_64-pseudo2" +namespace llvm { +extern const MCInstrDesc Sw64Insts[]; +} + +static cl::opt + ExpandPre("expand-presched", + cl::desc("Expand pseudo Inst before PostRA schedule"), + cl::init(true), cl::Hidden); + +namespace { +class Sw64ExpandPseudo2 : public MachineFunctionPass { +public: + static char ID; + Sw64ExpandPseudo2() : MachineFunctionPass(ID) {} + + const Sw64InstrInfo *TII; + const Sw64Subtarget *STI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "Sw64 pseudo instruction expansion pass2"; + } + +private: + bool expandPseudoCall(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandLoadAddress(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandLoadCPAddress(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandLdihInstPair(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned FlagsHi, unsigned SecondOpcode, + unsigned FlagsLo = Sw64II::MO_GPREL_LO, + unsigned srcReg = Sw64::R29); + + bool expandLoadGotAddress(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB); + + bool expandMBB(MachineBasicBlock &MBB); +}; +char Sw64ExpandPseudo2::ID = 0; +} // namespace + +bool Sw64ExpandPseudo2::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB) { + bool Modified = false; + + if (ExpandPre) { + switch (MBBI->getOpcode()) { + case Sw64::LOADlitSym: + case Sw64::LOADlit: + return expandLoadGotAddress(MBB, MBBI, NMBB); + case Sw64::LOADconstant: + return expandLoadCPAddress(MBB, MBBI, NMBB); + case Sw64::MOVaddrCP: + case Sw64::MOVaddrBA: + case Sw64::MOVaddrGP: + case Sw64::MOVaddrEXT: + case Sw64::MOVaddrJT: + return expandLoadAddress(MBB, MBBI, NMBB); + case Sw64::PseudoCall: + return expandPseudoCall(MBB, MBBI, NMBB); + default: + return Modified; + } + } else { + switch (MBBI->getOpcode()) { + case Sw64::PseudoCall: + return expandPseudoCall(MBB, MBBI, NMBB); + default: + return Modified; + } + } +} + +bool Sw64ExpandPseudo2::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool Sw64ExpandPseudo2::expandLoadCPAddress( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI, + Sw64::LDL); +} + +bool Sw64ExpandPseudo2::expandLoadAddress( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI, + Sw64::LDA); +} + +bool Sw64ExpandPseudo2::expandLdihInstPair( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi, + unsigned SecondOpcode, unsigned FlagsLo, unsigned srcReg) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + unsigned DestReg = MI.getOperand(0).getReg(); + const MachineOperand &Symbol = MI.getOperand(1); + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg) + .add(Symbol) + .addReg(srcReg); + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg) + .add(Symbol) + .addReg(DestReg); + + MachineInstr *tmpInst = MIB.getInstr(); + MachineInstr *tmpInst1 = MIB1.getInstr(); + + MachineOperand &SymbolHi = tmpInst->getOperand(1); + MachineOperand &SymbolLo = tmpInst1->getOperand(1); + + SymbolHi.addTargetFlag(FlagsHi); + SymbolLo.addTargetFlag(FlagsLo); + + MI.eraseFromParent(); + return true; +} + +// while expanding call, we can choose adding lituse +// for linker relax or not. Adding flags for sortRelocs +bool Sw64ExpandPseudo2::expandPseudoCall( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + LLVM_DEBUG(dbgs() << "expand PseudoCall" << *MBBI); + + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + const Sw64FrameLowering *SFL = STI.getFrameLowering(); + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + unsigned Lflags = 0; // load flags + unsigned Cflags = 0; // Call flags + + MachineOperand Symbol = MI.getOperand(0); + switch (MF->getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model for lowering"); + case CodeModel::Small: { + if (Symbol.isGlobal()) { + int64_t Offs = Symbol.getOffset(); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27) + .addGlobalAddress(Symbol.getGlobal(), Offs, + Lflags | Sw64II::MO_LITERAL | + Sw64II::MO_LITERAL_BASE) + .addReg(Sw64::R29); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26) + .addReg(Sw64::R27) + .addGlobalAddress(Symbol.getGlobal(), 0, + Cflags | Sw64II::MO_HINT | Sw64II::MO_LITUSE); + } else if (Symbol.isSymbol()) { + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL) + .addReg(Sw64::R29); + const Sw64TargetLowering *STL = STI.getTargetLowering(); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26) + .addReg(Sw64::R27) + .addExternalSymbol(Symbol.getSymbolName()); + } + break; + } + + case CodeModel::Medium: { + if (Symbol.isGlobal()) { + int64_t Offs = Symbol.getOffset(); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R27) + .addGlobalAddress(Symbol.getGlobal(), Offs, Sw64II::MO_LITERAL_GOT) + .addReg(Sw64::R29); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27) + .addGlobalAddress(Symbol.getGlobal(), Offs, + Lflags | Sw64II::MO_LITERAL | + Sw64II::MO_LITERAL_BASE) + .addReg(Sw64::R27); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26) + .addReg(Sw64::R27) + .addGlobalAddress(Symbol.getGlobal(), 0, + Cflags | Sw64II::MO_HINT | Sw64II::MO_LITUSE); + } else if (Symbol.isSymbol()) { + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R27) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT) + .addReg(Sw64::R29); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL) + .addReg(Sw64::R27); + const Sw64TargetLowering *STL = STI.getTargetLowering(); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26) + .addReg(Sw64::R27) + .addExternalSymbol(Symbol.getSymbolName()); + } + break; + } + } + + MI.eraseFromParent(); + return true; +} + +bool Sw64ExpandPseudo2::expandLoadGotAddress( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + LLVM_DEBUG(dbgs() << "expand Loadlit LoadlitSym" << *MBBI); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + unsigned DestReg = MI.getOperand(0).getReg(); + const MachineOperand &Symbol = MI.getOperand(1); + + MachineFunction *MF = MBB.getParent(); + switch (MF->getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model for lowering"); + case CodeModel::Small: { + if (Symbol.isSymbol()) + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL) + .addReg(Sw64::R29); + else + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg) + .addDisp(Symbol, 0, Sw64II::MO_LITERAL) + .addReg(Sw64::R29); + break; + } + + case CodeModel::Medium: { + if (Symbol.isSymbol()) { + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT) + .addReg(Sw64::R29); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg) + .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL) + .addReg(DestReg); + } else { + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg) + .addDisp(Symbol, 0, Sw64II::MO_LITERAL_GOT) + .addReg(Sw64::R29); + BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg) + .addDisp(Symbol, 0, Sw64II::MO_LITERAL) + .addReg(DestReg); + } + break; + } + } + MI.eraseFromParent(); + return true; +} + +bool Sw64ExpandPseudo2::runOnMachineFunction(MachineFunction &MF) { + STI = &static_cast(MF.getSubtarget()); + TII = STI->getInstrInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) + Modified |= expandMBB(*MFI); + + if (Modified) + MF.RenumberBlocks(); + + return Modified; +} + +/// createSw64ExpandPseudoPass - returns an instance of the pseudo instruction +/// expansion pass. +FunctionPass *llvm::createSw64ExpandPseudo2Pass() { + return new Sw64ExpandPseudo2(); +} diff --git a/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp b/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp new file mode 100644 index 000000000000..9030d8ba99c2 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp @@ -0,0 +1,456 @@ +//=====- Sw64FrameLowering.cpp - Sw64 Frame Information ------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sw64 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// +#include "Sw64FrameLowering.h" +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64InstrInfo.h" +#include "Sw64MachineFunctionInfo.h" +#include "Sw64Subtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetOptions.h" +#include // std::sort + +using namespace llvm; + +cl::opt Sw64PG("pg", cl::desc("Support the pg"), cl::init(false)); + +static long getUpper16(long l) { + long y = l / Sw64::IMM_MULT; + if (l % Sw64::IMM_MULT > Sw64::IMM_HIGH) + ++y; + else if (l % Sw64::IMM_MULT < Sw64::IMM_LOW) + --y; + return y; +} + +static long getLower16(long l) { + long h = getUpper16(l); + return l - h * Sw64::IMM_MULT; +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +bool Sw64FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || + TRI->hasStackRealignment(MF); +} + +// hasReservedCallFrame - Under normal circumstances, when a frame pointer is +// not required, we reserve argument space for call sites in the function +// immediately on entry to the current function. This eliminates the need for +// add/sub sp brackets around call sites. Returns true if the call frame is +// included as part of the stack frame. +bool Sw64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + return !MF.getFrameInfo().hasVarSizedObjects(); +} + +bool Sw64FrameLowering::isLeafProc(MachineFunction &MF) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + return !MRI.isPhysRegUsed(Sw64::R29); +} + +bool Sw64FrameLowering::hasBP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); +} + +void Sw64FrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + + MachineBasicBlock::iterator MBBI = MBB.begin(); // Prolog goes in entry BB + MachineFrameInfo &MFI = MF.getFrameInfo(); + + const Sw64InstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const Sw64RegisterInfo &RegInfo = *static_cast( + MF.getSubtarget().getRegisterInfo()); + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; + + // First, compute final stack size. + uint64_t StackSize = MFI.getStackSize(); + + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + + MBB.addLiveIn(Sw64::R27); + int curgpdist = STI.getCurgpdist(); + // Handle GOT offset + // Now sw_64 won't emit this unless it is necessary. + // While it is also useful for DebugInfo test. + if (!isLeafProc(MF)) { + BuildMI(MBB, MBBI, dl, TII.get(Sw64::MOVProgPCGp)) + .addGlobalAddress(&(MF.getFunction())) + .addImm(++curgpdist) + .addReg(Sw64::R27); + + BuildMI(MBB, MBBI, dl, TII.get(Sw64::ALTENT)) + .addGlobalAddress(&(MF.getFunction())); + } + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI.adjustsStack()) + return; + + if (Sw64Mieee) { + if (!Sw64DeleteNop) + BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP)); + } + if (Sw64PG) { + BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDL), Sw64::R28) + .addExternalSymbol("_mcount") + .addReg(Sw64::R29); + if (Sw64Mieee) { + if (!Sw64DeleteNop) + BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP)); + BuildMI(MBB, MBBI, dl, TII.get(Sw64::JSR)) + .addReg(Sw64::R28) + .addReg(Sw64::R28) + .addExternalSymbol("_mcount"); + if (!Sw64DeleteNop) + BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP)); + } else + BuildMI(MBB, MBBI, dl, TII.get(Sw64::JSR)) + .addReg(Sw64::R28) + .addReg(Sw64::R28) + .addExternalSymbol("_mcount"); + } + + unsigned Align = getStackAlignment(); + StackSize = (StackSize + Align - 1) / Align * Align; + + // Update frame info to pretend that this is part of the stack... + MFI.setStackSize(StackSize); + + // adjust stack pointer: r30 -= numbytes + int AdjustStackSize = -StackSize; + if (AdjustStackSize >= Sw64::IMM_LOW) { + BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30) + .addImm(AdjustStackSize) + .addReg(Sw64::R30); + } else if (getUpper16(AdjustStackSize) >= Sw64::IMM_LOW) { + BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDAH), Sw64::R30) + .addImm(getUpper16(AdjustStackSize)) + .addReg(Sw64::R30); + BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30) + .addImm(getLower16(AdjustStackSize)) + .addReg(Sw64::R30); + } else { + report_fatal_error("Too big a stack frame at " + Twine(-AdjustStackSize)); + } + + // emit ".cfi_def_cfa_offset StackSize" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, -AdjustStackSize)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + std::vector &CSI = MFI.getCalleeSavedInfo(); + + if (!CSI.empty()) { + // Find the instruction past the last instruction that saves a + // callee-saved register to the stack. + for (unsigned i = 0; i < CSI.size(); ++i) + ++MBBI; + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + for (std::vector::const_iterator I = CSI.begin(), + E = CSI.end(); + I != E; ++I) { + int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + unsigned DReg = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DReg, Offset)); + + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } + + // if framepointer enabled, set it to point to the stack pointer. + // Now if we need to, save the old FP and set the new + if (hasFP(MF)) { + // This must be the last instr in the prolog + BuildMI(MBB, MBBI, dl, TII.get(Sw64::BISr), Sw64::R15) + .addReg(Sw64::R30) + .addReg(Sw64::R30); + + // emit ".cfi_def_cfa_register $fp" + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(Sw64::R15, true))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + if (RegInfo.hasStackRealignment(MF)) { + // ldi -MaxAlign + // and -MaxAlign for sp + Register VR = MF.getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + + assert((Log2(MFI.getMaxAlign()) < 16) && + "Function's alignment size requirement is not supported."); + int64_t MaxAlign = -(int64_t)MFI.getMaxAlign().value(); + BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), VR) + .addImm(MaxAlign) + .addReg(Sw64::R31); + BuildMI(MBB, MBBI, dl, TII.get(Sw64::ANDr), Sw64::R30) + .addReg(Sw64::R30) + .addReg(VR); + + if (hasBP(MF)) + // mov $sp, $14 + BuildMI(MBB, MBBI, dl, TII.get(Sw64::BISr), Sw64::R14) + .addReg(Sw64::R30) + .addReg(Sw64::R30); + } + } +} + +void Sw64FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const Sw64InstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + DebugLoc dl = MBBI->getDebugLoc(); + + assert((MBBI->getOpcode() == Sw64::PseudoRet) && + "Can only insert epilog into returning blocks"); + + // Get the number of bytes allocated from the FrameInfo... + uint64_t StackSize = MFI.getStackSize(); + // now if we need to, restore the old FP + if (hasFP(MF)) { + // Find the first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) { + --I; + } + + // copy the FP into the SP (discards allocas) + BuildMI(MBB, I, dl, TII.get(Sw64::BISr), Sw64::R30) + .addReg(Sw64::R15) + .addReg(Sw64::R15); + } + + if (StackSize != 0) { + if (StackSize <= Sw64::IMM_HIGH) { + BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30) + .addImm(StackSize) + .addReg(Sw64::R30); + } else if (getUpper16(StackSize) <= Sw64::IMM_HIGH) { + BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDAH), Sw64::R30) + .addImm(getUpper16(StackSize)) + .addReg(Sw64::R30); + BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30) + .addImm(getLower16(StackSize)) + .addReg(Sw64::R30); + } else { + report_fatal_error("Too big a stack frame at " + Twine(StackSize)); + } + } +} + +StackOffset +Sw64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (MFI.isFixedObjectIndex(FI)) + FrameReg = hasFP(MF) ? Sw64::R15 : Sw64::R30; + else + FrameReg = hasBP(MF) ? Sw64::R14 : Sw64::R30; + + return StackOffset::getFixed(MFI.getObjectOffset(FI) + MFI.getStackSize() - + getOffsetOfLocalArea() + + MFI.getOffsetAdjustment()); +} + +// TODO: must be rewrite. +bool Sw64FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return true; + + const TargetInstrInfo &TII = *STI.getInstrInfo(); + + DebugLoc DL; + if (MI != MBB.end() && !MI->isDebugInstr()) + DL = MI->getDebugLoc(); + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + MBB.addLiveIn(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, TRI, + Register()); + } + return true; +} + +bool Sw64FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo(); + bool AtStart = MI == MBB.begin(); + MachineBasicBlock::iterator BeforeI = MI; + if (!AtStart) + --BeforeI; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI, + Register()); + assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert multiple + // instructions. + if (AtStart) + MI = MBB.begin(); + else { + MI = BeforeI; + ++MI; + } + } + return true; +} + +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +MachineBasicBlock::iterator Sw64FrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + + const Sw64InstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + + if (!hasReservedCallFrame(MF)) { + // Turn the adjcallstackdown instruction into 'ldi sp,-sp' and the + // adjcallstackup instruction into 'ldi sp,sp' + MachineInstr &Old = *I; + // FIXME: temporary modify the old value is: Old.getOperand(0).getImm(); + uint64_t Amount = Old.getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount + Align - 1) / Align * Align; + + MachineInstr *New; + if (Old.getOpcode() == Sw64::ADJUSTSTACKDOWN) { + New = BuildMI(MF, Old.getDebugLoc(), TII.get(Sw64::LDA), Sw64::R30) + .addImm(-Amount) + .addReg(Sw64::R30); + } else { + assert(Old.getOpcode() == Sw64::ADJUSTSTACKUP); + New = BuildMI(MF, Old.getDebugLoc(), TII.get(Sw64::LDA), Sw64::R30) + .addImm(Amount) + .addReg(Sw64::R30); + } + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + + return MBB.erase(I); +} + +/// Mark \p Reg and all registers aliasing it in the bitset. +static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, + unsigned Reg) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + SavedRegs.set(*AI); +} + +// TODO: must be rewrite. +void Sw64FrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + // Mark $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) + setAliasRegs(MF, SavedRegs, Sw64::R15); + if (hasBP(MF)) + setAliasRegs(MF, SavedRegs, Sw64::R14); + + // Set scavenging frame index if necessary. + uint64_t MaxSPOffset = estimateStackSize(MF); + + // If there is a variable sized object on the stack, the estimation cannot + // account for it. + if (isIntN(16, MaxSPOffset) && !MF.getFrameInfo().hasVarSizedObjects()) + return; +} + +// Estimate the size of the stack, including the incoming arguments. We need to +// account for register spills, local objects, reserved call frame and incoming +// arguments. This is required to determine the largest possible positive offset +// from $sp so that it can be determined if an emergency spill slot for stack +// addresses is required. +uint64_t Sw64FrameLowering::estimateStackSize(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + + int64_t Size = 0; + + // Iterate over fixed sized objects which are incoming arguments. + for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) + if (MFI.getObjectOffset(I) > 0) + Size += MFI.getObjectSize(I); + + // Conservatively assume all callee-saved registers will be saved. + for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { + unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); + Size = alignTo(Size + RegSize, RegSize); + } + + // Get the size of the rest of the frame objects and any possible reserved + // call frame, accounting for alignment. + return Size + MFI.estimateStackSize(MF); +} + +void Sw64FrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + const Sw64RegisterInfo *RegInfo = + MF.getSubtarget().getRegisterInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &Sw64::GPRCRegClass; + if (!isInt<16>(MFI.estimateStackSize(MF))) { + int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC), + RegInfo->getSpillAlign(*RC), false); + RS->addScavengingFrameIndex(RegScavFI); + } + assert(RS && "requiresRegisterScavenging failed"); +} diff --git a/llvm/lib/Target/Sw64/Sw64FrameLowering.h b/llvm/lib/Target/Sw64/Sw64FrameLowering.h new file mode 100644 index 000000000000..ef0613b44618 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64FrameLowering.h @@ -0,0 +1,82 @@ +//===-- Sw64FrameLowering.h - Frame info for Sw64 Target ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains Sw64 frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_SW64_SW64FRAMELOWERING_H +#define LLVM_LIB_TARGET_SW64_SW64FRAMELOWERING_H + +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class Sw64Subtarget; + +class Sw64FrameLowering : public TargetFrameLowering { + +protected: + const Sw64Subtarget &STI; + +public: + explicit Sw64FrameLowering(const Sw64Subtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0), + STI(sti) { + // Do nothing + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + ArrayRef CSI, + const TargetRegisterInfo *TRI) const override; + + bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + MutableArrayRef CSI, + const TargetRegisterInfo *TRI) const override; + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + + bool hasFP(const MachineFunction &MF) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; + + bool hasBP(const MachineFunction &MF) const; + +private: + void emitMieee(MachineFunction &MF) const; + + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const override; + + void processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + + //! Stack slot size (4 bytes) + static int stackSlotSize() { return 4; } + + // Returns true if MF is a leaf procedure. + bool isLeafProc(MachineFunction &MF) const; + +protected: + uint64_t estimateStackSize(const MachineFunction &MF) const; +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp b/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp new file mode 100644 index 000000000000..6689f7c256d3 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp @@ -0,0 +1,138 @@ +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64FrameLowering.h" +#include "Sw64Subtarget.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" + +#define DEBUG_TYPE "sw_64-ieee-contrain" + +using namespace llvm; + +namespace llvm { + +struct Sw64IEEEConstraint : public MachineFunctionPass { + /// Target machine description which we query for reg. names, data + /// layout, etc. + static char ID; + Sw64IEEEConstraint() : MachineFunctionPass(ID) {} + + StringRef getPassName() const { return "Sw64 Add IEEE Contrain"; } + + bool runOnMachineFunction(MachineFunction &F); +}; +char Sw64IEEEConstraint::ID = 0; +} // end namespace llvm + +static bool isNeedIEEEConstraint(unsigned opcode) { + switch (opcode) { + case Sw64::ADDS: + case Sw64::SUBS: + case Sw64::MULS: + case Sw64::DIVS: + case Sw64::FMAS: + case Sw64::FMSS: + case Sw64::FNMAS: + case Sw64::FNMSS: + case Sw64::ADDD: + case Sw64::SUBD: + case Sw64::MULD: + case Sw64::DIVD: + case Sw64::FMAD: + case Sw64::FMSD: + case Sw64::FNMAD: + case Sw64::FNMSD: + case Sw64::CVTQS: + case Sw64::CVTQT: + case Sw64::CVTTQ: + case Sw64::CVTTS: + case Sw64::CVTST: + case Sw64::FCVTWL: + case Sw64::FCVTLW: + case Sw64::VADDS: + case Sw64::VADDD: + case Sw64::VSUBS: + case Sw64::VSUBD: + case Sw64::VMULS: + case Sw64::VMULD: + case Sw64::VDIVS: + case Sw64::VDIVD: + case Sw64::VSQRTS: + case Sw64::VSQRTD: + case Sw64::SQRTSS: + case Sw64::SQRTSD: + case Sw64::CMPTEQ: + case Sw64::CMPTLE: + case Sw64::CMPTLT: + case Sw64::CMPTUN: + case Sw64::VFCMPEQ: + case Sw64::VFCMPLE: + case Sw64::VFCMPLT: + case Sw64::VFCMPUN: + case Sw64::VMAS: + case Sw64::VMAD: + case Sw64::VMSS: + case Sw64::VMSD: + case Sw64::VNMAS: + case Sw64::VNMAD: + case Sw64::VNMSS: + case Sw64::VNMSD: + case Sw64::FSELEQS: + case Sw64::FSELNES: + case Sw64::FSELLTS: + case Sw64::FSELLES: + case Sw64::FSELGTS: + case Sw64::FSELGES: + case Sw64::FSELEQD: + case Sw64::FSELNED: + case Sw64::FSELLTD: + case Sw64::FSELLED: + case Sw64::FSELGTD: + case Sw64::FSELGED: + case Sw64::FCTTDL_G: + case Sw64::FCTTDL_P: + case Sw64::FCTTDL_N: + case Sw64::FCTTDL: + return true; + } + return false; +} + +bool Sw64IEEEConstraint::runOnMachineFunction(MachineFunction &F) { + const Sw64Subtarget &ST = F.getSubtarget(); + if (ST.hasCore4()) + return false; + + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock &MBB = *FI; + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + NMBBI++; + for (; MBBI != MBB.end(); MBBI++) { + if (isNeedIEEEConstraint(MBBI->getOpcode())) { + MachineOperand &MO = MBBI->getOperand(0); + if (MO.isEarlyClobber()) { + LLVM_DEBUG(dbgs() << "getting is EarlyClobber Flag" + << MO.isEarlyClobber() << "\n"; + MBBI->dump()); + continue; + } + + MO.setIsEarlyClobber(); + LLVM_DEBUG(dbgs() << "setting is EarlyClobber Flag" + << MBBI->getOperand(0).isEarlyClobber() << "\n"; + MBBI->dump()); + } + } + } + return true; +} + +FunctionPass *llvm::createSw64IEEEConstraintPass() { + return new Sw64IEEEConstraint(); +} diff --git a/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp b/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp new file mode 100644 index 000000000000..d684a9aa25d7 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp @@ -0,0 +1,1016 @@ +//===-- Sw64ISelDAGToDAG.cpp - Sw64 pattern matching inst selector ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for Sw64, +// converting from a legalized dag to a Sw64 dag. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "sw_64-isel" +#define PASS_NAME "Sw64 DAG->DAG Pattern Instruction Selection" + +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64MachineFunctionInfo.h" +#include "Sw64Subtarget.h" +#include "Sw64TargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +#include +using namespace llvm; + +namespace { + +//===--------------------------------------------------------------------===// +/// Sw64DAGToDAGISel - Sw64 specific code to select Sw64 machine +/// instructions for SelectionDAG operations. +class Sw64DAGToDAGISel : public SelectionDAGISel { + const Sw64Subtarget *Subtarget; + + static const int64_t IMM_LOW = -32768; + static const int64_t IMM_HIGH = 32767; + static const int64_t IMM_MULT = 65536; + static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT; + static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT; + + static int64_t get_ldah16(int64_t x) { + int64_t y = x / IMM_MULT; + if (x % IMM_MULT > IMM_HIGH) + ++y; + if (x % IMM_MULT < IMM_LOW) + --y; + return y; + } + + static int64_t get_lda16(int64_t x) { return x - get_ldah16(x) * IMM_MULT; } + + /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot + /// instruction (if not, return 0). Note that this code accepts partial + /// zap masks. For example (and LHS, 1) is a valid zap, as long we know + /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are + /// in checking mode. If LHS is null, we assume that the mask has already + /// been validated before. + uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const { + uint64_t BitsToCheck = 0; + unsigned Result = 0; + for (unsigned i = 0; i != 8; ++i) { + if (((Constant >> 8 * i) & 0xFF) == 0) { + // nothing to do. + } else { + Result |= 1 << i; + if (((Constant >> 8 * i) & 0xFF) == 0xFF) { + // If the entire byte is set, zapnot the byte. + } else if (LHS.getNode() == 0) { + // Otherwise, if the mask was previously validated, we know its okay + // to zapnot this entire byte even though all the bits aren't set. + } else { + // Otherwise we don't know that the it's okay to zapnot this entire + // byte. Only do this iff we can prove that the missing bits are + // already null, so the bytezap doesn't need to really null them. + BitsToCheck |= ~Constant & (0xFFULL << 8 * i); + } + } + } + + // If there are missing bits in a byte (for example, X & 0xEF00), check to + // see if the missing bits (0x1000) are already known zero if not, the zap + // isn't okay to do, as it won't clear all the required bits. + if (BitsToCheck && !CurDAG->MaskedValueIsZero( + LHS, APInt(LHS.getValueSizeInBits(), BitsToCheck))) + return 0; + + return Result; + } + + static uint64_t get_zapImm(uint64_t x) { + unsigned build = 0; + for (int i = 0; i != 8; ++i) { + if ((x & 0x00FF) == 0x00FF) + build |= 1 << i; + else if ((x & 0x00FF) != 0) + return 0; + x >>= 8; + } + return build; + } + + static uint64_t getNearPower2(uint64_t x) { + if (!x) + return 0; + unsigned at = __builtin_clzll(x); + uint64_t complow = 1ULL << (63 - at); + uint64_t comphigh = complow << 1; + if (x - complow <= comphigh - x) + return complow; + else + return comphigh; + } + + static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) { + uint64_t y = getNearPower2(x); + if (swap) + return (y - x) == r; + else + return (x - y) == r; + } + +public: + static char ID; + + Sw64DAGToDAGISel() = delete; + + explicit Sw64DAGToDAGISel(Sw64TargetMachine &TM, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(ID, TM, OptLevel), Subtarget(nullptr) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + /// getI64Imm - Return a target constant with the specified value, of type + /// i64. + inline SDValue getI64Imm(int64_t Imm, const SDLoc &dl) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i64); + } + + inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i32); + } + + static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm); + // Select - Convert the specified operand from a target-independent to a + // target-specific node if it hasn't already been changed. + void Select(SDNode *N) override; + StringRef getPassName() const override { + return "Sw64 DAG->DAG Pattern Instruction Selection"; + } + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) override; + + template + bool SelectAddSubImm(SDValue N, SDValue &Imm) { + return SelectAddSubImm(N, VT, Imm); + } + + bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; + bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, + unsigned OffsetBits, + unsigned ShiftAmount) const; + bool selectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset) const; + bool selectAddrRegImm16(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + /// abs64 - absolute value of a 64-bit int. Not all environments support + /// "abs" on whatever their name for the 64-bit int type is. The absolute + /// value of the largest negative number is undefined, as with "abs". + inline int64_t abs64(int64_t x) { return (x < 0) ? -x : x; } + +// Include the pieces autogenerated from the target description. +#include "Sw64GenDAGISel.inc" + +private: + /// getTargetMachine - Return a reference to the TargetMachine, casted + /// to the target-specific type. + const Sw64TargetMachine &getTargetMachine() { + return static_cast(TM); + } + + bool SelectAddSubImm(SDValue N, MVT VT, SDValue &Imm); + bool SelectComplexImm(SDValue N, SDValue &Imm); + + SDNode *getGlobalBaseReg(); + SDNode *getGlobalRetAddr(); + void SelectCALL(SDNode *Op); + bool tryIndexedLoad(SDNode *N); + bool tryIndexedStore(SDNode *N); + bool selectSExti32(SDValue N, SDValue &Val); + bool selectZExti32(SDValue N, SDValue &Val); + + /// Select constant vector splats. + bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; + /// Select constant vector splats whose value fits in a given integer. + bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const; + /// Select constant vector splats whose value fits in a uimm8. + bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; + + bool selectVSplatSimm8(SDValue N, SDValue &Imm) const; + bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + bool selectIntAddrSImm16(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + bool selectIntAddrSImm12(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + bool SelectAddrFI(SDValue Addr, SDValue &Base); +}; +} // end anonymous namespace +char Sw64DAGToDAGISel::ID = 0; + +INITIALIZE_PASS(Sw64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) + +/// getGlobalBaseReg - Output the instructions required to put the +/// GOT address into a register. +/// +SDNode *Sw64DAGToDAGISel::getGlobalBaseReg() { + unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF); + return CurDAG + ->getRegister(GlobalBaseReg, + getTargetLowering()->getPointerTy(CurDAG->getDataLayout())) + .getNode(); +} + +/// getGlobalRetAddr - Grab the return address. +/// +SDNode *Sw64DAGToDAGISel::getGlobalRetAddr() { + unsigned GlobalRetAddr = Subtarget->getInstrInfo()->getGlobalRetAddr(MF); + return CurDAG + ->getRegister(GlobalRetAddr, + getTargetLowering()->getPointerTy(CurDAG->getDataLayout())) + .getNode(); +} + +bool Sw64DAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { + if (auto FIN = dyn_cast(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + return true; + } + + return false; +} + +// Select - Convert the specified operand from a target-independent to a +// target-specific node if it hasn't already been changed. +void Sw64DAGToDAGISel::Select(SDNode *N) { + + // Dump information about the Node being selected + LLVM_DEBUG(errs() << "Selecting: "; N->dump(CurDAG); errs() << "\n"); + + // If we have a custom node, we already have selected! + if (N->isMachineOpcode()) { + LLVM_DEBUG(errs() << "== "; N->dump(CurDAG); errs() << "\n"); + return; + } + SDLoc dl(N); + switch (N->getOpcode()) { + default: + break; + case ISD::LOAD: + if (tryIndexedLoad(N)) + return; + // Other cases are autogenerated. + break; + case ISD::STORE: + if (tryIndexedStore(N)) + return; + // Other cases are autogenerated. + break; + case Sw64ISD::CALL: + SelectCALL(N); + if (N->use_empty()) // Don't delete EntryToken, etc. + CurDAG->RemoveDeadNode(N); + return; + case ISD::FrameIndex: { + assert(N->getValueType(0) == MVT::i64); + int FI = cast(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); + if (N->hasOneUse()) { + N->setDebugLoc((*(N->use_begin()))->getDebugLoc()); + CurDAG->SelectNodeTo(N, Sw64::LDA, MVT::i64, TFI, + CurDAG->getTargetConstant(0, dl, MVT::i64)); + return; + } + ReplaceNode( + N, CurDAG->getMachineNode(Sw64::LDA, dl, MVT::i64, TFI, + CurDAG->getTargetConstant(0, dl, MVT::i64))); + return; + } + case ISD::GLOBAL_OFFSET_TABLE: + ReplaceNode(N, getGlobalBaseReg()); + return; + case Sw64ISD::GlobalRetAddr: + ReplaceNode(N, getGlobalRetAddr()); + return; + + case Sw64ISD::DivCall: { + SDValue Chain = CurDAG->getEntryNode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R24, N1, SDValue(0, 0)); + Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R25, N2, Chain.getValue(1)); + Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R27, N0, Chain.getValue(1)); + SDNode *CNode = CurDAG->getMachineNode(Sw64::PseudoCallDiv, dl, MVT::Other, + MVT::Glue, Chain, Chain.getValue(1)); + Chain = CurDAG->getCopyFromReg(Chain, dl, Sw64::R27, MVT::i64, + SDValue(CNode, 1)); + ReplaceNode(N, + CurDAG->getMachineNode(Sw64::BISr, dl, MVT::i64, Chain, Chain)); + return; + } + + case ISD::READCYCLECOUNTER: { + SDValue Chain = N->getOperand(0); + ReplaceNode( + N, CurDAG->getMachineNode(Sw64::RPCC, dl, MVT::i64, MVT::Other, Chain)); + return; + } + + case ISD::Constant: { + auto ConstNode = cast(N); + if (ConstNode->isZero()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + Sw64::R31, MVT::i64); + ReplaceUses(SDValue(N, 0), Result); + return; + } + uint64_t uval = cast(N)->getZExtValue(); + int64_t Imm = ConstNode->getSExtValue(); + int64_t val = Imm; + int32_t val32 = (int32_t)val; + if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT && + val >= IMM_LOW + IMM_LOW * IMM_MULT) + break; //(LDAH (LDA)) + if ((uval >> 32) == 0 && // empty upper bits + val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT) + break; //(zext (LDAH (LDA))) + // Else use the constant pool + + ConstantInt *C = + ConstantInt::get(Type::getInt64Ty(*CurDAG->getContext()), uval); + SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64); + SDNode *Load = + CurDAG->getMachineNode(Sw64::LOADconstant, dl, MVT::i64, CPI); + ReplaceNode(N, Load); + + return; + } + case ISD::TargetConstantFP: + case ISD::ConstantFP: { + ConstantFPSDNode *CN = cast(N); + bool isDouble = N->getValueType(0) == MVT::f64; + EVT T = isDouble ? MVT::f64 : MVT::f32; + if (CN->getValueAPF().isPosZero()) { + ReplaceNode( + N, CurDAG->getMachineNode(isDouble ? Sw64::CPYSD : Sw64::CPYSS, dl, T, + CurDAG->getRegister(Sw64::F31, T), + CurDAG->getRegister(Sw64::F31, T))); + return; + } else if (CN->getValueAPF().isNegZero()) { + ReplaceNode( + N, CurDAG->getMachineNode(isDouble ? Sw64::CPYSND : Sw64::CPYSNS, dl, + T, CurDAG->getRegister(Sw64::F31, T), + CurDAG->getRegister(Sw64::F31, T))); + return; + } else { + report_fatal_error("Unhandled FP constant type"); + } + break; + } + + case ISD::SETCC: + if (N->getSimpleValueType(0).SimpleTy == MVT::v4i64) + break; + if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) { + ISD::CondCode CC = cast(N->getOperand(2))->get(); + + unsigned Opc = Sw64::WTF; + bool rev = false; + bool inv = false; + bool ordonly = false; + if (Sw64Mieee) { + switch (CC) { + default: + LLVM_DEBUG(N->dump(CurDAG)); + llvm_unreachable("Unknown FP comparison!"); + case ISD::SETEQ: + case ISD::SETOEQ: + case ISD::SETUEQ: + Opc = Sw64::CMPTEQ; + break; + case ISD::SETLT: + case ISD::SETOLT: + case ISD::SETULT: + Opc = Sw64::CMPTLT; + break; + case ISD::SETLE: + case ISD::SETOLE: + case ISD::SETULE: + Opc = Sw64::CMPTLE; + break; + case ISD::SETGT: + case ISD::SETOGT: + case ISD::SETUGT: + Opc = Sw64::CMPTLT; + rev = true; + break; + case ISD::SETGE: + case ISD::SETOGE: + case ISD::SETUGE: + Opc = Sw64::CMPTLE; + rev = true; + break; + case ISD::SETNE: + case ISD::SETONE: + case ISD::SETUNE: + Opc = Sw64::CMPTEQ; + inv = true; + break; + case ISD::SETO: + Opc = Sw64::CMPTUN; + inv = true; + ordonly = true; + break; + case ISD::SETUO: + Opc = Sw64::CMPTUN; + ordonly = true; + break; + }; + + /* + unordered: + FCMPUN $f1, $f2, $f3 + FCMPxx $f1, $f2, $f3 + FSELNE $f3, $f3, $f4, $f4 + + ordered: + FCMPUN $f1, $f2, $f3 + FCMPxx $f1, $f2, $f3 + FSELEQ $f3, $f4, $f31, $f4 + + SETO/SETUO: + FCMPxx $f1, $f2, $f3 + */ + bool ordered = true; + switch (CC) { + case ISD::SETUEQ: + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + ordered = false; + break; + default: + break; + } + SDValue opr0 = N->getOperand(rev ? 1 : 0); + SDValue opr1 = N->getOperand(rev ? 0 : 1); + SDNode *cmpu = + CurDAG->getMachineNode(Sw64::CMPTUN, dl, MVT::f64, opr0, opr1); + SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, opr0, opr1); + if (inv) + cmp = CurDAG->getMachineNode( + Sw64::CMPTEQ, dl, MVT::f64, SDValue(cmp, 0), + CurDAG->getRegister(Sw64::F31, MVT::f64)); + + SDNode *sel = NULL; + if (ordonly) + sel = cmp; + else if (ordered) + sel = CurDAG->getMachineNode(Sw64::FSELEQD, dl, MVT::f64, + CurDAG->getRegister(Sw64::F31, MVT::f64), + SDValue(cmp, 0), SDValue(cmpu, 0)); + else + sel = CurDAG->getMachineNode(Sw64::FSELNED, dl, MVT::f64, + SDValue(cmp, 0), SDValue(cmpu, 0), + SDValue(cmpu, 0)); + + MVT VT = N->getSimpleValueType(0).SimpleTy == MVT::v4i64 ? MVT::v4i64 + : MVT::i64; + SDNode *LD = + CurDAG->getMachineNode(Sw64::FTOIT, dl, VT, SDValue(sel, 0)); + + ReplaceNode(N, CurDAG->getMachineNode( + Sw64::CMPULTr, dl, VT, + CurDAG->getRegister(Sw64::R31, VT), SDValue(LD, 0))); + return; + } else { + switch (CC) { + default: + LLVM_DEBUG(N->dump(CurDAG)); + llvm_unreachable("Unknown FP comparison!"); + case ISD::SETEQ: + case ISD::SETOEQ: + case ISD::SETUEQ: + Opc = Sw64::CMPTEQ; + break; + case ISD::SETLT: + case ISD::SETOLT: + case ISD::SETULT: + Opc = Sw64::CMPTLT; + break; + case ISD::SETLE: + case ISD::SETOLE: + case ISD::SETULE: + Opc = Sw64::CMPTLE; + break; + case ISD::SETGT: + case ISD::SETOGT: + case ISD::SETUGT: + Opc = Sw64::CMPTLT; + rev = true; + break; + case ISD::SETGE: + case ISD::SETOGE: + case ISD::SETUGE: + Opc = Sw64::CMPTLE; + rev = true; + break; + case ISD::SETNE: + case ISD::SETONE: + case ISD::SETUNE: + Opc = Sw64::CMPTEQ; + inv = true; + break; + case ISD::SETO: + Opc = Sw64::CMPTUN; + inv = true; + break; + case ISD::SETUO: + Opc = Sw64::CMPTUN; + break; + }; + SDValue tmp1 = N->getOperand(rev ? 1 : 0); + SDValue tmp2 = N->getOperand(rev ? 0 : 1); + SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2); + if (inv) + cmp = CurDAG->getMachineNode( + Sw64::CMPTEQ, dl, MVT::f64, SDValue(cmp, 0), + CurDAG->getRegister(Sw64::F31, MVT::f64)); + switch (CC) { + case ISD::SETUEQ: + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: { + SDNode *cmp2 = + CurDAG->getMachineNode(Sw64::CMPTUN, dl, MVT::f64, tmp1, tmp2); + cmp = CurDAG->getMachineNode(Sw64::ADDD, dl, MVT::f64, + SDValue(cmp2, 0), SDValue(cmp, 0)); + break; + } + default: + break; + } + SDNode *LD = + CurDAG->getMachineNode(Sw64::FTOIT, dl, MVT::i64, SDValue(cmp, 0)); + + ReplaceNode( + N, CurDAG->getMachineNode(Sw64::CMPULTr, dl, MVT::i64, + CurDAG->getRegister(Sw64::R31, MVT::i64), + SDValue(LD, 0))); + return; + } + } + break; + case ISD::AND: { + ConstantSDNode *SC = NULL; + ConstantSDNode *MC = NULL; + if (N->getOperand(0).getOpcode() == ISD::SRL && + (MC = dyn_cast(N->getOperand(1))) && + (SC = dyn_cast(N->getOperand(0).getOperand(1)))) { + uint64_t sval = SC->getZExtValue(); + uint64_t mval = MC->getZExtValue(); + // If the result is a zap, let the autogened stuff handle it. + if (get_zapImm(N->getOperand(0), mval)) + break; + // given mask X, and shift S, we want to see if there is any zap in the + // mask if we play around with the botton S bits + uint64_t dontcare = (~0ULL) >> (64 - sval); + uint64_t mask = mval << sval; + + if (get_zapImm(mask | dontcare)) + mask = mask | dontcare; + + if (get_zapImm(mask)) { + SDValue Z = + SDValue(CurDAG->getMachineNode(Sw64::ZAPNOTi, dl, MVT::i64, + N->getOperand(0).getOperand(0), + getI64Imm(get_zapImm(mask), dl)), + 0); + ReplaceNode(N, CurDAG->getMachineNode(Sw64::SRLi, dl, MVT::i64, Z, + getI64Imm(sval, dl))); + return; + } + } + break; + } + case ISD::BUILD_VECTOR: { + + BuildVectorSDNode *BVN = cast(N); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT ViaVecTy; + + if (!Subtarget->hasSIMD() || !BVN->getValueType(0).is256BitVector()) + return; + + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, false)) + break; + } + } + // Select the default instruction + SelectCode(N); +} + +void Sw64DAGToDAGISel::SelectCALL(SDNode *N) { + // TODO: add flag stuff to prevent nondeturministic breakage! + + SDValue Chain = N->getOperand(0); + SDValue Addr = N->getOperand(1); + SDValue InFlag = N->getOperand(N->getNumOperands() - 1); + SDLoc dl(N); + if (Addr.getOpcode() == Sw64ISD::GPRelLo) { + SDValue GOT = SDValue(getGlobalBaseReg(), 0); + Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R29, GOT, InFlag); + InFlag = Chain.getValue(1); + Chain = SDValue(CurDAG->getMachineNode(Sw64::BSR, dl, MVT::Other, MVT::Glue, + Addr.getOperand(0), Chain, InFlag), + 0); + } else { + Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R27, Addr, InFlag); + InFlag = Chain.getValue(1); + SDValue Ops[] = {Chain, CurDAG->getRegister(Sw64::R27, MVT::i64), + N->getOperand(2), InFlag}; + Chain = SDValue( + CurDAG->getMachineNode(Sw64::JSR, dl, MVT::Other, MVT::Glue, Ops), 0); + } + InFlag = Chain.getValue(1); + + ReplaceUses(SDValue(N, 0), Chain); + ReplaceUses(SDValue(N, 1), InFlag); +} + +/// Match frameindex +bool Sw64DAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + EVT ValTy = Addr.getValueType(); + + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy); + return true; + } + return false; +} + +/// Match frameindex+offset and frameindex|offset +bool Sw64DAGToDAGISel::selectAddrFrameIndexOffset( + SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits, + unsigned ShiftAmount = 0) const { + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) { + EVT ValTy = Addr.getValueType(); + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else { + Base = Addr.getOperand(0); + // If base is a FI, additional offset calculation is done in + // eliminateFrameIndex, otherwise we need to check the alignment + const Align Alignment(1ULL << ShiftAmount); + if (!isAligned(Alignment, CN->getZExtValue())) + return false; + } + + Offset = + CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), ValTy); + return true; + } + } + return false; +} + +bool Sw64DAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9)) + return true; + + return false; +} + +bool Sw64DAGToDAGISel::selectAddrRegImm16(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16)) + return true; + + return false; +} + +bool Sw64DAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { + SDValue Base, Offset; + + switch (ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + // We need to make sure that this one operand does not end up in XZR, thus + // require the address to be in a PointerRegClass register. + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); + const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); + SDLoc dl(Op); + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); + SDValue NewOp = + SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + Op.getValueType(), Op, RC), + 0); + OutOps.push_back(NewOp); + return false; + } + return true; +} + +bool Sw64DAGToDAGISel::tryIndexedLoad(SDNode *N) { + LoadSDNode *LD = cast(N); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM != ISD::POST_INC) + return false; + SDLoc dl(N); + MVT VT = LD->getMemoryVT().getSimpleVT(); + bool isFloat = false; + unsigned Opcode = 0; + switch (VT.SimpleTy) { + case MVT::i8: + Opcode = Sw64::LDBU_A; + break; + case MVT::i16: + Opcode = Sw64::LDHU_A; + break; + case MVT::i32: + Opcode = Sw64::LDW_A; + break; + case MVT::i64: + Opcode = Sw64::LDL_A; + break; + case MVT::f32: + Opcode = Sw64::LDS_A; + isFloat = true; + break; + case MVT::f64: + Opcode = Sw64::LDD_A; + isFloat = true; + break; + default: + return false; + } + SDValue Offset = LD->getOffset(); + int64_t Inc = cast(Offset.getNode())->getSExtValue(); + ReplaceNode( + N, CurDAG->getMachineNode(Opcode, SDLoc(N), isFloat ? VT : MVT::i64, + MVT::i64, MVT::Other, LD->getBasePtr(), + CurDAG->getTargetConstant(Inc, dl, MVT::i64), + LD->getChain())); + return true; +} + +bool Sw64DAGToDAGISel::tryIndexedStore(SDNode *N) { + StoreSDNode *ST = cast(N); + ISD::MemIndexedMode AM = ST->getAddressingMode(); + if (AM != ISD::POST_INC) + return false; + SDLoc dl(N); + MVT VT = ST->getMemoryVT().getSimpleVT(); + unsigned Opcode = 0; + switch (VT.SimpleTy) { + case MVT::i8: + Opcode = Sw64::STB_A; + break; + case MVT::i16: + Opcode = Sw64::STH_A; + break; + case MVT::i32: + Opcode = Sw64::STW_A; + break; + case MVT::i64: + Opcode = Sw64::STL_A; + break; + case MVT::f32: + Opcode = Sw64::STS_A; + break; + case MVT::f64: + Opcode = Sw64::STD_A; + break; + default: + return false; + } + MachineMemOperand *MemOp = ST->getMemOperand(); + SDValue From[2] = {SDValue(ST, 0), SDValue(ST, 1)}; + SDValue To[2]; + int64_t Inc = cast(ST->getOffset().getNode())->getSExtValue(); + SDValue Ops[] = {ST->getValue(), ST->getBasePtr(), + CurDAG->getTargetConstant(Inc, dl, MVT::i64), + ST->getChain()}; + MachineSDNode *S = + CurDAG->getMachineNode(Opcode, dl, MVT::i64, MVT::Other, Ops); + CurDAG->setNodeMemRefs(S, {MemOp}); + To[0] = SDValue(S, 0); + To[1] = SDValue(S, 1); + ReplaceUses(From, To, 2); + CurDAG->RemoveDeadNode(ST); + return true; +} + +/// ComplexPattern used on Sw64InstrInfo +/// Used on Sw64 Load/Store instructions +bool Sw64DAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType()); + return true; +} + +// Select constant vector splats. +// +// Returns true and sets Imm if: +// * MSA is enabled +// * N is a ISD::BUILD_VECTOR representing a constant splat +bool Sw64DAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const { + BuildVectorSDNode *Node = dyn_cast(N); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + MinSizeInBits, false)) + return false; + + Imm = SplatValue; + + return true; +} + +bool Sw64DAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + + if ((Signed && ImmValue.isSignedIntN(ImmBitSize)) || + (!Signed && ImmValue.isIntN(ImmBitSize))) { + Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats. +bool Sw64DAGToDAGISel::selectVSplatSimm8(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, true, 8); +} + +bool Sw64DAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 8); +} + +bool Sw64DAGToDAGISel::selectIntAddrSImm16(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool Sw64DAGToDAGISel::selectIntAddrSImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool Sw64DAGToDAGISel::SelectAddSubImm(SDValue N, MVT VT, SDValue &Imm) { + if (auto CNode = dyn_cast(N)) { + const int64_t ImmVal = CNode->getSExtValue(); + SDLoc DL(N); + + switch (VT.SimpleTy) { + case MVT::i8: + // Can always select i8s, no shift, mask the immediate value to + // deal with sign-extended value from lowering. + if (!isUInt<8>(ImmVal)) + return false; + Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i64); + return true; + case MVT::i16: + // i16 values get sign-extended to 32-bits during lowering. + Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i64); + return true; + break; + case MVT::i32: + case MVT::i64: + return false; + break; + default: + break; + } + } + + return false; +} + +bool Sw64DAGToDAGISel::SelectComplexImm(SDValue N, SDValue &Imm) { + if (auto CNode = dyn_cast(N)) { + const int64_t ImmVal = CNode->getSExtValue(); + SDLoc DL(N); + if (!isUInt<5>(ImmVal)) + return false; + Imm = CurDAG->getTargetConstant(ImmVal & 0x1F, DL, MVT::i64); + return true; + } + return false; +} + +/// createSw64ISelDag - This pass converts a legalized DAG into a +/// Sw64-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createSw64ISelDag(Sw64TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new Sw64DAGToDAGISel(TM, OptLevel); +} + +bool Sw64DAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast(N.getOperand(1))->getVT() == MVT::i32) { + Val = N.getOperand(0); + return true; + } + MVT VT = N.getSimpleValueType(); + if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { + Val = N; + return true; + } + + return false; +} + +bool Sw64DAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { + if (N.getOpcode() == ISD::AND) { + auto *C = dyn_cast(N.getOperand(1)); + if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { + Val = N.getOperand(0); + return true; + } + } + MVT VT = N.getSimpleValueType(); + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); + if (CurDAG->MaskedValueIsZero(N, Mask)) { + Val = N; + return true; + } + + return false; +} diff --git a/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp b/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp new file mode 100644 index 000000000000..2cccb036374e --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp @@ -0,0 +1,3984 @@ +//===-- Sw64ISelLowering.cpp - Sw64 DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Sw64TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "Sw64ISelLowering.h" +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64MachineFunctionInfo.h" +#include "Sw64Subtarget.h" +#include "Sw64TargetMachine.h" +#include "Sw64TargetObjectFile.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSw64.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "sw_64-lower" + +/// AddLiveIn - This helper function adds the specified physical register to the +/// MachineFunction as a live in value. It also creates a corresponding virtual +/// register for it. +static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, + const TargetRegisterClass *RC) { + assert(RC->contains(PReg) && "Not the correct regclass!"); + Register VReg = MF.getRegInfo().createVirtualRegister(RC); + MF.getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +const char *Sw64TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((Sw64ISD::NodeType)Opcode) { + default: + return 0; + case Sw64ISD::CVTQT_: + return "Sw64::CVTQT_"; + case Sw64ISD::CVTQS_: + return "Sw64::CVTQS_"; + case Sw64ISD::CVTTQ_: + return "Sw64::CVTTQ_"; + case Sw64ISD::CVTST_: + return "Sw64::CVTST_"; + case Sw64ISD::CVTTS_: + return "Sw64::CVTTS_"; + case Sw64ISD::JmpLink: + return "Sw64::JmpLink"; + case Sw64ISD::Ret: + return "Sw64::Ret"; + case Sw64ISD::TPRelLo: + return "Sw64::TPRelLo"; + case Sw64ISD::TPRelHi: + return "Sw64::TPRelHi"; + case Sw64ISD::SysCall: + return "Sw64::SysCall"; + case Sw64ISD::LDAWC: + return "Sw64::Sw64_LDAWC"; + + case Sw64ISD::TLSGD: + return "Sw64::TLSGD"; + case Sw64ISD::DTPRelLo: + return "Sw64::DTPRelLo"; + case Sw64ISD::DTPRelHi: + return "Sw64::DTPRelHi"; + case Sw64ISD::TLSLDM: + return "Sw64::TLSLDM"; + case Sw64ISD::RelGottp: + return "Sw64::RelGottp"; + case Sw64ISD::GPRelHi: + return "Sw64::GPRelHi"; + case Sw64ISD::GPRelLo: + return "Sw64::GPRelLo"; + case Sw64ISD::RelLit: + return "Sw64::RelLit"; + case Sw64ISD::GlobalRetAddr: + return "Sw64::GlobalRetAddr"; + case Sw64ISD::CALL: + return "Sw64::CALL"; + case Sw64ISD::DivCall: + return "Sw64::DivCall"; + case Sw64ISD::RET_FLAG: + return "Sw64::RET_FLAG"; + case Sw64ISD::COND_BRANCH_I: + return "Sw64::COND_BRANCH_I"; + case Sw64ISD::COND_BRANCH_F: + return "Sw64::COND_BRANCH_F"; + case Sw64ISD::MEMBARRIER: + return "Sw64ISD::MEMBARRIER"; + + case Sw64ISD::GPRel: + return "Sw64ISD::GPRel"; + case Sw64ISD::TPRel: + return "Sw64ISD::TPRel"; + case Sw64ISD::DTPRel: + return "Sw64ISD::DTPRel"; + case Sw64ISD::LDIH: + return "Sw64ISD::LDIH"; + case Sw64ISD::LDI: + return "Sw64ISD::LDI"; + + case Sw64ISD::Z_S_FILLCS: + return "Sw64ISD::Z_S_FILLCS"; + case Sw64ISD::Z_S_FILLDE: + return "Sw64ISD::Z_S_FILLDE"; + case Sw64ISD::Z_FILLDE: + return "Sw64ISD::Z_FILLDE"; + case Sw64ISD::Z_FILLDE_E: + return "Sw64ISD::Z_FILLDE_E"; + case Sw64ISD::Z_FILLCS: + return "Sw64ISD::Z_FILLCS"; + case Sw64ISD::Z_FILLCS_E: + return "Sw64ISD::Z_FILLCS_E"; + case Sw64ISD::Z_E_FILLCS: + return "Sw64ISD::Z_E_FILLCS"; + case Sw64ISD::Z_E_FILLDE: + return "Sw64ISD::Z_E_FILLDE"; + case Sw64ISD::Z_FLUSHD: + return "Sw64ISD::Z_FLUSHD"; + + case Sw64ISD::FRECS: + return "Sw64ISD::FRECS"; + case Sw64ISD::FRECD: + return "Sw64ISD::FRECD"; + case Sw64ISD::SBT: + return "Sw64ISD::SBT"; + case Sw64ISD::REVBH: + return "Sw64ISD::REVBH"; + case Sw64ISD::REVBW: + return "Sw64ISD::REVBW"; + + case Sw64ISD::ROLW: + return "Sw64ISD::ROLW"; + case Sw64ISD::CRC32B: + return "Sw64ISD::CRC32B"; + case Sw64ISD::CRC32H: + return "Sw64ISD::CRC32H"; + case Sw64ISD::CRC32W: + return "Sw64ISD::CRC32W"; + case Sw64ISD::CRC32L: + return "Sw64ISD::CRC32L"; + case Sw64ISD::CRC32CB: + return "Sw64ISD::CRC32CB"; + case Sw64ISD::CRC32CH: + return "Sw64ISD::CRC32CH"; + case Sw64ISD::CRC32CW: + return "Sw64ISD::CRC32CW"; + case Sw64ISD::CRC32CL: + return "Sw64ISD::CRC32CL"; + + case Sw64ISD::VLDWE: + return "Sw64ISD::VLDWE"; + case Sw64ISD::VLDSE: + return "Sw64ISD::VLDSE"; + case Sw64ISD::VLDDE: + return "Sw64ISD::VLDDE"; + + case Sw64ISD::VNOR: + return "Sw64ISD::VNOR"; + case Sw64ISD::VEQV: + return "Sw64ISD::VEQV"; + case Sw64ISD::VORNOT: + return "Sw64ISD::VORNOT"; + case Sw64ISD::VSHF: + return "Sw64ISD::VSHF"; + case Sw64ISD::SHF: + return "Sw64ISD::SHF"; + case Sw64ISD::ILVEV: + return "Sw64ISD::ILVEV"; + case Sw64ISD::ILVOD: + return "Sw64ISD::ILVOD"; + case Sw64ISD::ILVL: + return "Sw64ISD::ILVL"; + case Sw64ISD::ILVR: + return "Sw64ISD::ILVR"; + case Sw64ISD::PCKEV: + return "Sw64ISD::PCKEV"; + case Sw64ISD::PCKOD: + return "Sw64ISD::PCKOD"; + case Sw64ISD::VMAX: + return "Sw64ISD::VMAX"; + case Sw64ISD::VMIN: + return "Sw64ISD::VMIN"; + case Sw64ISD::VUMAX: + return "Sw64ISD::VUMAX"; + case Sw64ISD::VUMIN: + return "Sw64ISD::VUMIN"; + case Sw64ISD::VFREC: + return "Sw64ISD::VFREC"; + case Sw64ISD::VFCMPEQ: + return "Sw64ISD::VFCMPEQ"; + case Sw64ISD::VFCMPLE: + return "Sw64ISD::VFCMPLE"; + case Sw64ISD::VFCMPLT: + return "Sw64ISD::VFCMPLT"; + case Sw64ISD::VFCMPUN: + return "Sw64ISD::VFCMPUN"; + case Sw64ISD::VFCVTSD: + return "Sw64ISD::VFCVTSD"; + case Sw64ISD::VFCVTDS: + return "Sw64ISD::VFCVTDS"; + case Sw64ISD::VFCVTLS: + return "Sw64ISD::VFCVTLS"; + case Sw64ISD::VFCVTLD: + return "Sw64ISD::VFCVTLD"; + case Sw64ISD::VFCVTSH: + return "Sw64ISD::VFCVTSH"; + case Sw64ISD::VFCVTHS: + return "Sw64ISD::VFCVTHS"; + case Sw64ISD::VFCVTDL: + return "Sw64ISD::VFCVTDL"; + case Sw64ISD::VFCVTDLG: + return "Sw64ISD::VFCVTDLG"; + case Sw64ISD::VFCVTDLP: + return "Sw64ISD::VFCVTDLP"; + case Sw64ISD::VFCVTDLZ: + return "Sw64ISD::VFCVTDLZ"; + case Sw64ISD::VFCVTDLN: + return "Sw64ISD::VFCVTDLN"; + case Sw64ISD::VFRIS: + return "Sw64ISD::VFRIS"; + case Sw64ISD::VFRISG: + return "Sw64ISD::VFRISG"; + case Sw64ISD::VFRISP: + return "Sw64ISD::VFRISP"; + case Sw64ISD::VFRISZ: + return "Sw64ISD::VFRISZ"; + case Sw64ISD::VFRISN: + return "Sw64ISD::VFRISN"; + case Sw64ISD::VFRID: + return "Sw64ISD::VFRID"; + case Sw64ISD::VFRIDG: + return "Sw64ISD::VFRIDG"; + case Sw64ISD::VFRIDP: + return "Sw64ISD::VFRIDP"; + case Sw64ISD::VFRIDZ: + return "Sw64ISD::VFRIDZ"; + case Sw64ISD::VFRIDN: + return "Sw64ISD::VFRIDN"; + case Sw64ISD::VMAXF: + return "Sw64ISD::VMAXF"; + case Sw64ISD::VMINF: + return "Sw64ISD::VMINF"; + case Sw64ISD::VCPYB: + return "Sw64ISD::VCPYB"; + case Sw64ISD::VCPYH: + return "Sw64ISD::VCPYH"; + + case Sw64ISD::VCON_W: + return "Sw64ISD::VCON_W"; + case Sw64ISD::VCON_S: + return "Sw64ISD::VCON_S"; + case Sw64ISD::VCON_D: + return "Sw64ISD::VCON_D"; + + case Sw64ISD::INSVE: + return "Sw64ISD::INSVE"; + case Sw64ISD::VCOPYF: + return "Sw64ISD::VCOPYF"; + case Sw64ISD::V8SLL: + return "Sw64ISD::V8SLL"; + case Sw64ISD::V8SLLi: + return "Sw64ISD::V8SLLi"; + case Sw64ISD::V8SRL: + return "Sw64ISD::V8SRL"; + case Sw64ISD::V8SRLi: + return "Sw64ISD::V8SRLi"; + case Sw64ISD::VROTR: + return "Sw64ISD::VROTR"; + case Sw64ISD::VROTRi: + return "Sw64ISD::VROTRi"; + case Sw64ISD::V8SRA: + return "Sw64ISD::V8SRA"; + case Sw64ISD::V8SRAi: + return "Sw64ISD::V8SRAi"; + case Sw64ISD::VROLB: + return "Sw64ISD::VROLB"; + case Sw64ISD::VROLBi: + return "Sw64ISD::VROLBi"; + case Sw64ISD::VROLH: + return "Sw64ISD::VROLH"; + case Sw64ISD::VROLHi: + return "Sw64ISD::VROLHi"; + case Sw64ISD::VROLL: + return "Sw64ISD::VROLL"; + case Sw64ISD::VROLLi: + return "Sw64ISD::VROLLi"; + case Sw64ISD::VCTPOP: + return "Sw64ISD::VCTPOP"; + case Sw64ISD::VCTLZ: + return "Sw64ISD::VCTLZ"; + + case Sw64ISD::VLOG: + return "Sw64ISD::VLOG"; + case Sw64ISD::VSETGE: + return "Sw64ISD::VSETGE"; + + case Sw64ISD::VSELEQW: + return "Sw64ISD::VSELEQW"; + case Sw64ISD::VSELLTW: + return "Sw64ISD::VSELLTW"; + case Sw64ISD::VSELLEW: + return "Sw64ISD::VSELLEW"; + case Sw64ISD::VSELLBCW: + return "Sw64ISD::VSELLBCW"; + + case Sw64ISD::VFCMOVEQ: + return "Sw64ISD::VFCMOVEQ"; + case Sw64ISD::VFCMOVLE: + return "Sw64ISD::VFCMOVLE"; + case Sw64ISD::VFCMOVLT: + return "Sw64ISD::VFCMOVLT"; + + case Sw64ISD::VECT_VUCADDW: + return "Sw64ISD::VECT_VUCADDW"; + case Sw64ISD::VECT_VUCADDH: + return "Sw64ISD::VECT_VUCADDH"; + case Sw64ISD::VECT_VUCADDB: + return "Sw64ISD::VECT_VUCADDB"; + case Sw64ISD::VECT_VUCSUBW: + return "Sw64ISD::VECT_VUCSUBW"; + case Sw64ISD::VECT_VUCSUBH: + return "Sw64ISD::VECT_VUCSUBH"; + case Sw64ISD::VECT_VUCSUBB: + return "Sw64ISD::VECT_VUCSUBB"; + + case Sw64ISD::VECREDUCE_FADD: + return "Sw64ISD::VECREDUCE_FADD"; + case Sw64ISD::VSHL_BY_SCALAR: + return "Sw64ISD::VSHL_BY_SCALAR"; + case Sw64ISD::VSRL_BY_SCALAR: + return "Sw64ISD::VSRL_BY_SCALAR"; + case Sw64ISD::VSRA_BY_SCALAR: + return "Sw64ISD::VSRA_BY_SCALAR"; + case Sw64ISD::VEXTRACT_SEXT_ELT: + return "Sw64ISD::VEXTRACT_SEXT_ELT"; + case Sw64ISD::VBROADCAST: + return "Sw64ISD::VBROADCAST"; + case Sw64ISD::VBROADCAST_LD: + return "Sw64ISD::VBROADCAST_LD"; + case Sw64ISD::VTRUNCST: + return "Sw64ISD::VTRUNCST"; + } + + return nullptr; +} + +Sw64TargetLowering::Sw64TargetLowering(const TargetMachine &TM, + const Sw64Subtarget &Subtarget) + : TargetLowering(TM), TM(TM), Subtarget(Subtarget) { + if (Subtarget.hasSIMD()) { + // Expand all truncating stores and extending loads. + for (MVT VT0 : MVT::vector_valuetypes()) { + for (MVT VT1 : MVT::vector_valuetypes()) { + setTruncStoreAction(VT0, VT1, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); + } + } + } + + // Set up the TargetLowering object. + // I am having problems with shr n i8 1 + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + addRegisterClass(MVT::i64, &Sw64::GPRCRegClass); + addRegisterClass(MVT::f64, &Sw64::F8RCRegClass); + addRegisterClass(MVT::f32, &Sw64::F4RCRegClass); + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // Loads + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + } + + setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i8, Expand); // ldbu + setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i16, Expand); // ldhu + setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, MVT::i32, Expand); // ldwu + + if (Subtarget.hasCore4() && Subtarget.enablePostInc()) { + for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64}) { + setIndexedLoadAction(ISD::POST_INC, VT, Legal); + setIndexedStoreAction(ISD::POST_INC, VT, Legal); + } + } + + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + + for (MVT VT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + + // Sw64 wants to turn select_cc of INT/FP into sel/fsel when possible. + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + if (Subtarget.hasCore4() && Subtarget.enableFloatCmov()) { + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + } else { + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + } + + setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom); + + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); + setOperationAction(ISD::CTLZ, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::UDIV, MVT::i128, Custom); + setOperationAction(ISD::SDIV, MVT::i128, Custom); + setOperationAction(ISD::UREM, MVT::i128, Custom); + setOperationAction(ISD::SREM, MVT::i128, Custom); + + if (!Subtarget.hasCore4() || !Subtarget.enableIntAri()) { + setOperationAction(ISD::SREM, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + setOperationAction(ISD::SDIV, MVT::i64, Custom); + setOperationAction(ISD::UDIV, MVT::i64, Custom); + } + + if (Subtarget.hasCore4() && Subtarget.enableByteInst()) { + setOperationAction(ISD::BSWAP, MVT::i64, Legal); + setOperationAction(ISD::BSWAP, MVT::i32, Legal); + setOperationAction(ISD::BSWAP, MVT::i16, Legal); + } else { + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + } + + if (Subtarget.hasCore4() && Subtarget.enableFloatRound()) { + for (MVT Ty : {MVT::f32, MVT::f64}) { + setOperationAction(ISD::FFLOOR, Ty, Legal); + setOperationAction(ISD::FNEARBYINT, Ty, Legal); + setOperationAction(ISD::FCEIL, Ty, Legal); + setOperationAction(ISD::FTRUNC, Ty, Legal); + setOperationAction(ISD::FROUND, Ty, Legal); + } + } + + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); + + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // We don't support sin/cos/sqrt/pow + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + + setOperationAction(ISD::FSQRT, MVT::f64, Legal); + setOperationAction(ISD::FSQRT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); + + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + + // We have fused multiply-addition for f32 and f64 but not f128. + setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FMA, MVT::f32, Legal); + setOperationAction(ISD::FMA, MVT::f128, Expand); + + setOperationAction(ISD::SETCC, MVT::f32, Promote); + + setOperationAction(ISD::BITCAST, MVT::f32, Promote); + // Not implemented yet. + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + // We want to legalize GlobalAddress and ConstantPool and + // ExternalSymbols nodes into the appropriate instructions to + // materialize the address. + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i32, Custom); + + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i8, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i8, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i16, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i16, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom); + + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FADD, MVT::i128, Custom); + setStackPointerRegisterToSaveRestore(Sw64::R30); + + if (Subtarget.hasSIMD() || Subtarget.hasCore4()) { + // We want to custom lower some of our intrinsics. + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, + Custom); // for builtin_sw64_load + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + } + + if (Subtarget.hasSIMD()) { + addSIMDIntType(MVT::v32i8, &Sw64::V256LRegClass); + addSIMDIntType(MVT::v16i16, &Sw64::V256LRegClass); + addSIMDIntType(MVT::v8i32, &Sw64::V256LRegClass); + addSIMDIntType(MVT::v4i64, &Sw64::V256LRegClass); + addSIMDFloatType(MVT::v4f32, &Sw64::V256LRegClass); + addSIMDFloatType(MVT::v4f64, &Sw64::V256LRegClass); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine(ISD::XOR); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Legal); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i16, Expand); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i8, Expand); + + setOperationAction(ISD::SETCC, MVT::v8i32, Legal); + setOperationAction(ISD::SETCC, MVT::v4i64, Expand); + setOperationAction(ISD::SETCC, MVT::v4f32, Legal); + setOperationAction(ISD::SETCC, MVT::v4f64, Expand); + + if (Subtarget.hasCore4()) + for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) { + addRegisterClass(VT, &Sw64::V256LRegClass); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Expand); + } + else { + addRegisterClass(MVT::v8i32, &Sw64::V256LRegClass); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SRA, MVT::v8i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i32, Custom); + } + } + + setOperationAction(ISD::FNEG, MVT::v4f32, Legal); + setOperationAction(ISD::FNEG, MVT::v4f64, Legal); + + setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); + + if (Subtarget.hasCore4() && Subtarget.enableIntShift()) { + setOperationAction(ISD::ROTR, MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Custom); + } else { + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + } + if (Subtarget.hasCore4() && Subtarget.enableFloatAri()) { + setOperationAction(ISD::FDIV, MVT::f32, Legal); + setOperationAction(ISD::FDIV, MVT::f64, Legal); + } + + // return R + setLibcallName(RTLIB::OEQ_F128, "_OtsEqlX"); + setLibcallName(RTLIB::UNE_F128, "_OtsNeqX"); + setLibcallName(RTLIB::UO_F128, "_OtsNeqX"); + setLibcallName(RTLIB::OLE_F128, "_OtsLeqX"); + setLibcallName(RTLIB::OLT_F128, "_OtsLssX"); + setLibcallName(RTLIB::OGE_F128, "_OtsGeqX"); + setLibcallName(RTLIB::OGT_F128, "_OtsGtrX"); + // return R16+R17 + setLibcallName(RTLIB::FPEXT_F64_F128, "_OtsConvertFloatTX"); + setLibcallName(RTLIB::FPEXT_F32_F128, "_OtsConvertFloatTX"); + setLibcallName(RTLIB::UINTTOFP_I64_F128, "_OtsCvtQUX"); + setLibcallName(RTLIB::UINTTOFP_I32_F128, "_OtsCvtQUX"); + setLibcallName(RTLIB::SINTTOFP_I32_F128, "_OtsCvtQX"); + setLibcallName(RTLIB::SINTTOFP_I64_F128, "_OtsCvtQX"); + // add round return R + setLibcallName(RTLIB::FPTOSINT_F128_I64, "_OtsCvtXQ"); + setLibcallName(RTLIB::FPTOUINT_F128_I64, "_OtsCvtXQ"); + setLibcallName(RTLIB::FPROUND_F128_F64, "_OtsConvertFloatXT"); + setLibcallName(RTLIB::FPROUND_F128_F32, "_OtsConvertFloatXT"); + // add round return R16+R17 + setLibcallName(RTLIB::ADD_F128, "_OtsAddX"); + setLibcallName(RTLIB::SUB_F128, "_OtsSubX"); + setLibcallName(RTLIB::MUL_F128, "_OtsMulX"); + setLibcallName(RTLIB::DIV_F128, "_OtsDivX"); + setOperationAction(ISD::CTPOP, MVT::i32, Promote); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + + setMinStackArgumentAlignment(Align(32)); + setMinFunctionAlignment(Align(8)); + setTargetDAGCombine(ISD::MUL); + + computeRegisterProperties(Subtarget.getRegisterInfo()); + MaxStoresPerMemsetOptSize = 16; + MaxStoresPerMemset = 16; + MaxStoresPerMemcpy = 4; + MaxStoresPerMemcpyOptSize = 4; +} + +bool Sw64TargetLowering::generateFMAsInMachineCombiner( + EVT VT, CodeGenOpt::Level OptLevel) const { + return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector(); +} + +EVT Sw64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const { + // Refer to other. + if (!VT.isVector()) + return MVT::i64; + + return VT.changeVectorElementTypeToInteger(); +} + +#include "Sw64GenCallingConv.inc" + +static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + + return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); +} + +static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), + Flags); +} + +static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flag) { + + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} + +static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), + N->getOffset(), Flags); +} + +// This function returns true if CallSym is a long double emulation routine. +static bool isF128SoftLibCall_void(const char *CallSym) { + const char *const LibCalls[] = { + "_OtsAddX", "_OtsConvertFloatTX", "_OtsCvtQUX", "_OtsCvtQX", + "_OtsDivX", "_OtsMulX", "_OtsSubX"}; + + // Check that LibCalls is sorted betically. + auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; }; + assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp)); + + return std::binary_search(std::begin(LibCalls), std::end(LibCalls), CallSym, + Comp); +} + +// This function returns true if CallSym is a long double emulation routine. +static bool isF128SoftLibCall_round(const char *CallSym) { + const char *const LibCalls[] = { + "_OtsAddX", "_OtsConvertFloatTX", "_OtsConvertFloatXT", + "_OtsCvtXQ", "_OtsDivX", "_OtsMulX", + "_OtsSubX"}; + + // Check that LibCalls is sorted betically. + auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; }; + assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp)); + + return std::binary_search(std::begin(LibCalls), std::end(LibCalls), CallSym, + Comp); +} + +// Enable SIMD support for the given integer type and Register class. +void Sw64TargetLowering::addSIMDIntType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + // for vfcmpxxs + setTruncStoreAction(MVT::v4i64, MVT::v4i32, Custom); + + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom); + + setOperationAction(ISD::ROTL, Ty, Custom); + setOperationAction(ISD::ROTR, Ty, Expand); + setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::AND, Ty, Legal); + setOperationAction(ISD::MUL, Ty, Legal); + setOperationAction(ISD::OR, Ty, Legal); + setOperationAction(ISD::SDIV, Ty, Legal); + setOperationAction(ISD::SREM, Ty, Legal); + setOperationAction(ISD::SUB, Ty, Legal); + setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::UREM, Ty, Legal); + setOperationAction(ISD::UMAX, Ty, Legal); + setOperationAction(ISD::UMIN, Ty, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); + setOperationAction(ISD::XOR, Ty, Legal); + + setOperationAction(ISD::VECREDUCE_ADD, Ty, Legal); + + if (Ty == MVT::v8i32 || Ty == MVT::v4i64) { + setOperationAction(ISD::FP_TO_SINT, Ty, Legal); + setOperationAction(ISD::FP_TO_UINT, Ty, Legal); + setOperationAction(ISD::SINT_TO_FP, Ty, Legal); + setOperationAction(ISD::UINT_TO_FP, Ty, Legal); + } + setCondCodeAction(ISD::SETNE, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); +} + +// Enable SIMD support for the given floating-point type and Register class. +void Sw64TargetLowering::addSIMDFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + + setOperationAction(ISD::FCOPYSIGN, Ty, Legal); + + if (Ty != MVT::v16f16) { + setOperationAction(ISD::FABS, Ty, Expand); + setOperationAction(ISD::FADD, Ty, Legal); + setOperationAction(ISD::FDIV, Ty, Legal); + setOperationAction(ISD::FEXP2, Ty, Legal); + setOperationAction(ISD::FLOG2, Ty, Legal); + setOperationAction(ISD::FMA, Ty, Legal); + setOperationAction(ISD::FMUL, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + setOperationAction(ISD::FSQRT, Ty, Legal); + setOperationAction(ISD::FSUB, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETO, Ty, Custom); + setCondCodeAction(ISD::SETOGE, Ty, Expand); + setCondCodeAction(ISD::SETOGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); + } +} + +// Fold zero extensions into Sw64ISD::VEXTRACT_[SZ]EXT_ELT +// +// Performs the following transformations: +// - Changes Sw64ISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::AND. +// - Removes redundant zero extensions performed by an ISD::AND. +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +// Perform combines where ISD::OR is the root node. +// +// Performs the following transformations: +// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) +// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit +// vector type. +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, + SelectionDAG &DAG, + const Sw64Subtarget &Subtarget) { + unsigned MaxSteps = 4; + SmallVector WorkStack(1, C); + unsigned Steps = 0; + unsigned BitWidth = C.getBitWidth(); + + while (!WorkStack.empty()) { + APInt Val = WorkStack.pop_back_val(); + + if (Val == 0 || Val == 1) + continue; + + if (Steps >= MaxSteps) + return false; + + if (Val.isPowerOf2()) { + ++Steps; + continue; + } + + APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); + APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) + : APInt(BitWidth, 1) << C.ceilLogBase2(); + if ((Val - Floor).ule(Ceil - Val)) { + WorkStack.push_back(Floor); + WorkStack.push_back(Val - Floor); + } else { + WorkStack.push_back(Ceil); + WorkStack.push_back(Ceil - Val); + } + + ++Steps; + } + // If the value being multiplied is not supported natively, we have to pay + // an additional legalization cost, conservatively assume an increase in the + // cost of 3 instructions per step. This values for this heuristic were + // determined experimentally. + unsigned RegisterSize = DAG.getTargetLoweringInfo() + .getRegisterType(*DAG.getContext(), VT) + .getSizeInBits(); + Steps *= (VT.getSizeInBits() != RegisterSize) * 3; + if (Steps > 27) + return false; + + return true; +} + +static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, + EVT ShiftTy, SelectionDAG &DAG) { + // Return 0. + if (C == 0) + return DAG.getConstant(0, DL, VT); + + // Return x. + if (C == 1) + return X; + + // If c is power of 2, return (shl x, log2(c)). + if (C.isPowerOf2()) + return DAG.getNode(ISD::SHL, DL, VT, X, + DAG.getConstant(C.logBase2(), DL, ShiftTy)); + + unsigned BitWidth = C.getBitWidth(); + APInt Floor = APInt(BitWidth, 1) << C.logBase2(); + APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) + : APInt(BitWidth, 1) << C.ceilLogBase2(); + + // If |c - floor_c| <= |c - ceil_c|, + // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), + // return (add constMult(x, floor_c), constMult(x, c - floor_c)). + if ((C - Floor).ule(Ceil - C)) { + SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); + SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); + return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); + } + + // If |c - floor_c| > |c - ceil_c|, + // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). + SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); + SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); + return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); +} + +static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + EVT VT = N->getValueType(0); + + if (Subtarget.enOptMul()) + if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) + if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( + C->getAPIntValue(), VT, DAG, Subtarget)) + return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, + MVT::i64, DAG); + + return SDValue(N, 0); +} + +static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { + return SDValue(); +} + +static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { + return SDValue(); +} + +static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +SDValue Sw64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue Val; + + switch (N->getOpcode()) { + case ISD::AND: + Val = performANDCombine(N, DAG, DCI, Subtarget); + break; + case ISD::OR: + Val = performORCombine(N, DAG, DCI, Subtarget); + break; + case ISD::MUL: + return performMULCombine(N, DAG, DCI, Subtarget); + case ISD::SHL: + Val = performSHLCombine(N, DAG, DCI, Subtarget); + break; + case ISD::SRA: + return performSRACombine(N, DAG, DCI, Subtarget); + case ISD::SRL: + return performSRLCombine(N, DAG, DCI, Subtarget); + case ISD::VSELECT: + return performVSELECTCombine(N, DAG); + case ISD::XOR: + Val = performXORCombine(N, DAG, Subtarget); + break; + case ISD::SETCC: + Val = performSETCCCombine(N, DAG); + break; + } + + if (Val.getNode()) { + LLVM_DEBUG(dbgs() << "\nSw64 DAG Combine:\n"; + N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n"; + Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n"); + return Val; + } + + return Sw64TargetLowering::PerformDAGCombineV(N, DCI); +} + +/// ------------------------- scaler ------------------------------ /// + +static SDValue performDivRemCombineV(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static SDValue performSELECTCombineV(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static SDValue performANDCombineV(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static SDValue performORCombineV(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static SDValue performADDCombineV(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +static SDValue performSHLCombineV(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const Sw64Subtarget &Subtarget) { + return SDValue(); +} + +SDValue Sw64TargetLowering::PerformDAGCombineV(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + unsigned Opc = N->getOpcode(); + + switch (Opc) { + default: + break; + case ISD::SDIVREM: + case ISD::UDIVREM: + return performDivRemCombineV(N, DAG, DCI, Subtarget); + case ISD::SELECT: + return performSELECTCombineV(N, DAG, DCI, Subtarget); + case ISD::AND: + return performANDCombineV(N, DAG, DCI, Subtarget); + case ISD::OR: + return performORCombineV(N, DAG, DCI, Subtarget); + case ISD::ADD: + return performADDCombineV(N, DAG, DCI, Subtarget); + case ISD::SHL: + return performSHLCombineV(N, DAG, DCI, Subtarget); + } + + return SDValue(); +} + +SDValue Sw64TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + + SelectionDAG &DAG = CLI.DAG; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + MachineFunction &MF = DAG.getMachineFunction(); + // Sw64 target does not yet support tail call optimization. + isTailCall = false; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CC_Sw64); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getStackSize(); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); + SmallVector, 4> RegsToPass; + SmallVector MemOpChains; + SDValue StackPtr; + RegsToPass.push_back(std::make_pair((unsigned)Sw64::R27, Callee)); + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + SDValue Arg = OutVals[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + assert(0 && "Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + // Arguments that can be passed on register must be kept at RegsToPass + // vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, Sw64::R30, MVT::i64); + + SDValue PtrOff = + DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), dl)); + + MemOpChains.push_back( + DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); + } + } + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(Callee.getNode()); + if (ES && isF128SoftLibCall_round(ES->getSymbol())) { + RegsToPass.push_back(std::make_pair(((unsigned)Sw64::R16) + ArgLocs.size(), + DAG.getConstant(2, dl, MVT::i64))); + } + + // FIXME: Fix the error for clang-repl. + + // Transform all store nodes into one single node because all store nodes are + // independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); + + // Build a sequence of copy-to-reg nodes chained together with token chain and + // flag operands which copy the outgoing args into registers. The InFlag in + // necessary since all emitted instructions must be stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SmallVector Ops; + Ops.push_back(Chain); + // Fix the error for clang-repl. + // Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (!isTailCall) { + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + } + + if (InFlag.getNode()) + Ops.push_back(InFlag); + Chain = DAG.getNode(Sw64ISD::JmpLink, dl, NodeTys, Ops); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END( + Chain, + DAG.getConstant(NumBytes, dl, getPointerTy(DAG.getDataLayout()), true), + DAG.getConstant(0, dl, getPointerTy(DAG.getDataLayout()), true), InFlag, + dl); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, CLI.Callee.getNode(), CLI.RetTy); +} + +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +/// +SDValue Sw64TargetLowering::LowerCallResult( + SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals, const SDNode *CallNode, + const Type *RetTy) const { + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(CallNode); + + if (ES && isF128SoftLibCall_void(ES->getSymbol())) + CCInfo.AnalyzeCallResult(Ins, RetCC_F128Soft_Sw64); + else + + CCInfo.AnalyzeCallResult(Ins, RetCC_Sw64); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag) + .getValue(1); + + SDValue RetValue = Chain.getValue(0); + InFlag = Chain.getValue(2); + + // If this is an 8/16/32-bit value, it is really passed promoted to 64 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + + if (VA.getLocInfo() == CCValAssign::SExt) + RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue); + + InVals.push_back(RetValue); + } + + return Chain; +} + +SDValue Sw64TargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + Sw64MachineFunctionInfo *FuncInfo = MF.getInfo(); + + unsigned args_int[] = {Sw64::R16, Sw64::R17, Sw64::R18, + Sw64::R19, Sw64::R20, Sw64::R21}; + unsigned args_float[] = {Sw64::F16, Sw64::F17, Sw64::F18, + Sw64::F19, Sw64::F20, Sw64::F21}; + unsigned args_vector[] = {Sw64::V16, Sw64::V17, Sw64::V18, + Sw64::V19, Sw64::V20, Sw64::V21}; + + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + SDValue argt; + EVT ObjectVT = Ins[ArgNo].VT; + SDValue ArgVal; + if (ArgNo < 6) { + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: + assert(false && "Invalid value type!"); + case MVT::f64: + args_float[ArgNo] = + AddLiveIn(MF, args_float[ArgNo], &Sw64::F8RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT); + break; + case MVT::f32: + args_float[ArgNo] = + AddLiveIn(MF, args_float[ArgNo], &Sw64::F4RCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT); + break; + case MVT::i64: + args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], &Sw64::GPRCRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64); + break; + case MVT::v32i8: + case MVT::v16i16: + case MVT::v8i32: + case MVT::v4i64: + case MVT::v4f32: + case MVT::v4f64: + args_vector[ArgNo] = + AddLiveIn(MF, args_vector[ArgNo], &Sw64::V256LRegClass); + ArgVal = DAG.getCopyFromReg(Chain, dl, args_vector[ArgNo], ObjectVT); + break; + } + } else { // more args + // Create the frame index object for this incoming parameter... + int FI = MFI.CreateFixedObject(8, 8 * (ArgNo - 6), true); + + // Create the SelectionDAG nodes corresponding to a load + // from this parameter + SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); + } + InVals.push_back(ArgVal); + } + + // If the functions takes variable number of arguments, copy all regs to stack + if (isVarArg) { + FuncInfo->setVarArgsOffset(Ins.size() * 8); + std::vector LS; + for (int i = 0; i < 6; ++i) { + if (Register::isPhysicalRegister(args_int[i])) + args_int[i] = AddLiveIn(MF, args_int[i], &Sw64::GPRCRegClass); + SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); + int FI = MFI.CreateFixedObject(8, -8 * (6 - i), true); + if (i == 0) + FuncInfo->setVarArgsBase(FI); + SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo())); + if (Register::isPhysicalRegister(args_float[i])) + args_float[i] = AddLiveIn(MF, args_float[i], &Sw64::F8RCRegClass); + argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); + FI = MFI.CreateFixedObject(8, -8 * (12 - i), true); + SDFI = DAG.getFrameIndex(FI, MVT::i64); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo())); + } + // Set up a token factor with all the stack traffic + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LS); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention Implementation +//===----------------------------------------------------------------------===// + +bool Sw64TargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_Sw64); +} + +SDValue +Sw64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const { + + SDValue Copy = DAG.getCopyToReg( + Chain, dl, Sw64::R26, DAG.getNode(Sw64ISD::GlobalRetAddr, dl, MVT::i64), + SDValue()); + SmallVector RetOps(1, Chain); + + SDValue Flag; + unsigned outSize = Outs.size(); + unsigned *ArgReg = new unsigned[outSize]; + for (unsigned j = 0, r = 0, f = 0, v = 0; j != outSize; j++) { + EVT ArgVT = Outs[j].VT; + switch (ArgVT.getSimpleVT().SimpleTy) { + default: + if (ArgVT.isInteger()) + ArgReg[j] = Sw64::R0 + r++; + else + ArgReg[j] = Sw64::F0 + f++; + Copy = + DAG.getCopyToReg(Copy, dl, ArgReg[j], OutVals[j], Copy.getValue(1)); + + if (ArgVT.isInteger()) + RetOps.push_back(DAG.getRegister(ArgReg[j], MVT::i64)); + else + RetOps.push_back(DAG.getRegister(ArgReg[j], ArgVT.getSimpleVT())); + break; + + case MVT::v32i8: + case MVT::v16i16: + case MVT::v8i32: + case MVT::v4i64: + case MVT::v4f32: + case MVT::v4f64: + ArgReg[j] = Sw64::V0 + v++; + Copy = + DAG.getCopyToReg(Copy, dl, ArgReg[j], OutVals[j], Copy.getValue(1)); + RetOps.push_back(DAG.getRegister(ArgReg[j], ArgVT.getSimpleVT())); + break; + } + } + + RetOps[0] = Copy; + RetOps.push_back(Copy.getValue(1)); + return DAG.getNode(Sw64ISD::Ret, dl, MVT::Other, RetOps); +} + +void Sw64TargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, + SelectionDAG &DAG) const { + + SDLoc dl(N); + Chain = N->getOperand(0); + SDValue VAListP = N->getOperand(1); + const Value *VAListS = cast(N->getOperand(2))->getValue(); + unsigned Align = cast(N->getOperand(3))->getZExtValue(); + Align = std::max(Align,8u); + + SDValue Base = + DAG.getLoad(MVT::i64, dl, Chain, VAListP, MachinePointerInfo(VAListS)); + SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, + DAG.getConstant(8, dl, MVT::i64)); + SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), + Tmp, MachinePointerInfo(), MVT::i32); + DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); + if (N->getValueType(0).isFloatingPoint()) { + // if fp && Offset < 6*8, then subtract 6*8 from DataPtr + SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr, + DAG.getConstant(8 * 6, dl, MVT::i64)); + SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset, + DAG.getConstant(8 * 6, dl, MVT::i64), ISD::SETLT); + DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr); + } + SDValue NewOffset = DAG.getNode( + ISD::ADD, dl, MVT::i64, Offset, + DAG.getConstant(Align, dl, MVT::i64)); + Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, + MachinePointerInfo(), MVT::i32); +} + +/// LowerOperation - Provide custom lowering hooks for some operations. +SDValue Sw64TargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + switch (Op.getOpcode()) { + default: + llvm_unreachable("Wasn't expecting to be able to lower this!"); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: + return LowerINTRINSIC_VOID(Op, DAG); + case ISD::SRL_PARTS: + return LowerSRL_PARTS(Op, DAG); + case ISD::SRA_PARTS: + return LowerSRA_PARTS(Op, DAG); + case ISD::SHL_PARTS: + return LowerSHL_PARTS(Op, DAG); + case ISD::SINT_TO_FP: + return LowerSINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: + return LowerFP_TO_SINT(Op, DAG); + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + return LowerFP_TO_INT_SAT(Op, DAG); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG); + case ISD::BlockAddress: + return LowerBlockAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::ExternalSymbol: + return LowerExternalSymbol(Op, DAG); + case ISD::ATOMIC_FENCE: + return LowerATOMIC_FENCE(Op, DAG); + case ISD::ATOMIC_LOAD: + return LowerATOMIC_LOAD(Op, DAG); + case ISD::ATOMIC_STORE: + return LowerATOMIC_STORE(Op, DAG); + case ISD::OR: + return LowerOR(Op, DAG); + case ISD::UREM: + case ISD::SREM: + return LowerSUREM(Op, DAG); + // fall through + case ISD::SDIV: + case ISD::UDIV: + return LowerSUDIV(Op, DAG); + case ISD::VAARG: + return LowerVAARG(Op, DAG); + case ISD::VACOPY: + return LowerVACOPY(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::RETURNADDR: + return DAG.getNode(Sw64ISD::GlobalRetAddr, dl, MVT::i64); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG); + case ISD::PREFETCH: + return LowerPREFETCH(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + return LowerVectorShift(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SETCC: + return LowerSETCC(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); + } + + return SDValue(); +} + +SDValue Sw64TargetLowering::LowerVectorShift(SDValue Op, + SelectionDAG &DAG) const { + // Look for cases where a vector shift can use the *_BY_SCALAR form. + // SDValue Op0 = Op.getOperand(0); + // SDValue Op1 = Op.getOperand(1); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + // See whether the shift vector is a splat represented as BUILD_VECTOR. + switch (Op.getOpcode()) { + default: + llvm_unreachable("unexpect vecotr opcode"); + case ISD::ROTL: + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::sw64_vrol, DL, MVT::i64), + Op.getOperand(0), Op.getOperand(1)); + case ISD::SHL: + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::sw64_vsll, DL, MVT::i64), + Op.getOperand(0), Op.getOperand(1)); + case ISD::SRL: + case ISD::SRA: + unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::sw64_vsra + : Intrinsic::sw64_vsrl; + + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Opc, DL, MVT::i64), Op.getOperand(0), + Op.getOperand(1)); + } + + // Otherwise just treat the current form as legal. + return Op; +} + +// Lower Operand specifics +SDValue Sw64TargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Sw64:: begin lowJumpTable----\n"); + JumpTableSDNode *JT = cast(Op); + // FIXME there isn't really any debug info here + SDLoc dl(Op); + return getAddr(JT, DAG); +} + +SDValue Sw64TargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Sw64:: begin lowConstantPool----\n"); + SDLoc dl(Op); + SDLoc DL(Op); + ConstantPoolSDNode *N = cast(Op); + // FIXME there isn't really any debug info here + return getAddr(N, DAG); +} + +SDValue Sw64TargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Sw64:: begin lowBlockAddress----\n"); + SDLoc dl(Op); + SDLoc DL(Op); + + BlockAddressSDNode *BA = cast(Op); + return getAddr(BA, DAG); +} + +SDValue Sw64TargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Sw64:: begin lowGlobalAddress----\n"); + SDLoc dl(Op); + GlobalAddressSDNode *GSDN = cast(Op); + const GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, GSDN->getOffset()); + // FIXME there isn't really any debug info here + if (GV->hasLocalLinkage()) { + return getAddr(GSDN, DAG); + } else + return DAG.getNode(Sw64ISD::RelLit, dl, MVT::i64, GA, + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); +} + +template +SDValue Sw64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Sw64TargetLowering:: getAddr"); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDLoc DL(N); + + switch (getTargetMachine().getCodeModel()) { + default: + report_fatal_error("Unsupported code model for lowering"); + case CodeModel::Small: + case CodeModel::Medium: { + SDValue Hi = getTargetNode(N, DL, Ty, DAG, Sw64II::MO_GPREL_HI); + SDValue Lo = getTargetNode(N, DL, Ty, DAG, Sw64II::MO_GPREL_LO); + SDValue MNHi = DAG.getNode(Sw64ISD::LDIH, DL, Ty, Hi); + return DAG.getNode(Sw64ISD::LDI, DL, Ty, MNHi, Lo); + } + } +} + +SDValue Sw64TargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + + // If the relocation model is PIC, use the General Dynamic TLS Model or + // Local Dynamic TLS model, otherwise use the Initial Exec or + // Local Exec TLS Model. + + GlobalAddressSDNode *GSDN = cast(Op); + if (DAG.getTarget().useEmulatedTLS()) + return LowerToTLSEmulatedModel(GSDN, DAG); + + SDLoc dl(Op); + const GlobalValue *GV = GSDN->getGlobal(); + + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + TLSModel::Model model = getTargetMachine().getTLSModel(GV); + + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { + // General Dynamic == tlsgd + // LocalDynamic == tlsldm + // GA == TGA + SDValue Argument; + if (model == TLSModel::GeneralDynamic) { + SDValue Addr = + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_TLSGD); + Argument = + SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, Addr, + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)), + 0); + } else { + SDValue Addr = + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_TLSLDM); + Argument = + SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, Addr, + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)), + 0); + } + unsigned PtrSize = PtrVT.getSizeInBits(); + IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); + SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Argument; + Entry.Ty = PtrTy; + Args.push_back(Entry); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args)); + std::pair CallResult = LowerCallTo(CLI); + + SDValue Ret = CallResult.first; + if (model != TLSModel::LocalDynamic) + return Ret; + + SDValue DTPHi = DAG.getTargetGlobalAddress( + GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_DTPREL_HI); + SDValue DTPLo = DAG.getTargetGlobalAddress( + GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_DTPREL_LO); + + SDValue Hi = + SDValue(DAG.getMachineNode(Sw64::LDAH, dl, MVT::i64, DTPHi, Ret), 0); + return SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, DTPLo, Hi), 0); + } + + if (model == TLSModel::InitialExec) { + // Initial Exec TLS Model //gottprel + SDValue Gp = DAG.getGLOBAL_OFFSET_TABLE(MVT::i64); + SDValue Addr = + DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_GOTTPREL); + SDValue RelDisp = + SDValue(DAG.getMachineNode(Sw64::LDL, dl, MVT::i64, Addr, Gp), 0); + SDValue SysCall = DAG.getNode(Sw64ISD::SysCall, dl, MVT::i64, + DAG.getConstant(0x9e, dl, MVT::i64)); + return SDValue( + DAG.getMachineNode(Sw64::ADDQr, dl, MVT::i64, RelDisp, SysCall), 0); + } else { + // Local Exec TLS Model //tprelHi tprelLo + assert(model == TLSModel::LocalExec); + SDValue SysCall = DAG.getNode(Sw64ISD::SysCall, dl, MVT::i64, + DAG.getConstant(0x9e, dl, MVT::i64)); + SDValue TPHi = DAG.getTargetGlobalAddress( + GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_TPREL_HI); + SDValue TPLo = DAG.getTargetGlobalAddress( + GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_TPREL_LO); + SDValue Hi = + SDValue(DAG.getMachineNode(Sw64::LDAH, dl, MVT::i64, TPHi, SysCall), 0); + return SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, TPLo, Hi), 0); + } +} + +static bool isCrossINSMask(ArrayRef M, EVT VT) { + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned i = 0; i < NumElts; i++) { + unsigned idx = i / 2; + if (M[i] < 0) + return false; + if (M[i] != idx && (M[i] - NumElts) != idx) + return false; + } + return true; +} + +static SDValue GenerateVectorShuffle(SDValue Op, EVT VT, SelectionDAG &DAG, + SDLoc dl) { + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + ArrayRef ShuffleMask = SVN->getMask(); + if (ShuffleMask.size() > 8) + return SDValue(); + + unsigned NewMask; + if (VT == MVT::v8i32) { + for (int i = (ShuffleMask.size() - 1); i >= 0; i--) { + NewMask = NewMask << 4; + int idx = ShuffleMask[i]; + int bits = idx > 7 ? 1 : 0; + idx = idx > 7 ? (idx - 8) : idx; + NewMask |= (bits << 3) | idx; + } + } else if (VT == MVT::v4i64 || VT == MVT::v4f32 || VT == MVT::v4f64) { + for (int i = ShuffleMask.size() * 2 - 1; i >= 0; i--) { + NewMask = NewMask << 4; + int idx = ShuffleMask[i / 2]; + int bits = idx > 3 ? 1 : 0; + int mod = i % 2; + idx = idx > 3 ? (idx * 2 + mod - 8) : idx * 2 + mod; + NewMask |= (bits << 3) | idx; + } + } + + SDValue ConstMask = DAG.getConstant(NewMask, dl, MVT::i64); + return DAG.getNode(Sw64ISD::VSHF, dl, VT, Op.getOperand(0), Op.getOperand(1), + ConstMask); +} + +SDValue Sw64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + // Convert shuffles that are directly supported on NEON to target-specific + // DAG nodes, instead of keeping them as shuffles and matching them again + // during code selection. This is more efficient and avoids the possibility + // of inconsistencies between legalization and selection. + ArrayRef ShuffleMask = SVN->getMask(); + + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!"); + assert(ShuffleMask.size() == VT.getVectorNumElements() && + "Unexpected VECTOR_SHUFFLE mask size!"); + + if (SVN->isSplat()) { + int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) + Lane = 0; + + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) + return DAG.getNode(Sw64ISD::VBROADCAST, dl, V1.getValueType(), + V1.getOperand(0)); + + // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non- + // constant. If so, we can just reference the lane's definition directly. + if (V1.getOpcode() == ISD::BUILD_VECTOR && + !isa(V1.getOperand(Lane))) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, V1.getValueType(), + V1.getOperand(Lane)); + return DAG.getNode(Sw64ISD::VBROADCAST, dl, VT, Ext); + } + } + if (isCrossINSMask(ShuffleMask, VT)) + return DAG.getNode(Sw64ISD::VINSECTL, dl, VT, V1, V2); + + // SmallVector NewMask; + SDValue Tmp1 = GenerateVectorShuffle(Op, VT, DAG, dl); + + return Tmp1; +} + +SDValue Sw64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + unsigned NewIntrinsic; + EVT VT = Op.getValueType(); + switch (IntNo) { + default: + break; // Don't custom lower most intrinsics. + case Intrinsic::sw64_umulh: + return DAG.getNode(ISD::MULHU, dl, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_crc32b: + if (Subtarget.hasCore4() && Subtarget.enableCrcInst()) + return DAG.getNode(Sw64ISD::CRC32B, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_crc32h: + if (Subtarget.hasCore4() && Subtarget.enableCrcInst()) + return DAG.getNode(Sw64ISD::CRC32H, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_crc32w: + if (Subtarget.hasCore4() && Subtarget.enableCrcInst()) + return DAG.getNode(Sw64ISD::CRC32W, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_crc32l: + if (Subtarget.hasCore4() && Subtarget.enableCrcInst()) + return DAG.getNode(Sw64ISD::CRC32L, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_crc32cb: + if (Subtarget.hasCore4() && Subtarget.enableCrcInst()) + return DAG.getNode(Sw64ISD::CRC32CB, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_crc32ch: + if (Subtarget.hasCore4() && Subtarget.enableCrcInst()) + return DAG.getNode(Sw64ISD::CRC32CH, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_crc32cw: + if (Subtarget.hasCore4() && Subtarget.enableCrcInst()) + return DAG.getNode(Sw64ISD::CRC32CW, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_crc32cl: + if (Subtarget.hasCore4() && Subtarget.enableCrcInst()) + return DAG.getNode(Sw64ISD::CRC32CL, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_sbt: + if (Subtarget.hasCore4() && Subtarget.enableSCbtInst()) + return DAG.getNode(Sw64ISD::SBT, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + LLVM_FALLTHROUGH; + case Intrinsic::sw64_cbt: + if (Subtarget.hasCore4() && Subtarget.enableSCbtInst()) + return DAG.getNode(Sw64ISD::CBT, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + return Op; + case Intrinsic::sw64_vsllb: + case Intrinsic::sw64_vsllh: + case Intrinsic::sw64_vsllw: + case Intrinsic::sw64_vslll: + NewIntrinsic = Intrinsic::sw64_vsll; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(NewIntrinsic, dl, MVT::i64), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::sw64_vsrlb: + case Intrinsic::sw64_vsrlh: + case Intrinsic::sw64_vsrlw: + case Intrinsic::sw64_vsrll: + NewIntrinsic = Intrinsic::sw64_vsrl; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(NewIntrinsic, dl, MVT::i64), + Op.getOperand(1), Op.getOperand(2)); + // Fallthough + case Intrinsic::sw64_vsrab: + case Intrinsic::sw64_vsrah: + case Intrinsic::sw64_vsraw: + case Intrinsic::sw64_vsral: + NewIntrinsic = Intrinsic::sw64_vsra; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(NewIntrinsic, dl, MVT::i64), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::sw64_vrolb: + case Intrinsic::sw64_vrolh: + case Intrinsic::sw64_vrolw: + case Intrinsic::sw64_vroll: + NewIntrinsic = Intrinsic::sw64_vrol; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(NewIntrinsic, dl, MVT::i64), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::sw64_vlogzz: + return DAG.getNode(Sw64ISD::VLOG, dl, VT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(4)); + case Intrinsic::sw64_vmaxb: + case Intrinsic::sw64_vmaxh: + case Intrinsic::sw64_vmaxw: + case Intrinsic::sw64_vmaxl: + return DAG.getNode(Sw64ISD::VMAX, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vumaxb: + case Intrinsic::sw64_vumaxh: + case Intrinsic::sw64_vumaxw: + case Intrinsic::sw64_vumaxl: + return DAG.getNode(Sw64ISD::VUMAX, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vminb: + case Intrinsic::sw64_vminh: + case Intrinsic::sw64_vminw: + case Intrinsic::sw64_vminl: + return DAG.getNode(Sw64ISD::VMIN, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vuminb: + case Intrinsic::sw64_vuminh: + case Intrinsic::sw64_vuminw: + case Intrinsic::sw64_vuminl: + return DAG.getNode(Sw64ISD::VUMIN, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vmaxs: + case Intrinsic::sw64_vmaxd: + return DAG.getNode(Sw64ISD::VMAXF, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vmins: + case Intrinsic::sw64_vmind: + return DAG.getNode(Sw64ISD::VMINF, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + + case Intrinsic::sw64_vseleqw: + case Intrinsic::sw64_vseleqwi: + return DAG.getNode(Sw64ISD::VSELEQW, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::sw64_vselltw: + case Intrinsic::sw64_vselltwi: + return DAG.getNode(Sw64ISD::VSELLTW, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::sw64_vsellew: + case Intrinsic::sw64_vsellewi: + return DAG.getNode(Sw64ISD::VSELLEW, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::sw64_vsellbcw: + case Intrinsic::sw64_vsellbcwi: + return DAG.getNode(Sw64ISD::VSELLBCW, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::sw64_vsqrts: + case Intrinsic::sw64_vsqrtd: + return DAG.getNode(Sw64ISD::VSQRT, dl, Op->getValueType(0), + Op->getOperand(1)); + + case Intrinsic::sw64_vsums: + case Intrinsic::sw64_vsumd: + return DAG.getNode(Sw64ISD::VSUMF, dl, Op->getValueType(0), + Op->getOperand(1)); + + case Intrinsic::sw64_vfrecs: + case Intrinsic::sw64_vfrecd: + return DAG.getNode(Sw64ISD::VFREC, dl, Op->getValueType(0), + Op->getOperand(1)); + + case Intrinsic::sw64_vfcmpeqs: + case Intrinsic::sw64_vfcmpeqd: + return DAG.getNode(Sw64ISD::VFCMPEQ, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vfcmples: + case Intrinsic::sw64_vfcmpled: + return DAG.getNode(Sw64ISD::VFCMPLE, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vfcmplts: + case Intrinsic::sw64_vfcmpltd: + return DAG.getNode(Sw64ISD::VFCMPLT, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vfcmpuns: + case Intrinsic::sw64_vfcmpund: + return DAG.getNode(Sw64ISD::VFCMPUN, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + + case Intrinsic::sw64_vfcvtsd: + return DAG.getNode(Sw64ISD::VFCVTSD, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfcvtds: + return DAG.getNode(Sw64ISD::VFCVTDS, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfcvtls: + return DAG.getNode(Sw64ISD::VFCVTLS, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfcvtld: + return DAG.getNode(Sw64ISD::VFCVTLD, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfcvtsh: + return DAG.getNode(Sw64ISD::VFCVTSH, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::sw64_vfcvths: + return DAG.getNode(Sw64ISD::VFCVTHS, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + + case Intrinsic::sw64_vfcvtdl: + return DAG.getNode(Sw64ISD::VFCVTDL, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfcvtdl_g: + return DAG.getNode(Sw64ISD::VFCVTDLG, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfcvtdl_p: + return DAG.getNode(Sw64ISD::VFCVTDLP, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfcvtdl_z: + return DAG.getNode(Sw64ISD::VFCVTDLZ, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfcvtdl_n: + return DAG.getNode(Sw64ISD::VFCVTDLN, dl, Op->getValueType(0), + Op->getOperand(1)); + + case Intrinsic::sw64_vfris: + return DAG.getNode(Sw64ISD::VFRIS, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfris_g: + return DAG.getNode(Sw64ISD::VFRISG, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfris_p: + return DAG.getNode(Sw64ISD::VFRISP, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfris_z: + return DAG.getNode(Sw64ISD::VFRISZ, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfris_n: + return DAG.getNode(Sw64ISD::VFRISN, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfrid: + return DAG.getNode(Sw64ISD::VFRID, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfrid_g: + return DAG.getNode(Sw64ISD::VFRIDG, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfrid_p: + return DAG.getNode(Sw64ISD::VFRIDP, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfrid_z: + return DAG.getNode(Sw64ISD::VFRIDZ, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vfrid_n: + return DAG.getNode(Sw64ISD::VFRIDN, dl, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::sw64_vextw: + case Intrinsic::sw64_vextl: + case Intrinsic::sw64_vextfs: + case Intrinsic::sw64_vextfd: + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::sw64_vfseleqs: + case Intrinsic::sw64_vfseleqd: + return DAG.getNode(Sw64ISD::VFCMOVEQ, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::sw64_vfselles: + case Intrinsic::sw64_vfselled: + return DAG.getNode(Sw64ISD::VFCMOVLE, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::sw64_vfsellts: + case Intrinsic::sw64_vfselltd: + return DAG.getNode(Sw64ISD::VFCMOVLT, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::sw64_vshfw: + return DAG.getNode(Sw64ISD::VSHF, dl, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + } + return Op; +} + +SDValue Sw64TargetLowering::LowerVectorMemIntr(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + EVT VT = Op.getValueType(); + LLVM_DEBUG(dbgs() << "Custom Lower Vector Memory Intrinsics\n"; Op.dump();); + SDValue Args = Op.getOperand(2); + switch (IntNo) { + default: + break; + case Intrinsic::sw64_vload: + return DAG.getNode(ISD::LOAD, dl, VT, Args); + } + return Op; +} + +SDValue Sw64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = Op.getConstantOperandVal(1); + unsigned NewIntrinsic; + EVT VT = Op.getValueType(); + switch (IntNo) { + default: + break; // Don't custom lower most intrinsics. + case Intrinsic::sw64_vloadu: { + SDValue Chain = Op->getOperand(0); + SDVTList VTs = DAG.getVTList(VT.getSimpleVT().SimpleTy, MVT::Other); + NewIntrinsic = Intrinsic::sw64_vload_u; + SDValue VLOAD_U1 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain, + DAG.getConstant(NewIntrinsic, dl, MVT::i64), + Op.getOperand(2)); + SDValue Hiaddr = + DAG.getNode(ISD::ADD, dl, MVT::i64, + DAG.getConstant((VT == MVT::v4f32 ? 16 : 32), dl, MVT::i64), + Op->getOperand(2)); + SDValue VLOAD_U2 = + DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain, + DAG.getConstant(NewIntrinsic, dl, MVT::i64), Hiaddr); + + switch (VT.getSimpleVT().SimpleTy) { + default: + break; + case MVT::v8i32: + NewIntrinsic = Intrinsic::sw64_vconw; + break; + case MVT::v4f32: + NewIntrinsic = Intrinsic::sw64_vcons; + break; + case MVT::v4f64: + case MVT::v4i64: + NewIntrinsic = Intrinsic::sw64_vcond; + break; + } + return DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain, + DAG.getConstant(NewIntrinsic, dl, MVT::i64), VLOAD_U1, + VLOAD_U2, Op->getOperand(2)); + } + } + return SDValue(); +} + +SDValue Sw64TargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + unsigned NewIntrinsic; + EVT VT = Op.getValueType(); + EVT VTOperand2 = Op.getOperand(2).getValueType(); + switch (IntNo) { + case Intrinsic::sw64_vstoreu: { + NewIntrinsic = Intrinsic::sw64_vstoreul; + SDValue VSTOREUL = + DAG.getNode(ISD::INTRINSIC_VOID, dl, VT, Op.getOperand(0), + DAG.getConstant(NewIntrinsic, dl, MVT::i64), + Op.getOperand(2), Op.getOperand(3)); + + SDValue Hiaddr = DAG.getNode( + ISD::ADD, dl, MVT::i64, + DAG.getConstant((VTOperand2 == MVT::v4f32 ? 16 : 32), dl, MVT::i64), + Op->getOperand(3)); + NewIntrinsic = Intrinsic::sw64_vstoreuh; + return DAG.getNode(ISD::INTRINSIC_VOID, dl, VT, VSTOREUL, + DAG.getConstant(NewIntrinsic, dl, MVT::i64), + Op.getOperand(2), Hiaddr); + } + default: + break; + } + return Op; +} + +SDValue Sw64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + MVT VecVT = Vec.getSimpleValueType(); + SDValue Idx = Op.getOperand(1); + MVT EltVT = VecVT.getVectorElementType(); + if (EltVT != MVT::i32 && EltVT != MVT::f32 && EltVT != MVT::f64) + return SDValue(); + + if (!dyn_cast(Idx)) + return SDValue(); + + SDValue tmp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Vec, Idx); + return tmp; + // return DAG.getAnyExtOrTrunc(tmp, dl, MVT::i32); +} + +SDValue Sw64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue Idx = Op.getOperand(2); + + if (!dyn_cast(Idx)) + return SDValue(); + + return Op; +} + +static bool isConstantOrUndef(const SDValue Op) { + if (Op->isUndef()) + return true; + if (isa(Op)) + return true; + if (isa(Op)) + return true; + return false; +} + +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + +SDValue Sw64TargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *Node = cast(Op); + SDLoc dl(Op); + MVT VecVT = Op.getSimpleValueType(); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Subtarget.hasSIMD() || !ResTy.is256BitVector()) + return SDValue(); + + if (VecVT.isInteger()) { + // Certain vector constants, used to express things like logical NOT and + // arithmetic NEG, are passed through unmodified. This allows special + // patterns for these operations to match, which will lower these constants + // to whatever is proven necessary. + BuildVectorSDNode *BVN = cast(Op.getNode()); + if (BVN->isConstant()) + if (ConstantSDNode *Const = BVN->getConstantSplatNode()) { + unsigned BitSize = VecVT.getVectorElementType().getSizeInBits(); + APInt Val(BitSize, + Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue()); + if (Val.isZero() || Val.isAllOnes()) + return Op; + } + } + MVT ElemTy = Op->getSimpleValueType(0).getScalarType(); + unsigned ElemBits = ElemTy.getSizeInBits(); + + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, false) && + SplatBitSize <= 64 && ElemBits == SplatBitSize) { + // We can only cope with 8, 16, 32, or 64-bit elements + if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && + SplatBitSize != 64) + return SDValue(); + + // If the value isn't an integer type we will have to bitcast + // from an integer type first. Also, if there are any undefs, we must + // lower them to defined values first. + if (ResTy.isInteger() && !HasAnyUndefs) { + return DAG.getNode(Sw64ISD::VBROADCAST, dl, ResTy, Op.getOperand(1)); + } + + EVT ViaVecTy; + + switch (SplatBitSize) { + default: + return SDValue(); + case 8: + ViaVecTy = MVT::v32i8; + break; + case 16: + ViaVecTy = MVT::v16i16; + break; + case 32: + ViaVecTy = MVT::v8i32; + break; + case 64: + ViaVecTy = MVT::v4i64; + break; + } + + // SelectionDAG::getConstant will promote SplatValue appropriately. + SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); + + // Bitcast to the type we originally wanted + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, dl, ResTy, Result); + + return Result; + } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) { + return DAG.getNode(Sw64ISD::VBROADCAST, dl, ResTy, Op.getOperand(1)); + } else if (!isConstantOrUndefBUILD_VECTOR(Node)) { + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations + EVT ResTy = Node->getValueType(0); + + assert(ResTy.isVector()); + + unsigned NumElts = ResTy.getVectorNumElements(); + SDValue Vector = DAG.getUNDEF(ResTy); + for (unsigned i = 0; i < NumElts; ++i) { + Vector = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, + Node->getOperand(i), DAG.getConstant(i, DL, MVT::i64)); + } + return Vector; + } + + return SDValue(); +} + +SDValue Sw64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode &Nd = *cast(Op); + + if (Nd.getMemoryVT() != MVT::v4i32) + return Op; + + // Replace a v4i64 with v4i32 stores. + SDLoc DL(Op); + + SDValue Val = Op->getOperand(1); + + return DAG.getMemIntrinsicNode(Sw64ISD::VTRUNCST, DL, + DAG.getVTList(MVT::Other), + {Nd.getChain(), Val, Nd.getBasePtr()}, + Nd.getMemoryVT(), Nd.getMemOperand()); +} + +SDValue Sw64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + // Sw64 Produce not generic v4i64 setcc result, but v4f64/f32 result 2.0 + // Need to use addition compare to reverse the result. + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + SDLoc DL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + // TODO: Trunc v4i64 Compare to v4f64 + // Sw64 Doesn't have v4i64 compare. Due to LLVM speciality, all comparisons + // will be process as Ingeter, like Vector-64bits compare reults is v4i64. + // So we have to do it for now. + if (LHS.getValueType() == MVT::v4i64 && RHS.getValueType() == MVT::v4i64) { + return SDValue(); + } + + if (CC != ISD::SETO) + return Op; + + SDValue Res = DAG.getSetCC(DL, MVT::v4i64, Op.getOperand(0), Op.getOperand(1), + ISD::SETUO); + SDValue Zero = DAG.getRegister(Sw64::V31, MVT::v4f64); + SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::v4f64, Res); + return DAG.getSetCC(DL, MVT::v4i64, Cast, Zero, ISD::SETOEQ); +} + +SDValue Sw64TargetLowering::LowerSHL_PARTS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = MVT::i64; + + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + // if shamt < (VT.bits): + // lo = (shl lo, shamt) + // hi = (or (shl hi, shamt) (srl (srl lo, 1), (xor shamt, (VT.bits-1))) + // else: + // lo = 0 + // hi = (shl lo, shamt[4:0]) + SDValue Not = + DAG.getNode(ISD::XOR, DL, MVT::i64, Shamt, + DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i64)); + SDValue ShiftRight1Lo = + DAG.getNode(ISD::SRL, DL, VT, Lo, DAG.getConstant(1, DL, VT)); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i64, Shamt, + DAG.getConstant(VT.getSizeInBits(), DL, MVT::i64)); + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getConstant(0, DL, VT), + ShiftLeftLo); + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, DL); +} + +SDValue Sw64TargetLowering::LowerSRL_PARTS(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(64, dl, MVT::i64), ShAmt); + SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm, + DAG.getConstant(0, dl, MVT::i64), ISD::SETLE); + // if 64 - shAmt <= 0 + SDValue Hi_Neg = DAG.getConstant(0, dl, MVT::i64); + SDValue ShAmt_Neg = + DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(0, dl, MVT::i64), bm); + SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg); + // else + SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm); + SDValue Hi_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt); + SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt); + Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries); + // Merge + SDValue Hit = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos); + SDValue Lot = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos); + SDValue BMCC1 = DAG.getSetCC(dl, MVT::i64, ShAmt, + DAG.getConstant(0, dl, MVT::i64), ISD::SETEQ); + SDValue BMCC2 = DAG.getSetCC(dl, MVT::i64, ShAmt, + DAG.getConstant(64, dl, MVT::i64), ISD::SETEQ); + SDValue Hit1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpHi, Hit); + SDValue Lot1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpLo, Lot); + SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2, + DAG.getConstant(0, dl, MVT::i64), Hit1); + SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2, ShOpHi, Lot1); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, dl); +} + +SDValue Sw64TargetLowering::LowerSRA_PARTS(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(64, dl, MVT::i64), ShAmt); + SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm, + DAG.getConstant(0, dl, MVT::i64), ISD::SETLE); + // if 64 - shAmt <= 0 + SDValue Hi_Neg = DAG.getNode(ISD::SRA, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, dl, MVT::i64)); + SDValue ShAmt_Neg = + DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(0, dl, MVT::i64), bm); + SDValue Lo_Neg = DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi, ShAmt_Neg); + // else + SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm); + SDValue Hi_Pos = DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi, ShAmt); + SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt); + Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries); + // Merge + SDValue Hit = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos); + SDValue Lot = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos); + SDValue BMCC1 = DAG.getSetCC(dl, MVT::i64, ShAmt, + DAG.getConstant(0, dl, MVT::i64), ISD::SETEQ); + SDValue BMCC2 = DAG.getSetCC(dl, MVT::i64, ShAmt, + DAG.getConstant(64, dl, MVT::i64), ISD::SETEQ); + SDValue Hit1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpHi, Hit); + SDValue Lot1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpLo, Lot); + SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2, + DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi, + DAG.getConstant(63, dl, MVT::i64)), + Hit1); + SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2, ShOpHi, Lot1); + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, dl); +} + +SDValue Sw64TargetLowering::LowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + assert(Op.getOperand(0).getValueType() == MVT::i64 && + "Unhandled SINT_TO_FP type in custom expander!"); + SDValue LD; + bool isDouble = Op.getValueType() == MVT::f64; + LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); + SDValue FP = DAG.getNode(isDouble ? Sw64ISD::CVTQT_ : Sw64ISD::CVTQS_, dl, + isDouble ? MVT::f64 : MVT::f32, LD); + return FP; +} + +SDValue Sw64TargetLowering::LowerFP_TO_SINT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + bool isDouble = Op.getOperand(0).getValueType() == MVT::f64; + SDValue src = Op.getOperand(0); + + if (!isDouble) // Promote + src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src); + + src = DAG.getNode(Sw64ISD::CVTTQ_, dl, MVT::f64, src); + + return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src); +} + +SDValue Sw64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, + SelectionDAG &DAG) const { + SDValue width = Op.getOperand(1); + + if (width.getValueType() != MVT::i64) + width = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), MVT::i64, width); + + return expandFP_TO_INT_SAT(Op.getNode(), DAG); +} + +// ---------------------------------------------------------- +// For cnstruct a new chain call to libgcc to replace old chain +// from udiv/sidv i128 , i128 to call %sret, i128 ,i128 +// +// ---------------------------------------------------------- +SDValue Sw64TargetLowering::LowerSUDIVI128(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + + if (!Op.getValueType().isInteger()) + return SDValue(); + RTLIB::Libcall LC; + bool isSigned; + switch (Op->getOpcode()) { + default: + llvm_unreachable("Unexpected request for libcall!"); + case ISD::SDIV: + isSigned = true; + LC = RTLIB::SDIV_I128; + break; + case ISD::UDIV: + isSigned = false; + LC = RTLIB::UDIV_I128; + break; + case ISD::SREM: + isSigned = true; + LC = RTLIB::SREM_I128; + break; + case ISD::UREM: + isSigned = false; + LC = RTLIB::UREM_I128; + break; + } + SDValue InChain = DAG.getEntryNode(); + + // Create a extra stack objdect to store libcall result + SDValue DemoteStackSlot; + TargetLowering::ArgListTy Args; + auto &DL = DAG.getDataLayout(); + uint64_t TySize = 16; + MachineFunction &MF = DAG.getMachineFunction(); + int DemoteStackIdx = + MF.getFrameInfo().CreateStackObject(TySize, Align(8), false); + EVT ArgVT = Op->getOperand(0).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Type *StackSlotPtrType = PointerType::get(ArgTy, DL.getAllocaAddrSpace()); + // save the sret infomation + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); + ArgListEntry Entry; + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.IsSRet = true; + Entry.Alignment = Align(8); + Args.push_back(Entry); + + // passing udiv/sdiv operands argument + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + ArgListEntry Entry; + ArgVT = Op->getOperand(i).getValueType(); + assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && + "Unexpected argument type for lowering"); + Entry.Node = Op->getOperand(i); + Entry.Ty = IntegerType::get(*DAG.getContext(), 128); + Entry.IsInReg = true; + Entry.IsSExt = isSigned; + Entry.IsZExt = false; + Args.push_back(Entry); + } + + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy(DAG.getDataLayout())); + // create a new libcall to producess udiv/sdiv + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(InChain) + .setLibCallee( + getLibcallCallingConv(LC), + static_cast(MVT::isVoid).getTypeForEVT(*DAG.getContext()), + Callee, std::move(Args)) + .setNoReturn(true) + .setSExtResult(isSigned) + .setZExtResult(!isSigned); + + SDValue CallInfo = LowerCallTo(CLI).second; + return LowerCallExtraResult(CallInfo, DemoteStackSlot, DemoteStackIdx, DAG) + .first; +} + +// -------------------------------------------------------------------- +// when a call using sret arugments pass in register, the call result +// must be handled, create a load node and tokenfactor to pass the call +// result +// -------------------------------------------------------------------- +std::pair Sw64TargetLowering::LowerCallExtraResult( + SDValue &Chain, SDValue &DemoteStackSlot, unsigned DemoteStackIdx, + SelectionDAG &DAG) const { + SmallVector Chains(1), ReturnValues(1); + SDLoc DL(Chain); + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, DemoteStackSlot, + DAG.getConstant(0, DL, MVT::i64), Flags); + SDValue L = DAG.getLoad(MVT::i128, DL, Chain, Add, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), DemoteStackIdx, 0), + /* Alignment = */ 8); + Chains[0] = L.getValue(1); + ReturnValues[0] = L; + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(MVT::i128), + ReturnValues); + return std::make_pair(Res, Chain); +} + +SDValue Sw64TargetLowering::LowerExternalSymbol(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Sw64:: begin lowExternalSymbol----\n"); + SDLoc dl(Op); + return DAG.getNode(Sw64ISD::RelLit, dl, MVT::i64, + DAG.getTargetExternalSymbol( + cast(Op)->getSymbol(), MVT::i64), + DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); +} + +SDValue Sw64TargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + // FIXME: Need pseudo-fence for 'singlethread' fences + // FIXME: Set SType for weaker fences where supported/appropriate. + SDLoc DL(Op); + return DAG.getNode(Sw64ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + +SDValue Sw64TargetLowering::LowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *N = cast(Op); + assert(N->getOpcode() == ISD::ATOMIC_LOAD && "Bad Atomic OP"); + assert((N->getSuccessOrdering() == AtomicOrdering::Unordered || + N->getSuccessOrdering() == AtomicOrdering::Monotonic) && + "setInsertFencesForAtomic(true) expects unordered / monotonic"); + EVT VT = N->getMemoryVT(); + SDValue Result; + if (VT != MVT::i64) + Result = + DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(Op), MVT::i64, N->getChain(), + N->getBasePtr(), N->getPointerInfo(), VT, N->getAlign(), + N->getMemOperand()->getFlags(), N->getAAInfo()); + else + Result = DAG.getLoad(MVT::i64, SDLoc(Op), N->getChain(), N->getBasePtr(), + N->getPointerInfo(), N->getAlign(), + N->getMemOperand()->getFlags(), N->getAAInfo(), + N->getRanges()); + return Result; +} + +SDValue Sw64TargetLowering::LowerATOMIC_STORE(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *N = cast(Op); + assert(N->getOpcode() == ISD::ATOMIC_STORE && "Bad Atomic OP"); + assert((N->getSuccessOrdering() == AtomicOrdering::Unordered || + N->getSuccessOrdering() == AtomicOrdering::Monotonic) && + "setInsertFencesForAtomic(true) expects unordered / monotonic"); + + return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(), + N->getPointerInfo(), N->getAlign(), + N->getMemOperand()->getFlags(), N->getAAInfo()); +} +MachineMemOperand::Flags +Sw64TargetLowering::getTargetMMOFlags(const Instruction &I) const { + // Because of how we convert atomic_load and atomic_store to normal loads and + // stores in the DAG, we need to ensure that the MMOs are marked volatile + // since DAGCombine hasn't been updated to account for atomic, but non + // volatile loads. (See D57601) + if (auto *SI = dyn_cast(&I)) + if (SI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *LI = dyn_cast(&I)) + if (LI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *AI = dyn_cast(&I)) + if (AI->isAtomic()) + return MachineMemOperand::MOVolatile; + if (auto *AI = dyn_cast(&I)) + if (AI->isAtomic()) + return MachineMemOperand::MOVolatile; + return MachineMemOperand::MONone; +} + +SDValue Sw64TargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const { + SDValue N0 = Op->getOperand(0); + SDValue N1 = Op->getOperand(1); + EVT VT = N1.getValueType(); + SDLoc dl(Op); + if (auto *C1 = dyn_cast(N1)) { + const APInt &C1Val = C1->getAPIntValue(); + if (C1Val.isPowerOf2()) { + SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), dl, VT); + return DAG.getNode(Sw64ISD::SBT, dl, VT, N0, ShAmtC); + } + } + // if ((or (srl shl)) || (or (shl srl)) then rolw + if ((N0->getOpcode() == ISD::SRL && N1->getOpcode() == ISD::SRL) || + (N0->getOpcode() == ISD::SRL && N1->getOpcode() == ISD::SHL)) + if (N0->getOperand(1)->getOperand(0)->getOpcode() == ISD::SUB && + N0->getOperand(1)->getOperand(0)->getConstantOperandVal(0) == 32) + return DAG.getNode(Sw64ISD::ROLW, dl, VT, N1->getOperand(0), + N1->getOperand(1)->getOperand(0)); + return SDValue(); +} + +SDValue Sw64TargetLowering::LowerSUREM(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + // Expand only on constant case + // modify the operate of div 0 + if (Op.getOperand(1).getOpcode() == ISD::Constant && + cast(Op.getNode()->getOperand(1))->getAPIntValue() != 0) { + + EVT VT = Op.getNode()->getValueType(0); + + SmallVector Built; + SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM + ? BuildUDIV(Op.getNode(), DAG, false, Built) + : BuildSDIV(Op.getNode(), DAG, false, Built); + + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1)); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1); + + return Tmp1; + } + + return LowerSUDIV(Op, DAG); +} + +SDValue Sw64TargetLowering::LowerSUDIV(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + + if (!Op.getValueType().isInteger()) + return SDValue(); + + // modify the operate of div 0 + if (Op.getOperand(1).getOpcode() == ISD::Constant && + cast(Op.getNode()->getOperand(1))->getAPIntValue() != 0) { + SmallVector Built; + return Op.getOpcode() == ISD::SDIV + ? BuildSDIV(Op.getNode(), DAG, true, Built) + : BuildUDIV(Op.getNode(), DAG, true, Built); + } + + const char *opstr = 0; + switch (Op.getOpcode()) { + case ISD::UREM: + opstr = "__remlu"; + break; + case ISD::SREM: + opstr = "__reml"; + break; + case ISD::UDIV: + opstr = "__divlu"; + break; + case ISD::SDIV: + opstr = "__divl"; + break; + } + + SDValue Tmp1 = Op.getOperand(0); + SDValue Tmp2 = Op.getOperand(1); + SDValue Addr = DAG.getExternalSymbol(opstr, MVT::i64); + return DAG.getNode(Sw64ISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2); +} + +SDValue Sw64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue Chain, DataPtr; + LowerVAARG(Op.getNode(), Chain, DataPtr, DAG); + SDValue Result; + if (Op.getValueType() == MVT::i32) + Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, + MachinePointerInfo(), MVT::i32); + else if (Op.getValueType() == MVT::f32) { + Result = DAG.getLoad(MVT::f64, dl, Chain, DataPtr, MachinePointerInfo()); + SDValue InFlags = Result.getValue(1); + SmallVector Ops; + Ops.push_back(InFlags); + Ops.push_back(Result); + SDVTList NodeTys = DAG.getVTList(MVT::f32, MVT::Other); + Result = DAG.getNode(Sw64ISD::CVTTS_, dl, NodeTys, Ops); + } else { + Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, + MachinePointerInfo()); + } + return Result; +} + +SDValue Sw64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue Chain = Op.getOperand(0); + SDValue DestP = Op.getOperand(1); + SDValue SrcP = Op.getOperand(2); + const Value *DestS = cast(Op.getOperand(3))->getValue(); + const Value *SrcS = cast(Op.getOperand(4))->getValue(); + SDValue Val = DAG.getLoad(getPointerTy(DAG.getDataLayout()), dl, Chain, SrcP, + MachinePointerInfo(SrcS)); + SDValue Result = + DAG.getStore(Val.getValue(1), dl, Val, DestP, MachinePointerInfo(DestS)); + SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, + DAG.getConstant(8, dl, MVT::i64)); + Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, NP, + MachinePointerInfo(), MVT::i32); + SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, + DAG.getConstant(8, dl, MVT::i64)); + return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, MachinePointerInfo(), + MVT::i32); +} + +SDValue Sw64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + MachineFunction &MF = DAG.getMachineFunction(); + Sw64MachineFunctionInfo *FuncInfo = MF.getInfo(); + + SDValue Chain = Op.getOperand(0); + SDValue VAListP = Op.getOperand(1); + const Value *VAListS = cast(Op.getOperand(2))->getValue(); + + // vastart stores the address of the VarArgsBase and VarArgsOffset + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64); + SDValue S1 = + DAG.getStore(Chain, dl, FR, VAListP, MachinePointerInfo(VAListS)); + SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, + DAG.getConstant(8, dl, MVT::i64)); + + return DAG.getTruncStore( + S1, dl, DAG.getConstant(FuncInfo->getVarArgsOffset(), dl, MVT::i64), SA2, + MachinePointerInfo(), MVT::i32); +} + +// Prefetch operands are: +// 1: Address to prefetch +// 2: bool isWrite +// 3: int locality (0 = no locality ... 3 = extreme locality) +// 4: bool isDataCache +SDValue Sw64TargetLowering::LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + unsigned IsWrite = cast(Op.getOperand(2))->getZExtValue(); + // unsigned Locality = cast(Op.getOperand(3))->getZExtValue(); + unsigned IsData = cast(Op.getOperand(4))->getZExtValue(); + + unsigned Code = IsData ? Sw64ISD::Z_S_FILLCS : Sw64ISD::Z_FILLCS; + if (IsWrite == 1 && IsData == 1) + Code = Sw64ISD::Z_FILLDE; + if (IsWrite == 0 && IsData == 1) + Code = Sw64ISD::Z_FILLCS; + if (IsWrite == 1 && IsData == 0) + Code = Sw64ISD::Z_S_FILLDE; + if (IsWrite == 0 && IsData == 0) + Code = Sw64ISD::Z_FILLCS; + + unsigned PrfOp = 0; + + return DAG.getNode(Code, DL, MVT::Other, Op.getOperand(0), + DAG.getConstant(PrfOp, DL, MVT::i64), Op.getOperand(1)); +} + +SDValue Sw64TargetLowering::LowerROLW(SDNode *N, SelectionDAG &DAG) const { + SDLoc DL(N); + + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewRes = DAG.getNode(Sw64ISD::ROLW, DL, MVT::i64, NewOp0, NewOp1); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); +} + +SDValue Sw64TargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + // check the depth + if (cast(Op.getOperand(0))->getZExtValue() != 0) { + DAG.getContext()->emitError( + "return address can be determined only for current frame"); + return SDValue(); + } + + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Sw64::R15, VT); + return FrameAddr; +} + +void Sw64TargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + SDLoc dl(N); + switch (N->getOpcode()) { + default: + break; + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + SDValue Res = LowerSUDIVI128(SDValue(N, 0), DAG); + Results.push_back(Res); + return; + } + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + return; + case ISD::FP_TO_SINT: { + SDValue NewRes = + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, N->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), NewRes)); + return; + } + case ISD::ROTL: + SDValue Res = LowerROLW(N, DAG); + Results.push_back(Res); + return; + } + assert(N->getValueType(0) == MVT::i32 && N->getOpcode() == ISD::VAARG && + "Unknown node to custom promote!"); + + SDValue Chain, DataPtr; + LowerVAARG(N, Chain, DataPtr, DAG); + + SDValue Res = + DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, MachinePointerInfo()); + + Results.push_back(Res); + Results.push_back(SDValue(Res.getNode(), 1)); +} + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +Sw64TargetLowering::ConstraintType +Sw64TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'f': + case 'r': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +unsigned Sw64TargetLowering::MatchRegName(StringRef Name) const { + unsigned Reg = StringSwitch(Name.lower()) + .Case("$0", Sw64::R0) + .Case("$1", Sw64::R1) + .Case("$2", Sw64::R2) + .Case("$3", Sw64::R3) + .Case("$4", Sw64::R4) + .Case("$5", Sw64::R5) + .Case("$6", Sw64::R6) + .Case("$7", Sw64::R7) + .Case("$8", Sw64::R8) + .Case("$9", Sw64::R9) + .Case("$10", Sw64::R10) + .Case("$11", Sw64::R11) + .Case("$12", Sw64::R12) + .Case("$13", Sw64::R13) + .Case("$14", Sw64::R14) + .Case("$15", Sw64::R15) + .Case("$16", Sw64::R16) + .Case("$17", Sw64::R17) + .Case("$18", Sw64::R18) + .Case("$19", Sw64::R19) + .Case("$20", Sw64::R20) + .Case("$21", Sw64::R21) + .Case("$22", Sw64::R22) + .Case("$23", Sw64::R23) + .Case("$24", Sw64::R24) + .Case("$25", Sw64::R25) + .Case("$26", Sw64::R26) + .Case("$27", Sw64::R27) + .Case("$28", Sw64::R28) + .Case("$29", Sw64::R29) + .Case("$30", Sw64::R30) + .Case("$31", Sw64::R31) + .Default(0); + return Reg; +} +Register +Sw64TargetLowering::getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const { + Register Reg = MatchRegName(StringRef(RegName)); + if (Reg) + return Reg; + + report_fatal_error("Sw Invalid register name global variable"); +} +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +Sw64TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'f': + weight = CW_Register; + break; + } + return weight; +} + +Instruction *Sw64TargetLowering::emitLeadingFence(IRBuilderBase &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + if (isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) + return Builder.CreateFence(AtomicOrdering::AcquireRelease); + if (isa(Inst) && isReleaseOrStronger(Ord)) + return Builder.CreateFence(AtomicOrdering::Release); + return nullptr; +} + +Instruction *Sw64TargetLowering::emitTrailingFence(IRBuilderBase &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + if (isa(Inst) && isAcquireOrStronger(Ord)) + return Builder.CreateFence(AtomicOrdering::AcquireRelease); + if (isa(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) + return Builder.CreateFence(AtomicOrdering::Release); + return nullptr; +} + +/// This is a helper function to parse a physical register string and split it +/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag +/// that is returned indicates whether parsing was successful. The second flag +/// is true if the numeric part exists. +static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, + unsigned long long &Reg) { + if (C.front() != '{' || C.back() != '}') + return std::make_pair(false, false); + + // Search for the first numeric character. + StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; + I = std::find_if(B, E, isdigit); + + Prefix = StringRef(B, I - B); + + // The second flag is set to false if no numeric characters were found. + if (I == E) + return std::make_pair(true, false); + + // Parse the numeric characters. + return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg), + true); +} + +std::pair +Sw64TargetLowering::parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { + const TargetRegisterClass *RC; + StringRef Prefix; + unsigned long long Reg; + + std::pair R = parsePhysicalReg(C, Prefix, Reg); + + if (!R.first) + return std::make_pair(0U, nullptr); + + if (!R.second) + return std::make_pair(0U, nullptr); + + if (Prefix == "$f") { // Parse $f0-$f31. + // The size of FP registers is 64-bit or Reg is an even number, select + // the 64-bit register class. + if (VT == MVT::Other) + VT = MVT::f64; + + RC = getRegClassFor(VT); + + } else { // Parse $0-$31. + assert(Prefix == "$"); + // Sw64 has only i64 register. + RC = getRegClassFor(MVT::i64); + StringRef name((C.data() + 1), (C.size() - 2)); + + return std::make_pair(MatchRegName(name), RC); + } + + assert(Reg < RC->getNumRegs()); + return std::make_pair(*(RC->begin() + Reg), RC); +} +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. +std::pair +Sw64TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, &Sw64::GPRCRegClass); + case 'f': + return VT == MVT::f64 ? std::make_pair(0U, &Sw64::F8RCRegClass) + : std::make_pair(0U, &Sw64::F4RCRegClass); + } + } + + std::pair R; + R = parseRegForInlineAsmConstraint(Constraint, VT); + + if (R.second) + return R; + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +Sw64TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected instr type to insert"); + + case Sw64::FILLCS: + case Sw64::FILLDE: + case Sw64::S_FILLCS: + case Sw64::S_FILLDE: + return emitPrefetch(MI, BB); + + // I64 && I32 + case Sw64::ATOMIC_LOAD_ADD_I32: + case Sw64::LAS32: + return emitAtomicBinary(MI, BB); + case Sw64::ATOMIC_LOAD_ADD_I64: + case Sw64::LAS64: + return emitAtomicBinary(MI, BB); + + case Sw64::ATOMIC_SWAP_I32: + case Sw64::SWAP32: + return emitAtomicBinary(MI, BB); + case Sw64::ATOMIC_SWAP_I64: + case Sw64::SWAP64: + return emitAtomicBinary(MI, BB); + case Sw64::ATOMIC_CMP_SWAP_I32: + case Sw64::CAS32: + return emitAtomicCmpSwap(MI, BB, 4); + case Sw64::ATOMIC_CMP_SWAP_I64: + case Sw64::CAS64: + return emitAtomicCmpSwap(MI, BB, 8); + + case Sw64::ATOMIC_LOAD_AND_I32: + return emitAtomicBinary(MI, BB); + case Sw64::ATOMIC_LOAD_AND_I64: + return emitAtomicBinary(MI, BB); + + case Sw64::ATOMIC_LOAD_OR_I32: + return emitAtomicBinary(MI, BB); + case Sw64::ATOMIC_LOAD_OR_I64: + return emitAtomicBinary(MI, BB); + + case Sw64::ATOMIC_LOAD_SUB_I32: + return emitAtomicBinary(MI, BB); + case Sw64::ATOMIC_LOAD_SUB_I64: + return emitAtomicBinary(MI, BB); + + case Sw64::ATOMIC_LOAD_XOR_I32: + return emitAtomicBinary(MI, BB); + case Sw64::ATOMIC_LOAD_XOR_I64: + return emitAtomicBinary(MI, BB); + + case Sw64::ATOMIC_LOAD_UMAX_I64: + case Sw64::ATOMIC_LOAD_MAX_I64: + case Sw64::ATOMIC_LOAD_UMIN_I64: + case Sw64::ATOMIC_LOAD_MIN_I64: + case Sw64::ATOMIC_LOAD_NAND_I64: + return emitAtomicBinary(MI, BB); + + case Sw64::ATOMIC_LOAD_UMAX_I32: + case Sw64::ATOMIC_LOAD_MAX_I32: + case Sw64::ATOMIC_LOAD_UMIN_I32: + case Sw64::ATOMIC_LOAD_MIN_I32: + case Sw64::ATOMIC_LOAD_NAND_I32: + return emitAtomicBinary(MI, BB); + + case Sw64::ATOMIC_LOAD_UMAX_I16: + case Sw64::ATOMIC_LOAD_MAX_I16: + case Sw64::ATOMIC_LOAD_UMIN_I16: + case Sw64::ATOMIC_LOAD_MIN_I16: + case Sw64::ATOMIC_LOAD_NAND_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + + case Sw64::ATOMIC_LOAD_UMAX_I8: + case Sw64::ATOMIC_LOAD_MAX_I8: + case Sw64::ATOMIC_LOAD_UMIN_I8: + case Sw64::ATOMIC_LOAD_MIN_I8: + case Sw64::ATOMIC_LOAD_NAND_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + + // I8 + case Sw64::ATOMIC_LOAD_ADD_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case Sw64::ATOMIC_SWAP_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case Sw64::ATOMIC_LOAD_AND_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case Sw64::ATOMIC_LOAD_OR_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case Sw64::ATOMIC_LOAD_SUB_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case Sw64::ATOMIC_LOAD_XOR_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case Sw64::ATOMIC_CMP_SWAP_I8: + return emitAtomicCmpSwapPartword(MI, BB, 1); + + // I16 + case Sw64::ATOMIC_LOAD_ADD_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case Sw64::ATOMIC_SWAP_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case Sw64::ATOMIC_LOAD_AND_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case Sw64::ATOMIC_LOAD_OR_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case Sw64::ATOMIC_LOAD_SUB_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case Sw64::ATOMIC_LOAD_XOR_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case Sw64::ATOMIC_CMP_SWAP_I16: + return emitAtomicCmpSwapPartword(MI, BB, 2); + } +} + +MachineBasicBlock * +Sw64TargetLowering::emitPrefetch(MachineInstr &MI, + MachineBasicBlock *BB) const { + + Register RA, RB, RC; + MachineFunction *MF = BB->getParent(); + // MachineRegisterInfo &RegInfo = MF->getRegInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + + // %11:gprc = PHI %10:gprc, %bb.1, %15:gprc, %bb.4 + // FILLCS 128, %11:gprc + // it should be directed return. + if (!(DefMI->getOpcode() == Sw64::LDA && DefMI->getOperand(1).isImm())) + return BB; + + int Imm = DefMI->getOperand(1).getImm(); + int Distance = Imm + MI.getOperand(0).getImm(); + Register Address = DefMI->getOperand(2).getReg(); + + MachineInstr *MII = MI.getNextNode(); + if (MII) + MII = MII->getNextNode(); + else + return BB; + + if (MII) { + if (MII->getOpcode() == Sw64::LDL || MII->getOpcode() == Sw64::LDW || + MII->getOpcode() == Sw64::LDHU || MII->getOpcode() == Sw64::LDBU) { + int MIImm = MII->getOperand(1).getImm(); + if (MIImm > 1000 || MIImm < -1000) { + MI.eraseFromParent(); + return BB; + } + } + } + + if (Distance > 1500 || Distance < -1500) { + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; + } + + BuildMI(*BB, MI, DL, TII->get(MI.getOpcode())) + .addImm(Distance) + .addReg(Address); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +Sw64TargetLowering::emitReduceSum(MachineInstr &MI, + MachineBasicBlock *BB) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + Register RB = MI.getOperand(0).getReg(); + Register RA = MI.getOperand(1).getReg(); + + Register RC = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass); + Register RD = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass); + Register RE = RegInfo.createVirtualRegister(&Sw64::GPRCRegClass); + + MachineBasicBlock::iterator II(MI); + + BuildMI(*BB, II, DL, TII->get(MI.getOpcode())) + .addReg(RB, RegState::Define | RegState::EarlyClobber) + .addReg(RA, RegState::Kill) + .addReg(RC, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(RD, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(RE, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock * +Sw64TargetLowering::emitITOFSInstruct(MachineInstr &MI, + MachineBasicBlock *BB) const { + return BB; +} + +MachineBasicBlock * +Sw64TargetLowering::emitFSTOIInstruct(MachineInstr &MI, + MachineBasicBlock *BB) const { + + Register RA, RC; + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Opc = Sw64::CTPOPOW; + Register Scratch = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass); + + RC = MI.getOperand(0).getReg(); + RA = MI.getOperand(1).getReg(); + + if (MI.getOpcode() != Opc) + Opc = Sw64::CTLZOW; + + BuildMI(*BB, MI, DL, TII->get(Opc)) + .addReg(Scratch, RegState::Define) + .addReg(RA); + BuildMI(*BB, MI, DL, TII->get(Sw64::FTOIS)) + .addReg(RC, RegState::Define) + .addReg(Scratch); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Sw64TargetLowering::emitAtomicBinaryPartword( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicBinaryPartial."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i64); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned OldVal = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned Incr = MI.getOperand(2).getReg(); + + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned LockVal = RegInfo.createVirtualRegister(RC); + unsigned Reg_bic = RegInfo.createVirtualRegister(RC); + unsigned Scratch = RegInfo.createVirtualRegister(RC); + + unsigned AtomicOp = 0; + switch (MI.getOpcode()) { + case Sw64::ATOMIC_LOAD_ADD_I8: + AtomicOp = Sw64::ATOMIC_LOAD_ADD_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_SUB_I8: + AtomicOp = Sw64::ATOMIC_LOAD_SUB_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_AND_I8: + AtomicOp = Sw64::ATOMIC_LOAD_AND_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_OR_I8: + AtomicOp = Sw64::ATOMIC_LOAD_OR_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_XOR_I8: + AtomicOp = Sw64::ATOMIC_LOAD_XOR_I8_POSTRA; + break; + case Sw64::ATOMIC_SWAP_I8: + AtomicOp = Sw64::ATOMIC_SWAP_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_ADD_I16: + AtomicOp = Sw64::ATOMIC_LOAD_ADD_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_SUB_I16: + AtomicOp = Sw64::ATOMIC_LOAD_SUB_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_AND_I16: + AtomicOp = Sw64::ATOMIC_LOAD_AND_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_OR_I16: + AtomicOp = Sw64::ATOMIC_LOAD_OR_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_XOR_I16: + AtomicOp = Sw64::ATOMIC_LOAD_XOR_I16_POSTRA; + break; + case Sw64::ATOMIC_SWAP_I16: + AtomicOp = Sw64::ATOMIC_SWAP_I16_POSTRA; + break; + + case Sw64::ATOMIC_LOAD_UMAX_I16: + AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_MAX_I16: + AtomicOp = Sw64::ATOMIC_LOAD_MAX_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_UMIN_I16: + AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_MIN_I16: + AtomicOp = Sw64::ATOMIC_LOAD_MIN_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_NAND_I16: + AtomicOp = Sw64::ATOMIC_LOAD_NAND_I16_POSTRA; + break; + case Sw64::ATOMIC_LOAD_UMAX_I8: + AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_MAX_I8: + AtomicOp = Sw64::ATOMIC_LOAD_MAX_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_UMIN_I8: + AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_MIN_I8: + AtomicOp = Sw64::ATOMIC_LOAD_MIN_I8_POSTRA; + break; + case Sw64::ATOMIC_LOAD_NAND_I8: + AtomicOp = Sw64::ATOMIC_LOAD_NAND_I8_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo atomic for replacement!"); + } + + MachineBasicBlock::iterator II(MI); + + unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr); + + unsigned t_Incr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Incr).addReg(Incr).addReg(Incr); + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(OldVal, RegState::Define | RegState::EarlyClobber) + .addReg(t_Ptr, RegState::EarlyClobber) + .addReg(t_Incr, RegState::EarlyClobber) + .addReg(StoreVal, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(LockVal, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Reg_bic, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Scratch, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock *Sw64TargetLowering::emitAtomicCmpSwapPartword( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicCmpSwapPartial."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i64); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned OldVal = MI.getOperand(2).getReg(); + unsigned NewVal = MI.getOperand(3).getReg(); + + unsigned Reg_bic = RegInfo.createVirtualRegister(RC); + unsigned Reg_ins = RegInfo.createVirtualRegister(RC); + unsigned LockVal = RegInfo.createVirtualRegister(RC); + unsigned Reg_cmp = RegInfo.createVirtualRegister(RC); + unsigned Reg_mas = RegInfo.createVirtualRegister(RC); + + unsigned AtomicOp = 0; + switch (MI.getOpcode()) { + case Sw64::ATOMIC_CMP_SWAP_I8: + AtomicOp = Sw64::ATOMIC_CMP_SWAP_I8_POSTRA; + break; + case Sw64::ATOMIC_CMP_SWAP_I16: + AtomicOp = Sw64::ATOMIC_CMP_SWAP_I16_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo atomic for replacement!"); + } + + MachineBasicBlock::iterator II(MI); + + unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr); + unsigned t_OldVal = + MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_OldVal) + .addReg(OldVal) + .addReg(OldVal); + unsigned t_NewVal = + MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_NewVal) + .addReg(NewVal) + .addReg(NewVal); + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(t_Ptr, RegState::EarlyClobber) + .addReg(t_OldVal, RegState::EarlyClobber) + .addReg(t_NewVal, RegState::EarlyClobber) + .addReg(Reg_bic, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Reg_ins, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(LockVal, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Reg_cmp, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Reg_mas, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +} + +// This function also handles Sw64::ATOMIC_SWAP_I32 (when BinOpcode == 0), and +// Sw64::SWAP32 +MachineBasicBlock * +Sw64TargetLowering::emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i64); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned AtomicOp; + switch (MI.getOpcode()) { + case Sw64::ATOMIC_LOAD_ADD_I32: + case Sw64::LAS32: + AtomicOp = Sw64::ATOMIC_LOAD_ADD_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_SUB_I32: + AtomicOp = Sw64::ATOMIC_LOAD_SUB_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_AND_I32: + AtomicOp = Sw64::ATOMIC_LOAD_AND_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_OR_I32: + AtomicOp = Sw64::ATOMIC_LOAD_OR_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_XOR_I32: + AtomicOp = Sw64::ATOMIC_LOAD_XOR_I32_POSTRA; + break; + case Sw64::ATOMIC_SWAP_I32: + case Sw64::SWAP32: + AtomicOp = Sw64::ATOMIC_SWAP_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_ADD_I64: + case Sw64::LAS64: + AtomicOp = Sw64::ATOMIC_LOAD_ADD_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_SUB_I64: + AtomicOp = Sw64::ATOMIC_LOAD_SUB_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_AND_I64: + AtomicOp = Sw64::ATOMIC_LOAD_AND_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_OR_I64: + AtomicOp = Sw64::ATOMIC_LOAD_OR_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_XOR_I64: + AtomicOp = Sw64::ATOMIC_LOAD_XOR_I64_POSTRA; + break; + case Sw64::ATOMIC_SWAP_I64: + case Sw64::SWAP64: + AtomicOp = Sw64::ATOMIC_SWAP_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_UMAX_I64: + AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA; + break; + + case Sw64::ATOMIC_LOAD_MAX_I64: + AtomicOp = Sw64::ATOMIC_LOAD_MAX_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_UMIN_I64: + AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_MIN_I64: + AtomicOp = Sw64::ATOMIC_LOAD_MIN_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_NAND_I64: + AtomicOp = Sw64::ATOMIC_LOAD_NAND_I64_POSTRA; + break; + case Sw64::ATOMIC_LOAD_UMAX_I32: + AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_MAX_I32: + AtomicOp = Sw64::ATOMIC_LOAD_MAX_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_UMIN_I32: + AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_MIN_I32: + AtomicOp = Sw64::ATOMIC_LOAD_MIN_I32_POSTRA; + break; + case Sw64::ATOMIC_LOAD_NAND_I32: + AtomicOp = Sw64::ATOMIC_LOAD_NAND_I32_POSTRA; + break; + + default: + llvm_unreachable("Unknown pseudo atomic for replacement!"); + } + + unsigned OldVal = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned Incr = MI.getOperand(2).getReg(); + + unsigned StoreVal = RegInfo.createVirtualRegister(RC); + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch1 = RegInfo.createVirtualRegister(RC); + + MachineBasicBlock::iterator II(MI); + + unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr); + + unsigned t_Incr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Incr).addReg(Incr).addReg(Incr); + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(OldVal, RegState::Define | RegState::EarlyClobber) + .addReg(t_Ptr, RegState::EarlyClobber) + .addReg(t_Incr, RegState::EarlyClobber) + .addReg(StoreVal, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Scratch, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Scratch1, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock *Sw64TargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size) const { + assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i64); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned AtomicOp; + + switch (MI.getOpcode()) { + case Sw64::CAS32: + case Sw64::ATOMIC_CMP_SWAP_I32: + AtomicOp = Sw64::ATOMIC_CMP_SWAP_I32_POSTRA; + break; + case Sw64::CAS64: + case Sw64::ATOMIC_CMP_SWAP_I64: + AtomicOp = Sw64::ATOMIC_CMP_SWAP_I64_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo atomic for replacement!"); + } + + /* + $0=Dest $16=Ptr $17=OldVal $18=NewVal + + memb + $BB0_1: + ldi $0,0($16) + lldw $0,0($0) + cmpeq $17,$0,$1 + wr_f $1 + bis $18,$18,$2 + lstw $2,0($16) + rd_f $2 + beq $1,$BB0_2 + beq $2,$BB0_1 + $BB0_2: + */ + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned OldVal = MI.getOperand(2).getReg(); + unsigned NewVal = MI.getOperand(3).getReg(); + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Reg_cmp = RegInfo.createVirtualRegister(RC); + + MachineBasicBlock::iterator II(MI); + + unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr); + unsigned t_OldVal = + MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_OldVal) + .addReg(OldVal) + .addReg(OldVal); + unsigned t_NewVal = + MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_NewVal) + .addReg(NewVal) + .addReg(NewVal); + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(t_Ptr, RegState::EarlyClobber) + .addReg(t_OldVal, RegState::EarlyClobber) + .addReg(t_NewVal, RegState::EarlyClobber) + .addReg(Scratch, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Reg_cmp, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MVT Sw64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL, + EVT LHSTy) const { + return MVT::i64; +} + +bool Sw64TargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // The Sw64 target isn't yet aware of offsets. + return false; +} + +EVT Sw64TargetLowering::getOptimalMemOpType( + const MemOp &Op, const AttributeList & /*FuncAttributes*/) const { + if (Subtarget.enOptMemset()) + return MVT::i64; + return MVT::Other; +} + +bool Sw64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + if (VT != MVT::f32 && VT != MVT::f64) + return false; + // +0.0 F31 + // +0.0f F31 + // -0.0 -F31 + // -0.0f -F31 + return Imm.isZero() || Imm.isNegZero(); +} + +SDValue Sw64TargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, + int &RefinementSteps) const { + EVT VT = Operand.getValueType(); + if ((VT == MVT::f32 || VT == MVT::f64) && Subtarget.hasCore4() && + Subtarget.enableFloatAri()) { + if (RefinementSteps == ReciprocalEstimate::Unspecified) { + if (VT.getScalarType() == MVT::f32) + RefinementSteps = 2; + if (VT.getScalarType() == MVT::f64) + RefinementSteps = 3; + } + if (VT.getScalarType() == MVT::f32) + return DAG.getNode(Sw64ISD::FRECS, SDLoc(Operand), VT, Operand); + if (VT.getScalarType() == MVT::f64) + return DAG.getNode(Sw64ISD::FRECD, SDLoc(Operand), VT, Operand); + } + return SDValue(); +} + +bool Sw64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + EVT VT; + SDValue Ptr; + LSBaseSDNode *LSN = dyn_cast(N); + if (!LSN) + return false; + VT = LSN->getMemoryVT(); + bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || + VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64; + if (!IsLegalType) + return false; + if (Op->getOpcode() != ISD::ADD) + return false; + if (LoadSDNode *LD = dyn_cast(N)) { + VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast(N)) { + VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); + } else + return false; + + if (ConstantSDNode *RHS = dyn_cast(Op->getOperand(1))) { + uint64_t RHSC = RHS->getZExtValue(); + Base = Ptr; + Offset = DAG.getConstant(RHSC, SDLoc(N), MVT::i64); + AM = ISD::POST_INC; + return true; + } + + return false; +} + +const TargetRegisterClass *Sw64TargetLowering::getRepRegClassFor(MVT VT) const { + if (VT == MVT::Other) + return &Sw64::GPRCRegClass; + if (VT == MVT::i32) + return &Sw64::FPRC_loRegClass; + return TargetLowering::getRepRegClassFor(VT); +} + +bool Sw64TargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS, + Instruction *I) const { + if (!Subtarget.hasCore4() || !Subtarget.enablePostInc()) + return llvm::TargetLoweringBase::isLegalAddressingMode(DL, AM, Ty, AS, I); + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Require a 12-bit signed offset. + if (!isInt<12>(AM.BaseOffs)) + return false; + + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (!AM.HasBaseReg) // allow "r+i". + break; + return false; // disallow "r+r" or "r+r+i". + default: + return false; + } + + return true; +} + +bool Sw64TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + default: + break; + } + + return false; +} + +bool Sw64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, + Type *Ty) const { + switch (Ty->getScalarType()->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + return false; + } +} + +bool Sw64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + // Zexts are free if they can be combined with a load. + if (Subtarget.enOptExt()) { + if (auto *LD = dyn_cast(Val)) { + EVT MemVT = LD->getMemoryVT(); + if ((MemVT == MVT::i8 || MemVT == MVT::i16 || + (Subtarget.is64Bit() && MemVT == MVT::i32)) && + (LD->getExtensionType() == ISD::NON_EXTLOAD || + LD->getExtensionType() == ISD::ZEXTLOAD)) + return true; + } + } + + return TargetLowering::isZExtFree(Val, VT2); +} + +bool Sw64TargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { + if (Subtarget.enOptExt()) + return SrcVT == MVT::i32 && DstVT == MVT::i64; + return false; +} + +bool Sw64TargetLowering::isLegalICmpImmediate(int64_t Imm) const { + if (Subtarget.enOptExt()) + return Imm >= 0 && Imm <= 255; + return false; +} + +bool Sw64TargetLowering::isLegalAddImmediate(int64_t Imm) const { + if (Subtarget.enOptExt()) + return Imm >= 0 && Imm <= 255; + return false; +} diff --git a/llvm/lib/Target/Sw64/Sw64ISelLowering.h b/llvm/lib/Target/Sw64/Sw64ISelLowering.h new file mode 100644 index 000000000000..836abe2774af --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64ISelLowering.h @@ -0,0 +1,476 @@ +//===-- Sw64ISelLowering.h - Sw64 DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Sw64 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64ISELLOWERING_H +#define LLVM_LIB_TARGET_SW64_SW64ISELLOWERING_H + +#include "Sw64.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { + +// Forward delcarations +class Sw64Subtarget; +class Sw64TargetMachine; + +namespace Sw64ISD { +enum NodeType : unsigned { + // Start the numbering where the builtin ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // These corrospond to the identical Instruction + CVTQT_, + CVTQS_, + CVTTQ_, + CVTTS_, + CVTST_, + /// GPRelHi/GPRelLo - These represent the high and low 16-bit + /// parts of a global address respectively. + GPRelHi, + GPRelLo, + /// TPRelHi/TPRelLo - These represent the high and low 16-bit + /// parts of a TLS global address respectively. + TPRelHi, + TPRelLo, + TLSGD, // SW + TLSLDM, // SW + DTPRelHi, + DTPRelLo, + RelGottp, // SW + SysCall, + /// RetLit - Literal Relocation of a Global + RelLit, + + /// GlobalRetAddr - used to restore the return address + GlobalRetAddr, + + /// CALL - Normal call. + CALL, + + /// Jump and link (call) + JmpLink, + /// DIVCALL - used for special library calls for div and rem + DivCall, + /// return flag operand + RET_FLAG, + Ret, + LDAWC, + MEMBARRIER, + /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This + /// corresponds to the COND_BRANCH pseudo instruction. + /// *PRC is the input register to compare to zero, + /// OPC is the branch opcode to use (e.g. Sw64::BEQ), + /// DESTBB is the destination block to branch to, and INFLAG is + /// an optional input flag argument. + COND_BRANCH_I, + COND_BRANCH_F, + + Z_S_FILLCS, + Z_S_FILLDE, + Z_FILLDE, + Z_FILLDE_E, + Z_FILLCS, + Z_FILLCS_E, + Z_E_FILLCS, + Z_E_FILLDE, + Z_FLUSHD, + + GPRel, + TPRel, + DTPRel, + LDIH, + LDI, + + FRECS, + FRECD, + ADDPI, + ADDPIS, + SBT, + CBT, + REVBH, + REVBW, + + ROLW, + CRC32B, + CRC32H, + CRC32W, + CRC32L, + CRC32CB, + CRC32CH, + CRC32CW, + CRC32CL, + + VBROADCAST_LD, + VBROADCAST, + + // Vector load. + VLDWE, + VLDSE, + VLDDE, + + // Vector comparisons. + // These take a vector and return a boolean. + VALL_ZERO, + VANY_ZERO, + VALL_NONZERO, + VANY_NONZERO, + + // This is vcmpgew. + VSETGE, + + // These take a vector and return a vector bitmask. + VCEQ, + VCLE_S, + VCLE_U, + VCLT_S, + VCLT_U, + // These is vector select. + VFCMOVEQ, + VFCMOVLE, + VFCMOVLT, + VSELEQW, + VSELLTW, + VSELLEW, + VSELLBCW, + + VMAX, + VMIN, + VUMAX, + VUMIN, + VSQRT, + VSUMF, + VFREC, + VFCMPEQ, + VFCMPLE, + VFCMPLT, + VFCMPUN, + VFCVTSD, + VFCVTDS, + VFCVTLS, + VFCVTLD, + VFCVTSH, + VFCVTHS, + VFCVTDL, + VFCVTDLG, + VFCVTDLP, + VFCVTDLZ, + VFCVTDLN, + VFRIS, + VFRISG, + VFRISP, + VFRISZ, + VFRISN, + VFRID, + VFRIDG, + VFRIDP, + VFRIDZ, + VFRIDN, + VMAXF, + VMINF, + VINSECTL, + VCPYB, + VCPYH, + // Vector Shuffle with mask as an operand + VSHF, // Generic shuffle + SHF, // 4-element set shuffle. + ILVEV, // Interleave even elements + ILVOD, // Interleave odd elements + ILVL, // Interleave left elements + ILVR, // Interleave right elements + PCKEV, // Pack even elements + PCKOD, // Pack odd elements + VCON_W, + VCON_S, + VCON_D, + + VSHL_BY_SCALAR, + VSRL_BY_SCALAR, + VSRA_BY_SCALAR, + // Vector Lane Copy + INSVE, // Copy element from one vector to another + + // Combined (XOR (OR $a, $b), -1) + VNOR, + VEQV, + VORNOT, + + VCTPOP, + VCTLZ, + + VLOG, + VCOPYF, + V8SLL, + V8SLLi, + V8SRL, + V8SRLi, + VROTR, + VROTRi, + V8SRA, + V8SRAi, + VROLB, + VROLBi, + VROLH, + VROLHi, + VROLL, + VROLLi, + VECREDUCE_FADD, + VECT_VUCADDW, + VECT_VUCADDH, + VECT_VUCADDB, + VECT_VUCSUBW, + VECT_VUCSUBH, + VECT_VUCSUBB, + // Extended vector element extraction + VEXTRACT_SEXT_ELT, + VEXTRACT_ZEXT_ELT, + + VTRUNCST = ISD::FIRST_TARGET_MEMORY_OPCODE +}; +} // namespace Sw64ISD + +//===--------------------------------------------------------------------===// +// TargetLowering Implementation +//===--------------------------------------------------------------------===// +class Sw64TargetLowering : public TargetLowering { + const TargetMachine &TM; + const Sw64Subtarget &Subtarget; + +public: + explicit Sw64TargetLowering(const TargetMachine &TM, + const Sw64Subtarget &Subtarget); + + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT LHSTy) const override; + + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }; + + bool generateFMAsInMachineCombiner(EVT VT, + CodeGenOpt::Level OptLevel) const override; + + /// getSetCCResultType - Get the SETCC result ValueType + virtual EVT getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const override; + bool isLegalICmpImmediate(int64_t Imm) const override; + bool isLegalAddImmediate(int64_t Imm) const override; + bool isZExtFree(SDValue Val, EVT VT2) const override; + bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; + + /// LowerOperation - Provide custom lowering hooks for some operations. + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + /// getTargetNodeName - This method returns the name of a target specific + /// DAG node. + const char *getTargetNodeName(unsigned Opcode) const override; + template SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const; + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals, + const SDNode *CallNode, const Type *RetTy) const; + ConstraintType getConstraintType(const std::string &Constraint) const; + + unsigned MatchRegName(StringRef Name) const; + Register getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const override; + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const override; + + // Inline asm support + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; + + virtual bool + isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + + EVT getOptimalMemOpType( + const MemOp &Op, const AttributeList & /*FuncAttributes*/) const override; + + /// isFPImmLegal - Returns true if the target can instruction select the + /// specified FP immediate natively. If false, the legalizer will + /// materialize the FP immediate as a load from a constant pool. + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + struct LTStr { + bool operator()(const char *S1, const char *S2) const { + return strcmp(S1, S2) < 0; + } + }; + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + Register + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return Sw64::R16; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + Register + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return Sw64::R17; + } + SDValue PerformDAGCombineV(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + /// Enable SIMD support for the given integer type and Register + /// class. + void addSIMDIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + + /// Enable SIMD support for the given floating-point type and + /// Register class. + void addSIMDFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + +private: + // Helpers for custom lowering. + void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, + SelectionDAG &DAG) const; + + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SDLoc &dl, + SelectionDAG &DAG) const override; + + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &ArgsFlags, + LLVMContext &Context) const override; + + // Lower Operand specifics + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSUREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSUDIV(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSUDIVI128(SDValue Op, SelectionDAG &DAG) const; + std::pair LowerCallExtraResult(SDValue &Chain, + SDValue &DemoteStackSlot, + unsigned DemoteStackIdx, + SelectionDAG &DAG) const; + SDValue LowerROLW(SDNode *N, SelectionDAG &DAG) const; + + SDValue LowerVectorShift(SDValue Op, SelectionDAG &DAG) const; + + ISD::NodeType getExtendForAtomicOps() const override { + return ISD::ANY_EXTEND; + } + + SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + + MachineMemOperand::Flags + getTargetMMOFlags(const Instruction &I) const override; + + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return true; + } + Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + /// This function parses registers that appear in inline-asm constraints. + /// It returns pair (0, 0) on failure. + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; + + MachineBasicBlock *emitReduceSum(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitITOFSInstruct(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitFSTOIInstruct(MachineInstr &MI, + MachineBasicBlock *BB) const; + SDValue LowerVectorMemIntr(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + /// Return true if an FMA operation is faster than a pair of fmul and fadd + /// instructions. fmuladd intrinsics will be expanded to FMAs when this method + /// returns true, otherwise fmuladd is expanded to fmul + fadd. + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; + + std::pair + parseRegForInlineAsmConstraint(StringRef C, MVT VT) const; + + MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB, + unsigned Size) const; + MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size) const; + MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size) const; + MachineBasicBlock *emitPrefetch(MachineInstr &MI, + MachineBasicBlock *BB) const; + + SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, + int &RefinementSteps) const override; + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, + SDValue &Offset, ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; + + SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, + unsigned AS, + Instruction *I = nullptr) const override; +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/Sw64InstrFormats.td b/llvm/lib/Target/Sw64/Sw64InstrFormats.td new file mode 100644 index 000000000000..c7ec61ea5b5b --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64InstrFormats.td @@ -0,0 +1,452 @@ +//===- Sw64InstrFormats.td - Sw64 Instruction Formats ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def u5imm : Operand{ + let DecoderMethod = "decodeUImmOperand<5>"; +} +def u6imm : Operand{ + let DecoderMethod = "decodeUImmOperand<6>"; +} +def u8imm : Operand{ + let DecoderMethod = "decodeUImmOperand<8>"; +} +def u8immHex : Operand{ + let DecoderMethod = "decodeUImmOperand<8>"; + let PrintMethod = "printHexImm"; +} +def s8imm : Operand{ + let DecoderMethod = "decodeSImmOperand<8>"; +} +def s13imm : Operand{ + let DecoderMethod = "decodeSImmOperand<13>"; +} +def s12imm : Operand{ + let DecoderMethod = "decodeSImmOperand<12>"; +} +def s14imm : Operand{ + let DecoderMethod = "decodeSImmOperand<14>"; +} +def s16imm : Operand{ + let DecoderMethod = "decodeSImmOperand<16>"; + let OperandType = "OPERAND_PCREL"; +} +def s21imm : Operand{ + let DecoderMethod = "decodeSImmOperand<21>"; + let OperandType = "OPERAND_PCREL"; +} +def u26imm : Operand{ + let DecoderMethod = "decodeSImmOperand<26>"; +} +def s64imm : Operand{ + let DecoderMethod = "decodeSImmOperand<64>"; + let PrintMethod = "printMemoryArg"; +} +def u64imm : Operand{ + let DecoderMethod = "decodeSImmOperand<64>"; +} + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// +// Sw64 instruction baseline +class InstSw64 op, string opstr, string operands> : Instruction { + field bits<32> Inst; + let Namespace = "Sw64"; + let Inst{31-26} = op; + + let AsmString = opstr # " " # operands; + // Add Size: Number of bytes in encoding + let Size = 4; + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<32> SoftFail = 0; +} + +//Chapter2.6.1 +// sys_call |31 26|25 0| +// | Opcode | Func | +class PALForm opcode, dag iops, dag oops, + string opstr, string operands> + : InstSw64 { + let OutOperandList = oops; + let InOperandList = iops; + bits<26> disp; + + let Inst{25-0} = disp; +} + +// Branching beq/bge/bgt Chapter2.6.2 +// COND_BRANCH |31 26|25 21|20 0| +// | Opcode | RA/Fa | disp | + +def JmpTargetAsmOperand : AsmOperandClass { + let Name = "JmpImm"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImm"; + let ParserMethod = "parseJmpImm"; +} + +def target : Operand { + let ParserMatchClass = JmpTargetAsmOperand; + let EncoderMethod = "getBranchTargetOpValue"; + let DecoderMethod = "decodeSImmOperand<21>"; + let OperandType = "OPERAND_PCREL"; + let MCOperandPredicate = [{ + int64_t Imm; + if (MCOp.evaluateAsConstantImm(Imm)) + return isShiftedInt<22, 2>(Imm); + return MCOp.isBareSymbolRef(); + }]; +} + +class BForm opcode, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + bits<64> Opc; //dummy + bits<5> RA; + bits<21> disp; + + let Inst{25-21} = RA; + let Inst{20-0} = disp; +} + +// LDL/LDW Chapter2.6.3 +// Memory |31 26|25 21|20 16|15 0| +// | Opcode | RA/Fa | RB | disp | +class MForm opcode, dag iops, dag oops, + string opstr, string operands="", list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RA; + bits<16> DISP; + bits<5> RB; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-0} = DISP; +} + +class MfcForm opcode, bits<16> Func, dag iops, dag oops, + string opstr, string operands="", list pattern=[]> + : InstSw64 { + bits<16> Function=Func; + bits<5> RA; + bits<5> RB; + + let OutOperandList = oops; + let InOperandList = iops; + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-0} = Function; +} + + +// New Add, for atomic-op +// Memory + Func |31 26|25 21|20 16|15 12|11 0| +// | Opcode | RA/Fa | RB | Func | disp | +class MFuncForm opcode, bits<4> func, dag iops, dag oops, + string opstr, string operands="", list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let InOperandList = iops; + let OutOperandList = oops; + + bits<5> RA; + bits<12> disp; + bits<5> RB; + bits<4> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-12} = Function; + let Inst{11-0} = disp; +} + +// New Add, for privilege inst +// Memory + Func |31 26|25 21|20 16|15 12|11 0| +// | Opcode | TH | RB | Func | disp | +class MPrvlForm opcode, bits<4> func, dag iops, dag oops, + string opstr, string operands="", list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let InOperandList = iops; + let OutOperandList = oops; + + bits<5> TH; + bits<12> disp; + bits<5> RB; + bits<4> Function = func; + + let Inst{25-21} = TH; + let Inst{20-16} = RB; + let Inst{15-12} = Function; + let Inst{11-0} = disp; +} + +// Chapter2.6.4 +// simple_operation_form |31 26|25 21|20 16|15 13|12 5|4 0| +// r + r : | Opcode | RA | RB | SBZ | Func | RC | +class OForm opcode, bits<8> fun, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RC; + bits<5> RA; + bits<5> RB; + bits<8> Function = fun; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-13} = 0; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + + +// Chapter2.6.4 +// simple_operation_form |31 26|25 21|20 13|12 5|4 0| +// r + i : | Opcode | RA | imm | Func | RC | +class OFormL opcode, bits<8> fun, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RC; + bits<5> RA; + bits<8> L; + bits<8> Function = fun; + + let Inst{25-21} = RA; + let Inst{20-13} = L; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + +// Chapter2.6.4 +// simple_operation_form |31 26|25 13|12 5|4 0| +// r + i : | Opcode | imm | Func | RC | +class OFormI opcode, bits<8> fun, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RC; + bits<13> L; + bits<8> Function = fun; + + let Inst{25-13} = L; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + + + +// seleq/selne... Chapter2.6.5(1) +// int_complex_operation_form |31 26|25 21|20 16|15 13|12 10|9 5|4 0| +// r + r : | Opcode | RA | RB | SBZ | Func | RC | RD | +class OForm4 opcode, bits<3> fun, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> RC; + bits<3> Function = fun; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-13} = 0; + let Inst{12-10} = Function; + let Inst{9-5} = RC; + let Inst{4-0} = RD; +} + +// seleq/selne... Chapter2.6.5(2) +// int_complex_operation_form |31 26|25 21|20 13|12 10|9 5|4 0| +// r + i : | Opcode | RA | imm | Func | RC | RD | +class OForm4L opcode, bits<3> fun, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RD; + bits<5> RA; + bits<8> L; + bits<3> Function = fun; + bits<5> RC; + + let Inst{25-21} = RA; + let Inst{20-13} = L; + let Inst{12-10} = Function; + let Inst{9-5} = RC; + let Inst{4-0} = RD; +} + +// fadds/faddd... Chapter2.6.4 +// simple_operation_form |31 26|25 21|20 16|15 13|12 5|4 0| +// r + r : | Opcode | Fa | Fb | SBZ | Func | Fc | +class FPForm opcode, bits<8> fun, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let InOperandList = iops; + let OutOperandList = oops; + + bits<5> RC; + bits<5> RA; + bits<5> RB; + bits<8> Function = fun; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-13} = 0; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + +class FPForm1 opcode, bits<8> fun, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let InOperandList = iops; + let OutOperandList = oops; + + bits<5> RC; + bits<5> RA; + bits<5> RB; + bits<8> Function = fun; + + let Inst{25-21} = RA; + let Inst{15-13} = 0; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + +// New add fselXX Chapter2.6.5(3) +// fp_complex_operation_form |31 26|25 21|20 16|15 10|9 5|4 0| +// r + r : | Opcode | Fa | Fb | Func | Fc | Fd | +class FForm4 opcode, bits<6> func, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RD; + bits<5> RC; + bits<5> RB; + bits<5> RA; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = Function; + let Inst{9-5} = RC; + let Inst{4-0} = RD; +} + +// New add fselXX Chapter2.6.5(4) +// fp_complex_operation_form |31 26|25 21|20 16|15 10|9 5|4 0| +// r + i : | Opcode | Fa | Fb | Func | imm | Fd | +class FForm4L opcode, bits<6> func, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + let Constraints = "$RFALSE = $RDEST"; + let DisableEncoding = "$RFALSE"; + + bits<5> RA; + bits<5> RB; + bits<5> LIT; + bits<5> RD; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = Function; + let Inst{9-5} = LIT; + let Inst{4-0} = RD; +} + +// New add CSRXX Chapter4.9.2 +// fp_complex_operation_form |31 26|25 21|20 16|15 8|7 0| +// : | Opcode | Ra | Rb | Func | Index | +class CSRForm opcode, bits<8> func, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RB; + bits<5> RA; + bits<8> Function = func; + bits<8> L; + + let Inst{25-21} = RA; + let Inst{20-16} = 0x1f; + let Inst{15-8} = Function; + let Inst{7-0} = L; +} + +// New add FCVTSH Chapter 4.6.3.3 +// fp_complex_operation_form |31 26|25 21|20 16|15 10|9 5|4 0| +// r + i : | Opcode | Fa | Fb | Func | imm | Fd | +class FCForm4L opcode, bits<6> func, dag iops, dag oops, + string opstr, string operands, list pattern=[]> + : InstSw64 { + let Pattern = pattern; + let OutOperandList = oops; + let InOperandList = iops; + + bits<5> RA; + bits<5> RB; + bits<5> LIT; + bits<5> RD; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = Function; + let Inst{9-5} = LIT; + let Inst{4-0} = RD; +} + + +// Pseudo instructions. +class PseudoInstSw64 pattern> + : InstSw64<0, opstr, ""> { + let OutOperandList = oops; + let InOperandList = iops; + let Pattern = pattern; + let isCodeGenOnly = 1; +} diff --git a/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td b/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td new file mode 100644 index 000000000000..5339b7864a28 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td @@ -0,0 +1,389 @@ +//===- Sw64InstrFormats.td - Sw64 Instruction Formats ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// SIMD Instruction format superclass +//===----------------------------------------------------------------------===// + + +class SIMDPseudo pattern>: + PseudoInstSw64 { +} + + +class InstSw64V op> : Instruction { + field bits<32> Inst; + let Namespace = "Sw64"; + let Inst{31-26} = op; + let Size = 4; + field bits<32> SoftFail = 0; +} + +class InstSw64VLog op> : Instruction { + field bits<32> Inst; + let Namespace = "Sw64"; + let Inst{31-28} = op; + let Size = 4; + field bits<32> SoftFail = 0; +} + +// VLDD/VLDW Chapter2.6.3 +// Memory |31 26|25 21|20 16|15 0| +// | Opcode | RA/RA | RB | disp | +class MFormV opcode> : InstSw64V { + + bits<5> RA; + bits<21> addr; + + let Inst{25-21} = RA; + let Inst{20-16} = addr{20-16}; + let Inst{15-0} = addr{15-0}; +} + +// Memory + Func |31 26|25 21|20 16|15 12|11 0| +// | Opcode | RA/RA | RB | Func | disp | +class MFuncFormV opcode, bits<4> func> : InstSw64V { + + bits<5> RA; + bits<21> addr; + bits<4> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = addr{20-16}; + let Inst{15-12} = Function; + let Inst{11-0} = addr{11-0}; +} + +// fadds/faddd... Chapter2.6.4 +// simple_operation_form |31 26|25 21|20 16|15 13|12 5|4 0| +// r + r : | Opcode | RA | RB | SBZ | Func | RC | +class FPFormV opcode, bits<8> fun> : InstSw64V { + + bits<5> RC; + bits<5> RA; + bits<5> RB; + bits<8> Function = fun; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-13} = 0; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + +class FPFormV_2RV opcode, bits<8> fun> : InstSw64V { + + bits<5> RC; + bits<5> RA; + bits<5> RB; + bits<8> Function = fun; + + let Inst{25-21} = RA; + let Inst{15-13} = 0; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + +class FPFormV_2RV1 opcode, bits<8> fun> : InstSw64V { + + bits<5> RC; + bits<5> RA; + bits<5> RB; + bits<8> Function = fun; + + let Inst{20-16} = RB; + let Inst{15-13} = 0; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + +class FPFormV_CT opcode, bits<8> fun> : InstSw64V { + + bits<5> RC; + bits<5> RA; + bits<5> RB; + bits<8> Function = fun; + + let Inst{25-21} = RA; + let Inst{15-13} = 0; + let Inst{12-5} = Function; + let Inst{4-0} = RC; +} + + +// Chapter2.6.4 +// simple_operation_form |31 26|25 21|20 13|12 5|4 0| +// r + i : | Opcode | RA | imm | Func | RC | +class FPFormIV opcode, bits<8> fun> : InstSw64V { + + bits<5> RC; + bits<5> RA; + bits<8> Imm; + bits<8> Function = fun; + + let Inst{25-21} = RA; + let Inst{20-13} = Imm; + let Inst{12-11} = Function{7-6}; + let Inst{10} = 1; + let Inst{9-5} = Function{4-0}; + let Inst{4-0} = RC; +} + + +// New add fselXX Chapter2.6.5(3) +// fp_complex_operation_form |31 26|25 21|20 16|15 10|9 5|4 0| +// r + r : | Opcode | Va | Vb | Func | Vc | Vd | +class FForm4V opcode, bits<6> func> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> RC; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = Function; + let Inst{9-5} = RC; + let Inst{4-0} = RD; +} + +class FForm4VINSECTL opcode, bits<6> func> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> RC; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = Function; + let Inst{9-5} = 31; +} + +class FForm4VCPY opcode, bits<6> func> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> RC; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = 31; + let Inst{15-10} = Function; + let Inst{9-5} = 31; + let Inst{4-0} = RD; +} + +// vcpyw/vcpys +class FForm2V opcode, bits<6> func> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> RC; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = 0; + let Inst{15-10} = Function; + let Inst{9-5} = 0; + let Inst{4-0} = RC; +} + + +// New add fselXX Chapter2.6.5(4) +// fp_complex_operation_form |31 26|25 21|20 16|15 10|9 5|4 0| +// r + i : | Opcode | Va | Vb | Func | imm | Vd | +class FForm4LV opcode, bits<6> func> : InstSw64V { + + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> Imm; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = Function; + let Inst{9-5} = Imm; + let Inst{4-0} = RD; +} + +class FForm4LV1 opcode, bits<6> func> : InstSw64V { + + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> Imm; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = 31; + let Inst{15-10} = Function; + let Inst{9-5} = Imm; + let Inst{4-0} = RD; +} + +class FForm4LV2 opcode, bits<6> func> : InstSw64V { + + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> RC; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = Function; + let Inst{9-5} = RC; + let Inst{4-0} = RD; +} +// New add vext Fix the RD to RC +// fp_complex_operation_form |31 26|25 21|20 16|15 10|9 5|4 0| +// r + i : | Opcode | Va | Vb | Func | imm | Vd | +class FForm4LVV opcode, bits<6> func> : InstSw64V { + + bits<5> RC; + bits<5> RA; + bits<5> RB; + bits<5> Imm; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = 0; + let Inst{15-10} = Function; + let Inst{9-5} = Imm; + let Inst{4-0} = RC; +} + +// New add vlogxx +// vlogxx: |31 28|27 26|25 21|20 16|15 10|9 5|4 0| +// | Opcode | zz[7:6] | Va | Vb | zz[5:0] | Vc | Vd | +class FForm_VANDW opcode> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = 0; + let Inst{9-5} = 31; + let Inst{4-0} = RD; +} + +class FForm_VBICW opcode> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = 0x30; + let Inst{9-5} = 31; + let Inst{4-0} = RD; +} + +class FForm_VBISW opcode> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = 0x3c; + let Inst{9-5} = 31; + let Inst{4-0} = RD; +} + +class FForm_VXORW opcode> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = 0x3c; + let Inst{9-5} = 31; + let Inst{4-0} = RD; +} + +class FForm_VEQVW opcode> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = 0x03; + let Inst{9-5} = 31; + let Inst{4-0} = RD; +} + +class FForm_VORNOTW opcode> : InstSw64V { + bits<5> RD; + bits<5> RA; + bits<5> RB; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = 0x33; + let Inst{9-5} = 31; + let Inst{4-0} = RD; +} + + +class FForm4LVLogZZ opcode> : InstSw64VLog { + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> RC; + bits<8> Imm; + + let Inst{27-26} = Imm{7-6}; + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = Imm{5-0}; + let Inst{9-5} = RC; + let Inst{4-0} = RD; +} + +class FForm4LVLog opcode, bits<8> zz> : InstSw64VLog { + bits<5> RC; + bits<5> RA; + bits<5> RB; + + let Inst{27-26} = zz{7-6}; + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15-10} = zz{5-0}; + let Inst{9-5} = 31; + let Inst{4-0} = RC; +} + +// fp_complex_operation_form |31 26|25 21|20 16|15 10|9 5|4 0| +// r + i : | Opcode | Va | Vb | Func | imm | Vd | +class FForm4_VSELi opcode, bits<6> func> : InstSw64V { + + bits<5> RD; + bits<5> RA; + bits<5> RB; + bits<5> Imm; + bits<6> Function = func; + + let Inst{25-21} = RA; + let Inst{20-16} = RB; + let Inst{15} = 1; + let Inst{14-10} = Function{4-0}; + let Inst{9-5} = Imm; + let Inst{4-0} = RD; +} + +class VectorIndex : Operand, ImmLeaf; + +def VectorIndexB : VectorIndex; +def VectorIndexH : VectorIndex; +def VectorIndexS : VectorIndex; +def VectorIndexD : VectorIndex; diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp b/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp new file mode 100644 index 000000000000..8107c009230e --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp @@ -0,0 +1,1012 @@ +//===-- Sw64InstrInfo.cpp - Sw64 Instruction Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sw64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Sw64InstrInfo.h" +#include "Sw64.h" +#include "Sw64MachineFunctionInfo.h" +#include "Sw64OptionRecord.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineCombinerPattern.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +#define DEBUG_TYPE "Sw64combinefma" + +#define GET_INSTRINFO_CTOR_DTOR +#include "Sw64GenInstrInfo.inc" + +// Pin the vtable to this file. +void Sw64InstrInfo::anchor() {} + +Sw64InstrInfo::Sw64InstrInfo() + : Sw64GenInstrInfo(Sw64::ADJUSTSTACKDOWN, Sw64::ADJUSTSTACKUP), RI() {} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned Sw64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + switch (MI.getOpcode()) { + case Sw64::LDL: + case Sw64::LDW: + case Sw64::LDHU: + case Sw64::LDBU: + case Sw64::LDS: + case Sw64::LDD: + if (MI.getOperand(1).isFI()) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + break; + } + + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned Sw64InstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + switch (MI.getOpcode()) { + case Sw64::STL: + case Sw64::STH: + case Sw64::STB: + case Sw64::STW: + case Sw64::STS: + case Sw64::STD: + if (MI.getOperand(1).isFI()) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned Sw64InstrInfo::insertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "Sw64 branch conditions have two components!"); + + // Unconditional branch. + if (Cond.empty()) { + MachineInstr &MI = *BuildMI(&MBB, DL, get(Sw64::PseudoBR)).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + return 1; + } + + // Either a one or two-way conditional branch. + unsigned Opc = Cond[0].getImm(); + MachineInstr &CondMI = *BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(CondMI); + + // One-way conditional branch. + if (!FBB) + return 1; + + // Two-way conditional branch. + MachineInstr &MI = *BuildMI(&MBB, DL, get(Sw64::PseudoBR)).addMBB(FBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + return 2; +} + +void Sw64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, MCRegister DestReg, + MCRegister SrcReg, bool KillSrc) const { + if ((Sw64::F4RCRegClass.contains(DestReg) || + Sw64::FPRC_loRegClass.contains(DestReg)) && // for rust and SIMD + Sw64::GPRCRegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(Sw64::ITOFS), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::F4RCRegClass.contains(SrcReg) && // for rust and SIMD + Sw64::GPRCRegClass.contains(DestReg)) { + BuildMI(MBB, MI, DL, get(Sw64::FTOIS), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::FPRCRegClass.contains(SrcReg) && // for rust and SIMD + Sw64::GPRCRegClass.contains(DestReg)) { + BuildMI(MBB, MI, DL, get(Sw64::FTOIT), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::FPRCRegClass.contains(DestReg) && // for rust and SIMD + Sw64::GPRCRegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(Sw64::ITOFT), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::FPRCRegClass.contains(DestReg) && // for rust and SIMD + Sw64::FPRC_loRegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::FPRCRegClass.contains(SrcReg) && // for rust and SIMD + Sw64::FPRC_loRegClass.contains(DestReg)) { + BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::GPRCRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(Sw64::BISr), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::F4RCRegClass.contains(DestReg, SrcReg)) { + unsigned int RC = MI->getOperand(1).getReg(); + unsigned int Opc = Sw64::CPYSS; + for (MachineBasicBlock::iterator MBBI = MI; MBBI != MBB.begin(); --MBBI) { + if (MBBI->getOpcode() == Sw64::VLDS || MBBI->getOpcode() == Sw64::VLDD) { + unsigned int RD = MBBI->getOperand(0).getReg(); + if (RC == RD) + Opc = Sw64::VCPYS; + break; + } + } + BuildMI(MBB, MI, DL, get(Opc), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::F8RCRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::FPRCRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Sw64::V256LRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(Sw64::VOR), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + llvm_unreachable("Attempt to copy register that is not GPR or FPR"); + } +} + +void Sw64InstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIdx, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, Register VReg) const { + + DebugLoc DL; + if (MI != MBB.end()) + DL = MI->getDebugLoc(); + + unsigned Opc = 0; + + if (RC == &Sw64::F4RCRegClass) + Opc = Sw64::STS; + else if (RC == &Sw64::F8RCRegClass) + Opc = Sw64::STD; + else if (RC == &Sw64::GPRCRegClass) + Opc = Sw64::STL; + else if (TRI->isTypeLegalForClass(*RC, MVT::i64) || + TRI->isTypeLegalForClass(*RC, MVT::f64)) + Opc = Sw64::STD; + else if (TRI->isTypeLegalForClass(*RC, MVT::i32) || + TRI->isTypeLegalForClass(*RC, MVT::f32)) + Opc = Sw64::STS; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32)) + Opc = Sw64::VSTD; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4f32)) + Opc = Sw64::VSTS; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64)) + Opc = Sw64::VSTD; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4f64)) + Opc = Sw64::VSTD; + else + llvm_unreachable("Unhandled register class"); + + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIdx) + .addReg(Sw64::R31); +} + +void Sw64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const { + DebugLoc DL; + if (MI != MBB.end()) + DL = MI->getDebugLoc(); + + unsigned Opc = 0; + + if (RC == &Sw64::F4RCRegClass) + Opc = Sw64::LDS; + else if (RC == &Sw64::F8RCRegClass) + Opc = Sw64::LDD; + else if (RC == &Sw64::GPRCRegClass) + Opc = Sw64::LDL; + else if (TRI->isTypeLegalForClass(*RC, MVT::i64) || + TRI->isTypeLegalForClass(*RC, MVT::f64)) + Opc = Sw64::LDD; + else if (TRI->isTypeLegalForClass(*RC, MVT::i32) || + TRI->isTypeLegalForClass(*RC, MVT::f32)) + Opc = Sw64::LDS; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32)) + Opc = Sw64::VLDD; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4f32)) + Opc = Sw64::VLDS; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64)) + Opc = Sw64::VLDD; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4f64)) + Opc = Sw64::VLDD; + else + llvm_unreachable("Unhandled register class"); + + BuildMI(MBB, MI, DL, get(Opc), DestReg) + .addFrameIndex(FrameIdx) + .addReg(Sw64::R31); +} + +static unsigned Sw64RevCondCode(unsigned Opcode) { + switch (Opcode) { + case Sw64::BEQ: + return Sw64::BNE; + case Sw64::BNE: + return Sw64::BEQ; + case Sw64::BGE: + return Sw64::BLT; + case Sw64::BGT: + return Sw64::BLE; + case Sw64::BLE: + return Sw64::BGT; + case Sw64::BLT: + return Sw64::BGE; + case Sw64::BLBC: + return Sw64::BLBS; + case Sw64::BLBS: + return Sw64::BLBC; + case Sw64::FBEQ: + return Sw64::FBNE; + case Sw64::FBNE: + return Sw64::FBEQ; + case Sw64::FBGE: + return Sw64::FBLT; + case Sw64::FBGT: + return Sw64::FBLE; + case Sw64::FBLE: + return Sw64::FBGT; + case Sw64::FBLT: + return Sw64::FBGE; + default: + llvm_unreachable("Unknown opcode"); + } + return 0; // Not reached +} + +//===----------------------------------------------------------------------===// +// Branch Analysis +//===----------------------------------------------------------------------===// +// + +static bool isCondOpCode(unsigned Opcode) { + switch (Opcode) { + default: + return false; + case Sw64::BEQ: + case Sw64::BNE: + case Sw64::BGE: + case Sw64::BGT: + case Sw64::BLE: + case Sw64::BLT: + case Sw64::BLBC: + case Sw64::BLBS: + case Sw64::FBEQ: + case Sw64::FBNE: + case Sw64::FBGE: + case Sw64::FBGT: + case Sw64::FBLE: + case Sw64::FBLT: + return true; + } + return false; // Not reached +} + +static bool isUnCondOpCode(unsigned Opcode) { return Opcode == Sw64::PseudoBR; } + +static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, + SmallVectorImpl &Cond) { + + Target = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); +} + +bool Sw64InstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return false; + + if (!isUnpredicatedTerminator(*I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { + if (isUnCondOpCode(LastOpc)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (isCondOpCode(LastOpc)) { + parseCondBranch(LastInst, TBB, Cond); + return false; + } // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = &*I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && isUnCondOpCode(LastOpc)) { + while (isUnCondOpCode(SecondLastOpc)) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = &*I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + } + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) + return true; + + if (isCondOpCode(SecondLastOpc) && isUnCondOpCode(LastOpc)) { + parseCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two Sw64::BRs, handle it. The second one is not + // executed, so remove it. + if (isUnCondOpCode(SecondLastOpc) && isUnCondOpCode(LastOpc)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned Sw64InstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return false; + + if (I->getOpcode() != Sw64::PseudoBR && !isCondOpCode(I->getOpcode())) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) { + if (BytesRemoved) + *BytesRemoved = 4; + return 1; + } + --I; + if (!isCondOpCode(I->getOpcode())) { + if (BytesRemoved) + *BytesRemoved = 4; + return 1; + } + + // Remove the branch. + I->eraseFromParent(); + if (BytesRemoved) + *BytesRemoved = 8; + return 2; +} + +void Sw64InstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + DebugLoc DL; + BuildMI(MBB, MI, DL, get(Sw64::BISr), Sw64::R31) + .addReg(Sw64::R31) + .addReg(Sw64::R31); +} + +bool Sw64InstrInfo::ReverseBranchCondition( + SmallVectorImpl &Cond) const { + assert(Cond.size() == 2 && "Invalid Sw64 branch opcode!"); + Cond[0].setImm(Sw64RevCondCode(Cond[0].getImm())); + return false; +} + +/// getGlobalBaseReg - Return a virtual register initialized with the +/// the global base register value. Output instructions required to +/// initialize the register in the function entry block, if necessary. +/// +unsigned Sw64InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { + Sw64MachineFunctionInfo *Sw64FI = MF->getInfo(); + unsigned GlobalBaseReg = Sw64FI->getGlobalBaseReg(*MF); + if (GlobalBaseReg != 0) + return GlobalBaseReg; + + // Insert the set of GlobalBaseReg into the first MBB of the function + GlobalBaseReg = Sw64::R29; + Sw64FI->setGlobalBaseReg(GlobalBaseReg); + return GlobalBaseReg; +} + +/// getGlobalRetAddr - Return a virtual register initialized with the +/// the global base register value. Output instructions required to +/// initialize the register in the function entry block, if necessary. +/// +unsigned Sw64InstrInfo::getGlobalRetAddr(MachineFunction *MF) const { + Sw64MachineFunctionInfo *Sw64FI = MF->getInfo(); + unsigned GlobalRetAddr = Sw64FI->getGlobalRetAddr(*MF); + if (GlobalRetAddr != 0) + return GlobalRetAddr; + + // Insert the set of GlobalRetAddr into the first MBB of the function + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + GlobalRetAddr = Sw64::R26; + RegInfo.addLiveIn(Sw64::R26); + Sw64FI->setGlobalRetAddr(GlobalRetAddr); + return GlobalRetAddr; +} + +MachineInstr *Sw64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Sw64::DBG_VALUE)) + .addFrameIndex(FrameIx) + .addImm(0) + .addImm(Offset) + .addMetadata(MDPtr); + return &*MIB; +} + +// for vector optimize. +// Utility routine that checks if \param MO is defined by an +// \param CombineOpc instruction in the basic block \param MBB +static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, + unsigned CombineOpc) { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineInstr *MI = nullptr; + + if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) + MI = MRI.getUniqueVRegDef(MO.getReg()); + + LLVM_DEBUG(dbgs() << "is MO reg?" << MO.isReg(); + dbgs() << "is Register Virtual?" + << Register::isVirtualRegister(MO.getReg())); + + // And it needs to be in the trace (otherwise, it won't have a depth). + if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc) + return false; + + // Must only used by the user we combine with. + if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) + return false; + + return true; +} + +// +// Is \param MO defined by a floating-point multiply and can be combined? +static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, + unsigned MulOpc) { + return canCombine(MBB, MO, MulOpc); +} + +// TODO: There are many more machine instruction opcodes to match: +// 1. Other data types (integer, vectors) +// 2. Other math / logic operations (xor, or) +// 3. Other forms of the same operation (intrinsics and other variants) +bool Sw64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, + bool Invert) const { + if (Invert) + return false; + switch (Inst.getOpcode()) { + case Sw64::ADDD: + case Sw64::ADDS: + case Sw64::MULD: + case Sw64::MULS: + case Sw64::VADDS: + case Sw64::VADDD: + case Sw64::VMULS: + case Sw64::VMULD: + return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath; + default: + return false; + } +} + +// FP Opcodes that can be combined with a FMUL +static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { + switch (Inst.getOpcode()) { + default: + break; + case Sw64::ADDS: + case Sw64::ADDD: + case Sw64::SUBS: + case Sw64::SUBD: { + TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; + return (Options.UnsafeFPMath || + Options.AllowFPOpFusion == FPOpFusion::Fast); + } + case Sw64::VADDS: + case Sw64::VADDD: + case Sw64::VSUBS: + case Sw64::VSUBD: + return true; + } + return false; +} + +/// Find instructions that can be turned into madd. +static bool getFMAPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns) { + + if (!isCombineInstrCandidateFP(Root)) + return false; + + MachineBasicBlock &MBB = *Root.getParent(); + bool Found = false; + + switch (Root.getOpcode()) { + default: + assert(false && "Unsupported FP instruction in combiner\n"); + break; + case Sw64::ADDS: + assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && + "FADDS does not have register operands"); + if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULS)) { + Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1); + Found = true; + } + if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULS)) { + Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2); + Found = true; + } + break; + + case Sw64::ADDD: + if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULD)) { + Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1); + Found = true; + } + if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULD)) { + Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2); + Found = true; + } + break; + + case Sw64::SUBS: + if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULS)) { + Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1); + Found = true; + } + if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULS)) { + Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2); + Found = true; + } + break; + + case Sw64::SUBD: + if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULD)) { + Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1); + Found = true; + } + if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULD)) { + Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2); + Found = true; + } + break; + case Sw64::VADDS: + assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() && + "FADDS does not have register operands"); + if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULS)) { + Patterns.push_back(MachineCombinerPattern::VMULADDS_OP1); + Found = true; + } + if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULS)) { + Patterns.push_back(MachineCombinerPattern::VMULADDS_OP2); + Found = true; + } + break; + + case Sw64::VADDD: + if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULD)) { + Patterns.push_back(MachineCombinerPattern::VMULADDD_OP1); + Found = true; + } + if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULD)) { + Patterns.push_back(MachineCombinerPattern::VMULADDD_OP2); + Found = true; + } + break; + + case Sw64::VSUBS: + if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULS)) { + Patterns.push_back(MachineCombinerPattern::VMULSUBS_OP1); + Found = true; + } + if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULS)) { + Patterns.push_back(MachineCombinerPattern::VMULSUBS_OP2); + Found = true; + } + break; + case Sw64::VSUBD: + if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULD)) { + Patterns.push_back(MachineCombinerPattern::VMULSUBD_OP1); + Found = true; + } + if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULD)) { + Patterns.push_back(MachineCombinerPattern::VMULSUBD_OP2); + Found = true; + } + break; + } + return Found; +} + +/// Return true when a code sequence can improve throughput. It +/// should be called only for instructions in loops. +/// \param Pattern - combiner pattern +bool Sw64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { + switch (Pattern) { + default: + break; + case MachineCombinerPattern::FMULADDS_OP1: + case MachineCombinerPattern::FMULADDS_OP2: + case MachineCombinerPattern::FMULSUBS_OP1: + case MachineCombinerPattern::FMULSUBS_OP2: + case MachineCombinerPattern::FMULADDD_OP1: + case MachineCombinerPattern::FMULADDD_OP2: + case MachineCombinerPattern::FMULSUBD_OP1: + case MachineCombinerPattern::FMULSUBD_OP2: + case MachineCombinerPattern::FNMULSUBS_OP1: + case MachineCombinerPattern::FNMULSUBD_OP1: + case MachineCombinerPattern::VMULADDS_OP1: + case MachineCombinerPattern::VMULADDS_OP2: + case MachineCombinerPattern::VMULADDD_OP1: + case MachineCombinerPattern::VMULADDD_OP2: + case MachineCombinerPattern::VMULSUBS_OP1: + case MachineCombinerPattern::VMULSUBS_OP2: + case MachineCombinerPattern::VMULSUBD_OP1: + case MachineCombinerPattern::VMULSUBD_OP2: + return true; + } // end switch (Pattern) + return false; +} + +/// Return true when there is potentially a faster code sequence for an +/// instruction chain ending in \p Root. All potential patterns are listed in +/// the \p Pattern vector. Pattern should be sorted in priority order since the +/// pattern evaluator stops checking as soon as it finds a faster sequence. +bool Sw64InstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const { + // Floating point patterns + if (getFMAPatterns(Root, Patterns)) + return true; + + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, + DoRegPressureReduce); +} + +enum class FMAInstKind { Default, Indexed, Accumulator }; +/// genFusedMultiply - Generate fused multiply instructions. +/// This function supports both integer and floating point instructions. +/// A typical example: +/// F|MUL I=A,B,0 +/// F|ADD R,I,C +/// ==> F|MADD R,A,B,C +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information +/// \param Root is the F|ADD instruction +/// \param [out] InsInstrs is a vector of machine instructions and will +/// contain the generated madd instruction +/// \param IdxMulOpd is index of operand in Root that is the result of +/// the F|MUL. In the example above IdxMulOpd is 1. +/// \param MaddOpc the opcode fo the f|madd instruction +/// \param RC Register class of operands +/// \param kind of fma instruction (addressing mode) to be generated +/// \param ReplacedAddend is the result register from the instruction +/// replacing the non-combined operand, if any. +static MachineInstr * +genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, MachineInstr &Root, + SmallVectorImpl &InsInstrs, unsigned IdxMulOpd, + unsigned MaddOpc, const TargetRegisterClass *RC, + FMAInstKind kind = FMAInstKind::Default, + const unsigned *ReplacedAddend = nullptr) { + assert(IdxMulOpd == 1 || IdxMulOpd == 2); + + LLVM_DEBUG(dbgs() << "creating fma insn \n"); + LLVM_DEBUG(dbgs() << MaddOpc); + LLVM_DEBUG(dbgs() << "\n"); + + unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; + MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg()); + unsigned ResultReg = Root.getOperand(0).getReg(); + unsigned SrcReg0 = MUL->getOperand(1).getReg(); + bool Src0IsKill = MUL->getOperand(1).isKill(); + unsigned SrcReg1 = MUL->getOperand(2).getReg(); + bool Src1IsKill = MUL->getOperand(2).isKill(); + + unsigned SrcReg2; + bool Src2IsKill; + if (ReplacedAddend) { + // If we just generated a new addend, we must be it's only use. + SrcReg2 = *ReplacedAddend; + Src2IsKill = true; + } else { + SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); + Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); + } + if (Register::isVirtualRegister(ResultReg)) + MRI.constrainRegClass(ResultReg, RC); + if (Register::isVirtualRegister(SrcReg0)) + MRI.constrainRegClass(SrcReg0, RC); + if (Register::isVirtualRegister(SrcReg1)) + MRI.constrainRegClass(SrcReg1, RC); + if (Register::isVirtualRegister(SrcReg2)) + MRI.constrainRegClass(SrcReg2, RC); + + MachineInstrBuilder MIB; + if (kind == FMAInstKind::Default) + MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) + .addReg(SrcReg0, getKillRegState(Src0IsKill)) + .addReg(SrcReg1, getKillRegState(Src1IsKill)) + .addReg(SrcReg2, getKillRegState(Src2IsKill)); + else if (kind == FMAInstKind::Indexed) + MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) + .addReg(SrcReg2, getKillRegState(Src2IsKill)) + .addReg(SrcReg0, getKillRegState(Src0IsKill)) + .addReg(SrcReg1, getKillRegState(Src1IsKill)) + .addImm(MUL->getOperand(3).getImm()); + else if (kind == FMAInstKind::Accumulator) + MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg) + .addReg(SrcReg2, getKillRegState(Src2IsKill)) + .addReg(SrcReg0, getKillRegState(Src0IsKill)) + .addReg(SrcReg1, getKillRegState(Src1IsKill)); + else + assert(false && "Invalid FMA instruction kind \n"); + // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL) + InsInstrs.push_back(MIB); + return MUL; +} + +/// When getMachineCombinerPatterns() finds potential patterns, +/// this function generates the instructions that could replace the +/// original code sequence +void Sw64InstrInfo::genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { + + LLVM_DEBUG(dbgs() << "combining float instring\n"); + MachineBasicBlock &MBB = *Root.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + MachineInstr *MUL; + const TargetRegisterClass *RC; + unsigned Opc; + switch (Pattern) { + default: + // Reassociate instructions. + TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, + DelInstrs, InstrIdxForVirtReg); + return; + // Floating Point Support + case MachineCombinerPattern::FMULADDS_OP1: + case MachineCombinerPattern::FMULADDD_OP1: + // FMUL I=A,B + // FADD R,I,C + // ==> FMAx R,A,B,C + // --- Create(FMAx); + if (Pattern == MachineCombinerPattern::FMULADDS_OP1) { + Opc = Sw64::FMAS; + RC = &Sw64::F4RCRegClass; + } else { + Opc = Sw64::FMAD; + RC = &Sw64::F8RCRegClass; + } + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + case MachineCombinerPattern::FMULADDS_OP2: + case MachineCombinerPattern::FMULADDD_OP2: + // FMUL I=A,B + // FADD R,C,I + // ==> FMAx R,A,B,C + // --- Create(FMAx); + if (Pattern == MachineCombinerPattern::FMULADDS_OP2) { + Opc = Sw64::FMAS; + RC = &Sw64::F4RCRegClass; + } else { + Opc = Sw64::FMAD; + RC = &Sw64::F8RCRegClass; + } + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + + case MachineCombinerPattern::FMULSUBS_OP1: + case MachineCombinerPattern::FMULSUBD_OP1: { + // FMUL I=A,B,0 + // FSUB R,I,C + // ==> FMSx R,A,B,C // = A*B - C + // --- Create(FMSx); + if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) { + Opc = Sw64::FMSS; + RC = &Sw64::F4RCRegClass; + } else { + Opc = Sw64::FMSD; + RC = &Sw64::F8RCRegClass; + } + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + } + case MachineCombinerPattern::FMULSUBS_OP2: + case MachineCombinerPattern::FMULSUBD_OP2: { + // FMUL I=A,B,0 + // FSUB R,I,C + // ==> FNMAx R,A,B,C // = -A*B + C + // --- Create(FNMAx); + if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) { + Opc = Sw64::FNMAS; + RC = &Sw64::F4RCRegClass; + } else { + Opc = Sw64::FNMAD; + RC = &Sw64::F8RCRegClass; + } + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + } + + case MachineCombinerPattern::FNMULSUBS_OP1: + case MachineCombinerPattern::FNMULSUBD_OP1: { + // FNMUL I=A,B,0 + // FSUB R,I,C + // ==> FNMSx R,A,B,C // = -A*B - C + // --- Create(FNMSx); + if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { + Opc = Sw64::FNMSS; + RC = &Sw64::F4RCRegClass; + } else { + Opc = Sw64::FNMSD; + RC = &Sw64::F8RCRegClass; + } + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + } + + case MachineCombinerPattern::VMULADDS_OP1: + case MachineCombinerPattern::VMULADDD_OP1: { + // VMULx I=A,B + // VADDx I,C,R + // ==> VMAx A,B,C,R // = A*B+C + // --- Create(VMAx); + Opc = Pattern == MachineCombinerPattern::VMULADDS_OP1 ? Sw64::VMAS + : Sw64::VMAD; + RC = &Sw64::V256LRegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + } + case MachineCombinerPattern::VMULADDS_OP2: + case MachineCombinerPattern::VMULADDD_OP2: { + // VMUL I=A,B + // VADD C,R,I + // ==> VMA A,B,C,R (computes C + A*B) + // --- Create(FMSUB); + Opc = Pattern == MachineCombinerPattern::VMULADDS_OP2 ? Sw64::VMAS + : Sw64::VMAD; + RC = &Sw64::V256LRegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + } + + case MachineCombinerPattern::VMULSUBS_OP1: + case MachineCombinerPattern::VMULSUBD_OP1: { + // VMULx I=A,B + // VSUBx I,C,R + // ==> VMSx A,B,C,R // = A*B-C + // --- Create(VMSx); + Opc = Pattern == MachineCombinerPattern::VMULSUBS_OP1 ? Sw64::VMSS + : Sw64::VMSD; + RC = &Sw64::V256LRegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + } + case MachineCombinerPattern::VMULSUBS_OP2: + case MachineCombinerPattern::VMULSUBD_OP2: { + // FMUL I=A,B,0 + // FSUB R,C,I + // ==> FMSUB R,A,B,C (computes C - A*B) + // --- Create(FMSUB); + Opc = Pattern == MachineCombinerPattern::VMULSUBS_OP2 ? Sw64::VNMAS + : Sw64::VNMAD; + RC = &Sw64::V256LRegClass; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); + break; + } + } // end switch (Pattern) + // Record MUL and ADD/SUB for deletion + DelInstrs.push_back(MUL); + DelInstrs.push_back(&Root); +} + +bool Sw64InstrInfo::isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF)) + return true; + + switch (MI.getOpcode()) { + case Sw64::MOVProgPCGp: + case Sw64::MOVaddrPCGp: + case Sw64::WMEMB: + case Sw64::IMEMB: + case Sw64::MB: + return true; + } + return false; +} diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.h b/llvm/lib/Target/Sw64/Sw64InstrInfo.h new file mode 100644 index 000000000000..c20d2b117bdd --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.h @@ -0,0 +1,143 @@ +//===-- Sw64InstrInfo.h - Sw64 Instruction Information --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sw64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H +#define LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H + +#include "Sw64RegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "Sw64GenInstrInfo.inc" + +namespace llvm { + +class Sw64InstrInfo : public Sw64GenInstrInfo { + const Sw64RegisterInfo RI; + virtual void anchor(); + +public: + Sw64InstrInfo(); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const Sw64RegisterInfo &getRegisterInfo() const { return RI; } + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, Register SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, Register DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + Register VReg) const override; + + // Emit code before MBBI to load immediate value into physical register Reg. + // Returns an iterator to the new instruction. + MachineBasicBlock::iterator loadImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned Reg, uint64_t Value) const; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + + /// getGlobalBaseReg - Return a virtual register initialized with the + /// the global base register value. Output instructions required to + /// initialize the register in the function entry block, if necessary. + /// + unsigned getGlobalBaseReg(MachineFunction *MF) const; + + /// getGlobalRetAddr - Return a virtual register initialized with the + /// the global return address register value. Output instructions required to + /// initialize the register in the function entry block, if necessary. + /// + unsigned getGlobalRetAddr(MachineFunction *MF) const; + + bool isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + + /// Return true when a code sequence can improve throughput. It + /// should be called only for instructions in loops. + /// \param Pattern - combiner pattern + bool isThroughputPattern(MachineCombinerPattern Pattern) const override; + + /// Return true when there is potentially a faster code sequence + /// for an instruction chain ending in ``Root``. All potential patterns are + /// listed in the ``Patterns`` array. + bool + getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const override; + + /// Return true when Inst is associative and commutative so that it can be + /// reassociated. + bool isAssociativeAndCommutative(const MachineInstr &Inst, + bool Invert) const override; + + /// When getMachineCombinerPatterns() finds patterns, this function generates + /// the instructions that could replace the original code sequence + void genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const override; + // Sw64 supports MachineCombiner. + bool useMachineCombiner() const override { return true; } + + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; +}; +} // namespace llvm +#endif // END LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.td b/llvm/lib/Target/Sw64/Sw64InstrInfo.td new file mode 100644 index 000000000000..c69b805ce622 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.td @@ -0,0 +1,2084 @@ +//===- Sw64InstrInfo.td - The Sw64 Instruction Set -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "Sw64InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Sw64 Instruction Predicate Definitions. +// +def EnableIntShift : Predicate<"Subtarget->enableIntShift()">, + AssemblerPredicate<(all_of Featureintshift), "swIntShift">; + +def EnableFloatCmov : Predicate<"Subtarget->enableFloatCmov()">; + +def EnableCrcInst : Predicate<"Subtarget->enableCrcInst()">; + +def EnableWmembInst : Predicate<"Subtarget->enableWmembInst()">; + +def EnableCasInst : Predicate<"Subtarget->enableCasInst()">; + +def HasSIMD : Predicate<"Subtarget->hasSIMD()">, + AssemblerPredicate<(all_of FeatureSIMD)>; +//******************** +//Custom DAG Nodes +//******************** + +def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [ + SDTCisFP<1>, SDTCisFP<0> +]>; + +def SDTIntTriOp : SDTypeProfile<1, 3, [ + SDTCisInt<1>, SDTCisInt<0>, SDTCisInt<2>, SDTCisInt<3> +]>; + +def Sw64_cvtqt : SDNode<"Sw64ISD::CVTQT_", SDTFPUnaryOpUnC, []>; +def Sw64_cvtqs : SDNode<"Sw64ISD::CVTQS_", SDTFPUnaryOpUnC, []>; +def Sw64_cvttq : SDNode<"Sw64ISD::CVTTQ_" , SDTFPUnaryOp, []>; +def Sw64_cvtts : SDNode<"Sw64ISD::CVTTS_", SDTFPUnaryOpUnC, + [SDNPHasChain]>; +def Sw64_cvtst : SDNode<"Sw64ISD::CVTST_", SDTFPUnaryOpUnC, + [SDNPHasChain]>; +def Sw64_tprello : SDNode<"Sw64ISD::TPRelLo", SDTIntBinOp, []>; +def Sw64_tprelhi : SDNode<"Sw64ISD::TPRelHi", SDTIntBinOp, []>; + +def Sw64_tlsgd : SDNode<"Sw64ISD::TLSGD", SDTIntTriOp, []>; +def Sw64_tlsldm : SDNode<"Sw64ISD::TLSLDM", SDTIntBinOp, []>; +def Sw64_dtprello : SDNode<"Sw64ISD::DTPRelLo", SDTIntBinOp, []>; +def Sw64_dtprelhi : SDNode<"Sw64ISD::DTPRelHi", SDTIntBinOp, []>; + +def Sw64_syscall : SDNode<"Sw64ISD::SysCall", SDTIntUnaryOp, []>; +def Sw64_LDAWithChain : SDNode<"Sw64ISD::LDAWC", SDTIntBinOp, [SDNPHasChain]>; +def Sw64_gprello : SDNode<"Sw64ISD::GPRelLo", SDTIntUnaryOp>; +def Sw64_gprelhi : SDNode<"Sw64ISD::GPRelHi", SDTIntUnaryOp>; +def Sw64_rellit : SDNode<"Sw64ISD::RelLit", SDTIntUnaryOp>; + +def Sw64_gprel : SDNode<"Sw64ISD::GPRel", SDTIntUnaryOp>; +def Sw64_tprel : SDNode<"Sw64ISD::TPRel", SDTIntUnaryOp>; +def Sw64_dtprel : SDNode<"Sw64ISD::DTPRel", SDTIntUnaryOp>; + +def Sw64ldih : SDNode<"Sw64ISD::LDIH", SDTIntUnaryOp, []>; +def Sw64ldi : SDNode<"Sw64ISD::LDI", SDTIntBinOp, [SDNPOutGlue]>; + +def Sw64_relgottp : SDNode<"Sw64ISD::RelGottp", SDTIntBinOp, [SDNPMayLoad]>; +def retflag : SDNode<"Sw64ISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +// These are target-independent nodes, but have target-specific formats. +def SDT_Sw64CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>; +def SDT_Sw64CallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>, + SDTCisVT<1, i64> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_Sw64CallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_Sw64CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def Sw64_frecs : SDNode<"Sw64ISD::FRECS", SDTFPUnaryOp, []>; +def Sw64_frecd : SDNode<"Sw64ISD::FRECD", SDTFPUnaryOp, []>; +def Sw64_sbt : SDNode<"Sw64ISD::SBT", SDTIntBinOp, []>; +def Sw64_cbt : SDNode<"Sw64ISD::CBT", SDTIntBinOp, []>; +def Sw64_addpi : SDNode<"Sw64ISD::ADDPI", SDTIntUnaryOp, []>; +def Sw64_addpis : SDNode<"Sw64ISD::ADDPIS", SDTIntUnaryOp, []>; + +def Sw64_revbh : SDNode<"Sw64ISD::REVBH", SDTIntUnaryOp, []>; +def Sw64_revbw : SDNode<"Sw64ISD::REVBW", SDTIntUnaryOp, []>; + +def Sw64_rolw : SDNode<"Sw64ISD::ROLW", SDTIntBinOp, []>; + +def Sw64_crc32b : SDNode<"Sw64ISD::CRC32B", SDTIntBinOp, []>; +def Sw64_crc32h : SDNode<"Sw64ISD::CRC32H", SDTIntBinOp, []>; +def Sw64_crc32w : SDNode<"Sw64ISD::CRC32W", SDTIntBinOp, []>; +def Sw64_crc32l : SDNode<"Sw64ISD::CRC32L", SDTIntBinOp, []>; +def Sw64_crc32cb : SDNode<"Sw64ISD::CRC32CB", SDTIntBinOp, []>; +def Sw64_crc32ch : SDNode<"Sw64ISD::CRC32CH", SDTIntBinOp, []>; +def Sw64_crc32cw : SDNode<"Sw64ISD::CRC32CW", SDTIntBinOp, []>; +def Sw64_crc32cl : SDNode<"Sw64ISD::CRC32CL", SDTIntBinOp, []>; + +def Sw64_casl : SDNode<"Sw64ISD::CASL", SDTIntBinOp, []>; +def Sw64_casw : SDNode<"Sw64ISD::CASW", SDTIntBinOp, []>; + +let Constraints = "$RD = $RC" in +class inst_cas opc, bits<8> fun, string opstr> + : OForm ; + +def CASW : inst_cas<0x10, 0x5e, "casw">; +def CASL : inst_cas<0x10, 0x5f, "casl">; + +def : Pat<(atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp), + (CASL GPRC:$cmp, GPRC:$ptr, GPRC:$swp)>, + Requires<[EnableCasInst, HasCore4]>; + +def : Pat<(atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp), + (CASW GPRC:$cmp, GPRC:$ptr, GPRC:$swp)>, + Requires<[EnableCasInst, HasCore4]>; + +def call_symbol : Operand; +//******************** +//Paterns for matching +//******************** +def invX : SDNodeXFormgetZExtValue(), SDLoc(N)); +}]>; +def negX : SDNodeXFormgetZExtValue() + 1, SDLoc(N)); +}]>; +def SExt32 : SDNodeXFormgetZExtValue() << 32) >> 32, SDLoc(N)); +}]>; +def SExt16 : SDNodeXFormgetZExtValue() << 48) >> 48, SDLoc(N)); +}]>; +def LL16 : SDNodeXFormgetZExtValue()), SDLoc(N)); +}]>; +def LH16 : SDNodeXFormgetZExtValue()), SDLoc(N)); +}]>; +def iZAPX : SDNodeXForm(N->getOperand(1)); + return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()), SDLoc(N)); +}]>; +def nearP2X : SDNodeXFormgetZExtValue())), SDLoc(N)); +}]>; +def nearP2RemX : SDNodeXFormgetZExtValue() - getNearPower2((uint64_t)N->getZExtValue())); + return getI64Imm(Log2_64(x), SDLoc(N)); +}]>; + +def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field + return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue(); +}]>; +def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field + return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue(); +}], invX>; +def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field + return ((uint64_t)~N->getZExtValue() + 1) == + (uint8_t)((uint64_t)~N->getZExtValue() + 1); +}], negX>; + +def immUExt13 : PatLeaf<(imm), [{ + return (uint32_t)N->getZExtValue() < (1 << 13); +}]>; + +def immSExt12 : PatLeaf<(imm), [{ //imm fits in 12 bit sign extended field + return ((int64_t)N->getZExtValue() << 52) >> 52 == + (int64_t)N->getZExtValue(); +}]>; + +def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field + return ((int64_t)N->getZExtValue() << 48) >> 48 == + (int64_t)N->getZExtValue(); +}]>; + +def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{ + ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); + if (!RHS) return 0; + uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue()); + return build != 0; +}]>; + +def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0 + (void)N; // silence warning. + return true; +}]>; + +def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>; +def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>; +def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>; +def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>; +def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>; +def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>; +def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>; +def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>; +def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>; +def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>; + +def immRemP2n : PatLeaf<(imm), [{ + return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) - + N->getZExtValue()); +}]>; +def immRemP2 : PatLeaf<(imm), [{ + return isPowerOf2_64(N->getZExtValue() - + getNearPower2((uint64_t)N->getZExtValue())); +}]>; +def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi + int64_t d = abs64((int64_t)N->getZExtValue() - + (int64_t)getNearPower2((uint64_t)N->getZExtValue())); + if (isPowerOf2_64(d)) return false; + switch (d) { + case 1: case 3: case 5: return false; + default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue(); + }; +}]>; + +def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>; +def add4 : PatFrag<(ops node:$op1, node:$op2), + (add (shl node:$op1, (i64 2)), node:$op2)>; +def sub4 : PatFrag<(ops node:$op1, node:$op2), + (sub (shl node:$op1, (i64 2)), node:$op2)>; +def add8 : PatFrag<(ops node:$op1, node:$op2), + (add (shl node:$op1, (i64 3)), node:$op2)>; +def sub8 : PatFrag<(ops node:$op1, node:$op2), + (sub (shl node:$op1, (i64 3)), node:$op2)>; + +class ThridOpFrag : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; +class CmpOpFrag : PatFrag<(ops node:$R), res>; + +def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ + return isOrEquivalentToAdd(N); +}]>; +def AddrFI : ComplexPattern; + +//Pseudo ops for selection + +def WTF : PseudoInstSw64<(outs), (ins variable_ops), "#wtf", []>, Sched<[]>; + +let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in { +def ADJUSTSTACKUP : PseudoInstSw64<(outs), (ins s64imm:$amt1, s64imm:$amt2), + "; ADJUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>, Sched<[]>; +def ADJUSTSTACKDOWN : PseudoInstSw64<(outs), (ins s64imm:$amt1, s64imm:$amt2), + "; ADJDOWN $amt1", + [(callseq_start (i64 timm:$amt1), (i64 timm:$amt2))]>, Sched<[]>; +} + +let isCodeGenOnly = 1 in { +def ALTENT : PseudoInstSw64<(outs), (ins s64imm:$TARGET), "$$${TARGET}..ng:\n", + []>, Sched<[]>; +def PCLABEL : PseudoInstSw64<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[]>, + Sched<[]>; +def MEMLABEL : PseudoInstSw64<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, + s64imm:$m), + "LSMARKER$$$i$$$j$$$k$$$m:", []>, Sched<[]>; +} +let hasNoSchedulingInfo = 1 in { +let usesCustomInserter = 1 in { // Expanded after instruction selection. +def ATOMIC_CMP_SWAP_I32 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic cmpare and swap", + [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>; + +def ATOMIC_CMP_SWAP_I64 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic compare and swap", + [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>; + + +def ATOMIC_LOAD_ADD_I32 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load add", + [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_ADD_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load add", + [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_UMAX_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load umax", + [(set GPRC:$dst, (atomic_load_umax_64 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_MAX_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load max", + [(set GPRC:$dst, (atomic_load_max_64 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_UMIN_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load umin", + [(set GPRC:$dst, (atomic_load_umin_64 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_MIN_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load min", + [(set GPRC:$dst, (atomic_load_min_64 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_NAND_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load nand", + [(set GPRC:$dst, (atomic_load_nand_64 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_UMAX_I32 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load umax", + [(set GPRC:$dst, (atomic_load_umax_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_MAX_I32 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load max", + [(set GPRC:$dst, (atomic_load_max_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_UMIN_I32 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load umin", + [(set GPRC:$dst, (atomic_load_umin_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_MIN_I32 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load min", + [(set GPRC:$dst, (atomic_load_min_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_NAND_I32 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load nand", + [(set GPRC:$dst, (atomic_load_nand_32 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_UMAX_I16 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load umax", + [(set GPRC:$dst, (atomic_load_umax_16 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_MAX_I16 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load max", + [(set GPRC:$dst, (atomic_load_max_16 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_UMIN_I16 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load umin", + [(set GPRC:$dst, (atomic_load_umin_16 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_MIN_I16 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load min", + [(set GPRC:$dst, (atomic_load_min_16 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_NAND_I16 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load nand", + [(set GPRC:$dst, (atomic_load_nand_16 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_UMAX_I8 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load umax", + [(set GPRC:$dst, (atomic_load_umax_8 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_MAX_I8 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load max", + [(set GPRC:$dst, (atomic_load_max_8 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_UMIN_I8 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load umin", + [(set GPRC:$dst, (atomic_load_umin_8 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_MIN_I8 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load min", + [(set GPRC:$dst, (atomic_load_min_8 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_NAND_I8 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load nand", + [(set GPRC:$dst, (atomic_load_nand_8 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_SWAP_I32 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic swap", + [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_SWAP_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic swap", + [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))]>; + + +def ATOMIC_LOAD_AND_I32 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load and", + [(set GPRC:$dst, (atomic_load_and_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_AND_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load and", + [(set GPRC:$dst, (atomic_load_and_64 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_OR_I32 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load or", + [(set GPRC:$dst, (atomic_load_or_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_OR_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load and", + [(set GPRC:$dst, (atomic_load_or_64 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_SUB_I32 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load sub", + [(set GPRC:$dst, (atomic_load_sub_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_SUB_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load sub", + [(set GPRC:$dst, (atomic_load_sub_64 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_XOR_I32 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load xor", + [(set GPRC:$dst, (atomic_load_xor_32 GPRC:$ptr, GPRC:$swp))]>; +def ATOMIC_LOAD_XOR_I64 :PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load xor", + [(set GPRC:$dst, (atomic_load_xor_64 GPRC:$ptr, GPRC:$swp))]>; + + +//I8 +def ATOMIC_LOAD_ADD_I8: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load add", + [(set GPRC:$dst, (atomic_load_add_8 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_SWAP_I8: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic swap", + [(set GPRC:$dst, (atomic_swap_8 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_AND_I8: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load and", + [(set GPRC:$dst, (atomic_load_and_8 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_OR_I8: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load or", + [(set GPRC:$dst, (atomic_load_or_8 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_SUB_I8: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load sub", + [(set GPRC:$dst, (atomic_load_sub_8 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_XOR_I8: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load xor", + [(set GPRC:$dst, (atomic_load_xor_8 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_CMP_SWAP_I8 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic compare and swap", + [(set GPRC:$dst, (atomic_cmp_swap_8 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>; + +//I16 +def ATOMIC_LOAD_ADD_I16: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load add", + [(set GPRC:$dst, (atomic_load_add_16 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_SWAP_I16: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic swap", + [(set GPRC:$dst, (atomic_swap_16 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_AND_I16: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic and", + [(set GPRC:$dst, (atomic_load_and_16 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_OR_I16: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load or", + [(set GPRC:$dst, (atomic_load_or_16 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_SUB_I16: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load and sub", + [(set GPRC:$dst, (atomic_load_sub_16 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_LOAD_XOR_I16: PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic xor", + [(set GPRC:$dst, (atomic_load_xor_16 GPRC:$ptr, GPRC:$swp))]>; + +def ATOMIC_CMP_SWAP_I16 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic compare and swap", + [(set GPRC:$dst, (atomic_cmp_swap_16 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>; + + +def CAS32 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic compare and swap", + [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>; +def CAS64 : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic compare and swap", + [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>; + +def LAS32 : PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), + "# 32-bit atomic load and sub", + [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))]>; +def LAS64 :PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), + "# 64-bit atomic load and sub", + [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))]>; + +def SWAP32 : PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), + "# 32-bit atomic swap", + [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))]>; +def SWAP64 :PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), + "# 64-bit atomic swap", + [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))]>; +} + +let mayLoad = 1, mayStore = 1 in { + def ATOMIC_LOAD_ADD_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>; + def ATOMIC_LOAD_ADD_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>; + + def ATOMIC_SWAP_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>; + def ATOMIC_SWAP_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>; + + def ATOMIC_LOAD_AND_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>; + def ATOMIC_LOAD_AND_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>; + + def ATOMIC_LOAD_OR_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>; + def ATOMIC_LOAD_OR_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>; + + def ATOMIC_LOAD_SUB_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>; + def ATOMIC_LOAD_SUB_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>; + + def ATOMIC_LOAD_XOR_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>; + def ATOMIC_LOAD_XOR_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>; + + def ATOMIC_CMP_SWAP_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>; + def ATOMIC_CMP_SWAP_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>; +} +def ATOMIC_LOAD_ADD_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_ADD_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>; + +def ATOMIC_SWAP_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_SWAP_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>; + +def ATOMIC_LOAD_AND_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_AND_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>; + +def ATOMIC_LOAD_OR_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_OR_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>; + +def ATOMIC_LOAD_SUB_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_SUB_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>; + +def ATOMIC_LOAD_XOR_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_XOR_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>; + +def ATOMIC_CMP_SWAP_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_CMP_SWAP_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>; + +def ATOMIC_LOAD_UMAX_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_MAX_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_UMIN_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_MIN_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>; +def ATOMIC_LOAD_NAND_I8_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>; + +def ATOMIC_LOAD_UMAX_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>; +def ATOMIC_LOAD_MAX_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>; +def ATOMIC_LOAD_UMIN_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>; +def ATOMIC_LOAD_MIN_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>; +def ATOMIC_LOAD_NAND_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>; + +def ATOMIC_LOAD_UMAX_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>; +def ATOMIC_LOAD_MAX_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>; +def ATOMIC_LOAD_UMIN_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>; +def ATOMIC_LOAD_MIN_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>; +def ATOMIC_LOAD_NAND_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>; + +def ATOMIC_LOAD_UMAX_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>; +def ATOMIC_LOAD_MAX_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>; +def ATOMIC_LOAD_UMIN_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>; +def ATOMIC_LOAD_MIN_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>; +def ATOMIC_LOAD_NAND_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst), + (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>; + + + +}// for atomic load/store. set hasNoSchedulingInfo + +//*********************** +//Real instructions +//*********************** + +//4 The base instruction system +//4.1 system call instruction +let hasSideEffects = 1 in +class inst_syscall opcode, string opstr, string operands=""> + : PALForm; + +let Defs=[R0] in +def SYS_CALL : inst_syscall<0x00, "sys_call", "$disp">; + +def LBR : inst_syscall<0x1d, "lbr", "$disp">; + +def : Pat<(Sw64_syscall (i64 immUExt8:$N)),(SYS_CALL immUExt8:$N )>; + +//4.2 control instruction + +//4.2.1 jump +// Call + +def SDT_Sw64JmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; +def Sw64JmpLink : SDNode<"Sw64ISD::JmpLink", SDT_Sw64JmpLink, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; + +class arg_jmp opcode, list pattern> + : MForm; + +class branch_i opcode, list pattern> + : BForm; + +let isCall = 1, Defs = [R26], Uses = [R27] in +def JSR : arg_jmp<"call", 0x01, []>; + +let isReturn = 1, isTerminator = 1, isBarrier = 1 in +def RET : arg_jmp<"ret", 0x02, []>; + +def Sw64Ret : SDNode<"Sw64ISD::Ret", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +let RA = 31, DISP=0 in +def JMP : arg_jmp<"jmp", 0x03, []>; + +let isBranch = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in +def BR : branch_i<"br", 0x04, []>; +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in +def BSR : branch_i<"bsr", 0x05, []>; + +// for expand Call target, we create two Insns like: +// load R27,symbol(GP) defs R27 +// call R26,R27,symbol use R27 +// so we dont need to use R27, we actually def R27 +let isBarrier = 1, isCall = 1, Defs = [R26, R27], Uses = [R29] in { +def PseudoCall : PseudoInstSw64<(outs), (ins call_symbol:$func), "", + []>,Sched<[WriteJmp]>; +} + +let isBarrier = 1, isCall = 1, Defs = [R26], Uses = [R27, R29] in { + def PseudoCallIndirect : PseudoInstSw64<(outs), (ins GPRC:$RB), "", + [(Sw64JmpLink GPRC:$RB)]>, + PseudoInstExpansion<(JSR R26, GPRC:$RB, 0)>, + Sched<[WriteJmp]>; +} + +let isBarrier = 1, isBranch = 1, isTerminator = 1 in +def PseudoBrind : PseudoInstSw64<(outs), (ins GPRC:$RB), "", + [(brind GPRC:$RB)]>, + PseudoInstExpansion<(JMP R31, GPRC:$RB, 0)>, + Sched<[WriteJmp]>; + +//to match libgcc _div _rem +let isBarrier = 1, isCall = 1, Defs = [R23, R24, R25, R27, R28] + , Uses = [R24, R25, R27] in +def PseudoCallDiv : PseudoInstSw64<(outs), (ins), "", []>, + PseudoInstExpansion<(JSR R23, R27, 0)>, + Sched<[WriteJmp]>; + +let isBranch = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in +def PseudoBR : PseudoInstSw64<(outs), (ins target:$disp), "", [(br bb:$disp)]>, + PseudoInstExpansion<(BR R31, target:$disp)>, Sched<[WriteJmp]>; + +let isBarrier = 1, isReturn = 1, isTerminator = 1 in +def PseudoRet : PseudoInstSw64<(outs), (ins), "", [(Sw64Ret)]>, + PseudoInstExpansion<(RET R31, R26, 1)>, Sched<[WriteJmp]>; + +////4.2.2 uncondition shift +///////////////////////////////////////////////////////// +//Branching +///////////////////////////////////////////////////////// +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in{ +class br_icc opc, string asmstr> + : BForm; +class br_fcc opc, string asmstr> + : BForm; +} +def BEQ : br_icc<0x30, "beq">; +def BGE : br_icc<0x35, "bge">; +def BGT : br_icc<0x34, "bgt">; +def BLBC : br_icc<0x36, "blbc">; +def BLBS : br_icc<0x37, "blbs">; +def BLE : br_icc<0x33, "ble">; +def BLT : br_icc<0x32, "blt">; +def BNE : br_icc<0x31, "bne">; + +//Branches, float +def FBEQ : br_fcc<0x38, "fbeq">; +def FBGE : br_fcc<0x3D, "fbge">; +def FBGT : br_fcc<0x3C, "fbgt">; +def FBLE : br_fcc<0x3B, "fble">; +def FBLT : br_fcc<0x3A, "fblt">; +def FBNE : br_fcc<0x39, "fbne">; +//4.3 load and store instruction +//4.3.1 load integer + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +class load_ri opcode, RegisterClass regtype, + SDPatternOperator loadop> + : MForm; + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 ,Constraints = "$RB = $wback,@earlyclobber $wback" in +class load_ri1 opcode, bits<4> func, + RegisterClass regtype, SDPatternOperator loadop> + : MFuncForm; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +class store_ri opcode, RegisterClass regtype, + SDPatternOperator storeop> + : MForm; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 ,Constraints = "$RB = $wback,@earlyclobber $wback" in +class store_ri1 opcode, bits<4> func, + RegisterClass regtype, SDPatternOperator storeop> + : MFuncForm; + +// integer load +def LDL : load_ri<"ldl", 0x23, GPRC, load>; +def LDW : load_ri<"ldw", 0x22, GPRC, sextloadi32>; +def LDHU : load_ri<"ldhu", 0x21, GPRC, zextloadi16>; +def LDBU : load_ri<"ldbu", 0x20, GPRC, zextloadi8>; +def LDL_A : load_ri1<"ldl_a", 0x1E, 0x3, GPRC, load>; +def LDW_A : load_ri1<"ldw_a", 0x1E, 0x2, GPRC, sextloadi32>; +def LDHU_A : load_ri1<"ldhu_a", 0x1E, 0x1, GPRC, zextloadi16>; +def LDBU_A : load_ri1<"ldbu_a", 0x1E, 0x0, GPRC, zextloadi8>; + +// float load +def LDS : load_ri<"flds", 0x26, F4RC, load>; +def LDD : load_ri<"fldd", 0x27, F8RC, load>; +def LDS_A : load_ri1<"flds_a", 0x1E, 0x4, F4RC, load>; +def LDD_A : load_ri1<"fldd_a", 0x1E, 0x5, F8RC, load>; + +// integer store +def STL : store_ri<"stl", 0x2B, GPRC, store>; +def STW : store_ri<"stw", 0x2A, GPRC, truncstorei32>; +def STH : store_ri<"sth", 0x29, GPRC, truncstorei16>; +def STB : store_ri<"stb", 0x28, GPRC, truncstorei8>; +def STL_A : store_ri1<"stl_a", 0x1E, 0x9, GPRC, store>; +def STW_A : store_ri1<"stw_a", 0x1E, 0x8, GPRC, truncstorei32>; +def STH_A : store_ri1<"sth_a", 0x1E, 0x7, GPRC, truncstorei16>; +def STB_A : store_ri1<"stb_a", 0x1E, 0x6, GPRC, truncstorei8>; + +// float store +def STS : store_ri<"fsts", 0x2E, F4RC, store>; +def STD : store_ri<"fstd", 0x2F, F8RC, store>; +def STS_A : store_ri1<"fsts_a", 0x1E, 0xA, F4RC, store>; +def STD_A : store_ri1<"fstd_a", 0x1E, 0xB, F8RC, store>; + +// imm inst +def LDA : MForm<0x3E,(ins s64imm:$DISP, GPRC:$RB) , (outs GPRC:$RA), + "ldi", "$RA,${DISP}(${RB})", + [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))]>; +def LDAH : MForm<0x3F,(ins s64imm:$DISP, GPRC:$RB) , (outs GPRC:$RA), + "ldih", "$RA,${DISP}(${RB})", []>; + +let Uses = [R29] in { +def LOADgprel : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "", + [(set GPRC:$dst, (Sw64_gprel tglobaladdr:$addr))]>, Sched<[WriteLD]>; + +def LOADconstant : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "", []>, + Sched<[WriteAdrLD]>; + +def LOADlit : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "", + [(set GPRC:$dst, (Sw64_rellit tglobaladdr:$addr))]>, Sched<[WriteLD]>; + +def LOADlitSym : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "", + [(set GPRC:$dst, (Sw64_rellit texternalsym:$addr))]>, Sched<[WriteLD]>; + + +// The MOVaddr instruction should match only when the add is not folded +// into a load or store address. +def MOVaddrGP +: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "", + [(set GPRC:$dst, (Sw64ldi (Sw64ldih tglobaladdr:$hi), + tglobaladdr:$low))]>, Sched<[WriteAdrAdr]>; + +def MOVaddrCP +: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "", + [(set GPRC:$dst, (Sw64ldi (Sw64ldih tconstpool:$hi), + tconstpool:$low))]>, Sched<[WriteAdrAdr]>; + +def MOVaddrBA +: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "", + [(set GPRC:$dst, (Sw64ldi (Sw64ldih tblockaddress:$hi), + tblockaddress:$low))]>, Sched<[WriteAdrAdr]>; + +def MOVaddrEXT +: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "", + [(set GPRC:$dst, (Sw64ldi (Sw64ldih texternalsym:$hi), + texternalsym:$low))]>, Sched<[WriteAdrAdr]>; + +def MOVaddrJT +: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "", + [(set GPRC:$dst, (Sw64ldi (Sw64ldih tjumptable:$hi), + tjumptable:$low))]>, Sched<[WriteAdrAdr]>; +} + +//TODO: for core3 target, sw64 need gpdisp to get global address table +// we cannot change Prologue disp, or it will cause error +let isBarrier = 1, hasNoSchedulingInfo = 1, Defs = [R29] in +def MOVProgPCGp : PseudoInstSw64<(outs), + (ins s16imm:$DISP, s16imm:$NUM , GPRC:$dst_reg), "", []>, + Sched<[WriteAdrAdr]>; + +let Defs = [R29] in +def MOVaddrPCGp : PseudoInstSw64<(outs), + (ins s16imm:$DISP, s16imm:$NUM , GPRC:$dst_reg), "", []>, + Sched<[WriteAdrAdr]>; + +// def patterns + +def : Pat<(Sw64JmpLink tglobaladdr:$func), + (PseudoCall tglobaladdr:$func)>; +def : Pat<(Sw64JmpLink texternalsym:$func), + (PseudoCall texternalsym:$func)>; + +def : Pat<(Sw64_LDAWithChain GPRC:$RB, immSExt16:$DISP), + (LDA immSExt16:$DISP, GPRC:$RB)>; + + +def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))), + (LDBU immSExt16:$DISP, GPRC:$RB)>; +def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))), + (LDHU immSExt16:$DISP, GPRC:$RB)>; +def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))), + (LDW immSExt16:$DISP, GPRC:$RB)>; +def : Pat<(i64 (zextloadi8 (add GPRC:$RB, immSExt16:$DISP))), + (LDBU immSExt16:$DISP, GPRC:$RB)>; +def : Pat<(i64 (zextloadi16 (add GPRC:$RB, immSExt16:$DISP))), + (LDHU immSExt16:$DISP, GPRC:$RB)>; +def : Pat<(i64 (sextloadi32 (add GPRC:$RB, immSExt16:$DISP))), + (LDW immSExt16:$DISP, GPRC:$RB)>; + +def : Pat<(i64 (load GPRC:$addr)), + (LDL 0, GPRC:$addr)>; +def : Pat<(i64 (sextloadi32 GPRC:$addr)), + (LDW 0, GPRC:$addr)>; +def : Pat<(i64 (extloadi32 GPRC:$addr)), + (LDW 0, GPRC:$addr)>; +def : Pat<(i64 (zextloadi16 GPRC:$addr)), + (LDHU 0, GPRC:$addr)>; +def : Pat<(i64 (extloadi16 GPRC:$addr)), + (LDHU 0, GPRC:$addr)>; +def : Pat<(i64 (zextloadi8 GPRC:$addr)), + (LDBU 0, GPRC:$addr)>; +def : Pat<(i64 (extloadi8 GPRC:$addr)), + (LDBU 0, GPRC:$addr)>; + +//4.3.5 s float load +def : Pat<(f32 (load GPRC:$addr)), + (LDS 0, GPRC:$addr)>; +def : Pat<(f64 (load GPRC:$addr)), + (LDD 0, GPRC:$addr)>; +//4.3.3 store integer +def : Pat<(store GPRC:$DATA, GPRC:$addr), + (STL GPRC:$DATA, 0, GPRC:$addr)>; +def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr), + (STW GPRC:$DATA, 0, GPRC:$addr)>; +def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr), + (STH GPRC:$DATA, 0, GPRC:$addr)>; +def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr), + (STB GPRC:$DATA, 0, GPRC:$addr)>; +def : Pat<(store F4RC:$DATA, GPRC:$addr), + (STS F4RC:$DATA, 0, GPRC:$addr)>; +def : Pat<(store F8RC:$DATA, GPRC:$addr), + (STD F8RC:$DATA, 0, GPRC:$addr)>; +multiclass LdPat { + def : Pat<(VT (LoadOp GPRC:$addr)), (Inst 0, GPRC:$addr)>; + def : Pat<(VT (LoadOp AddrFI:$addr)), (Inst 0, AddrFI:$addr)>; + def : Pat<(VT (LoadOp (add GPRC:$addr, immSExt16:$DISP))), + (Inst immSExt16:$DISP, GPRC:$addr)>; + def : Pat<(VT (LoadOp (add AddrFI:$addr, immSExt16:$DISP))), + (Inst immSExt16:$DISP, AddrFI:$addr)>; + def : Pat<(VT (LoadOp (IsOrAdd AddrFI:$addr, immSExt16:$DISP))), + (Inst immSExt16:$DISP, AddrFI:$addr)>; +} + +multiclass StPat { + def : Pat<(StoreOp StTy:$rs2, GPRC:$rs1), (Inst StTy:$rs2, 0, GPRC:$rs1)>; + def : Pat<(StoreOp StTy:$rs2, AddrFI:$rs1), (Inst StTy:$rs2, 0, AddrFI:$rs1)>; + def : Pat<(StoreOp StTy:$rs2, (add GPRC:$rs1, immSExt16:$DISP)), + (Inst StTy:$rs2, immSExt16:$DISP, GPRC:$rs1)>; + def : Pat<(StoreOp StTy:$rs2, (add AddrFI:$rs1, immSExt16:$DISP)), + (Inst StTy:$rs2, immSExt16:$DISP, AddrFI:$rs1)>; + def : Pat<(StoreOp StTy:$rs2, (IsOrAdd AddrFI:$rs1, immSExt16:$DISP)), + (Inst StTy:$rs2, immSExt16:$DISP, AddrFI:$rs1)>; +} + +defm : LdPat; +defm : LdPat; +defm : LdPat; +defm : LdPat; +defm : LdPat; +defm : LdPat; +defm : LdPat; +defm : LdPat; +defm : LdPat; + +defm : StPat; +defm : StPat; +defm : StPat; +defm : StPat; +defm : StPat; +defm : StPat; +//4.5 int caculate instruction +multiclass inst_rr_ri fun, string opstr, + SDPatternOperator OpNode = null_frag> { + def r : OForm <0x10, fun, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC), + opstr, "$RA,$RB,$RC", + [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>; + def i : OFormL<0x12, fun,(ins GPRC:$RA, u8imm:$L), (outs GPRC:$RC), + opstr, "$RA,$L,$RC", + [(set GPRC:$RC, (OpNode GPRC:$RA, (i64 immUExt8:$L)))]>; +} + +let Predicates = [EnableCrcInst] in +class inst_rr_r fun, string opstr, + SDPatternOperator OpNode> + : OForm <0x10, fun, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC), + opstr, "$RA,$RB,$RC", + [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>; + +multiclass inst_rr_rin funL, bits<8> funQ, string opstr, + SDPatternOperator OpNode > { + def Q : OForm <0x10, funQ, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC), + opstr # "l", "$RA,$RB,$RC", + [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>; + def L : OForm <0x10, funL, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC), + opstr # "w", "$RA,$RB,$RC", + [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>; +} + +class inst_rr_13 fun, string opstr, + SDPatternOperator OpNode=null_frag> + : OFormI <0x10, fun, (ins s13imm:$L), (outs GPRC:$RC), + opstr, "$L, $RC", + [(set GPRC:$RC, (OpNode immUExt13:$L))]>; + +let RA = 31 in +class inst_rr_2 fun, string opstr, ValueType vt> + : OForm <0x10, fun, (ins GPRC:$RB), (outs GPRC:$RC), + opstr, "$RB, $RC", + [(set GPRC:$RC, (sext_inreg GPRC:$RB, vt))]>; + +let RA = 31 in +class inst_rr_3 fun, string opstr, + SDPatternOperator OpNode=null_frag> + : OForm <0x10, fun, (ins GPRC:$RB), (outs GPRC:$RC), + opstr, "$RB, $RC", + [(set GPRC:$RC, (OpNode GPRC:$RB))]>; + +multiclass inst_lw funL, bits<8> funQ, string opstr, + SDPatternOperator OpNode> { + defm L : inst_rr_ri; + defm Q : inst_rr_ri; +} + +defm ADD : inst_lw<0x00, 0x08,"add", add>; +defm SUB : inst_lw<0x01, 0x09,"sub", sub>; +defm S4ADD: inst_lw<0x02, 0x0a,"s4add", add4>; +defm S4SUB: inst_lw<0x03, 0x0b,"s4sub", sub4>; +defm S8ADD: inst_lw<0x04, 0x0c,"s8add", add8>; +defm S8SUB: inst_lw<0x05, 0x0d,"s8sub", sub8>; +defm MUL : inst_lw<0x10, 0x18,"mul", mul>; + +let mayRaiseFPException = 1, hasSideEffects = 1 in { + defm DIV : inst_rr_rin<0x11, 0x1a, "div", sdiv>; + defm UDIV : inst_rr_rin<0x12, 0x1b, "udiv", udiv>; + defm REM : inst_rr_rin<0x13, 0x1c, "rem", srem>; + defm UREM : inst_rr_rin<0x14, 0x1d, "urem", urem>; +} + +def ADDPI : inst_rr_13<0x1e, "addpi", Sw64_addpi>; +def ADDPIS : inst_rr_13<0x1f, "addpis", Sw64_addpis>; + +defm SBT : inst_rr_ri<0x2d, "sbt", Sw64_sbt>; +defm CBT : inst_rr_ri<0x2e, "cbt", Sw64_cbt>; + +defm UMULH : inst_rr_ri<0x19, "umulh", mulhu>; + +let RA=31 in +class inst_ct func, string opstr, SDNode OpNode> + : OForm<0x10, func, (ins GPRC:$RB), (outs GPRC:$RC), + opstr, "$RB,$RC", [(set GPRC:$RC, (OpNode GPRC:$RB))]>; + +def CTTZ : inst_ct<0x5A, "cttz", cttz>; +def CTLZ : inst_ct<0x59, "ctlz", ctlz>; +def CTPOP : inst_ct<0x58, "ctpop", ctpop>; + +defm ZAP : inst_rr_ri<0x68, "zap">; +defm ZAPNOT : inst_rr_ri<0x69, "zapnot">; + +def SEXTB : inst_rr_2<0x6A, "sextb", i8>; +def SEXTH : inst_rr_2<0x6B, "sexth", i16>; + +//4.5.2 integer cmp +defm CMPEQ : inst_rr_ri<0x28, "cmpeq", seteq>; +defm CMPLT : inst_rr_ri<0x29, "cmplt", setlt>; +defm CMPLE : inst_rr_ri<0x2A, "cmple", setle>; +defm CMPULT : inst_rr_ri<0x2B, "cmpult", setult>; +defm CMPULE : inst_rr_ri<0x2C, "cmpule", setule>; + +//4.5.3 integer order +defm AND : inst_rr_ri<0x38, "and", and>; +defm BIC : inst_rr_ri<0x39, "bic", + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm BIS : inst_rr_ri<0x3A, "bis", or>; +defm ORNOT : inst_rr_ri<0x3B, "ornot", + BinOpFrag<(or node:$LHS, (not node:$RHS))>>; +defm XOR : inst_rr_ri<0x3C, "xor", xor>; +defm EQV : inst_rr_ri<0x3D, "eqv", + BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; + +//4.5.4 integer move position +defm SL : inst_rr_ri<0x48, "sll", shl>; +defm SRA : inst_rr_ri<0x4A, "sra", sra>; +defm SRL : inst_rr_ri<0x49, "srl", srl>; +defm ROLL : inst_rr_ri<0x4B, "roll", rotl>; +defm SLLW : inst_rr_ri<0x4C, "sllw">; +defm SRLW : inst_rr_ri<0x4D, "srlw">; +defm SRAW : inst_rr_ri<0x4E, "sraw">; +defm ROLW : inst_rr_ri<0x4F, "rolw", Sw64_rolw>; + +def sexti32 : ComplexPattern; + +def zexti32 : ComplexPattern; + +def : Pat<(srem (sexti32 (i64 GPRC:$RA)), (sexti32 (i64 GPRC:$RB))), + (REML GPRC:$RA, GPRC:$RB)>; + +def : Pat<(sext_inreg (sdiv GPRC:$RA, GPRC:$RB), i32), + (DIVL GPRC:$RA, GPRC:$RB)>; + +def : Pat<(sext_inreg (udiv (and GPRC:$RA,0xffffffff), (and GPRC:$RB,0xffffffff)), i32), + (UDIVL GPRC:$RA, GPRC:$RB)>; + +def : Pat<(sext_inreg (shl GPRC:$RA, GPRC:$RB), i32), + (SLLWr GPRC:$RA, GPRC:$RB)>, + Requires<[EnableIntShift, HasCore4]>; + +def : Pat<(sext_inreg (shl GPRC:$RA, (i64 immUExt8:$L)), i32), + (SLLWi GPRC:$RA, (i64 immUExt8:$L))>, + Requires<[EnableIntShift, HasCore4]>; + +def : Pat<(sext_inreg (srl GPRC:$RA, GPRC:$RB), i32), + (SRLWr GPRC:$RA, GPRC:$RB)>, + Requires<[EnableIntShift, HasCore4]>; + +def : Pat<(srl (i64 (zexti32 GPRC:$RA)), (i64 immUExt8:$L)), + (SRLWi GPRC:$RA, (i64 immUExt8:$L))>, + Requires<[EnableIntShift, HasCore4]>; + +def : Pat<(sra (i64 (sexti32 GPRC:$RA)), (i64 GPRC:$RB)), + (SRAWr GPRC:$RA, GPRC:$RB)>, + Requires<[EnableIntShift, HasCore4]>; + +def : Pat<(sra (i64 (sexti32 GPRC:$RA)), (i64 immUExt8:$L)), + (SRAWi GPRC:$RA, (i64 immUExt8:$L))>, + Requires<[EnableIntShift, HasCore4]>; + +def : Pat<(sext_inreg (rotl GPRC:$RA, GPRC:$RB), i32), + (ROLWr GPRC:$RA, GPRC:$RB)>, + Requires<[EnableIntShift, HasCore4]>; + +def : Pat<(sext_inreg (rotl GPRC:$RA, (i64 immUExt8:$L)), i32), + (ROLWi GPRC:$RA, (i64 immUExt8:$L))>, + Requires<[EnableIntShift, HasCore4]>; + +multiclass PatGprInst{ +def : Pat<(OpNode GPRC:$rs1, GPRC:$rs2), + (!cast(Inst # "Qr") + GPRC:$rs1, GPRC:$rs2)>; + +def : Pat<(intop (OpNode GPRC:$rs1, GPRC:$rs2)), + (!cast(Inst # "Lr") + GPRC:$rs1, GPRC:$rs2)>; + +def : Pat<(OpNode GPRC:$rs1, immUExt8:$imm8), + (!cast(Inst # "Qi") + GPRC:$rs1, immUExt8:$imm8)>; + +def : Pat<(intop (OpNode GPRC:$rs1, immUExt8:$imm8)), + (!cast(Inst # "Li") + GPRC:$rs1, immUExt8:$imm8)>; +} + +multiclass PatGprInstn{ +def : Pat<(OpNode GPRC:$rs1, GPRC:$rs2), + (!cast(Inst # "Q") + GPRC:$rs1, GPRC:$rs2)>; + +def : Pat<(intop (OpNode GPRC:$rs1, GPRC:$rs2)), + (!cast(Inst # "L") + GPRC:$rs1, GPRC:$rs2)>; +} + +defm : PatGprInst; +defm : PatGprInst; +defm : PatGprInst; +defm : PatGprInst; +defm : PatGprInst; +defm : PatGprInst; +defm : PatGprInst; + +//Const cases since legalize does sub x, int -> add x, inv(int) + 1 +def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), + (SUBLi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), + (S4SUBLi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), + (S8SUBLi GPRC:$RA, immUExt8neg:$L)>; +def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>; +// Define the pattern that produces ZAPNOTi. + +def : Pat<(cttz_zero_undef i64:$Rn), (CTTZ $Rn)>; + +def : Pat<(zappat:$imm GPRC:$RA), + (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>; + +def : Pat<(sext_inreg GPRC:$RB, i32), + (ADDLi GPRC:$RB, 0)>; + +def : Pat<(sext_inreg (add GPRC:$RA, GPRC:$RB), i32), + (ADDLr GPRC:$RA, GPRC:$RB)>; + +def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQr GPRC:$X, GPRC:$Y)>; +def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>; +def : Pat<(setueq immUExt8:$Y, GPRC:$X), (CMPEQi GPRC:$X, immUExt8:$Y)>; + +def : Pat<(seteq GPRC:$X, GPRC:$Y), (CMPEQr GPRC:$X, GPRC:$Y)>; +def : Pat<(seteq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>; +def : Pat<(seteq immUExt8:$Y, GPRC:$X), (CMPEQi GPRC:$X, immUExt8:$Y)>; + +def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULTr GPRC:$Y, GPRC:$X)>; +def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>; +def : Pat<(brcond (i64 (setugt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPULEi GPRC:$Y, immUExt8:$X), bb:$DISP)>; + +def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULEr GPRC:$Y, GPRC:$X)>; +def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>; +def : Pat<(brcond (i64 (setuge GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPULTi GPRC:$Y, immUExt8:$X), bb:$DISP)>; + +def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLTr GPRC:$Y, GPRC:$X)>; +def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>; +def : Pat<(brcond (i64 (setgt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPLEi GPRC:$Y, immUExt8:$X), bb:$DISP)>; + +def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLEr GPRC:$Y, GPRC:$X)>; +def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>; +def : Pat<(brcond (i64 (setgt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPLTi GPRC:$Y, immUExt8:$X), bb:$DISP)>; + +def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQr GPRC:$X, GPRC:$Y), 0)>; +def : Pat<(setne GPRC:$X, immUExt8:$Y), + (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>; +def : Pat<(brcond (i64 (setne GPRC:$X, immUExt8:$Y)), bb:$DISP), (BEQ (CMPEQi GPRC:$X, immUExt8:$Y), bb:$DISP)>; +def : Pat<(brcond (i64 (setne immUExt8:$Y, GPRC:$X)), bb:$DISP), (BEQ (CMPEQi GPRC:$X, immUExt8:$Y), bb:$DISP)>; + +def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQr GPRC:$X, GPRC:$Y), 0)>; +def : Pat<(setune GPRC:$X, immUExt8:$Y), + (CMPEQi (CMPEQr GPRC:$X, immUExt8:$Y), 0)>; + +//after put here because SLi +//Stupid crazy arithmetic stuff: +let AddedComplexity = 1 in { +def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>; +def : Pat<(sext_inreg (mul GPRC:$RA, 5), i32), (S4ADDLr GPRC:$RA, GPRC:$RA)>; +def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>; +def : Pat<(sext_inreg (mul GPRC:$RA, 9), i32), (S8ADDLr GPRC:$RA, GPRC:$RA)>; +def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>; +def : Pat<(sext_inreg (mul GPRC:$RA, 3), i32), (S4SUBLr GPRC:$RA, GPRC:$RA)>; +def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>; +def : Pat<(sext_inreg (mul GPRC:$RA, 7), i32), (S8SUBLr GPRC:$RA, GPRC:$RA)>; +//slight tree expansion if we are multiplying near to a power of 2 + +def : Pat<(mul GPRC:$RA, immRem1:$imm), + (ADDQr (SLi GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>; +def : Pat<(mul GPRC:$RA, immRem2:$imm), + (ADDQr (SLi GPRC:$RA, (nearP2X immRem2:$imm)), + (ADDQr GPRC:$RA, GPRC:$RA))>; +def : Pat<(mul GPRC:$RA, immRem3:$imm), + (ADDQr (SLi GPRC:$RA, (nearP2X immRem3:$imm)), + (S4SUBQr GPRC:$RA, GPRC:$RA))>; +def : Pat<(mul GPRC:$RA, immRem4:$imm), + (S4ADDQr GPRC:$RA, (SLi GPRC:$RA, (nearP2X immRem4:$imm)))>; +def : Pat<(mul GPRC:$RA, immRem5:$imm), + (ADDQr (SLi GPRC:$RA, (nearP2X immRem5:$imm)), + (S4ADDQr GPRC:$RA, GPRC:$RA))>; +def : Pat<(mul GPRC:$RA, immRemP2:$imm), + (ADDQr (SLi GPRC:$RA, (nearP2X immRemP2:$imm)), + (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>; + +} //Added complexity +//4.5.5 integer cond select +// RA:COND RB:TRUE RC:FALSE RD:DEST +multiclass select_inst< bits<3> fun, string opstr, SDPatternOperator OpNode> { +def r : OForm4 <0x11, fun, (ins GPRC:$RA, GPRC:$RB, GPRC:$RC), + (outs GPRC:$RD), opstr, "$RA,$RB,$RC,$RD", + [(set GPRC:$RD, + (select (i64 (OpNode GPRC:$RA)), GPRC:$RB, GPRC:$RC))]>; +def i : OForm4L<0x13, fun, (ins GPRC:$RA, u8imm:$L, GPRC:$RC), + (outs GPRC:$RD), opstr, "$RA,$L,$RC,$RD", + [(set GPRC:$RD, + (select (i64 (OpNode GPRC:$RA)), immUExt8:$L, GPRC:$RC))]>; +} + +defm SELEQ : select_inst<0x0, "seleq", CmpOpFrag<(seteq node:$R, 0)>>; +defm SELNE : select_inst<0x5, "selne", CmpOpFrag<(setne node:$R, 0)>>; +defm SELLT : select_inst<0x4, "sellt", CmpOpFrag<(setlt node:$R, 0)>>; +defm SELLE : select_inst<0x3, "selle", CmpOpFrag<(setle node:$R, 0)>>; +defm SELGT : select_inst<0x2, "selgt", CmpOpFrag<(setgt node:$R, 0)>>; +defm SELGE : select_inst<0x1, "selge", CmpOpFrag<(setge node:$R, 0)>>; +defm SELLBC : select_inst<0x6, "sellbc", CmpOpFrag<(xor node:$R, 1)>>; +defm SELLBS : select_inst<0x7, "sellbs", CmpOpFrag<(and node:$R, 1)>>; + +//General pattern for select +def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2), + (SELNEr GPRC:$which, GPRC:$src1, GPRC:$src2)>; +def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2), + (SELEQi GPRC:$which, immUExt8:$src2, GPRC:$src1)>; +def : Pat<(select (i64 (setne GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE), + (SELEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; +def : Pat<(select (i64 (setgt GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE), + (SELLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; +def : Pat<(select (i64 (setge GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE), + (SELLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; +def : Pat<(select (i64 (setlt GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE), + (SELGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; +def : Pat<(select (i64 (setle GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE), + (SELGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; + +def : Pat<(mulhs GPRC:$RA, GPRC:$RB), + (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), + (ADDQr (SELGEr GPRC:$RB, R31, GPRC:$RA), + (SELGEr GPRC:$RA, R31, GPRC:$RB)))>; + +defm CMPBGE : inst_rr_ri<0x6C, "cmpgeb">; + +defm EXTLB : inst_rr_ri<0x50, "extlb">; +defm EXTLH : inst_rr_ri<0x51, "extlh">; +defm EXTLW : inst_rr_ri<0x52, "extlw">; +defm EXTLL : inst_rr_ri<0x53, "extll">; + +defm EXTHB : inst_rr_ri<0x54, "exthb">; +defm EXTHH : inst_rr_ri<0x55, "exthh">; +defm EXTHW : inst_rr_ri<0x56, "exthw">; +defm EXTHL : inst_rr_ri<0x57, "exthl">; + +defm INSLB: inst_rr_ri<0x40, "inslb">; +defm INSLH: inst_rr_ri<0x41, "inslh">; +defm INSLW: inst_rr_ri<0x42, "inslw">; +defm INSLL: inst_rr_ri<0x43, "insll">; +defm INSHB: inst_rr_ri<0x44, "inshb">; +defm INSHH: inst_rr_ri<0x45, "inshh">; +defm INSHW: inst_rr_ri<0x46, "inshw">; +defm INSHL: inst_rr_ri<0x47, "inshl">; + +def REVBH: inst_rr_3<0x5B, "revbh", Sw64_revbh>; +def REVBW: inst_rr_3<0x5C, "revbw", Sw64_revbw>; +def REVBL: inst_rr_3<0x5D, "revbl", bswap>; + +def : Pat<(sra (bswap GPRC:$RB), (i64 32)), + (REVBW GPRC:$RB)>; +def : Pat<(sra (bswap GPRC:$RB), (i64 48)), + (REVBH GPRC:$RB)>; +def : Pat<(srl (bswap GPRC:$RB), (i64 32)), + (REVBW GPRC:$RB)>; +def : Pat<(srl (bswap GPRC:$RB), (i64 48)), + (REVBH GPRC:$RB)>; + +defm MASKLB: inst_rr_ri<0x60, "masklb">; +defm MASKLH: inst_rr_ri<0x61, "masklh">; +defm MASKLW: inst_rr_ri<0x62, "masklw">; +defm MASKLL: inst_rr_ri<0x63, "maskll">; +defm MASKHB: inst_rr_ri<0x64, "maskhb">; +defm MASKHH: inst_rr_ri<0x65, "maskhh">; +defm MASKHW: inst_rr_ri<0x66, "maskhw">; +defm MASKHL: inst_rr_ri<0x67, "maskhl">; + +// 4.5.7 crc32 instruction +def CRC32B: inst_rr_r<0x20, "crc32b", Sw64_crc32b>; +def CRC32H: inst_rr_r<0x21, "crc32h", Sw64_crc32h>; +def CRC32W: inst_rr_r<0x22, "crc32w", Sw64_crc32w>; +def CRC32L: inst_rr_r<0x23, "crc32l", Sw64_crc32l>; +def CRC32CB: inst_rr_r<0x24, "crc32cb", Sw64_crc32cb>; +def CRC32CH: inst_rr_r<0x25, "crc32ch", Sw64_crc32ch>; +def CRC32CW: inst_rr_r<0x26, "crc32cw", Sw64_crc32cw>; +def CRC32CL: inst_rr_r<0x27, "crc32cl", Sw64_crc32cl>; + +def : Pat<(Sw64_crc32b GPRC:$rs1, GPRC:$rs2), + (CRC32B GPRC:$rs1, GPRC:$rs2)>, + Requires<[EnableCrcInst, HasCore4]>; +def : Pat<(Sw64_crc32h GPRC:$rs1, GPRC:$rs2), + (CRC32H GPRC:$rs1, GPRC:$rs2)>, + Requires<[EnableCrcInst, HasCore4]>; +def : Pat<(Sw64_crc32w GPRC:$rs1, GPRC:$rs2), + (CRC32W GPRC:$rs1, GPRC:$rs2)>, + Requires<[EnableCrcInst, HasCore4]>; +def : Pat<(Sw64_crc32l GPRC:$rs1, GPRC:$rs2), + (CRC32L GPRC:$rs1, GPRC:$rs2)>, + Requires<[EnableCrcInst, HasCore4]>; +def : Pat<(Sw64_crc32cb GPRC:$rs1, GPRC:$rs2), + (CRC32CB GPRC:$rs1, GPRC:$rs2)>, + Requires<[EnableCrcInst, HasCore4]>; +def : Pat<(Sw64_crc32ch GPRC:$rs1, GPRC:$rs2), + (CRC32CH GPRC:$rs1, GPRC:$rs2)>, + Requires<[EnableCrcInst, HasCore4]>; +def : Pat<(Sw64_crc32cw GPRC:$rs1, GPRC:$rs2), + (CRC32CW GPRC:$rs1, GPRC:$rs2)>, + Requires<[EnableCrcInst, HasCore4]>; +def : Pat<(Sw64_crc32cl GPRC:$rs1, GPRC:$rs2), + (CRC32CL GPRC:$rs1, GPRC:$rs2)>, + Requires<[EnableCrcInst, HasCore4]>; + +//4.6 float caculate instruction +//4.6.1 float caculate +class inst_fpufunc , string opstr, RegisterClass regtype> + : FPForm<0x18, func, (ins regtype:$RA, regtype:$RB), + (outs regtype:$RC), opstr, "$RA,$RB,$RC">; + +class inst_fpu_rrfunc , string opstr, RegisterClass regtype , + SDPatternOperator OpNode> + : FPForm<0x18, func, (ins regtype:$RA, regtype:$RB), + (outs regtype:$RC), opstr, "$RA,$RB,$RC", + [(set regtype:$RC, (OpNode regtype:$RA, regtype:$RB))]>; + +class inst_fpu_rr_rev64func , string opstr, + SDPatternOperator OpNode> + : FPForm<0x18, func, (ins F8RC:$RA, F8RC:$RB), + (outs F8RC:$RC), opstr, "$RA,$RB,$RC", + [(set F8RC:$RC, (OpNode F8RC:$RB, F8RC:$RA))]>; + +let DecoderNamespace = "FP32" in { +class inst_fpu_rr_rev32func , string opstr, + SDPatternOperator OpNode> + : FPForm<0x18, func, (ins F4RC:$RA, F4RC:$RB), + (outs F4RC:$RC), opstr, "$RA,$RB,$RC", + [(set F4RC:$RC, (OpNode F4RC:$RB, F4RC:$RA))]>; +} + +multiclass inst_fpu_sdfuncS, bits<8>funcD , + string opstr, SDPatternOperator OpNode>{ + def S : inst_fpu_rr; + def D : inst_fpu_rr; +} + +defm ADD : inst_fpu_sd<0x00, 0x01, "fadd", fadd>; +defm SUB : inst_fpu_sd<0x02, 0x03, "fsub", fsub>; +defm MUL : inst_fpu_sd<0x04, 0x05, "fmul", fmul>; +defm DIV : inst_fpu_sd<0x06, 0x07, "fdiv", fdiv>; + +def CPYSD : inst_fpu_rr_rev64<0x30, "fcpys", fcopysign>; +def CPYSED : inst_fpu_rr_rev64<0x31, "fcpyse", fcopysign>; +def CPYSND : inst_fpu_rr_rev64<0x32, "fcpysn", null_frag>; +def CPYSS : inst_fpu_rr_rev32<0x30, "fcpys", fcopysign>; +def CPYSES : inst_fpu_rr_rev32<0x31, "fcpyse", fcopysign>; +def CPYSNS : inst_fpu_rr_rev32<0x32, "fcpysn", null_frag>; + +def CMPTEQ : inst_fpu<0x10, "fcmpeq", F8RC>; +def CMPTLE : inst_fpu<0x11, "fcmple", F8RC>; +def CMPTLT : inst_fpu<0x12, "fcmplt", F8RC>; +def CMPTUN : inst_fpu<0x13, "fcmpun", F8RC>; + +//4.6.2 float cmp + +def : Pat<(fabs F8RC:$RB), + (CPYSD F31, F8RC:$RB)>; +def : Pat<(fabs F4RC:$RB), + (CPYSS F31, F4RC:$RB)>; +def : Pat<(fneg F8RC:$RB), + (CPYSND F8RC:$RB, F8RC:$RB)>; +def : Pat<(fneg F4RC:$RB), + (CPYSNS F4RC:$RB, F4RC:$RB)>; + +def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)), + (CPYSNS F4RC:$B, F4RC:$A)>; +def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)), + (CPYSND F8RC:$B, F8RC:$A)>; + +def : Pat<(fneg (fcopysign F4RC:$A, F4RC:$B)), + (CPYSNS F4RC:$B, F4RC:$A)>; +def : Pat<(fneg (fcopysign F8RC:$A, F8RC:$B)), + (CPYSND F8RC:$B, F8RC:$A)>; + +def : Pat<(fneg (fcopysign F4RC:$A, F8RC:$B)), + (CPYSNS (COPY_TO_REGCLASS F8RC:$B, F4RC), F4RC:$A)>; +def : Pat<(fneg (fcopysign F8RC:$A, F4RC:$B)), + (CPYSND (COPY_TO_REGCLASS F4RC:$B, F8RC), F8RC:$A)>; + +def : Pat<(fcopysign F4RC:$A, F8RC:$B), + (CPYSS (COPY_TO_REGCLASS F8RC:$B, F4RC), F4RC:$A)>; +def : Pat<(fcopysign F8RC:$A, F4RC:$B), + (CPYSD (COPY_TO_REGCLASS F4RC:$B, F8RC), F8RC:$A)>; + +//4.6.3 float convert + +let RA = 31 in +class inst_fpu_cvt func, string opstr, + RegisterClass regorg, RegisterClass regdst, + SDPatternOperator OpNode=null_frag> + : FPForm<0x18, func, (ins regorg:$RB), (outs regdst:$RC), + opstr, "$RB,$RC", + [(set regdst:$RC, (OpNode regorg:$RB))]>; + +let RA = 31 in +class inst_fpu_cmov func, string opstr, + RegisterClass regorg, RegisterClass regdst, + SDPatternOperator OpNode=null_frag> + : FPForm<0x10, func, (ins regorg:$RB), (outs regdst:$RC), + opstr, "$RB,$RC", + [(set (i64 regdst:$RC), (OpNode regorg:$RB))]>; + +let RA = 31 in +class inst_flwfunc , string opstr, RegisterClass regtype> + : FPForm<0x18, func, (ins regtype:$RB), + (outs regtype:$RC), opstr, "$RB,$RC">; + +let RB = 31 in +class inst_fpu_cvt1 func, string opstr, + RegisterClass regorg, RegisterClass regdst, + SDPatternOperator OpNode=null_frag> + : FPForm<0x18, func, (ins regorg:$RA), (outs regdst:$RC), + opstr, "$RA,$RC", + [(set regdst:$RC, (OpNode regorg:$RA))]>; + +def CVTQS : inst_fpu_cvt<0x2D, "fcvtls", F8RC, F4RC, Sw64_cvtqs>; +def CVTQT : inst_fpu_cvt<0x2F, "fcvtld", F8RC, F8RC, Sw64_cvtqt>; +def CVTTQ : inst_fpu_cvt<0x24, "fcvtdl_z", F8RC, F8RC, Sw64_cvttq>; +def CVTST : inst_fpu_cvt<0x20, "fcvtsd", F4RC, F8RC, fpextend>; +def CVTTS : inst_fpu_cvt<0x21, "fcvtds", F8RC, F4RC, fpround>; +def : Pat<(Sw64_cvtts F8RC:$RB), (CVTTS F8RC:$RB)>; +def : Pat<(Sw64_cvtst F4RC:$RB), (CVTST F4RC:$RB)>; + +def FCVTWL : inst_flw<0x28, "fcvtwl", F8RC>; +def FCVTLW : inst_flw<0x29, "fcvtlw", F8RC>; + +def FCTTDL_G : inst_fpu_cvt<0x22, "fcvtdl_g", F8RC, F8RC>; +def FCTTDL_P : inst_fpu_cvt<0x23, "fcvtdl_p", F8RC, F8RC>; +def FCTTDL_N : inst_fpu_cvt<0x25, "fcvtdl_n", F8RC, F8RC>; +def FCTTDL : inst_fpu_cvt<0x27, "fcvtdl", F8RC, F8RC>; + +def FCVTHS : inst_fpu_cvt<0x2E, "fcvths", F8RC, F8RC>; +def FCVTSH : FCForm4L<0x1B, 0x37, (ins F8RC:$RA, F8RC:$RB, u6imm:$LIT), + (outs F8RC:$RD), "fcvtsh", "$RA,$RB,$LIT,$RD">; + +def CMOVDL : inst_fpu_cmov<0x72, "cmovdl", F8RC, GPRC>; +def CMOVDL_G : inst_fpu_cmov<0x74, "cmovdl_g", F8RC, GPRC>; +def CMOVDL_P : inst_fpu_cmov<0x7A, "cmovdl_p", F8RC, GPRC>; +def CMOVDL_Z : inst_fpu_cmov<0x7C, "cmovdl_z", F8RC, GPRC>; +def CMOVDL_N : inst_fpu_cmov<0x80, "cmovdl_n", F8RC, GPRC>; +def CMOVDLU : inst_fpu_cmov<0x81, "cmovdlu", F8RC, GPRC>; +def CMOVDLU_G : inst_fpu_cmov<0x82, "cmovdlu_g", F8RC, GPRC>; +def CMOVDLU_P : inst_fpu_cmov<0x83, "cmovdlu_p", F8RC, GPRC>; +def CMOVDLU_Z : inst_fpu_cmov<0x84, "cmovdlu_z", F8RC, GPRC>; +def CMOVDLU_N : inst_fpu_cmov<0x85, "cmovdlu_n", F8RC, GPRC>; +def CMOVDWU : inst_fpu_cmov<0x86, "cmovdwu", F8RC, GPRC>; +def CMOVDWU_G : inst_fpu_cmov<0x87, "cmovdwu_g", F8RC, GPRC>; +def CMOVDWU_P : inst_fpu_cmov<0x88, "cmovdwu_p", F8RC, GPRC>; +def CMOVDWU_Z : inst_fpu_cmov<0x89, "cmovdwu_z", F8RC, GPRC>; +def CMOVDWU_N : inst_fpu_cmov<0x8A, "cmovdwu_n", F8RC, GPRC>; +def CMOVDW : inst_fpu_cmov<0x8B, "cmovdw", F8RC, GPRC>; +def CMOVDW_G : inst_fpu_cmov<0x8C, "cmovdw_g", F8RC, GPRC>; +def CMOVDW_P : inst_fpu_cmov<0x8D, "cmovdw_p", F8RC, GPRC>; +def CMOVDW_Z : inst_fpu_cmov<0x8E, "cmovdw_z", F8RC, GPRC>; +def CMOVDW_N : inst_fpu_cmov<0x8F, "cmovdw_n", F8RC, GPRC>; + +// f64 to i64 +def : Pat<(i64 (fp_to_sint F8RC:$Rn)), (CMOVDL_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_sint (fround F8RC:$Rn))), (CMOVDL_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_sint (fceil F8RC:$Rn))), (CMOVDL_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_sint (ffloor F8RC:$Rn))), (CMOVDL_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_sint (fnearbyint F8RC:$Rn))), (CMOVDL F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; + +// f64 to u64 +def : Pat<(i64 (fp_to_uint F8RC:$Rn)), (CMOVDLU_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_uint (fround F8RC:$Rn))), (CMOVDLU_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_uint (fceil F8RC:$Rn))), (CMOVDLU_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_uint (ffloor F8RC:$Rn))), (CMOVDLU_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_uint (fnearbyint F8RC:$Rn))), (CMOVDLU F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; + +// f32 to i64 +def : Pat<(i64 (fp_to_sint F4RC:$Rn)), (CMOVDL_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_sint (fround F4RC:$Rn))), (CMOVDL_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_sint (fceil F4RC:$Rn))), (CMOVDL_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_sint (ffloor F4RC:$Rn))), (CMOVDL_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_sint (fnearbyint F4RC:$Rn))), (CMOVDL (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; + +// f32 to u64 +def : Pat<(i64 (fp_to_uint F4RC:$Rn)), (CMOVDLU_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_uint (fround F4RC:$Rn))), (CMOVDLU_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_uint (fceil F4RC:$Rn))), (CMOVDLU_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_uint (ffloor F4RC:$Rn))), (CMOVDLU_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(i64 (fp_to_uint (fnearbyint F4RC:$Rn))), (CMOVDLU (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; + +// f64 to u32 +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint F8RC:$Rn))), i32), + (CMOVDWU_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fround F8RC:$Rn)))), i32), + (CMOVDWU_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fceil F8RC:$Rn)))), i32), + (CMOVDWU_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (ffloor F8RC:$Rn)))), i32), + (CMOVDWU_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fnearbyint F8RC:$Rn)))), i32), + (CMOVDWU F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; + +// f64 to i32 +def : Pat<(sext_inreg (fp_to_sint F8RC:$Rn), i32), + (CMOVDW_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (fp_to_sint (fround F8RC:$Rn)), i32), + (CMOVDW_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (fp_to_sint (fceil F8RC:$Rn)), i32), + (CMOVDW_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (fp_to_sint (ffloor F8RC:$Rn)), i32), + (CMOVDW_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (fp_to_sint (fnearbyint F8RC:$Rn)), i32), + (CMOVDW F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; + +// f32 to u32 +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint F4RC:$Rn))), i32), + (CMOVDWU_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fround F4RC:$Rn)))), i32), + (CMOVDWU_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fceil F4RC:$Rn)))), i32), + (CMOVDWU_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (ffloor F4RC:$Rn)))), i32), + (CMOVDWU_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fnearbyint F4RC:$Rn)))), i32), + (CMOVDWU (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; + +// f32 to i32 +def : Pat<(sext_inreg (fp_to_sint F4RC:$Rn), i32), + (CMOVDW_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (fp_to_sint (fround F4RC:$Rn)), i32), + (CMOVDW_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (fp_to_sint (fceil F4RC:$Rn)), i32), + (CMOVDW_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (fp_to_sint (ffloor F4RC:$Rn)), i32), + (CMOVDW_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(sext_inreg (fp_to_sint (fnearbyint F4RC:$Rn)), i32), + (CMOVDW (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>; + +def CMOVLS : inst_fpu_cvt<0x48, "cmovls", GPRC, F4RC>; +def CMOVWS : inst_fpu_cvt<0x49, "cmovws", GPRC, F4RC>; +def CMOVLD : inst_fpu_cvt<0x4a, "cmovld", GPRC, F8RC>; +def CMOVWD : inst_fpu_cvt<0x4b, "cmovwd", GPRC, F8RC>; +def CMOVULS : inst_fpu_cvt<0x4c, "cmovuls", GPRC, F4RC>; +def CMOVULD : inst_fpu_cvt<0x4e, "cmovuld", GPRC, F8RC>; +def CMOVUWS : inst_fpu_cvt<0x4d, "cmovuws", GPRC, F4RC>; +def CMOVUWD : inst_fpu_cvt<0x4f, "cmovuwd", GPRC, F8RC>; + +def : Pat<(f32 (sint_to_fp GPRC:$Rn)), (CMOVLS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(f32 (sint_to_fp (assertsext GPRC:$Rn))), (CMOVWS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(f64 (sint_to_fp GPRC:$Rn)), (CMOVLD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(f64 (sint_to_fp (assertsext GPRC:$Rn))), (CMOVWD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(f32 (uint_to_fp GPRC:$Rn)), (CMOVULS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(f64 (uint_to_fp GPRC:$Rn)), (CMOVULD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(f32 (uint_to_fp (and (assertsext GPRC:$Rn), 0xffffffff))), (CMOVUWS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; +def : Pat<(f64 (uint_to_fp (and (assertsext GPRC:$Rn), 0xffffffff))), (CMOVUWD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>; + +def FRIS : inst_fpu_cvt<0x5A, "fris", F4RC, F4RC, fnearbyint>; +def FRIS_G : inst_fpu_cvt<0x5B, "fris_g", F4RC, F4RC, fround>; +def FRIS_P : inst_fpu_cvt<0x5C, "fris_p", F4RC, F4RC, fceil>; +def FRIS_Z : inst_fpu_cvt<0x5D, "fris_z", F4RC, F4RC, ftrunc>; +def FRIS_N : inst_fpu_cvt<0x5F, "fris_n", F4RC, F4RC, ffloor>; + +def FRID : inst_fpu_cvt<0x60, "frid", F8RC, F8RC, fnearbyint>; +def FRID_G : inst_fpu_cvt<0x61, "frid_g", F8RC, F8RC, fround>; +def FRID_P : inst_fpu_cvt<0x62, "frid_p", F8RC, F8RC, fceil>; +def FRID_Z : inst_fpu_cvt<0x63, "frid_z", F8RC, F8RC, ftrunc>; +def FRID_N : inst_fpu_cvt<0x64, "frid_n", F8RC, F8RC, ffloor>; + +def SQRTSS : inst_flw<0x08, "fsqrts", F4RC>; +def SQRTSD : inst_flw<0x09, "fsqrtd", F8RC>; + +def FRECS : inst_fpu_cvt1<0x58, "frecs", F4RC, F4RC, Sw64_frecs>; +def FRECD : inst_fpu_cvt1<0x59, "frecd", F8RC, F8RC, Sw64_frecd>; + +def : Pat<(fsqrt F4RC:$RB), (SQRTSS F4RC:$RB)>; +def : Pat<(fsqrt F8RC:$RB), (SQRTSD F8RC:$RB)>; + +//4.6.6 int 2 float or float 2 int + +let RB = 31 in +class inst_fpu_fi opc, bits<8> func, string opstr, + RegisterClass regorg, RegisterClass regdst, + SDPatternOperator OpNode> + : FPForm; + +def ITOFS : inst_fpu_fi<0x18, 0x40, "ifmovs", GPRC, F4RC, bitconvert>; +def ITOFT : inst_fpu_fi<0x18, 0x41, "ifmovd", GPRC, F8RC, bitconvert>; +def FTOIS : inst_fpu_fi<0x10, 0x70, "fimovs", F4RC, GPRC, bitconvert>; +def FTOIT : inst_fpu_fi<0x10, 0x78, "fimovd", F8RC, GPRC, bitconvert>; + +let DecoderNamespace = "SIMD" in { +class inst_fpu_fi_simd opc, bits<8> func, string opstr, + RegisterClass regorg, RegisterClass regdst, + SDPatternOperator OpNode> + : FPForm1; +} + +def ITOFStmp : inst_fpu_fi_simd<0x18, 0x40, "ifmovs", GPRC, FPRC, null_frag>; +def ITOFTtmp : inst_fpu_fi_simd<0x18, 0x41, "ifmovd", GPRC, FPRC, null_frag>; +def FTOIStmp : inst_fpu_fi_simd<0x10, 0x70, "fimovs", FPRC, GPRC, null_frag>; +def FTOITtmp : inst_fpu_fi_simd<0x10, 0x78, "fimovd", FPRC, GPRC, null_frag>; + +multiclass inst_fpu_fma funcS, bits<6> funcD, string opstr> { + def S : FForm4<0x19, funcS, (ins F4RC:$RA, F4RC:$RB, F4RC:$RC), + (outs F4RC:$RD), opstr # "s", "$RA,$RB,$RC,$RD">; + + def D : FForm4<0x19, funcD, (ins F8RC:$RA, F8RC:$RB, F8RC:$RC), + (outs F8RC:$RD), opstr # "d", "$RA,$RB,$RC,$RD">; +} + +defm FMA : inst_fpu_fma<0x00, 0x01, "fma">; +defm FMS : inst_fpu_fma<0x02, 0x03, "fms">; +defm FNMA : inst_fpu_fma<0x04, 0x05, "fnma">; +defm FNMS : inst_fpu_fma<0x06, 0x07, "fnms">; + +multiclass fma_pat { +def : Pat<(fma regtype:$RA, regtype:$RB, regtype:$RC), + (!cast("FMA" # type) + regtype:$RA, regtype:$RB, regtype:$RC)>; + +def : Pat<(fma regtype:$RA, regtype:$RB, + (fneg regtype:$RC)), + (!cast("FMS" # type) + regtype:$RA, regtype:$RB, regtype:$RC)>; + +def : Pat<(fneg + (fma regtype:$RA, regtype:$RB, + (fneg regtype:$RC))), + (!cast("FNMA" # type) + regtype:$RA, regtype:$RB, regtype:$RC)>; + +def : Pat<(fneg + (fma regtype:$RA, regtype:$RB, regtype:$RC)), + (!cast("FNMS" # type) + regtype:$RA, regtype:$RB, regtype:$RC)>; + +} + +defm : fma_pat; +defm : fma_pat; + +class inst_fpu_select64 fun, string opstr> + : FForm4 <0x19, fun, (ins F8RC:$RC, F8RC:$RB, F8RC:$RA), + (outs F8RC:$RD), opstr, "$RA,$RB,$RC,$RD">; + +let DecoderNamespace = "FP32" in { +class inst_fpu_select32 fun, string opstr> + : FForm4 <0x19, fun, (ins F4RC:$RC, F4RC:$RB, F8RC:$RA), + (outs F4RC:$RD), opstr, "$RA,$RB,$RC,$RD">; +} +def FSELEQD : inst_fpu_select64<0x10, "fseleq">; +def FSELNED : inst_fpu_select64<0x11, "fselne">; +def FSELLTD : inst_fpu_select64<0x12, "fsellt">; +def FSELLED : inst_fpu_select64<0x13, "fselle">; +def FSELGTD : inst_fpu_select64<0x14, "fselgt">; +def FSELGED : inst_fpu_select64<0x15, "fselge">; +def FSELEQS : inst_fpu_select32<0x10, "fseleq">; +def FSELNES : inst_fpu_select32<0x11, "fselne">; +def FSELLTS : inst_fpu_select32<0x12, "fsellt">; +def FSELLES : inst_fpu_select32<0x13, "fselle">; +def FSELGTS : inst_fpu_select32<0x14, "fselgt">; +def FSELGES : inst_fpu_select32<0x15, "fselge">; + +multiclass f_select_pat { +def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F4RC:$st, F4RC:$sf), + (!cast(Inst # "S") + F4RC:$sf, F4RC:$st, (InstCmp F8RC:$RA, F8RC:$RB))>; +def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F8RC:$st, F8RC:$sf), + (!cast(Inst # "D") + F8RC:$sf, F8RC:$st, (InstCmp F8RC:$RA, F8RC:$RB))>; +} + +multiclass f_select_pat_c { +def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F4RC:$st, F4RC:$sf), + (!cast(Inst # "S") + F4RC:$sf, F4RC:$st, (InstCmp F8RC:$RB, F8RC:$RA))>; +def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F8RC:$st, F8RC:$sf), + (!cast(Inst # "D") + F8RC:$sf, F8RC:$st, (InstCmp F8RC:$RB, F8RC:$RA))>; +} +defm : f_select_pat; +defm : f_select_pat; +defm : f_select_pat; +defm : f_select_pat; + +defm : f_select_pat_c; +defm : f_select_pat_c; + +def : Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf), + (f64 (FSELEQD F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>; +def : Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf), + (f32 (FSELEQS F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>; + +//4.6.9 read and write float register +let RB=31, RC=31 in { +def RFPCR : FPForm<0x18,0x50,(ins), (outs F8RC:$RA), "rfpcr", "$RA">; +def WFPCR : FPForm<0x18,0x51,(ins F8RC:$RA), (outs), "wfpcr", "$RA">; +} + +let RA=31, RB = 31, RC=31 in { +def SETFPEC0 : FPForm<0x18,0x54,(ins), (outs),"setfpec0","">; +def SETFPEC1 : FPForm<0x18,0x55,(ins), (outs),"setfpec1","">; +def SETFPEC2 : FPForm<0x18,0x56,(ins), (outs),"setfpec2","">; +def SETFPEC3 : FPForm<0x18,0x57,(ins), (outs),"setfpec3","">; + +def NOP : FPForm<0x10,0x3a,(ins), (outs),"nop", "">; +} +//4.7 sundry instruction +let RA = 0, RB= 0, hasNoSchedulingInfo=1 in{ +def WMEMB : MfcForm<0x06, 0x0002,(ins), (outs), "wmemb">; +def IMEMB : MfcForm<0x06, 0x0001,(ins), (outs), "imemb">; +def MB : MfcForm<0x06, 0x0000,(ins), (outs), "memb">; //memory barrier +def HALT : MfcForm<0x06, 0x0080,(ins), (outs), "halt">; +} +def : Pat<(trap), (SYS_CALL 0x80)>; +def : Pat<(atomic_fence (i64 5), (timm)), (WMEMB)>,Requires<[EnableWmembInst, HasCore4]>; +def : Pat<(atomic_fence (timm), (timm)), (MB)>; +let RB = 31 in { +//4.7.3 read time counter +def RPCC : MfcForm<0x06, 0x0020,(ins), (outs GPRC:$RA), "rtc", "$RA">; //Read process cycle counter +//4.7.4 read cpu core id +def RCID : MfcForm<0x06, 0x0040,(ins), (outs GPRC:$RA), "rcid", "$RA">; +} +//4.7.6 atom operate instruction + +let mayLoad = 1 in { +def LDQ_L : MFuncForm<0x08,0x1,(ins s64imm:$disp, GPRC:$RB),(outs GPRC:$RA), + "lldl", "$RA,${disp}(${RB})">; +def LDL_L : MFuncForm<0x08,0x0,(ins s64imm:$disp, GPRC:$RB),(outs GPRC:$RA), + "lldw", "$RA,${disp}(${RB})">; +} + + +let mayStore = 1 in { +def STQ_C : MFuncForm<0x08,0x9, (ins GPRC:$RA, s64imm:$disp, GPRC:$RB), (outs), + "lstl","$RA,${disp}(${RB})">; +def STL_C : MFuncForm<0x08,0x8, (ins GPRC:$RA, s64imm:$disp, GPRC:$RB), (outs), + "lstw","$RA,${disp}(${RB})">; +} + +let RB = 31, hasNoSchedulingInfo = 1 in { +def WR_F : MfcForm<0x06, 0x1020, (ins GPRC:$RA) , (outs), "wr_f", "$RA">; + +def RD_F : MfcForm<0x06, 0x1000, (ins GPRC:$RA) , (outs), "rd_f", "$RA">; +} + +//4.8 cache control instruction +// Prefetch +def SDT_ZPrefetch : SDTypeProfile<0, 2, [ + SDTCisPtrTy<0>, SDTCisInt<1>, +]>; + +def z_s_fillcs : SDNode<"Sw64ISD::Z_S_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>; +def z_s_fillde : SDNode<"Sw64ISD::Z_S_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>; +def z_fillde : SDNode<"Sw64ISD::Z_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>; +def z_fillde_e : SDNode<"Sw64ISD::Z_FILLDE_E", SDT_ZPrefetch, [SDNPHasChain]>; +def z_fillcs : SDNode<"Sw64ISD::Z_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>; +def z_fillcs_e : SDNode<"Sw64ISD::Z_FILLCS_E", SDT_ZPrefetch, [SDNPHasChain]>; +def z_e_fillcs : SDNode<"Sw64ISD::Z_E_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>; +def z_e_fillde : SDNode<"Sw64ISD::Z_E_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>; + +def z_flushd : SDNode<"Sw64ISD::Z_FLUSHD", SDT_ZPrefetch, [SDNPHasChain]>; + +let usesCustomInserter = 1, mayLoad = 1, mayStore = 0, RA = 31 in +class Prefetch opcode, SDPatternOperator loadop> + : MForm; + +def FILLCS : Prefetch<"fillcs", 0x09, z_fillcs>; +def S_FILLDE : Prefetch<"s_fillde", 0x23, z_s_fillde>; +def S_FILLCS : Prefetch<"s_fillcs", 0x22, z_s_fillcs>; +def FILLDE : Prefetch<"fillde", 0x26, z_fillde>; +def FILLDE_E : Prefetch<"fillde_e", 0x27, z_fillde_e>; +def FILLCS_E : Prefetch<"fillcs_e", 0x0B, z_fillcs_e>; +def E_FILLCS : Prefetch<"e_fillcs", 0x0A, z_e_fillcs>; +def E_FILLDE : Prefetch<"e_fillde", 0x0C, z_e_fillde>; + +// END Prefetch + + +//4.9 privilege instruction +def DPFHR : MPrvlForm<0x1e,0xe,(ins u5imm:$TH, s64imm:$disp, GPRC:$RB), (outs), + "dpfhr", "$TH,${disp}(${RB})">; +def DPFHW : MPrvlForm<0x1e,0xf,(ins u5imm:$TH, s64imm:$disp, GPRC:$RB), (outs), + "dpfhw", "$TH,${disp}(${RB})">; + +//4.9.1 csrr and csrw +// ---------------------------------------------------------- +def CSRR : CSRForm<0x06, 0xfe, (ins GPRC:$RA, u5imm:$L), (outs), "csrr", "$RA,$L">; +def CSRW : CSRForm<0x06, 0xff, (ins GPRC:$RA, u5imm:$L), (outs), "csrw", "$RA,$L">; +// ---------------------------------------------------------- + +//4.9.2 csrws and csrwc +// ---------------------------------------------------------- +def CSRWS : CSRForm<0x06, 0xfc, (ins GPRC:$RA, u5imm:$L), (outs), "csrws", "$RA,$L">; +def CSRWC : CSRForm<0x06, 0xfd, (ins GPRC:$RA, u5imm:$L), (outs), "csrwc", "$RA,$L">; +// ---------------------------------------------------------- + +class BrPat + : Pat<(brcond GPRC:$RA, bb:$DISP), + (Inst GPRC:$RA, bb:$DISP)>; + +class BrPat_const + : Pat<(brcond (i64 (CondOp GPRC:$RA, 0)), bb:$DISP), + (Inst GPRC:$RA, bb:$DISP)>; + +class BrPat_cond + : Pat<(brcond (i64 (CondOp regtype:$RB, regtype:$RA)), bb:$DISP), + (InstBr (InstCmp regtype:$RA, regtype:$RB), bb:$DISP)>; +class BrPat_cond_i + : Pat<(brcond (i64 (CondOp regtype:$RA, regtype:$RB)), bb:$DISP), + (InstBr (InstCmp regtype:$RA, regtype:$RB), bb:$DISP)>; + +class BrSwapPat_cond + : Pat<(brcond (i64 (CondOp regtype:$RB, regtype:$RA)), bb:$DISP), + (InstBr (InstCmp regtype:$RB, regtype:$RA), bb:$DISP)>; + +class BrPat_f + : Pat<(brcond (i64 (CondOp F8RC:$RA, immFPZ)), bb:$DISP), + (InstBr F8RC:$RA, bb:$DISP)>; + + +def : BrPat_const; +def : BrPat_const; +def : BrPat_const; +def : BrPat_const; +def : BrPat_const; +def : BrPat_const; + +def : BrPat; +def : BrPat_cond_i; +def : BrPat_cond_i; +def : BrPat_cond_i; +def : BrPat_cond_i; +def : BrPat_cond_i; + +def : BrPat_cond_i; +def : BrPat_cond_i; + +def : BrPat_f; +def : BrPat_f; +def : BrPat_f; +def : BrPat_f; + +def : BrPat_cond; +def : BrPat_cond; +def : BrPat_cond; +def : BrPat_cond; + +def : BrSwapPat_cond; +def : BrSwapPat_cond; +def : BrSwapPat_cond; +def : BrSwapPat_cond; + +def : BrPat_cond_i; +def : BrPat_cond_i; + +def : BrPat_cond_i; +def : BrPat_cond_i; + + + class IBrPat_cond_i + : Pat<(brcond (i64(CondOp regtype:$RA, immUExt8:$RB)), bb:$DISP), + (InstBr (InstCmp regtype:$RA, immUExt8:$RB), bb:$DISP)>; + def : IBrPat_cond_i; + def : IBrPat_cond_i; + def : IBrPat_cond_i; + def : IBrPat_cond_i; + def : IBrPat_cond_i; + def : IBrPat_cond_i; + + def : IBrPat_cond_i; + def : IBrPat_cond_i; + def : IBrPat_cond_i; + def : IBrPat_cond_i; + + +//Int cond patterns +def : Pat<(brcond (i64 (and GPRC:$RA, 1)), bb:$DISP), + (BLBS GPRC:$RA, bb:$DISP)>; + +def : Pat<(brcond (i64 (seteq (and GPRC:$RA, 1), 0)), bb:$DISP), + (BLBC GPRC:$RA, bb:$DISP)>; + +//Constant handling +def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field + return ((int64_t)N->getZExtValue() << 48) >> 48 == + ((int64_t)N->getZExtValue() << 32) >> 32; +}], SExt16>; + +def immConst2PartInt : PatLeaf<(imm), [{ + //true if imm fits in a LDAH LDA pair with zeroext + uint64_t uval = N->getZExtValue(); + int32_t val32 = (int32_t)uval; + return ((uval >> 32) == 0 && //empty upper bits + val32 <= IMM_FULLHIGH); +}]>; + +def immConst2Part : PatLeaf<(imm), [{ + //true if imm fits in a LDAH LDA pair + int64_t val = (int64_t)N->getZExtValue(); + return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW); +}]>; + + +def : Pat<(i64 immConst2Part:$imm), + (LDA (LL16 immConst2Part:$imm), + (LDAH (LH16 immConst2Part:$imm), R31))>; + +def : Pat<(i64 immSExt16:$imm), + (LDA immSExt16:$imm, R31)>; + +def : Pat<(i64 immSExt16int:$imm), + (ZAPNOTi (LDA immSExt16int:$imm, R31), 15)>; + +def : Pat<(i64 immConst2PartInt:$imm), + (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))), + (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), + R31)), 15)>; + +//===----------------------------------------------------------------------===// +// Instruction aliases +//===----------------------------------------------------------------------===// +// 4.5.1 integer caculate +def : InstAlias<"addw $RA,$L,$RC", + (ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"subw $RA,$L,$RC", + (SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"s4addw $RA,$L,$RC", + (S4ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"s4subw $RA,$L,$RC", + (S4SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"s8addw $RA,$L,$RC", + (S8ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"s8subw $RA,$L,$RC", + (S8SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"addl $RA,$L,$RC", + (ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"subl $RA,$L,$RC", + (SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"s4addl $RA,$L,$RC", + (S4ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"s4subl $RA,$L,$RC", + (S4SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"s8addl $RA,$L,$RC", + (S8ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"s8subl $RA,$L,$RC", + (S8SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"mulw $RA,$L,$RC", + (MULLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"mull $RA,$L,$RC", + (MULQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"umulh $RA,$L,$RC", + (UMULHi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"zap $RA,$L,$RC", + (ZAPi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"zapnot $RA,$L,$RC", + (ZAPNOTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"addpi $L,$RC", + (ADDPI GPRC:$RC, s13imm:$L), 0>; +def : InstAlias<"addpis $L,$RC", + (ADDPIS GPRC:$RC, s13imm:$L), 0>; +def : InstAlias<"sbt $RA,$L,$RC", + (SBTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"cbt $RA,$L,$RC", + (CBTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; + +// 4.5.2 integer cmp +def : InstAlias<"cmpeq $RA,$L,$RC", + (CMPEQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"cmple $RA,$L,$RC", + (CMPLEi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"cmplt $RA,$L,$RC", + (CMPLTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"cmpule $RA,$L,$RC", + (CMPULEi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"cmpult $RA,$L,$RC", + (CMPULTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; + +// 4.5.3 integer order +def : InstAlias<"and $RA,$L,$RC", + (ANDi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"bic $RA,$L,$RC", + (BICi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"bis $RA,$L,$RC", + (BISi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"ornot $RA,$L,$RC", + (ORNOTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"xor $RA,$L,$RC", + (XORi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"eqv $RA,$L,$RC", + (EQVi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; + +// 4.5.4 integer move position +def : InstAlias<"sll $RA,$L,$RC", + (SLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"srl $RA,$L,$RC", + (SRLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; +def : InstAlias<"sra $RA,$L,$RC", + (SRAi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>; + +// 4.5.5 integer cond select +def : InstAlias<"seleq $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELEQi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST), + 0>; +def : InstAlias<"seleq $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELEQr GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE, GPRC:$RDEST), + 0>; +def : InstAlias<"selge $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELGEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST), + 0>; +def : InstAlias<"selgt $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELGTi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST), + 0>; +def : InstAlias<"selle $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELLEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST), + 0>; +def : InstAlias<"sellt $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELLTi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST), + 0>; +def : InstAlias<"selne $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELNEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST), + 0>; +def : InstAlias<"sellbc $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELLBCi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST), + 0>; +def : InstAlias<"sellbs $RCOND,$RTRUE,$RFALSE,$RDEST", + (SELLBSi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST), + 0>; + +// 4.5.6 byte operate +def : InstAlias<"nop", (BISr R31, R31, R31), 0>; + +def : InstAlias<"ldi $RA, $imm", + (LDA GPRC:$RA, s64imm:$imm, R31), 0>; +def : InstAlias<"br $disp", + (BR R31, target:$disp), 0>; +def : InstAlias<"mov $imm,$RA", + (BISi GPRC:$RA, R31, u8imm:$imm), 0>; +def : InstAlias<"mov $RB,$RA", + (BISr GPRC:$RA, R31, GPRC:$RB), 0>; +def : InstAlias<"ret $31,($26), $imm", + (RET R31, R26, s16imm:$imm), 0>; +def : InstAlias<"ret", + (RET R31, R26, 1), 0>; +def : InstAlias<"ldgp $29,0(${RA})", + (MOVaddrPCGp 0, 0, GPRC:$RA), 0>; +def : InstAlias<"clr $RA", + (BISr R31, R31, GPRC:$RA), 0>; + +//===----------------------------------------------------------------------===// +// Sw64 vector Definitions. +//===----------------------------------------------------------------------===// + +include "Sw64VectorVarDefine.td" +include "Sw64InstrFormatsV.td" +include "Sw64InstrVector.td" diff --git a/llvm/lib/Target/Sw64/Sw64InstrVector.td b/llvm/lib/Target/Sw64/Sw64InstrVector.td new file mode 100644 index 000000000000..da17a14ffaae --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64InstrVector.td @@ -0,0 +1,1767 @@ +//===- Sw64InstrVector.td - SIMD instructions -*- tablegen ----------------*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes Sw64 SIMD instructions. +// +//===----------------------------------------------------------------------===// + +class IsCommutable { + bit isCommutable = 1; +} + +def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>]>; +def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>]>; + +def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>, + SDTCisSameAs<0, 1>]>; + +def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; +def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def SDT_INSVE : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<0, 3>, + SDTCisVT<4, i32>]>; +def SDT_VINSECTL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; + +def SDT_VecReduce : SDTypeProfile<1, 1, [ // vector reduction + SDTCisFP<0>, SDTCisVec<1> +]>; + +def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; + +def SDT_ZVecBinaryFp : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, f32>]>; + +def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, i64>]>; + +def SDT_ZVecCT : SDTypeProfile<1, 1, [ // vector number of head 0/1. + SDTCisInt<0>, SDTCisVec<1> +]>; +def SDT_ZVecFREC : SDTypeProfile<1, 1, [ // vector number of head 0/1. + SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1> +]>; + +def SDT_Vlog : SDTypeProfile<1, 4, [ // vlogzz + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>, SDTCisInt<4> +]>; + +def SDT_ZVecFCMP : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> +]>; +def SDT_ZVecFCVT : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; +def SDT_ZVecFCVTDL : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1> +]>; +def SDT_ZVecFCVTSH : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisVec<1> +]>; +def SDT_ZVecFCVTHS : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisVec<1> +]>; + +def SDT_ZVecFRI : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1> +]>; +def SDT_ZVecCPY : SDTypeProfile<1, 1, [ + SDTCisVec<1> +]>; + +def SDT_VSELECT : SDTypeProfile<1, 3, [ + SDTCisInt<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> +]>; +def SDT_VSQRT : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1> +]>; +def SDT_VSUMF : SDTypeProfile<1, 1, [ + SDTCisFP<0>, SDTCisVec<1>, SDTCisFP<1> +]>; +def SDT_Sw64VTruncStore : SDTypeProfile<0, 2, [SDTCisPtrTy<1>]>; + +def Sw64VBroadCastLd : SDNode<"Sw64ISD::VBROADCAST_LD", SDTLoad>; +def Sw64VBroadCast : SDNode<"Sw64ISD::VBROADCAST", SDTVBroadcast>; + +def Sw64VBroadCasti32: PatFrag<(ops node:$src), + (Sw64VBroadCastLd node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 4; +}]>; + +def Sw64VBroadCastf32: PatFrag<(ops node:$src), + (Sw64VBroadCastLd node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 4 + && cast(N)->getMemoryVT() == MVT::f32; +}]>; + +def Sw64VBroadCastf64: PatFrag<(ops node:$src), + (Sw64VBroadCastLd node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 8; +}]>; + +def Sw64VTruncStore : SDNode<"Sw64ISD::VTRUNCST", SDT_Sw64VTruncStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def z_ldwe : SDNode<"Sw64ISD::VLDWE", SDTLoad>; +def z_ldse : SDNode<"Sw64ISD::VLDSE", SDTLoad>; +def z_ldde : SDNode<"Sw64ISD::VLDDE", SDTLoad>; + +def z_vlog : SDNode<"Sw64ISD::VLOG", SDT_Vlog>; + +def z_ctpop : SDNode<"Sw64ISD::VCTPOP", SDT_ZVecCT>; +def z_ctlz : SDNode<"Sw64ISD::VCTLZ", SDT_ZVecCT>; + +def Sw64VNOR : SDNode<"Sw64ISD::VNOR", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def Sw64VEQV : SDNode<"Sw64ISD::VEQV", SDTIntBinOp>; +def Sw64VORNOT : SDNode<"Sw64ISD::VORNOT", SDTIntBinOp>; + + +def Sw64VSHF : SDNode<"Sw64ISD::VSHF", SDT_VSHF>; + +def Sw64SHF : SDNode<"Sw64ISD::SHF", SDT_SHF>; +def Sw64ILVEV : SDNode<"Sw64ISD::ILVEV", SDT_ILV>; +def Sw64ILVOD : SDNode<"Sw64ISD::ILVOD", SDT_ILV>; +def Sw64ILVL : SDNode<"Sw64ISD::ILVL", SDT_ILV>; +def Sw64ILVR : SDNode<"Sw64ISD::ILVR", SDT_ILV>; +def Sw64PCKEV : SDNode<"Sw64ISD::PCKEV", SDT_ILV>; +def Sw64PCKOD : SDNode<"Sw64ISD::PCKOD", SDT_ILV>; +def Sw64INSVE : SDNode<"Sw64ISD::INSVE", SDT_INSVE>; + +def Sw64VFCMPEQ : SDNode<"Sw64ISD::VFCMPEQ", SDT_ZVecFCMP>; +def Sw64VFCMPLE : SDNode<"Sw64ISD::VFCMPLE", SDT_ZVecFCMP>; +def Sw64VFCMPLT : SDNode<"Sw64ISD::VFCMPLT", SDT_ZVecFCMP>; +def Sw64VFCMPUN : SDNode<"Sw64ISD::VFCMPUN", SDT_ZVecFCMP>; + +def Sw64VFCVTSD : SDNode<"Sw64ISD::VFCVTSD", SDT_ZVecFCVT>; +def Sw64VFCVTDS : SDNode<"Sw64ISD::VFCVTDS", SDT_ZVecFCVT>; +def Sw64VFCVTLS : SDNode<"Sw64ISD::VFCVTLS", SDT_ZVecFCVT>; +def Sw64VFCVTLD : SDNode<"Sw64ISD::VFCVTLD", SDT_ZVecFCVT>; +def Sw64VFCVTSH : SDNode<"Sw64ISD::VFCVTSH", SDT_ZVecFCVTSH>; +def Sw64VFCVTHS : SDNode<"Sw64ISD::VFCVTHS", SDT_ZVecFCVTHS>; + +def Sw64VFCVTDL : SDNode<"Sw64ISD::VFCVTDL", SDT_ZVecFCVTDL>; +def Sw64VFCVTDLG : SDNode<"Sw64ISD::VFCVTDLG", SDT_ZVecFCVTDL>; +def Sw64VFCVTDLP : SDNode<"Sw64ISD::VFCVTDLP", SDT_ZVecFCVTDL>; +def Sw64VFCVTDLZ : SDNode<"Sw64ISD::VFCVTDLZ", SDT_ZVecFCVTDL>; +def Sw64VFCVTDLN : SDNode<"Sw64ISD::VFCVTDLN", SDT_ZVecFCVTDL>; + +def Sw64VFRIS : SDNode<"Sw64ISD::VFRIS", SDT_ZVecFRI>; +def Sw64VFRISG : SDNode<"Sw64ISD::VFRISG", SDT_ZVecFRI>; +def Sw64VFRISP : SDNode<"Sw64ISD::VFRISP", SDT_ZVecFRI>; +def Sw64VFRISZ : SDNode<"Sw64ISD::VFRISZ", SDT_ZVecFRI>; +def Sw64VFRISN : SDNode<"Sw64ISD::VFRISN", SDT_ZVecFRI>; +def Sw64VFRID : SDNode<"Sw64ISD::VFRID", SDT_ZVecFRI>; +def Sw64VFRIDG : SDNode<"Sw64ISD::VFRIDG", SDT_ZVecFRI>; +def Sw64VFRIDP : SDNode<"Sw64ISD::VFRIDP", SDT_ZVecFRI>; +def Sw64VFRIDZ : SDNode<"Sw64ISD::VFRIDZ", SDT_ZVecFRI>; +def Sw64VFRIDN : SDNode<"Sw64ISD::VFRIDN", SDT_ZVecFRI>; + +def vseleqw : SDNode<"Sw64ISD::VSELEQW", SDT_VSELECT>; +def vselltw : SDNode<"Sw64ISD::VSELLTW", SDT_VSELECT>; +def vsellew : SDNode<"Sw64ISD::VSELLEW", SDT_VSELECT>; +def vsellbcw : SDNode<"Sw64ISD::VSELLBCW", SDT_VSELECT>; + +def vfcmovlt : SDNode<"Sw64ISD::VFCMOVLT", SDTFPTernaryOp>; +def vfcmovle : SDNode<"Sw64ISD::VFCMOVLE", SDTFPTernaryOp>; +def vfcmoveq : SDNode<"Sw64ISD::VFCMOVEQ", SDTFPTernaryOp>; + +def vect_vucaddw : SDNode<"Sw64ISD::VECT_VUCADDW", SDTIntBinOp>; +def vect_vucaddh : SDNode<"Sw64ISD::VECT_VUCADDH", SDTIntBinOp>; +def vect_vucaddb : SDNode<"Sw64ISD::VECT_VUCADDB", SDTIntBinOp>; +def vect_vucsubw : SDNode<"Sw64ISD::VECT_VUCSUBW", SDTIntBinOp>; +def vect_vucsubh : SDNode<"Sw64ISD::VECT_VUCSUBH", SDTIntBinOp>; +def vect_vucsubb : SDNode<"Sw64ISD::VECT_VUCSUBB", SDTIntBinOp>; + +def z_vshl_by_scalar : SDNode<"Sw64ISD::VSHL_BY_SCALAR", + SDT_ZVecBinaryFp>; +def z_vsrl_by_scalar : SDNode<"Sw64ISD::VSRL_BY_SCALAR", + SDT_ZVecBinaryFp>; +def z_vsra_by_scalar : SDNode<"Sw64ISD::VSRA_BY_SCALAR", + SDT_ZVecBinaryFp>; + +def z_vcopyf : SDNode<"Sw64ISD::VCOPYF", + SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, []>; + +def z_v8sll : SDNode<"Sw64ISD::V8SLL", + SDT_ZVecBinaryInt>; + +def z_v8srl : SDNode<"Sw64ISD::V8SRL", + SDT_ZVecBinaryInt>; + +def z_v8sra : SDNode<"Sw64ISD::V8SRA", + SDT_ZVecBinaryInt>; + +def z_vrotr : SDNode<"Sw64ISD::VROTR", + SDT_ZVecBinaryInt>; + +def Sw64VINSECTL : SDNode<"Sw64ISD::VINSECTL", SDT_VINSECTL>; + +// ---- For immediate format. + +def SDT_ZV8X : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; + +def Sw64V8SLL : SDNode<"Sw64ISD::V8SLLi", SDT_ZV8X>; +def Sw64V8SRL : SDNode<"Sw64ISD::V8SRLi", SDT_ZV8X>; +def Sw64V8SRA : SDNode<"Sw64ISD::V8SRAi", SDT_ZV8X>; +def Sw64VROTR : SDNode<"Sw64ISD::VROTRi", SDT_ZV8X>; +def Sw64VROLB : SDNode<"Sw64ISD::VROLBi", SDT_ZV8X>; +def Sw64VROLH : SDNode<"Sw64ISD::VROLHi", SDT_ZV8X>; +def Sw64VROLL : SDNode<"Sw64ISD::VROLLi", SDT_ZV8X>; + +def z_v8slli : PatFrag<(ops node:$vec, node:$val), + (v8i32 (Sw64V8SLL node:$vec, node:$val))>; + +def z_v8srli : PatFrag<(ops node:$vec, node:$val), + (v8i32 (Sw64V8SRL node:$vec, node:$val))>; + +def z_v8srai : PatFrag<(ops node:$vec, node:$val), + (v8i32 (Sw64V8SRA node:$vec, node:$val))>; + +def z_vrotri : PatFrag<(ops node:$vec, node:$val), + (v8i32 (Sw64VROTR node:$vec, node:$val))>; + +def z_vrolbi : PatFrag<(ops node:$vec, node:$val), + (v32i8 (Sw64VROLB node:$vec, node:$val))>; +def z_vrolhi : PatFrag<(ops node:$vec, node:$val), + (v16i16 (Sw64VROLH node:$vec, node:$val))>; +def z_vrolli : PatFrag<(ops node:$vec, node:$val), + (v4i64 (Sw64VROLL node:$vec, node:$val))>; + +def z_vslls : PatFrag<(ops node:$vec, node:$val), + (v4f32 (Sw64V8SLL node:$vec, node:$val))>; + +def z_vslld : PatFrag<(ops node:$vec, node:$val), + (v4f64 (Sw64V8SLL node:$vec, node:$val))>; + +def z_vsrls : PatFrag<(ops node:$vec, node:$val), + (v4f32 (Sw64V8SRL node:$vec, node:$val))>; + +def z_vsrld : PatFrag<(ops node:$vec, node:$val), + (v4f64 (Sw64V8SRL node:$vec, node:$val))>; + +// ---- + +def Sw64VExtractSExt : SDNode<"Sw64ISD::VEXTRACT_SEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +def Sw64VExtractZExt : SDNode<"Sw64ISD::VEXTRACT_ZEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; + +// Pattern fragments +def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractSExt node:$vec, node:$idx, i8)>; +def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractSExt node:$vec, node:$idx, i16)>; +def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractSExt node:$vec, node:$idx, i32)>; +def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractSExt node:$vec, node:$idx, i64)>; + +def vextract_sext_f32 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractSExt node:$vec, node:$idx, f32)>; + +def vextract_sext_f64 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractSExt node:$vec, node:$idx, f64)>; + +def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractZExt node:$vec, node:$idx, i8)>; +def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractZExt node:$vec, node:$idx, i16)>; +def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractZExt node:$vec, node:$idx, i32)>; +def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx), + (Sw64VExtractZExt node:$vec, node:$idx, i64)>; + +def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; +def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; + +class vsetcc_type : + PatFrag<(ops node:$lhs, node:$rhs), + (vsetcc (ResTy node:$lhs), (ResTy node:$rhs), CC)>; + +def SDT_VSetGE : SDTypeProfile<1, 2, [SDTCisInt<0>, + SDTCisVec<1>, + SDTCisSameAs<1, 2>]>; +def vsetge_v8i32 : SDNode<"Sw64ISD::VSETGE", SDT_VSetGE>; + +def z_vsetge : PatFrag<(ops node:$vec, node:$val), + (vsetge_v8i32 node:$vec, node:$val)>; + +class Vector_2Op_Pat : + PatFrag<(ops node:$vec, node:$val), + (OpNode (Ty node:$vec), (Ty node:$val))>; + +class Vector_1Op_Pat : + PatFrag<(ops node:$src), + (OpNode (Ty node:$src))>; + +multiclass MultiVec2OpPat { + def v8i32 : Vector_2Op_Pat; + def v4i64 : Vector_2Op_Pat; + def v4f32 : Vector_2Op_Pat; + def v4f64 : Vector_2Op_Pat; + def v16i16 : Vector_2Op_Pat; + def v32i8 : Vector_2Op_Pat; +} + +defm add : MultiVec2OpPat; +defm sub : MultiVec2OpPat; +defm and : MultiVec2OpPat; +defm xor : MultiVec2OpPat; +defm or : MultiVec2OpPat; + +def vbic : BinOpFrag<(and node:$LHS, (vnot node:$RHS))>; +def vornot : BinOpFrag<(or node:$LHS, (vnot node:$RHS))>; +def veqv : BinOpFrag<(vnot (xor node:$LHS, node:$RHS))>; + +def vseteq_v8i32 : vsetcc_type; +def vsetle_v8i32 : vsetcc_type; +def vsetlt_v8i32 : vsetcc_type; +def vsetule_v8i32 : vsetcc_type; +def vsetult_v8i32 : vsetcc_type; +def vsetueq_v32i8 : vsetcc_type; +def vsetugt_v32i8 : vsetcc_type; + +def SDT_VMAX : SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisVec<0>, + SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>; +def SDT_VFMAX : SDTypeProfile<1, 2, [SDTCisFP<0>,SDTCisVec<0>, + SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>; +def vmax : SDNode<"Sw64ISD::VMAX", SDT_VMAX>; +def vmin : SDNode<"Sw64ISD::VMIN", SDT_VMAX>; +def vumax : SDNode<"Sw64ISD::VUMAX", SDT_VMAX>; +def vumin : SDNode<"Sw64ISD::VUMIN", SDT_VMAX>; + +def vmaxf : SDNode<"Sw64ISD::VMAXF", SDT_VFMAX>; +def vminf : SDNode<"Sw64ISD::VMINF", SDT_VFMAX>; + +class vfsetcc_type : + PatFrag<(ops node:$lhs, node:$rhs), + (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>; + +// ISD::SETFALSE cannot occur +def vfseteq_v4f32 : vfsetcc_type; +def vfseteq_v4f64 : vfsetcc_type; +def vfsetge_v4f32 : vfsetcc_type; +def vfsetge_v4f64 : vfsetcc_type; +def vfsetgt_v4f32 : vfsetcc_type; +def vfsetgt_v4f64 : vfsetcc_type; +def vfsetle_v4f32 : vfsetcc_type; +def vfsetle_v4f64 : vfsetcc_type; +def vfsetlt_v4f32 : vfsetcc_type; +def vfsetlt_v4f64 : vfsetcc_type; +def vfsetne_v4f32 : vfsetcc_type; +def vfsetne_v4f64 : vfsetcc_type; + +def vfsetoeq_v4f32 : vfsetcc_type; +def vfsetoeq_v4f64 : vfsetcc_type; +def vfsetoge_v4f32 : vfsetcc_type; +def vfsetoge_v4f64 : vfsetcc_type; +def vfsetogt_v4f32 : vfsetcc_type; +def vfsetogt_v4f64 : vfsetcc_type; +def vfsetole_v4f32 : vfsetcc_type; +def vfsetole_v4f64 : vfsetcc_type; +def vfsetolt_v4f32 : vfsetcc_type; +def vfsetolt_v4f64 : vfsetcc_type; +def vfsetone_v4f32 : vfsetcc_type; +def vfsetone_v4f64 : vfsetcc_type; +def vfsetord_v4f32 : vfsetcc_type; +def vfsetord_v4f64 : vfsetcc_type; +def vfsetun_v4f32 : vfsetcc_type; +def vfsetun_v4f64 : vfsetcc_type; +def vfsetueq_v4f32 : vfsetcc_type; +def vfsetueq_v4f64 : vfsetcc_type; +def vfsetuge_v4f32 : vfsetcc_type; +def vfsetuge_v4f64 : vfsetcc_type; +def vfsetugt_v4f32 : vfsetcc_type; +def vfsetugt_v4f64 : vfsetcc_type; +def vfsetule_v4f32 : vfsetcc_type; +def vfsetule_v4f64 : vfsetcc_type; +def vfsetult_v4f32 : vfsetcc_type; +def vfsetult_v4f64 : vfsetcc_type; +def vfsetune_v4f32 : vfsetcc_type; +def vfsetune_v4f64 : vfsetcc_type; +// ISD::SETTRUE cannot occur +// ISD::SETFALSE2 cannot occur +// ISD::SETTRUE2 cannot occur + +class SplatComplexPattern roots = [], + list props = []> : + ComplexPattern { + Operand OpClass = opclass; +} + +multiclass MultiVec1OpPat { + def v8i32 : Vector_1Op_Pat; + def v4i64 : Vector_1Op_Pat; + def v4f32 : Vector_1Op_Pat; + def v4f64 : Vector_1Op_Pat; + def v16i16 : Vector_1Op_Pat; + def v32i8 : Vector_1Op_Pat; +} + +defm vsplat : MultiVec1OpPat; + +def vsplati64_simm8 : SplatComplexPattern; + +def vsplati64_uimm8 : SplatComplexPattern; + +def vsplati32_simm8 : SplatComplexPattern; + +def vsplati32_uimm8 : SplatComplexPattern; + +def vsplati16_uimm8 : SplatComplexPattern; + +def vsplati8_uimm8 : SplatComplexPattern; + +def AddSubImm8Pat : ComplexPattern", []>; +def ComplexImmPat : ComplexPattern; + +def addrimm10 : ComplexPattern; + +def addrimm10lsl1 : ComplexPattern; + +def addrimm16 : ComplexPattern; +def addrimm12 : ComplexPattern; + +def immZExt1Ptr : ImmLeaf(Imm);}]>; +def immZExt2Ptr : ImmLeaf(Imm);}]>; +def immZExt3Ptr : ImmLeaf(Imm);}]>; +def immZExt4Ptr : ImmLeaf(Imm);}]>; +def immZExt5Ptr : ImmLeaf(Imm);}]>; +def immZExt8Ptr : ImmLeaf(Imm);}]>; + +def vinsert_v8i32 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v8i32 (vector_insert node:$vec, node:$val, node:$idx))>; + +def vinsert_v4f32 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v4f32 (vector_insert node:$vec, node:$val, node:$idx))>; + +def vinsert_v4f64 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v4f64 (vector_insert node:$vec, node:$val, node:$idx))>; + +def vinsert_v32i8 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v32i8 (vector_insert node:$vec, node:$val, node:$idx))>; + +def vinsert_v16i16 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v16i16 (vector_insert node:$vec, node:$val, node:$idx))>; +// Instruction desc. +// 存储器指令格式 +class VectorStoreBASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROWD:$RA, MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$RA, $addr"); + list Pattern = [(OpNode (vt ROWD:$RA), Addr:$addr)]; +} + +class VectorLoadBASE { + dag OutOperandList = (outs ROWD:$RA); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$RA, $addr"); + list Pattern = [(set ROWD:$RA, (vt (OpNode Addr:$addr)))]; +} + +let mayStore = 1 in +class VectorStore opcode, string instr_asm, RegisterOperand ROWD, + ValueType vt, SDPatternOperator OpNode=null_frag> + : MFormV, + VectorStoreBASE; + +let mayLoad = 1 in +class VectorLoad opcode, string instr_asm, RegisterOperand ROWD, + ValueType vt, SDPatternOperator OpNode=null_frag> + : MFormV, + VectorLoadBASE; + +let DecoderMethod = "DecodeFIXMEInstruction" in{ +def VSTS : VectorStore<0x0E, "vsts", V256LOpnd, v4f32, store>; +def VSTD : VectorStore<0x0F, "vstd", V256LOpnd, v4f64, store>; +def VLDS : VectorLoad <0x0C, "vlds", V256LOpnd, v4f32, load>; +def VLDD : VectorLoad <0x0D, "vldd", V256LOpnd, v4f64, load>; +def VLDWE : VectorLoad <0x09, "ldwe", V256LOpnd, v8i32, Sw64VBroadCasti32>; +def VLDSE : VectorLoad <0x0A, "ldse", V256LOpnd, v4f32, Sw64VBroadCastf32>; +def VLDDE : VectorLoad <0x0B, "ldde", V256LOpnd, v4f64, Sw64VBroadCastf64>; +} +multiclass V256Pat { + def v32i8 : PatFrag<(ops node:$src), (v32i8 (OpNode node:$src))>; + def v16i16 : PatFrag<(ops node:$src), (v16i16 (OpNode node:$src))>; + def v8i32 : PatFrag<(ops node:$src), (v8i32 (OpNode node:$src))>; + def v4i64 : PatFrag<(ops node:$src), (v4i64 (OpNode node:$src))>; + def v4f64 : PatFrag<(ops node:$src), (v4f64 (OpNode node:$src))>; +} + +//////////////////////////////////////////// +// Extern Vector Memory Operation +// ///////////////////////////////////////// +// 带功能域的存储器指令格式 +let mayStore = 1 in +class VectorStoreExt func, string instr_asm, ValueType vt, + SDPatternOperator OpNode=null_frag> + : MFuncFormV<0x1C, func>, + VectorStoreBASE; + +let mayLoad = 1 in +class VectorLoadExt func, string instr_asm, ValueType vt, + SDPatternOperator OpNode=null_frag> + : MFuncFormV<0x1C, func>, + VectorLoadBASE; + +let DecoderMethod = "DecodeFIXMEInstruction" in{ +def VLDWU : VectorLoadExt <0x00, "vldw_u" , v8i32>; +def VLDSU : VectorLoadExt <0x02, "vlds_u" , v4f32>; +def VLDDU : VectorLoadExt <0x04, "vldd_u" , v4f64>; +def VLDDNC : VectorLoadExt <0x0e, "vldd_nc", v4f64>; +def VSTWU : VectorStoreExt<0x01, "vstw_u" , v8i32>; +def VSTSU : VectorStoreExt<0x03, "vsts_u" , v4f32>; +def VSTDU : VectorStoreExt<0x05, "vstd_u" , v4f64>; +def VSTWUL : VectorStoreExt<0x08, "vstw_ul", v8i32>; +def VSTSUL : VectorStoreExt<0x0a, "vsts_ul", v4f32>; +def VSTDUL : VectorStoreExt<0x0c, "vstd_ul", v4f64>; +def VSTWUH : VectorStoreExt<0x09, "vstw_uh", v8i32>; +def VSTSUH : VectorStoreExt<0x0b, "vsts_uh", v4f32>; +def VSTDUH : VectorStoreExt<0x0d, "vstd_uh", v4f64>; +def VSTDNC : VectorStoreExt<0x0f, "vstd_nc", v4f64>; +} +class vload_pat + : Pat<(Vt (OpNode addrimm16:$src)), (Inst addrimm16:$src)>; + +class vstore_pat + : Pat<(OpNode (Vt V256L:$DST), addrimm16:$src), (Inst $DST, addrimm16:$src)>; + + +// commom pattern for load/store intrinsic +multiclass vector_mem_multipat { +def : vload_pat; +def : vstore_pat; +def : vload_pat; +def : vstore_pat; +} + +multiclass vector_mem_intrpat { +def : vload_pat; +def : vstore_pat; +} + +// extension pattern for load_u/loade/store_u/storeuh/.. +multiclass vector_mem_extension { +def : vload_pat (LoadI#U)>; +def : vload_pat (LoadI#E)>; +def : vstore_pat(StoreI#U)>; +def : vstore_pat(StoreI#UH)>; +def : vstore_pat(StoreI#UL)>; +} + +defm : vector_mem_multipat; +defm : vector_mem_multipat; +defm : vector_mem_multipat; +defm : vector_mem_multipat; + +defm : vector_mem_intrpat; +defm : vector_mem_intrpat; + +defm : vector_mem_extension; +defm : vector_mem_extension; +defm : vector_mem_extension; +defm : vector_mem_extension; + +multiclass vector_mem_nc { +def : vload_pat ; +def : vstore_pat; +} + +defm : vector_mem_nc; +defm : vector_mem_nc; +defm : vector_mem_nc; +defm : vector_mem_nc; +defm : vector_mem_nc; + +def : Pat<(v8i32 (Sw64VBroadCast (i64 (extloadi32 addrimm16:$src)))), + (VLDWE addrimm16:$src)>; +def : Pat<(v4f32 (Sw64VBroadCast (f32 (load addrimm16:$src)))), + (VLDSE addrimm16:$src)>; +def : Pat<(v4i64 (Sw64VBroadCast (i64 (load addrimm16:$src)))), + (VLDDE addrimm16:$src)>; +def : Pat<(v4f64 (Sw64VBroadCast (f64 (load addrimm16:$src)))), + (VLDDE addrimm16:$src)>; + +def : vstore_pat; + +class SIMD_3RR_SAME { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + list Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), (Vt ROB:$RB)))]; +} + +class SIMD_3RI_SAME { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, immtype:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC"); + list Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), immtype:$Imm))]; +} + +class SIMD_4RR_SAME { + dag OutOperandList = (outs ROC:$RD); + dag InOperandList = (ins ROC:$RA, ROC:$RB, ROC:$RC); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD"); + list Pattern = [(set (Vt ROC:$RD), + (OpNode (Vt ROC:$RA), (Vt ROC:$RB), (Vt ROC:$RC)))]; +} + +class Vector_2OP_Reg_Pat + : Pat<(OpNode (Vt OR:$RA), (Yt OR:$RB)), + (Inst (Vt OR:$RA), (Yt OR:$RB))>; + +class Vector_1OP_Imm_Pat + : Pat<(Vt (OpNode (Vt OR:$RA), (i64 cpx:$Imm))), + (Inst (Vt OR:$RA), $Imm)>; + +class Vector_2OP_Imm_VB_Pat + : Pat<(Vt (OpNode (Vt OR:$RA), (it (immop (i64 cpx:$Imm))))), + (Inst (Vt OR:$RA), $Imm)>; + +class Vector_2OP_Reg_Scalar + : Pat<(Vt (OpNode (Vt ROA:$RA), (i64 GPRCOpnd:$RB))), + (Inst (Vt ROA:$RA), (i32 (COPY_TO_REGCLASS GPRCOpnd:$RB, ROB)))>; + +class Vector_2OP_Reg_S32 + : Pat<(Vt (OpNode (Vt ROA:$RA), (Vt (Sw64VBroadCast (i64 GPRCOpnd:$RB))))), + (Inst (Vt ROA:$RA), (i32 (COPY_TO_REGCLASS GPRCOpnd:$RB, FPRC_lo)))>; + +class Vector_3OP_SameReg_Pat + : Pat<(OpNode (Vt OR:$RA), (Vt OR:$RB), (Vt OR:$RC)), + (Inst OR:$RA, OR:$RB, OR:$RC)>; + +multiclass SIMD_ARITH Opcode, bits<8>func, + string instr_asm, SDPatternOperator OpNode, ValueType Vt, + Operand immtype, RegisterOperand RO, + SDPatternOperator IOp = null_frag, + ComplexPattern cpx = AddSubImm8Pat> { + def rr : FPFormV, SIMD_3RR_SAME; + + def ri : FPFormIV, + SIMD_3RI_SAME; + + def : Vector_2OP_Reg_Pat(NAME # rr)>; + + def : Vector_2OP_Imm_VB_Pat(NAME # ri)>; + + def : Vector_2OP_Imm_VB_Pat(NAME # ri)>; +} + +defm VUCADDv16i16 : SIMD_ARITH<0x1A, 0x42, "vucaddh", add, v16i16, + s8imm, V256LOpnd, int_sw64_vucaddh_v16hi>; +defm VUCSUBv16i16 : SIMD_ARITH<0x1A, 0x43, "vucsubh", sub, v16i16, + s8imm, V256LOpnd, int_sw64_vucsubh_v16hi>; +defm VUCADDv32i8 : SIMD_ARITH<0x1A, 0x44, "vucaddb", add, v32i8, + s8imm, V256LOpnd, int_sw64_vucaddb_v32qi>; +defm VUCSUBv32i8 : SIMD_ARITH<0x1A, 0x45, "vucsubb", sub, v32i8, + s8imm, V256LOpnd, int_sw64_vucsubb_v32qi>; +defm VADDv8i32 : SIMD_ARITH<0x1A, 0x00, "vaddw", add, v8i32, + s8imm, V256LOpnd>; +defm VSUBv8i32 : SIMD_ARITH<0x1A, 0x01, "vsubw", sub, v8i32, + s8imm, V256LOpnd>; +defm VUCADDv8i32 : SIMD_ARITH<0x1A, 0x40, "vucaddw", add, v8i32, + s8imm, V256LOpnd, int_sw64_vucaddw>; +defm VUCSUBv8i32 : SIMD_ARITH<0x1A, 0x41, "vucsubw", sub, v8i32, + s8imm, V256LOpnd, int_sw64_vucsubw>; +defm VADDv4i64 : SIMD_ARITH<0x1A, 0x0E, "vaddl", add, v4i64, + s8imm, V256LOpnd>; +defm VSUBv4i64 : SIMD_ARITH<0x1A, 0x0F, "vsubl", sub, v4i64, + s8imm, V256LOpnd>; + +def : Vector_2OP_Reg_Pat; +def : Vector_2OP_Reg_Pat; +def : Vector_2OP_Reg_Pat; +def : Vector_2OP_Reg_Pat; + +def : Vector_1OP_Imm_Pat; +def : Vector_1OP_Imm_Pat; +def : Vector_1OP_Imm_Pat; +def : Vector_1OP_Imm_Pat; + +class SIMD_3RR_VCMPGEW { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); +} + +class SIMD_3RI_VCMPGEW { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, immtype:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC"); +} + +def VCMPGEWrr : FPFormV<0x1A, 0x02>, SIMD_3RR_VCMPGEW<"vcmpgew", null_frag, v8i32>; +def VCMPGEWri : FPFormIV<0x1A, 0x02>, SIMD_3RI_VCMPGEW<"vcmpgew", null_frag, v8i32, u8imm>; + +def : Pat<(int_sw64_vcmpgew (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB)), + (i64 (FTOIStmp (VCMPGEWrr (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB))))>; +def : Pat<(int_sw64_vcmpgew (v8i32 V256LOpnd:$RA), (v8i32 (Sw64VBroadCast (i64 AddSubImm8Pat:$Imm)))), + (i64 (FTOIStmp (VCMPGEWri (v8i32 V256LOpnd:$RA), $Imm)))>; + +defm VCMPEQW : SIMD_ARITH<0x1A, 0x03, "vcmpeqw", seteq, v8i32, + u8imm, V256LOpnd, int_sw64_vcmpeqw, AddSubImm8Pat>; +defm VCMPLEW : SIMD_ARITH<0x1A, 0x04, "vcmplew", setle, v8i32, + u8imm, V256LOpnd, int_sw64_vcmplew, AddSubImm8Pat>; +defm VCMPLTW : SIMD_ARITH<0x1A, 0x05, "vcmpltw", setlt, v8i32, + u8imm, V256LOpnd, int_sw64_vcmpltw, AddSubImm8Pat>; +defm VCMPULEW : SIMD_ARITH<0x1A, 0x06, "vcmpulew", setule, v8i32, + u8imm, V256LOpnd, int_sw64_vcmpulew, AddSubImm8Pat>; +defm VCMPULTW : SIMD_ARITH<0x1A, 0x07, "vcmpultw", setult, v8i32, + u8imm, V256LOpnd, int_sw64_vcmpultw, AddSubImm8Pat>; + +defm VCMPUEQB : SIMD_ARITH<0x1A, 0x4B, "vcmpueqb", null_frag, v32i8, + u8imm, V256LOpnd, int_sw64_vcmpueqb, AddSubImm8Pat>; +defm VCMPUGTB : SIMD_ARITH<0x1A, 0x4C, "vcmpugtb", null_frag, v32i8, + u8imm, V256LOpnd, int_sw64_vcmpugtb, AddSubImm8Pat>; + +class SIMD_2RR_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA); + string AsmString = !strconcat(instr_asm, "\t$RA, $RC"); +} + +def CTPOPOW : FPFormV_CT<0x1A, 0x18>, + SIMD_2RR_BASE<"ctpopow", FPRCOpnd, V256LOpnd>; +def CTLZOW : FPFormV_CT<0x1A, 0x19>, + SIMD_2RR_BASE<"ctlzow", FPRCOpnd, V256LOpnd>; + +def VSUMv8i32 : FPFormV_CT<0x1A, 0x47>, + SIMD_2RR_BASE<"vsumw", FPRCOpnd, V256LOpnd>; +def VSUMv4i64 : FPFormV_CT<0x1A, 0x48>, + SIMD_2RR_BASE<"vsuml", FPRCOpnd, V256LOpnd>; + +def : Pat<(int_sw64_vsumw (v8i32 V256LOpnd:$RA)), + (i64 (FTOIStmp (i64 (VSUMv8i32 (v8i32 V256LOpnd:$RA)))))>; + +def : Pat<(int_sw64_vsuml (v4i64 V256LOpnd:$RA)), + (i64 (FTOITtmp (i64 (VSUMv4i64 (v4i64 V256LOpnd:$RA)))))>; + +def : Pat<(int_sw64_ctpopow (v4i64 V256LOpnd:$RA)), + (i64 (FTOIStmp (i64 (CTPOPOW (v4i64 V256LOpnd:$RA)))))>; + +def : Pat<(int_sw64_ctlzow (v4i64 V256LOpnd:$RA)), + (i64 (FTOIStmp (i64 (CTLZOW (v4i64 V256LOpnd:$RA)))))>; + +class SIMD_3RR_SCALER { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + list Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), ROB:$RB))]; +} + +multiclass SIMD_SHIFT Opcode, bits<8>func, + string instr_asm, SDPatternOperator OpNode, ValueType Vt, + Operand immtype, RegisterOperand ROA, RegisterOperand ROB, + SDPatternOperator IOp = null_frag> { + def rr : FPFormV, + SIMD_3RR_SCALER; + + def ri : FPFormIV, + SIMD_3RI_SAME; + + def : Vector_2OP_Reg_Scalar(NAME # rr)>; + + def : Vector_1OP_Imm_Pat(NAME # ri)>; +} + +multiclass SIMD_Shift_Multi funcW, bits<8> funcB,bits<8> funcH, + bits<8> funcL, string instr_asm, RegisterOperand RO, + SDPatternOperator OpNode> { +defm v8i32 : SIMD_SHIFT<0x1A, funcW, instr_asm#w, OpNode, v8i32, + s8imm, V256LOpnd, RO>; +defm v16i16 : SIMD_SHIFT<0x1A, funcH, instr_asm#h, OpNode, v16i16, + s8imm, V256LOpnd, RO>; +defm v32i8 : SIMD_SHIFT<0x1A, funcB, instr_asm#b, OpNode, v32i8, + s8imm, V256LOpnd, RO>; +defm v4i64 : SIMD_SHIFT<0x1A, funcL, instr_asm#l, OpNode, v4i64, + s8imm, V256LOpnd, RO>; + +def : Vector_2OP_Imm_VB_Pat(NAME # v8i32 #ri)>; + +def : Vector_2OP_Imm_VB_Pat(NAME # v16i16 #ri)>; + +def : Vector_2OP_Imm_VB_Pat(NAME # v32i8 #ri)>; + +def : Vector_2OP_Imm_VB_Pat(NAME # v4i64 #ri)>; +} + +defm VSLL : SIMD_Shift_Multi<0x08, 0x10, 0x14, 0x1A, "vsll", + FPRCloOpnd, int_sw64_vsll>; +defm VSRL : SIMD_Shift_Multi<0x09, 0x11, 0x15, 0x1B, "vsrl", + FPRCloOpnd, int_sw64_vsrl>; +defm VSRA : SIMD_Shift_Multi<0x0A, 0x12, 0x16, 0x1C, "vsra", + FPRCloOpnd, int_sw64_vsra>; +defm VROL : SIMD_Shift_Multi<0x0B, 0x13, 0x17, 0x1D, "vrol", + FPRCloOpnd, int_sw64_vrol>; + +multiclass Vector_Shift_VB { +def : Vector_1OP_Imm_Pat(InstName # ri)>; +def : Vector_2OP_Reg_S32(InstName # rr)>; +} + +multiclass Vector_Shift { +defm : Vector_Shift_VB; +defm : Vector_Shift_VB; +defm : Vector_Shift_VB; +defm : Vector_Shift_VB; +} + +defm : Vector_Shift; +defm : Vector_Shift; +defm : Vector_Shift; +defm : Vector_Shift; + +defm VSLLOW : SIMD_SHIFT<0x1A, 0x0C, "sllow", int_sw64_sllow, v4i64, + s8imm, V256LOpnd, FPRCloOpnd>; +defm VSRLOW : SIMD_SHIFT<0x1A, 0x0D, "srlow", int_sw64_srlow, v4i64, + s8imm, V256LOpnd, FPRCloOpnd>; +defm VSRAOW : SIMD_SHIFT<0x1A, 0x46, "sraow", int_sw64_sraow, v4i64, + s8imm, V256LOpnd, FPRCloOpnd>; + +def : Pat<(int_sw64_vslls (v4f32 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)), + (VSLLOWri V256LOpnd:$RA, $Imm)>; +def : Pat<(int_sw64_vslld (v4f64 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)), + (VSLLOWri V256LOpnd:$RA, $Imm)>; + +def : Pat<(int_sw64_vsrls (v4f32 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)), + (VSRLOWri V256LOpnd:$RA, $Imm)>; +def : Pat<(int_sw64_vsrld (v4f64 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)), + (VSRLOWri V256LOpnd:$RA, $Imm)>; + +multiclass SIMD_LOGIC OpFunc,string instr_asm, RegisterOperand RO, + SDPatternOperator OpNode> { +def "" : FForm4LVLog<0x5, OpFunc>, + SIMD_3RR_SAME; + +def : Vector_2OP_Reg_Pat(NAME)>; +def : Vector_2OP_Reg_Pat(NAME)>; +def : Vector_2OP_Reg_Pat(NAME)>; +} + +defm VOR : SIMD_LOGIC<0x54, "vbisw", V256LOpnd, or>; +defm VAND : SIMD_LOGIC<0x40, "vandw", V256LOpnd, and>; +defm VXOR : SIMD_LOGIC<0x1c, "vxorw", V256LOpnd, xor>; + +defm VORNOT : SIMD_LOGIC<0x51, "vornotw", V256LOpnd, vornot>; +defm VBIC : SIMD_LOGIC<0x10, "vbicw", V256LOpnd, vbic>; +defm VEQV : SIMD_LOGIC<0x41, "veqvw", V256LOpnd, veqv>; + +def : Pat<(v8i32 immAllZerosV), (VOR (v8i32 V31) , (v8i32 V31))>; +def : Pat<(v32i8 immAllZerosV), (VOR (v32i8 V31) , (v32i8 V31))>; +def : Pat<(v16i16 immAllZerosV), (VOR (v16i16 V31), (v16i16 V31))>; +def : Pat<(v4i64 immAllZerosV), (VOR (v4i64 V31) , (v4i64 V31))>; + +def : Pat<(v8i32 immAllOnesV), (VEQV (v8i32 V31) , (v8i32 V31))>; +def : Pat<(v32i8 immAllOnesV), (VEQV (v32i8 V31) , (v32i8 V31))>; +def : Pat<(v16i16 immAllOnesV), (VEQV (v16i16 V31), (v16i16 V31))>; +def : Pat<(v4i64 immAllOnesV), (VEQV (v4i64 V31) , (v4i64 V31))>; + +class SIMD_INSERT_BASE { + dag OutOperandList = (outs V256LOpnd:$RD); + dag InOperandList = (ins FPO:$RA, V256LOpnd:$RB, ImmOp:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD"); + + list Pattern = [(set V256LOpnd:$RD, + (vector_insert (vectype V256LOpnd:$RB), + (eltVt FPO:$RA), ImmOp:$Imm))]; + +} + +multiclass SIMD_INSERT_Multi funcB, bits<6> funcH,bits<6> funcW, + bits<6> funcL, string instr_asm> { +def E8 : FForm4LV<0x1B, funcB>, + SIMD_INSERT_BASE; + +def E16 : FForm4LV<0x1B, funcH>, + SIMD_INSERT_BASE; + +def E32 : FForm4LV<0x1B, funcW>, + SIMD_INSERT_BASE; + +def E64 : FForm4LV<0x1B, funcL>, + SIMD_INSERT_BASE; +} + +defm VINS : SIMD_INSERT_Multi<0x2A, 0x2B, 0x20, 0x21, "vins">; + +def : Pat<(vector_insert (v4f32 V256LOpnd:$RB), (f32 FPRCloOpnd:$RA), VectorIndexD:$idx), + (VINSE64 (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC)), (v4f32 V256LOpnd:$RB), VectorIndexD:$idx)>; + +def : Pat<(vector_insert (v4i64 V256LOpnd:$RB), (i64 FPRCOpnd:$RA), VectorIndexD:$idx), + (VINSE64 (i64 FPRCOpnd:$RA), (v4i64 V256LOpnd:$RB), VectorIndexD:$idx)>; + +class vins_pat + : Pat<(OpNode GPRCOpnd:$RA, (vectype V256LOpnd:$RB), ImmOp:$idx), + (vectype (Inst (eltvt (COPY_TO_REGCLASS GPRCOpnd:$RA, RC)), (vectype V256LOpnd:$RB), ImmOp:$idx))>; + +class vinselt + : Pat<(OpNode (vectype V256LOpnd:$RA), GPRCOpnd:$RB, ImmOp:$idx), + (vectype (Inst (eltvt (COPY_TO_REGCLASS GPRCOpnd:$RB, RC)), (vectype V256LOpnd:$RA), ImmOp:$idx))>; + +def : vins_pat; +def : vins_pat; +def : vins_pat; +def : vins_pat; + +def : vinselt; +def : vinselt; +def : vinselt; +def : vinselt; + +def : Pat<(int_sw64_vinsfs (f32 FPRCloOpnd:$RA), + (v4f32 V256LOpnd:$RB), VectorIndexD:$idx), + (v4f32 (VINSE64 (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC)), (v4f32 V256LOpnd:$RB), VectorIndexD:$idx))>; +def : Pat<(int_sw64_vinsfd (f64 FPRCOpnd:$RA), + (v4f64 V256LOpnd:$RB), VectorIndexD:$idx), + (v4f64 (VINSE64 (f64 FPRCOpnd:$RA), (v4f64 V256LOpnd:$RB), VectorIndexD:$idx))>; + +multiclass SIMD_COPY_Multi funcB, bits<6> funcH,bits<6> funcW, + bits<6> funcL, string instr_asm> { +def E8 : FForm2V<0x1B, funcB>, + SIMD_2RR_BASE; + +def E16 : FForm2V<0x1B, funcH>, + SIMD_2RR_BASE; + +def E32 : FForm2V<0x1B, funcW>, + SIMD_2RR_BASE; + +def E64 : FForm2V<0x1B, funcL>, + SIMD_2RR_BASE; + +def : Pat <(v32i8 (Sw64VBroadCast GPRCOpnd:$RA)), + (v32i8 (!cast(NAME # E8) + (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>; + +def : Pat <(v16i16 (Sw64VBroadCast GPRCOpnd:$RA)), + (v16i16 (!cast(NAME # E16) + (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>; + +def : Pat <(v8i32 (Sw64VBroadCast GPRCOpnd:$RA)), + (v8i32 (!cast(NAME # E32) + (i32 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC_lo))))>; + +def : Pat <(v4i64 (Sw64VBroadCast GPRCOpnd:$RA)), + (v4i64 (!cast(NAME # E64) + (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>; + +def : Pat <(v4f64 (Sw64VBroadCast (f64 FPRCOpnd:$RA))), + (v4f64 (!cast(NAME # E64) (f64 FPRCOpnd:$RA)))>; + +def : Pat <(v4f32 (Sw64VBroadCast (f32 FPRCloOpnd:$RA))), + (v4f32 (!cast(NAME # E64) (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC))))>; +} + +defm VCPY : SIMD_COPY_Multi<0x32, 0x33, 0x24, 0x25, "vcpy">; + +multiclass SIMD_VINSECT_Multi { +def H : FForm4VINSECTL<0x1B, 0x2C>, + SIMD_3RR_SAME; + +def W : FForm4VINSECTL<0x1B, 0x2D>, + SIMD_3RR_SAME; + +def L : FForm4VINSECTL<0x1B, 0x2E>, + SIMD_3RR_SAME; + +def B : FForm4VINSECTL<0x1B, 0x2F>, + SIMD_3RR_SAME; +} + +defm VINSECTL : SIMD_VINSECT_Multi<"vinsectl">; + +def VSHFQB : FForm4VINSECTL<0x1B, 0x31>, + SIMD_3RR_SAME<"vshfqb", int_sw64_vshfqb, v32i8, V256LOpnd>; + +class SIMD_4RI_BASE { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ROB:$RB, ImmOp:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD"); + + list Pattern = [(set ROD:$RD, + (OpNode (Vt ROA:$RA), (Vt ROB:$RB), Imm:$Imm))]; +} + +class SIMD_4RR_BASE { + + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD"); + +} + +def VSHFQ : FForm4LV<0x1B, 0x30>, + SIMD_4RI_BASE<"vshfq", int_sw64_vshfq, v8i32, u5imm, immZExt4Ptr, V256LOpnd>; + +def VCONW : FForm4LV2<0x1B, 0x26>, + SIMD_4RR_BASE<"vconw", FPRCOpnd, V256LOpnd>; +def VCONS : FForm4LV2<0x1B, 0x28>, + SIMD_4RR_BASE<"vcons", FPRCOpnd, V256LOpnd>; +def VCOND : FForm4LV2<0x1B, 0x29>, + SIMD_4RR_BASE<"vcond", FPRCOpnd, V256LOpnd>; +def VSHFW : FForm4LV2<0x1B, 0x27>, + SIMD_4RR_BASE<"vshfw", FPRCOpnd, V256LOpnd>; + +def : Pat<(int_sw64_vshfq (v8i32 V256LOpnd:$RA), + (v8i32 V256LOpnd:$RB), (i64 ComplexImmPat:$imm)), + (VSHFQ V256LOpnd:$RA, V256LOpnd:$RB, $imm)>; + +def : Pat<(int_sw64_vconw (v8i32 V256LOpnd:$RA), + (v8i32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)), + (VCONW (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB), + (i64 (ITOFTtmp GPRCOpnd:$RC)))>; + +def : Pat<(int_sw64_vcons (v4f32 V256LOpnd:$RA), + (v4f32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)), + (VCONS (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB), + (i64 (ITOFTtmp GPRCOpnd:$RC)))>; + +def : Pat<(int_sw64_vcond (v4f64 V256LOpnd:$RA), + (v4f64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)), + (VCOND (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB), + (i64 (ITOFTtmp GPRCOpnd:$RC)))>; + +def : Pat<(int_sw64_vconl (v4i64 V256LOpnd:$RA), + (v4i64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)), + (VCOND (v4i64 V256LOpnd:$RA), (v4i64 V256LOpnd:$RB), + (i64 (ITOFTtmp GPRCOpnd:$RC)))>; + +def : Pat<(Sw64VSHF (v8i32 V256LOpnd:$RA), + (v8i32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)), + (VSHFW (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB), + (i64 (ITOFTtmp GPRCOpnd:$RC)))>; +def : Pat<(Sw64VSHF (v4i64 V256LOpnd:$RA), + (v4i64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)), + (VSHFW (v4i64 V256LOpnd:$RA), (v4i64 V256LOpnd:$RB), + (i64 (ITOFTtmp GPRCOpnd:$RC)))>; +def : Pat<(Sw64VSHF (v4f32 V256LOpnd:$RA), + (v4f32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)), + (VSHFW (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB), + (i64 (ITOFTtmp GPRCOpnd:$RC)))>; +def : Pat<(Sw64VSHF (v4f64 V256LOpnd:$RA), + (v4f64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)), + (VSHFW (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB), + (i64 (ITOFTtmp GPRCOpnd:$RC)))>; + +def VEXTW : FForm4LVV<0x1B, 0x22>, + SIMD_3RI_SAME<"vextw", null_frag, v8i32, u5imm, FPRCOpnd, V256LOpnd>; +def VEXTF : FForm4LVV<0x1B, 0x23>, + SIMD_3RI_SAME<"vextf", null_frag, v4f32, u5imm, FPRCOpnd, V256LOpnd>; + +multiclass Vector_extract_pat { +def : Pat<(ext_vt (vector_extract (vecty V256LOpnd:$RA), Index:$Idx)), + (ext_vt (TransI (Inst (vecty V256LOpnd:$RA), Index:$Idx)))>; + +def : Pat<(ext_vt (Intr (vecty V256LOpnd:$RA), Index:$Idx)), + (ext_vt (TransI (Inst (vecty V256LOpnd:$RA), Index:$Idx)))>; +} + +defm : Vector_extract_pat; +defm : Vector_extract_pat; + +// TODO: How to Combine it with class pattern? +def : Pat<(f64 (vector_extract (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx)), + (f64 (VEXTF (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx))>; +def : Pat<(f64 (int_sw64_vextfd (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx)), + (f64 (VEXTF (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx))>; +def : Pat<(f32 (vector_extract (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx)), + (f32 (COPY_TO_REGCLASS (VEXTF (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx), FPRC_lo))>; +def : Pat<(f32 (int_sw64_vextfs (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx)), + (f32 (COPY_TO_REGCLASS (VEXTF (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx), FPRC_lo))>; + +class SIMD_VLOGZZ { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROD:$RA, ROD:$RB, ROD:$RC, ImmOp:$Imm); + string AsmString = !strconcat(instr_asm # "$Imm", "\t$RA, $RB, $RC, $RD"); + + list Pattern = [(set ROD:$RD, + (z_vlog (TyNode ROD:$RA), (TyNode ROD:$RB), + (TyNode ROD:$RC), Imm:$Imm))]; +} + +def VLOGZZ : FForm4LVLogZZ<0x5>, + SIMD_VLOGZZ<"vlog", u8immHex, immZExt8Ptr, v4i64, V256LOpnd>; + +multiclass SIMD_Floating_3RR Opcode, bits<8>func, + string instr_asm, SDPatternOperator OpNode> { +def "" : FPFormV, + SIMD_3RR_SAME; + +def : Pat<(v4i64 (OpNode (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB))), + (v4i64 (!cast(NAME) V256LOpnd:$RA, V256LOpnd:$RB))>; + +def : Pat<(v4i64 (OpNode (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB))), + (v4i64 (!cast(NAME) V256LOpnd:$RA, V256LOpnd:$RB))>; +} + +defm VFCMPEQ : SIMD_Floating_3RR<0x1A, 0x8C, "vfcmpeq", setoeq>; +defm VFCMPLE : SIMD_Floating_3RR<0x1A, 0x8D, "vfcmple", setole>; +defm VFCMPLT : SIMD_Floating_3RR<0x1A, 0x8E, "vfcmplt", setolt>; +defm VFCMPUN : SIMD_Floating_3RR<0x1A, 0x8F, "vfcmpun", setuo>; + + +multiclass Vector_compare_pat { +def : Pat <(OpNode (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB)), + (Inst V256LOpnd:$RA, V256LOpnd:$RB)>; +def : Pat <(OpNode (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB)), + (Inst V256LOpnd:$RA, V256LOpnd:$RB)>; +} + +defm : Vector_compare_pat; +defm : Vector_compare_pat; +defm : Vector_compare_pat; +defm : Vector_compare_pat; + +def VCPYS : FPFormV<0x1A, 0x90>, + SIMD_3RR_SAME<"vcpys", int_sw64_vcpysd, v4f64, V256LOpnd>; +def VCPYSE : FPFormV<0x1A, 0x91>, + SIMD_3RR_SAME<"vcpyse", int_sw64_vcpysed, v4f64, V256LOpnd>; +def VCPYSN : FPFormV<0x1A, 0x92>, + SIMD_3RR_SAME<"vcpysn", int_sw64_vcpysnd, v4f64, V256LOpnd>; + +def : Pat<(int_sw64_vcpyss V256LOpnd:$RA, V256LOpnd:$RB), + (VCPYS V256LOpnd:$RA, V256LOpnd:$RB)>; +def : Pat<(int_sw64_vcpyses V256LOpnd:$RA, V256LOpnd:$RB), + (VCPYSE V256LOpnd:$RA, V256LOpnd:$RB)>; +def : Pat<(int_sw64_vcpysns V256LOpnd:$RA, V256LOpnd:$RB), + (VCPYSN V256LOpnd:$RA, V256LOpnd:$RB)>; + +multiclass SIMD_FMA funcS, bits<6> funcD, + string instr_asm, SDPatternOperator OpNode> { +def S : FForm4V<0x1B, funcS>, + SIMD_4RR_SAME; +def D : FForm4V<0x1B, funcD>, + SIMD_4RR_SAME; +} + +defm VMA : SIMD_FMA<0x00, 0x01, "vma", fma>; +defm VMS : SIMD_FMA<0x02, 0x03, "vms", + ThridOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; +defm VNMA : SIMD_FMA<0x04, 0x05, "vnma", + ThridOpFrag<(fma (fneg node:$LHS), node:$MHS, node:$RHS)> >; +defm VNMS : SIMD_FMA<0x06, 0x07, "vnms", + ThridOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; + +multiclass SIMD_FLOAT_SEL func, string instr_asm, + SDPatternOperator OpNode> { +def "" : FForm4V<0x1B, func>, + SIMD_4RR_SAME; + +def : Vector_3OP_SameReg_Pat(NAME)>; +} + +defm VFSELEQ : SIMD_FLOAT_SEL<0x10, "vfseleq", vfcmoveq>; +defm VFSELLT : SIMD_FLOAT_SEL<0x12, "vfsellt", vfcmovlt>; +defm VFSELLE : SIMD_FLOAT_SEL<0x13, "vfselle", vfcmovle>; + +// 简单运算指令格式: 寄存器格式 +class SIMD_3RV_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + InstrItinClass Itinerary = itin; + + string Constraints = "@earlyclobber $RC"; +} + +class SIMD_3RVV_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + InstrItinClass Itinerary = itin; +} + +class SIMD_3RV_TY_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + InstrItinClass Itinerary = itin; + + string Constraints = "@earlyclobber $RC"; +} + +class SIMD_VFCMPS_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + InstrItinClass Itinerary = itin; + + string Constraints = "@earlyclobber $RC"; +} + +class SIMD_2RV_R_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + InstrItinClass Itinerary = itin; +} + +class SIMD_VSETGE_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + + bit usesCustomInserter = 1; + InstrItinClass Itinerary = itin; +} + +class SIMD_VSQRT_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RB, $RC"); + InstrItinClass Itinerary = itin; + + string Constraints = "@earlyclobber $RC"; +} + +class SIMD_POPCNT_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RB, $RC"); + InstrItinClass Itinerary = itin; + + bit usesCustomInserter = 1; +} + +class SIMD_REDUCE_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA); + string AsmString = !strconcat(instr_asm, "\t$RA, $RC"); + InstrItinClass Itinerary = itin; + + bit usesCustomInserter = 1; // 6A should be extend. +} + +// 简单运算指令格式: 立即数格式 +class SIMD_I8_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, SplatImm.OpClass:$imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $imm, $RC"); + InstrItinClass Itinerary = itin; +} + + +// 浮点复核运算指令格式 寄存器格式 +class SIMD_4RV_DESC_BASE { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD"); + + InstrItinClass Itinerary = itin; + + string Constraints = "@earlyclobber $RD"; +} + +class SIMD_4RV_DESC_SEL { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD"); + + list Pattern = [(set (Vt ROD:$RD), (OpNode (Vt ROA:$RA), (Vt ROB:$RB), (Vt ROC:$RC)))]; +} + +class SIMD_4RV_DESC_VNMSS { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD"); + + InstrItinClass Itinerary = itin; + + string Constraints = "@earlyclobber $RD"; +} + + +class SIMD_SELECT_DESC_BASE { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROC:$RC, ROB:$RB, ROA:$RA); + string AsmString = !strconcat(instr_asm, "\t$RC, $RB, $RA, $RD"); + + InstrItinClass Itinerary = itin; +} + + + +class SIMD_VSETGE_I_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ImmOp:$imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $imm, $RC"); + + bit usesCustomInserter = 1; + InstrItinClass Itinerary = itin; +} + +// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be +// of that type. +def vnot_sw64 : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v8i32 immAllOnesV)))>; + +class SIMD_VBIC_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + + InstrItinClass Itinerary = itin; +} + +class SIMD_VORNOT_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + + InstrItinClass Itinerary = itin; +} + +class SIMD_COPY_DESC_BASE { + dag OutOperandList = (outs ROB:$RB); + dag InOperandList = (ins ROA:$RA); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB"); + + bit usesCustomInserter = Num; // 6A should be extend. + InstrItinClass Itinerary = itin; +} + +class SIMD_COPYF_DESC_BASE { + dag OutOperandList = (outs ROB:$RB); + dag InOperandList = (ins ROA:$RA); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB"); + list Pattern = []; + InstrItinClass Itinerary = itin; +} + +class SIMD_COPYF_PSEUDO_BASE : + SIMDPseudo<(outs RCWD:$wd), (ins RCWS:$fs), + [(set RCWD:$wd, (VT (OpNode RCWS:$fs)))]> { + let usesCustomInserter = 1; +} + +class SIMD_VSHIFT_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, F4RCOpnd:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC"); + InstrItinClass Itinerary = itin; + +} + +class SIMD_VINSECTL_DESC_BASE { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RD"); + InstrItinClass Itinerary = itin; + +} + +class SIMD_INSERT_DESC_BASE { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ROD:$RB, ImmOp:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD"); + InstrItinClass Itinerary = itin; + + bit usesCustomInserter = Num; +} + +class SIMD_EXTRACT_DESC_BASE { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ImmOp:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RD"); + + bit usesCustomInserter = Num; + InstrItinClass Itinerary = itin; +} + +class SIMD_MIX_DESC_BASE { + + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD"); + + bit usesCustomInserter = Num; + InstrItinClass Itinerary = itin; +} + +class VADDWC_DESC : SIMD_3RVV_DESC_BASE<"vaddw", addv8i32, V256LOpnd>, IsCommutable; + +class SIMD_2RV_SRi_DESC_BASE { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA, ImmOp:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC"); + InstrItinClass Itinerary = itin; +} +class VSUMW_DESC : SIMD_REDUCE_DESC_BASE<"vsumw", vecreduce_add, v8i32, GPRCOpnd, V256LOpnd>; +class VSUML_DESC : SIMD_REDUCE_DESC_BASE<"vsuml", vecreduce_add, v4i64, GPRCOpnd, V256LOpnd>; + +class VADDWC_ENC : FPFormV<0x1A, 0x00>; + +class VSUMW_ENC : FPFormV_2RV<0x1A, 0x47>; +class VSUML_ENC : FPFormV_2RV<0x1A, 0x48>; + +//--------------------------- Instruction defs ----------------------------------------// + +class SIMD_VMAX_VMINfunc, string instr_asm, SDPatternOperator OpNode, + ValueType vt, RegisterOperand RO> + : FPFormV<0x1A, func>, SIMD_3RR_SAME, IsCommutable; + +def VMAXB : SIMD_VMAX_VMIN<0x1E, "vmaxb", vmax, v32i8, V256LOpnd>; +def VMINB : SIMD_VMAX_VMIN<0x1F, "vminb", vmin, v32i8, V256LOpnd>; +def VMAXH : SIMD_VMAX_VMIN<0x50, "vmaxh", vmax, v16i16, V256LOpnd>; +def VMINH : SIMD_VMAX_VMIN<0x51, "vminh", vmin, v16i16, V256LOpnd>; +def VMAXW : SIMD_VMAX_VMIN<0x52, "vmaxw", vmax, v8i32, V256LOpnd>; +def VMINW : SIMD_VMAX_VMIN<0x53, "vminw", vmin, v8i32, V256LOpnd>; +def VMAXL : SIMD_VMAX_VMIN<0x54, "vmaxl", vmax, v4i64, V256LOpnd>; +def VMINL : SIMD_VMAX_VMIN<0x55, "vminl", vmin, v4i64, V256LOpnd>; + +def VUMAXB : SIMD_VMAX_VMIN<0x56, "vumaxb", vumax, v32i8, V256LOpnd>; +def VUMINB : SIMD_VMAX_VMIN<0x57, "vuminb", vumin, v32i8, V256LOpnd>; +def VUMAXH : SIMD_VMAX_VMIN<0x58, "vumaxh", vumax, v16i16, V256LOpnd>; +def VUMINH : SIMD_VMAX_VMIN<0x59, "vuminh", vumin, v16i16, V256LOpnd>; +def VUMAXW : SIMD_VMAX_VMIN<0x5A, "vumaxw", vumax, v8i32, V256LOpnd>; +def VUMINW : SIMD_VMAX_VMIN<0x5B, "vuminw", vumin, v8i32, V256LOpnd>; +def VUMAXL : SIMD_VMAX_VMIN<0x5C, "vumaxl", vumax, v4i64, V256LOpnd>; +def VUMINL : SIMD_VMAX_VMIN<0x5D, "vuminl", vumin, v4i64, V256LOpnd>; + +def VMAXS : SIMD_VMAX_VMIN<0xAC, "vmaxs", vmaxf, v4f32, V256LOpnd>; +def VMINS : SIMD_VMAX_VMIN<0xAD, "vmins", vminf, v4f32, V256LOpnd>; +def VMAXD : SIMD_VMAX_VMIN<0xAE, "vmaxd", vmaxf, v4f64, V256LOpnd>; +def VMIND : SIMD_VMAX_VMIN<0xAF, "vmind", vminf, v4f64, V256LOpnd>; + + +// For VSELXX pattern match with imm operand +multiclass SIMD_VSELXX Opcode, bits<6>func, + string instr_asm, SDPatternOperator OpNode, ValueType Vt, + Operand immtype, RegisterOperand RO, + SDPatternOperator IOp = null_frag, + ComplexPattern cpx = ComplexImmPat> { + + def rr : FForm4V, SIMD_4RV_DESC_SEL; + + def ri : FForm4_VSELi, SIMD_4RI_BASE; + + def : Pat<(Vt (OpNode (Vt RO:$RA), (Vt RO:$RB), (Vt (Sw64VBroadCast (i64 cpx:$Imm))))), + (!cast(NAME # ri) (Vt RO:$RA), (Vt RO:$RB), $Imm)>; +} + +defm VSELEQW : SIMD_VSELXX<0x1B, 0x18, "vseleqw", vseleqw, v8i32, u5imm, V256LOpnd>; +defm VSELLBCW : SIMD_VSELXX<0x1B, 0x19, "vsellbcw", vsellbcw, v8i32, u5imm, V256LOpnd>; +defm VSELLTW : SIMD_VSELXX<0x1B, 0x1A, "vselltw", vselltw, v8i32, u5imm, V256LOpnd>; +defm VSELLEW : SIMD_VSELXX<0x1B, 0x1B, "vsellew", vsellew, v8i32, u5imm, V256LOpnd>; + +class SIMD_ARITH_FLOAT Opcode, bits<8>func, + string instr_asm, SDPatternOperator OpNode, + ValueType Vt, RegisterOperand RO> : + FPFormV, SIMD_3RR_SAME; + +def VADDS : SIMD_ARITH_FLOAT<0x1A, 0x80, "vadds", fadd, v4f32, V256LOpnd>; +def VADDD : SIMD_ARITH_FLOAT<0x1A, 0x81, "vaddd", fadd, v4f64, V256LOpnd>; +def VSUBS : SIMD_ARITH_FLOAT<0x1A, 0x82, "vsubs", fsub, v4f32, V256LOpnd>; +def VSUBD : SIMD_ARITH_FLOAT<0x1A, 0x83, "vsubd", fsub, v4f64, V256LOpnd>; +def VMULS : SIMD_ARITH_FLOAT<0x1A, 0x84, "vmuls", fmul, v4f32, V256LOpnd>; +def VMULD : SIMD_ARITH_FLOAT<0x1A, 0x85, "vmuld", fmul, v4f64, V256LOpnd>; +def VDIVS : SIMD_ARITH_FLOAT<0x1A, 0x86, "vdivs", fdiv, v4f32, V256LOpnd>; +def VDIVD : SIMD_ARITH_FLOAT<0x1A, 0x87, "vdivd", fdiv, v4f64, V256LOpnd>; + + +def vsqrt_sw : SDNode<"Sw64ISD::VSQRT", SDT_VSQRT>; + +class SIMD_VSQRT { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROB:$RB); + string AsmString = !strconcat(instr_asm, "\t$RB, $RC"); + list Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROB:$RB)))]; +} + +def VSQRTS : FPFormV_2RV1<0x1A, 0x88>, SIMD_VSQRT<"vsqrts", vsqrt_sw, v4f32, V256LOpnd>; +def VSQRTD : FPFormV_2RV1<0x1A, 0x89>, SIMD_VSQRT<"vsqrtd", vsqrt_sw, v4f64, V256LOpnd>; + +def Sw64VFREC : SDNode<"Sw64ISD::VFREC", SDT_ZVecFREC>; + +def VFRECS : FPFormV_2RV1<0x1A, 0xAA>, SIMD_VSQRT<"vfrecs", Sw64VFREC, v4f32, V256LOpnd>; +def VFRECD : FPFormV_2RV1<0x1A, 0xAB>, SIMD_VSQRT<"vfrecd", Sw64VFREC, v4f64, V256LOpnd>; + +class SIMD_VSUMF { + dag OutOperandList = (outs ROC:$RC); + dag InOperandList = (ins ROA:$RA); + string AsmString = !strconcat(instr_asm, "\t$RA, $RC"); + list Pattern = [(set (TyC ROC:$RC), (OpNode (TyA ROA:$RA)))]; +} + +def VFCVTSD : FPFormV_2RV<0x1A, 0x95>, SIMD_VSUMF<"vfcvtsd", Sw64VFCVTSD, v4f64, v4f32, V256LOpnd, V256LOpnd>; +def VFCVTDS : FPFormV_2RV<0x1A, 0x96>, SIMD_VSUMF<"vfcvtds", Sw64VFCVTDS, v4f32, v4f64, V256LOpnd, V256LOpnd>; +def VFCVTLS : FPFormV_2RV<0x1A, 0x99>, SIMD_VSUMF<"vfcvtls", Sw64VFCVTLS, v4f32, v4i64, V256LOpnd, V256LOpnd>; +def VFCVTLD : FPFormV_2RV<0x1A, 0x9A>, SIMD_VSUMF<"vfcvtld", Sw64VFCVTLD, v4f64, v4i64, V256LOpnd, V256LOpnd>; + +class SIMD_FCVTSH_DESC_BASE { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROD:$RA, ROD:$RB, ImmOp:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD"); + list Pattern = [(set (v4f64 ROD:$RD), (OpNode (v4f32 ROD:$RA), (v4f32 ROD:$RB), Imm:$Imm))]; +} + +class SIMD_FCVTHS_DESC_BASE { + dag OutOperandList = (outs ROD:$RD); + dag InOperandList = (ins ROD:$RA, ImmOp:$Imm); + string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RD"); + list Pattern = [(set (v4f32 ROD:$RD), (OpNode (v4f64 ROD:$RA), Imm:$Imm))]; +} + +def VFCVTSH : FForm4LV<0x1B, 0x35>, SIMD_FCVTSH_DESC_BASE<"vfcvtsh", Sw64VFCVTSH, uimm5, immZExt5Ptr, V256LOpnd>; +def VFCVTHS : FForm4LV1<0x1B, 0x36>, SIMD_FCVTHS_DESC_BASE<"vfcvths", Sw64VFCVTHS, uimm5, immZExt5Ptr, V256LOpnd>; + +def VFCVTDL : FPFormV_2RV<0x1A, 0x9B>, SIMD_VSUMF<"vfcvtdl", Sw64VFCVTDL, v4i64, v4f32, V256LOpnd, V256LOpnd>; +def VFCVTDLG : FPFormV_2RV<0x1A, 0x9C>, SIMD_VSUMF<"vfcvtdl_g", Sw64VFCVTDLG, v4i64, v4f32, V256LOpnd, V256LOpnd>; +def VFCVTDLP : FPFormV_2RV<0x1A, 0x9D>, SIMD_VSUMF<"vfcvtdl_p", Sw64VFCVTDLP, v4i64, v4f32, V256LOpnd, V256LOpnd>; +def VFCVTDLZ : FPFormV_2RV<0x1A, 0x9E>, SIMD_VSUMF<"vfcvtdl_z", Sw64VFCVTDLZ, v4i64, v4f32, V256LOpnd, V256LOpnd>; +def VFCVTDLN : FPFormV_2RV<0x1A, 0x9F>, SIMD_VSUMF<"vfcvtdl_n", Sw64VFCVTDLN, v4i64, v4f32, V256LOpnd, V256LOpnd>; + +def VFRIS : FPFormV_2RV1<0x1A, 0xA0>, SIMD_VSQRT<"vfris", Sw64VFRIS, v4f32, V256LOpnd>; +def VFRISG : FPFormV_2RV1<0x1A, 0xA1>, SIMD_VSQRT<"vfris_g", Sw64VFRISG, v4f32, V256LOpnd>; +def VFRISP : FPFormV_2RV1<0x1A, 0xA2>, SIMD_VSQRT<"vfris_p", Sw64VFRISP, v4f32, V256LOpnd>; +def VFRISZ : FPFormV_2RV1<0x1A, 0xA3>, SIMD_VSQRT<"vfris_z", Sw64VFRISZ, v4f32, V256LOpnd>; +def VFRISN : FPFormV_2RV1<0x1A, 0xA4>, SIMD_VSQRT<"vfris_n", Sw64VFRISN, v4f32, V256LOpnd>; +def VFRID : FPFormV_2RV1<0x1A, 0xA5>, SIMD_VSQRT<"vfrid", Sw64VFRID, v4f64, V256LOpnd>; +def VFRIDG : FPFormV_2RV1<0x1A, 0xA6>, SIMD_VSQRT<"vfrid_g", Sw64VFRIDG, v4f64, V256LOpnd>; +def VFRIDP : FPFormV_2RV1<0x1A, 0xA7>, SIMD_VSQRT<"vfrid_p", Sw64VFRIDP, v4f64, V256LOpnd>; +def VFRIDZ : FPFormV_2RV1<0x1A, 0xA8>, SIMD_VSQRT<"vfrid_z", Sw64VFRIDZ, v4f64, V256LOpnd>; +def VFRIDN : FPFormV_2RV1<0x1A, 0xA9>, SIMD_VSQRT<"vfrid_n", Sw64VFRIDN, v4f64, V256LOpnd>; + +def vsumf : SDNode<"Sw64ISD::VSUMF", SDT_VSUMF>; + +def VSUMS : FPFormV_2RV<0x1A, 0x93>, SIMD_VSUMF<"vsums", vsumf, f32, v4f32, F4RCOpnd, V256LOpnd>; +def VSUMD : FPFormV_2RV<0x1A, 0x94>, SIMD_VSUMF<"vsumd", vsumf, f64, v4f64, F8RCOpnd, V256LOpnd>; + +// Patterns. +class SIMDPat pred = [HasSIMD]> : + Pat, Requires; + +// ------------------------ +class Sw64Pat : Pat; + +// TODO: Add support for FPOpFusion::Standard +def AllowFPOpFusion : Predicate<"TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast">; + +class ASE_SIMD { + list ASEPredicate = [HasSIMD]; +} + + +class FPOP_FUSION_FAST { + list AdditionalPredicates = [AllowFPOpFusion]; +} + + +// Additional VNMSX patterns: -a*b + c == -(a*b - c) +multiclass Vecotr_fma_pat { +def : Vector_3OP_SameReg_Pat(Inst#S)>; + +def : Vector_3OP_SameReg_Pat(Inst#D)>; +} + +defm : Vecotr_fma_pat, "VNMA">; +defm : Vecotr_fma_pat, "VNMA">; + +def : Pat<(int_sw64_vnmsd V256LOpnd:$RA, V256LOpnd:$RB, V256LOpnd:$RC), + (VNMSD $RA, $RB, $RC)>; + +def : Pat<(fneg v4f64:$RA), (VCPYSN $RA, $RA)>; +def : Pat<(fneg v4f32:$RA), (VCPYSN $RA, $RA)>; + +def :Pat<(v4f32 (fadd (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB))), + (VADDD V256LOpnd:$RA, V256LOpnd:$RB)>; + +class bitconvert_pat + : Pat<(dstTy (bitconvert (srcTy V256LOpnd:$RA))), (dstTy V256LOpnd:$RA)>; + +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; + +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; + +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; + +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; + +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; +def : bitconvert_pat; + diff --git a/llvm/lib/Target/Sw64/Sw64LLRP.cpp b/llvm/lib/Target/Sw64/Sw64LLRP.cpp new file mode 100644 index 000000000000..d7abbd8204a9 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64LLRP.cpp @@ -0,0 +1,475 @@ +//===-- Sw64LLRP.cpp - Sw64 Load Load Replay Trap elimination pass. -- --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Here we check for potential replay traps introduced by the spiller +// We also align some branch targets if we can do so for free. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sw_64-nops" +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "Sw64.h" +#include "Sw64FrameLowering.h" +#include "Sw64Subtarget.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; +cl::opt Sw64Mieee("mieee", cl::desc("Support the IEEE754"), + cl::init(true)); + +cl::opt Sw64DeleteNop("sw64-delete-nop", cl::desc("Delete NOP"), + cl::init(true)); + +STATISTIC(nopintro, "Number of nops inserted"); +STATISTIC(nopalign, "Number of nops inserted for alignment"); +namespace llvm { +cl::opt AlignAll("sw_64-align-all", cl::Hidden, + cl::desc("Align all blocks")); + +struct Sw64LLRPPass : public MachineFunctionPass { + // Target machine description which we query for reg. names, data + // layout, etc. + // + Sw64TargetMachine &TM; + + static char ID; + Sw64LLRPPass(Sw64TargetMachine &tm) : MachineFunctionPass(ID), TM(tm) {} + + StringRef getPassName() const { return "Sw64 NOP inserter"; } + + bool runOnMachineFunction(MachineFunction &F) { + const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); + bool flag = false; // hasJSR ? + bool Changed = false; + MachineInstr *prev[3] = {0, 0, 0}; + unsigned count = 0; + + DebugLoc dl; + const Sw64Subtarget &Subtarget = F.getSubtarget(); + int curgpdist = Subtarget.getCurgpdist(); + + SmallVector Ops; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; + ++FI) { + MachineBasicBlock &MBB = *FI; + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE;) { + MachineInstr *MI = &*MII; + ++MII; + if (MII == MIE) + break; + MachineInstr *MINext = &*MII; + if (MINext->getOpcode() == Sw64::FILLCS || + MINext->getOpcode() == Sw64::FILLDE) { + if (MI->getOpcode() == Sw64::LDA && + (MI->getOperand(1).getImm() == MINext->getOperand(0).getImm())) { + bool isRead = false; + for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) { + MachineInstr *Mtest = &*M1; + if (Mtest->getOpcode() == Sw64::LDA || + Mtest->getOpcode() == Sw64::LDAH || + Mtest->getOpcode() == Sw64::LDL || + Mtest->getOpcode() == Sw64::LDW || + Mtest->getOpcode() == Sw64::LDHU || + Mtest->getOpcode() == Sw64::LDBU) { + if (Mtest->getOperand(0).getReg() == + MI->getOperand(0).getReg() && + !isRead) { + Ops.push_back(MI); + break; + } + } + if (Mtest->getOpcode() == Sw64::STL || + Mtest->getOpcode() == Sw64::STW || + Mtest->getOpcode() == Sw64::STH || + Mtest->getOpcode() == Sw64::STB) { + if (Mtest->getOperand(2).getReg() == + MI->getOperand(0).getReg() || + Mtest->getOperand(0).getReg() == + MI->getOperand(0).getReg()) { + isRead = true; + } + } + ++M1; + } + } + } + } + for (auto *PrefMI : Ops) + PrefMI->eraseFromParent(); + Ops.clear(); + } + + // Remove all duplicate prefetch instr + SmallVector FILL; + int Dul; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; + ++FI) { + MachineBasicBlock &MBB = *FI; + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE;) { + MachineInstr *MI = &*MII; + ++MII; + Dul = 1; + if (MII == MIE) + break; + if (MI->getOpcode() == Sw64::FILLCS || + MI->getOpcode() == Sw64::FILLCS_E || + MI->getOpcode() == Sw64::FILLDE || + MI->getOpcode() == Sw64::FILLDE_E || + MI->getOpcode() == Sw64::S_FILLDE || + MI->getOpcode() == Sw64::S_FILLCS) { + if (!FILL.empty()) { + for (auto *PrefMI : FILL) { + if (PrefMI->getOperand(1).getReg() == + MI->getOperand(1).getReg()) { + Dul = 2; + break; + } + } + } + if (Dul == 1) { + for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) { + MachineInstr *Mtest = &*M1; + if (Mtest->getOpcode() == Sw64::FILLCS || + Mtest->getOpcode() == Sw64::FILLCS_E || + Mtest->getOpcode() == Sw64::FILLDE || + Mtest->getOpcode() == Sw64::FILLDE_E || + Mtest->getOpcode() == Sw64::S_FILLCS || + Mtest->getOpcode() == Sw64::S_FILLDE) { + if (Mtest->getOperand(1).getReg() == + MI->getOperand(1).getReg()) { + FILL.push_back(Mtest); + } + } + ++M1; + } + } + } + } + if (!FILL.empty()) { + for (auto *PrefMI1 : FILL) + PrefMI1->eraseFromParent(); + } + FILL.clear(); + } + + // If read and write, use fillde + int N = 0; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;) { + MachineBasicBlock &MBB = *FI; + ++FI; + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE;) { + MachineInstr *MI = &*MII; + ++MII; + if (MII == MIE) + break; + if (MI->getOpcode() == Sw64::FILLCS || + MI->getOpcode() == Sw64::S_FILLCS) { + for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) { + MachineInstr *Mtest = &*M1; + if (Mtest->getOpcode() == Sw64::LDA || + Mtest->getOpcode() == Sw64::LDAH || + Mtest->getOpcode() == Sw64::LDL || + Mtest->getOpcode() == Sw64::LDW || + Mtest->getOpcode() == Sw64::LDHU || + Mtest->getOpcode() == Sw64::LDBU) { + if (Mtest->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + N = 1; + } + } + ++M1; + } + if (FI == FE) + break; + MachineBasicBlock &MBB1 = *FI; + for (MachineBasicBlock::iterator MII1 = MBB1.begin(), + MIE1 = MBB1.end(); + MII1 != MIE1;) { + MachineInstr *MI1 = &*MII1; + if (MI1->getOpcode() == Sw64::STL || + MI1->getOpcode() == Sw64::STW || + MI1->getOpcode() == Sw64::STB || + MI1->getOpcode() == Sw64::STH) { + if (MI1->getOperand(2).getReg() == MI->getOperand(1).getReg() && + N == 0) { + if (MI->getOpcode() == Sw64::FILLCS) + MI->setDesc(TII->get(Sw64::FILLDE)); + if (MI->getOpcode() == Sw64::S_FILLCS) + MI->setDesc(TII->get(Sw64::S_FILLDE)); + N = 0; + } + } + ++MII1; + } + } + } + } + + const TargetRegisterInfo *TRI = F.getSubtarget().getRegisterInfo(); + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; + ++FI) { + MachineBasicBlock &MBB = *FI; + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE;) { + MachineInstr *MI = &*MII; + ++MII; + if (MII == MIE) + break; + if (MI->getOpcode() == Sw64::FILLCS || + MI->getOpcode() == Sw64::FILLDE) { + int N = 0; + int isDul = 0; + for (MachineBasicBlock::iterator MIT = MII; MIT != MIE;) { + MachineInstr *MITT = &*MIT; + if (MITT->readsRegister(MI->getOperand(1).getReg(), TRI)) { + N++; + } + if (MITT->getOpcode() == Sw64::FILLCS || + MITT->getOpcode() == Sw64::FILLDE || + MITT->getOpcode() == Sw64::FILLCS_E || + MITT->getOpcode() == Sw64::FILLDE_E) + isDul++; + ++MIT; + } + if (N == 1 && isDul > 0) { + if (MI->getOpcode() == Sw64::FILLCS) + MI->setDesc(TII->get(Sw64::FILLCS_E)); + if (MI->getOpcode() == Sw64::FILLDE) { + MI->setDesc(TII->get(Sw64::FILLDE_E)); + } + } + } + } + } + + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; + ++FI) { + MachineBasicBlock &MBB = *FI; + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE;) { + MachineInstr *MI = &*MII; + if (MI->getOpcode() == Sw64::FILLCS || + MI->getOpcode() == Sw64::S_FILLCS) { + for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) { + MachineInstr *Mtest = &*M1; + if (Mtest->getOpcode() == Sw64::STL || + Mtest->getOpcode() == Sw64::STW || + Mtest->getOpcode() == Sw64::STH || + Mtest->getOpcode() == Sw64::STB) { + if (Mtest->getOperand(2).getReg() == MI->getOperand(1).getReg()) { + if (MI->getOpcode() == Sw64::FILLCS) + MI->setDesc(TII->get(Sw64::FILLDE)); + if (MI->getOpcode() == Sw64::S_FILLCS) + MI->setDesc(TII->get(Sw64::S_FILLDE)); + } + } + ++M1; + } + } + ++MII; + } + } + + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; + ++FI) { + MachineBasicBlock &MBB = *FI; + + int count = 0; + bool isLable = 0; + if (MBB.getBasicBlock() && MBB.getBasicBlock()->isLandingPad()) { + MachineBasicBlock::iterator MBBI = MBB.begin(); + for (MBBI = MBB.begin(); MBBI != MBB.end(); ++MBBI, ++count) { + if (count == 0 && MBBI->isLabel()) + isLable = true; + if (count == 1 && isLable) { + BuildMI(MBB, MBBI, dl, TII->get(Sw64::MOVaddrPCGp)) + .addGlobalAddress(&(F.getFunction())) + .addImm(++curgpdist) + .addReg(Sw64::R26); + isLable = false; + } + } + if (count == 1 && isLable) { + BuildMI(MBB, MBBI, dl, TII->get(Sw64::MOVaddrPCGp)) + .addGlobalAddress(&(F.getFunction())) + .addImm(++curgpdist) + .addReg(Sw64::R26); + isLable = false; + } + } + + MachineBasicBlock::iterator I; + for (I = MBB.begin(); I != MBB.end(); ++I) { + if (flag) { + BuildMI(MBB, I, dl, TII->get(Sw64::MOVaddrPCGp)) + .addGlobalAddress(&(F.getFunction())) + .addImm(++curgpdist) + .addReg(Sw64::R26); + if (Sw64Mieee) { + if (!Sw64DeleteNop) + BuildMI(MBB, I, dl, TII->get(Sw64::NOP)); + } + flag = false; + } + if (I->getOpcode() == Sw64::JSR || + I->getOpcode() == Sw64::PseudoCallIndirect) { + dl = MBB.findDebugLoc(I); + if (Sw64Mieee) { + if (!Sw64DeleteNop) + BuildMI(MBB, I, dl, TII->get(Sw64::NOP)); + } + flag = true; + } + } + if (flag) { + BuildMI(MBB, I, dl, TII->get(Sw64::MOVaddrPCGp)) + .addGlobalAddress(&(F.getFunction())) + .addImm(++curgpdist) + .addReg(Sw64::R26); + if (Sw64Mieee) { + if (!Sw64DeleteNop) + BuildMI(MBB, I, dl, TII->get(Sw64::NOP)); + } + flag = false; + } + } + + if (!Sw64DeleteNop) { + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; + ++FI) { + MachineBasicBlock &MBB = *FI; + bool ub = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { + if (count % 4 == 0) + prev[0] = prev[1] = prev[2] = 0; // Slots cleared at fetch boundary + ++count; + MachineInstr *MI = &(*I); + I++; + switch (MI->getOpcode()) { + case Sw64::LDL: + case Sw64::LDW: + case Sw64::LDHU: + case Sw64::LDBU: + case Sw64::LDD: + case Sw64::LDS: + case Sw64::STL: + case Sw64::STW: + case Sw64::STH: + case Sw64::STB: + case Sw64::STD: + case Sw64::STS: + dl = MBB.findDebugLoc(MI); + if (MI->getOperand(2).getReg() == Sw64::R30) { + if (prev[0] && + prev[0]->getOperand(2).getReg() == + MI->getOperand(2).getReg() && + prev[0]->getOperand(1).getImm() == + MI->getOperand(1).getImm()) { + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = 0; + BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31) + .addReg(Sw64::R31) + .addReg(Sw64::R31); + Changed = true; + nopintro += 1; + count += 1; + } else if (prev[1] && + prev[1]->getOperand(2).getReg() == + MI->getOperand(2).getReg() && + prev[1]->getOperand(1).getImm() == + MI->getOperand(1).getImm()) { + prev[0] = prev[2]; + prev[1] = prev[2] = 0; + BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31) + .addReg(Sw64::R31) + .addReg(Sw64::R31); + BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31) + .addReg(Sw64::R31) + .addReg(Sw64::R31); + Changed = true; + nopintro += 2; + count += 2; + } else if (prev[2] && + prev[2]->getOperand(2).getReg() == + MI->getOperand(2).getReg() && + prev[2]->getOperand(1).getImm() == + MI->getOperand(1).getImm()) { + prev[0] = prev[1] = prev[2] = 0; + BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31) + .addReg(Sw64::R31) + .addReg(Sw64::R31); + BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31) + .addReg(Sw64::R31) + .addReg(Sw64::R31); + BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31) + .addReg(Sw64::R31) + .addReg(Sw64::R31); + Changed = true; + nopintro += 3; + count += 3; + } + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = MI; + break; + } + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = 0; + break; + case Sw64::ALTENT: + case Sw64::MEMLABEL: + case Sw64::PCLABEL: + --count; + break; + case Sw64::BR: + case Sw64::PseudoBR: + case Sw64::JMP: + ub = true; + // fall through + default: + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = 0; + break; + } + } + if (ub || AlignAll) { + // we can align stuff for free at this point + while (count % 4) { + BuildMI(MBB, MBB.end(), dl, TII->get(Sw64::BISr), Sw64::R31) + .addReg(Sw64::R31) + .addReg(Sw64::R31); + ++count; + ++nopalign; + prev[0] = prev[1]; + prev[1] = prev[2]; + prev[2] = 0; + } + } + } + } + return Changed; + } +}; +char Sw64LLRPPass::ID = 0; +} // namespace llvm + +FunctionPass *llvm::createSw64LLRPPass(Sw64TargetMachine &tm) { + return new Sw64LLRPPass(tm); +} diff --git a/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp b/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp new file mode 100644 index 000000000000..8a839ad57bb9 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp @@ -0,0 +1,281 @@ +//===-- Sw64MCInstLower.cpp - Convert Sw64 MachineInstr to MCInst -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Sw64 MachineInstrs to their +// corresponding MCInst records. +// +//===----------------------------------------------------------------------===// +#include "Sw64MCInstLower.h" +#include "MCTargetDesc/Sw64BaseInfo.h" +#include "MCTargetDesc/Sw64MCExpr.h" +#include "Sw64.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" + +using namespace llvm; + +#include "Sw64GenInstrInfo.inc" + +namespace llvm { +struct Sw64InstrTable { + MCInstrDesc Insts[1000]; +}; +extern const Sw64InstrTable Sw64Descs; +} // namespace llvm + +Sw64MCInstLower::Sw64MCInstLower(class AsmPrinter &asmprinter) + : Printer(asmprinter) {} + +void Sw64MCInstLower::Initialize(MCContext *C) { Ctx = C; } + +static bool lowerLitUseMOp(const MachineOperand &MO, + Sw64MCExpr::Sw64ExprKind &Kind) { + Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None; + unsigned flags = MO.getTargetFlags(); + if (flags & Sw64II::MO_LITERAL && flags & Sw64II::MO_LITERAL_BASE) { + TargetKind = Sw64MCExpr::MEK_LITUSE_BASE; + } else if (flags & Sw64II::MO_HINT && flags & Sw64II::MO_LITUSE) { + TargetKind = Sw64MCExpr::MEK_LITUSE_JSRDIRECT; + } else + return false; + + Kind = TargetKind; + return true; +} + +MCOperand Sw64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, + unsigned Offset) const { + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None; + const MCSymbol *Symbol; + + switch (MO.getTargetFlags()) { + default: + if (lowerLitUseMOp(MO, TargetKind)) + break; + llvm_unreachable("Invalid target flag!"); + case Sw64II::MO_NO_FLAG: + TargetKind = Sw64MCExpr::MEK_None; + break; + case Sw64II::MO_GPDISP_HI: + TargetKind = Sw64MCExpr::MEK_GPDISP_HI16; + break; + case Sw64II::MO_GPDISP_LO: + TargetKind = Sw64MCExpr::MEK_GPDISP_LO16; + break; + case Sw64II::MO_GPREL_HI: + TargetKind = Sw64MCExpr::MEK_GPREL_HI16; + break; + case Sw64II::MO_GPREL_LO: + TargetKind = Sw64MCExpr::MEK_GPREL_LO16; + break; + case Sw64II::MO_ABS_LO: + case Sw64II::MO_LITERAL: + TargetKind = Sw64MCExpr::MEK_ELF_LITERAL; + break; + case Sw64II::MO_LITERAL_GOT: + TargetKind = Sw64MCExpr::MEK_ELF_LITERAL_GOT; + break; + case Sw64II::MO_TPREL_HI: + TargetKind = Sw64MCExpr::MEK_TPREL_HI16; + break; + case Sw64II::MO_TPREL_LO: + TargetKind = Sw64MCExpr::MEK_TPREL_LO16; + break; + case Sw64II::MO_TLSGD: + TargetKind = Sw64MCExpr::MEK_TLSGD; + break; + case Sw64II::MO_TLSLDM: + TargetKind = Sw64MCExpr::MEK_TLSLDM; + break; + case Sw64II::MO_GOTTPREL: + TargetKind = Sw64MCExpr::MEK_GOTTPREL16; + break; + case Sw64II::MO_DTPREL_HI: + TargetKind = Sw64MCExpr::MEK_DTPREL_HI16; + break; + case Sw64II::MO_DTPREL_LO: + TargetKind = Sw64MCExpr::MEK_DTPREL_LO16; + break; + case Sw64II::MO_HINT: + TargetKind = Sw64MCExpr::MEK_HINT; + } + + switch (MOTy) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + case MachineOperand::MO_GlobalAddress: + Symbol = Printer.getSymbol(MO.getGlobal()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_BlockAddress: + Symbol = Printer.GetBlockAddressSymbol(MO.getBlockAddress()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_ExternalSymbol: + Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_JumpTableIndex: + Symbol = Printer.GetJTISymbol(MO.getIndex()); + break; + case MachineOperand::MO_ConstantPoolIndex: + Symbol = Printer.GetCPISymbol(MO.getIndex()); + Offset += MO.getOffset(); + break; + default: + llvm_unreachable(""); + } + + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); + + if (Offset) { + // Assume offset is never negative. + assert(Offset > 0); + + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx), + *Ctx); + } + + if (TargetKind != Sw64MCExpr::MEK_None) + Expr = Sw64MCExpr::create(TargetKind, Expr, *Ctx); + + return MCOperand::createExpr(Expr); +} + +MCOperand Sw64MCInstLower::LowerOperand(const MachineOperand &MO, + unsigned offset) const { + MachineOperandType MOTy = MO.getType(); + + switch (MOTy) { + default: + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + break; + return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_Immediate: + return MCOperand::createImm(MO.getImm() + offset); + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand(MO, MOTy, offset); + case MachineOperand::MO_RegisterMask: + break; + } + + return MCOperand(); +} + +void Sw64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp = LowerOperand(MO); + + if (MCOp.isValid()) + OutMI.addOperand(MCOp); + } +} + +static MCOperand lowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, unsigned Offset, + const AsmPrinter &AP) { + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None; + const MCSymbol *Symbol; + MCContext &Ctx = AP.OutContext; + + switch (MOTy) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + case MachineOperand::MO_GlobalAddress: + Symbol = AP.getSymbol(MO.getGlobal()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_BlockAddress: + Symbol = AP.GetBlockAddressSymbol(MO.getBlockAddress()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_ExternalSymbol: + Symbol = AP.GetExternalSymbolSymbol(MO.getSymbolName()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_JumpTableIndex: + Symbol = AP.GetJTISymbol(MO.getIndex()); + break; + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AP.GetCPISymbol(MO.getIndex()); + Offset += MO.getOffset(); + break; + default: + llvm_unreachable(""); + } + + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx); + + if (Offset) { + // Assume offset is never negative. + assert(Offset > 0); + + Expr = + MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx); + } + + if (TargetKind != Sw64MCExpr::MEK_None) + Expr = Sw64MCExpr::create(TargetKind, Expr, Ctx); + + return MCOperand::createExpr(Expr); +} + +bool llvm::LowerSw64MachineOperandToMCOperand(const MachineOperand &MO, + MCOperand &MCOp, + const AsmPrinter &AP) { + switch (MO.getType()) { + default: + report_fatal_error("LowerSw64MachineInstrToMCInst: unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + return false; + MCOp = MCOperand::createReg(MO.getReg()); + break; + case MachineOperand::MO_RegisterMask: + // Regmasks are like implicit defs. + return false; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + return false; + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + MCOp = lowerSymbolOperand(MO, MO.getType(), 0, AP); + return false; + } + return true; +} diff --git a/llvm/lib/Target/Sw64/Sw64MCInstLower.h b/llvm/lib/Target/Sw64/Sw64MCInstLower.h new file mode 100644 index 000000000000..7a8dfee7bb0b --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64MCInstLower.h @@ -0,0 +1,44 @@ +//===-- Sw64MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64MCINSTLOWER_H +#define LLVM_LIB_TARGET_SW64_SW64MCINSTLOWER_H +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class MCContext; +class MCInst; +class MCOperand; +class MachineInstr; +class MachineFunction; +class Mangler; +class AsmPrinter; + +typedef MachineOperand::MachineOperandType MachineOperandType; +// This class is used to lower an MachineInstr into an MCInst. +class LLVM_LIBRARY_VISIBILITY Sw64MCInstLower { + MCContext *Ctx; + AsmPrinter &Printer; + +public: + Sw64MCInstLower(class AsmPrinter &asmprinter); + void Initialize(MCContext *C); + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + MCOperand LowerOperand(const MachineOperand &MO, unsigned offset = 0) const; + + void lowerMemory(const MachineInstr *MI, MCInst &OutMI) const; + +private: + MCOperand LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, unsigned Offset) const; +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp new file mode 100644 index 000000000000..54a53e2bc589 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp @@ -0,0 +1,33 @@ +//===-- Sw64MachineFunctionInfo.cpp - Sw64 machine function info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sw64MachineFunctionInfo.h" +#include "Sw64InstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Function.h" + +using namespace llvm; + +void Sw64MachineFunctionInfo::anchor() {} + +bool Sw64MachineFunctionInfo::isLargeFrame(const MachineFunction &MF) const { + if (CachedEStackSize == -1) { + CachedEStackSize = MF.getFrameInfo().estimateStackSize(MF); + } + // isLargeFrame() is used when deciding if spill slots should be added to + // allow eliminateFrameIndex() to scavenge registers. + // This is only required when there is no FP and offsets are greater than + // ~256KB (~64Kwords). Thus only for code run on the emulator! + // + // The arbitrary value of 0xf000 allows frames of up to ~240KB before spill + // slots are added for the use of eliminateFrameIndex() register scavenging. + // For frames less than 240KB, it is assumed that there will be less than + // 16KB of function arguments. + return CachedEStackSize > 0xf000; +} diff --git a/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h new file mode 100644 index 000000000000..714ecef2f50d --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h @@ -0,0 +1,69 @@ +//===- Sw64MachineFunctionInfo.h - Sw64 machine function info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares Sw64-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include +#include +#include + +namespace llvm { + +// Sw64MachineFunctionInfo - This class is derived from MachineFunction private +// Sw64 target-specific information for each MachineFunction. +class Sw64MachineFunctionInfo : public MachineFunctionInfo { +private: + // GlobalBaseReg - keeps track of the virtual register initialized for + // use as the global base register. This is used for PIC in some PIC + // relocation models. + unsigned GlobalBaseReg; + + // GlobalRetAddr = keeps track of the virtual register initialized for + // the return address value. + unsigned GlobalRetAddr; + + // VarArgsOffset - What is the offset to the first vaarg + int VarArgsOffset; + // VarArgsBase - What is the base FrameIndex + int VarArgsBase; + + virtual void anchor(); + mutable int CachedEStackSize = -1; + +public: + Sw64MachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) + : GlobalBaseReg(0), GlobalRetAddr(0), VarArgsOffset(0), VarArgsBase(0) {} + + //~Sw64MachineFunctionInfo() override; + + bool globalBaseRegSet() const; + unsigned getGlobalBaseReg(MachineFunction &MF) const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + bool globalRetAddrSet() const; + void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; } + unsigned getGlobalRetAddr(MachineFunction &MF) const { return GlobalRetAddr; } + + int getVarArgsOffset() const { return VarArgsOffset; } + void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; } + + int getVarArgsBase() const { return VarArgsBase; } + void setVarArgsBase(int Base) { VarArgsBase = Base; } + bool isLargeFrame(const MachineFunction &MF) const; +}; +} // end namespace llvm +#endif // LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H diff --git a/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp b/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp new file mode 100644 index 000000000000..d349665abccd --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp @@ -0,0 +1,65 @@ +//===- Sw64MacroFusion.cpp - Sw64 Macro Fusion ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sw64 implementation of the DAG scheduling +// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "Sw64MacroFusion.h" +#include "Sw64Subtarget.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +using namespace llvm; + +// CMPxx followed by BEQ/BNE +static bool isCmpBqPair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + if (SecondMI.getOpcode() != Sw64::BEQ && SecondMI.getOpcode() != Sw64::BNE) + return false; + + // Assume the 1st instr to be a wildcard if it is unspecified. + if (FirstMI == nullptr) + return true; + + switch (FirstMI->getOpcode()) { + case Sw64::CMPEQr: + case Sw64::CMPEQi: + case Sw64::CMPLTr: + case Sw64::CMPLTi: + case Sw64::CMPLEr: + case Sw64::CMPLEi: + case Sw64::CMPULTr: + case Sw64::CMPULTi: + case Sw64::CMPULEr: + case Sw64::CMPULEi: + return true; + } + + return false; +} + +// Check if the instr pair, FirstMI and SecondMI, should be fused +// together. Given SecondMI, when FirstMI is unspecified, then check if +// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const Sw64Subtarget &ST = static_cast(TSI); + + if (ST.hasCore4() && isCmpBqPair(FirstMI, SecondMI)) + return true; + + return false; +} + +std::unique_ptr llvm::createSw64MacroFusionDAGMutation() { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} diff --git a/llvm/lib/Target/Sw64/Sw64MacroFusion.h b/llvm/lib/Target/Sw64/Sw64MacroFusion.h new file mode 100644 index 000000000000..92a6faf1f5bd --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64MacroFusion.h @@ -0,0 +1,28 @@ +//===- Sw64MacroFusion.h - Sw64 Macro Fusion ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sw64 definition of the DAG scheduling +// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H +#define LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +// Note that you have to add: +// DAG.addMutation(createSw64MacroFusionDAGMutation()); +// to Sw64PassConfig::createMachineScheduler() to have an effect. +std::unique_ptr createSw64MacroFusionDAGMutation(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H diff --git a/llvm/lib/Target/Sw64/Sw64OptionRecord.h b/llvm/lib/Target/Sw64/Sw64OptionRecord.h new file mode 100644 index 000000000000..81a4c4d63c82 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64OptionRecord.h @@ -0,0 +1,67 @@ +//===- Sw64OptionRecord.h - Abstraction for storing information -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Sw64OptionRecord - Abstraction for storing arbitrary information in +// ELF files. Arbitrary information (e.g. register usage) can be stored in Sw64 +// specific ELF sections like .Sw64.options. Specific records should subclass +// Sw64OptionRecord and provide an implementation to EmitSw64OptionRecord which +// basically just dumps the information into an ELF section. More information +// about .Sw64.option can be found in the SysV ABI and the 64-bit ELF Object +// specification. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H +#define LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H + +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include + +namespace llvm { + +class Sw64ELFStreamer; + +class Sw64OptionRecord { +public: + virtual ~Sw64OptionRecord() = default; + + virtual void EmitSw64OptionRecord() = 0; +}; + +class Sw64RegInfoRecord : public Sw64OptionRecord { +public: + Sw64RegInfoRecord(Sw64ELFStreamer *S, MCContext &Context) + : Streamer(S), Context(Context) { + + const MCRegisterInfo *TRI = Context.getRegisterInfo(); + GPRCRegClass = &(TRI->getRegClass(Sw64::GPRCRegClassID)); + F4RCRegClass = &(TRI->getRegClass(Sw64::F4RCRegClassID)); + F8RCRegClass = &(TRI->getRegClass(Sw64::F8RCRegClassID)); + V256LRegClass = &(TRI->getRegClass(Sw64::V256LRegClassID)); + } + + ~Sw64RegInfoRecord() override = default; + + void EmitSw64OptionRecord() override; + void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo); + +private: + Sw64ELFStreamer *Streamer; + MCContext &Context; + const MCRegisterClass *GPRCRegClass; + const MCRegisterClass *F4RCRegClass; + const MCRegisterClass *F8RCRegClass; + const MCRegisterClass *V256LRegClass; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H diff --git a/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp b/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp new file mode 100644 index 000000000000..5790ce81fc04 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp @@ -0,0 +1,96 @@ +//=== lib/CodeGen/GlobalISel/Sw64PreLegalizerCombiner.cpp --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass does combining of machine instructions at the generic MI level, +// before the legalizer. +// +//===----------------------------------------------------------------------===// + +#include "Sw64TargetMachine.h" +#include "llvm/CodeGen/GlobalISel/Combiner.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "sw_64-prelegalizer-combiner" + +using namespace llvm; + +namespace { +class Sw64PreLegalizerCombinerInfo : public CombinerInfo { +public: + Sw64PreLegalizerCombinerInfo() + : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, + /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false, + /*EnableOptSize*/ false, /*EnableMinSize*/ false) {} + + virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, + MachineIRBuilder &B) const override; +}; + +bool Sw64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, + MachineInstr &MI, + MachineIRBuilder &B) const { + return false; +} + +// Pass boilerplate +// ================ + +class Sw64PreLegalizerCombiner : public MachineFunctionPass { +public: + static char ID; + + Sw64PreLegalizerCombiner(); + + StringRef getPassName() const override { return "Sw64PreLegalizerCombiner"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; +} // end anonymous namespace + +void Sw64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesCFG(); + getSelectionDAGFallbackAnalysisUsage(AU); + MachineFunctionPass::getAnalysisUsage(AU); +} + +Sw64PreLegalizerCombiner::Sw64PreLegalizerCombiner() : MachineFunctionPass(ID) { + initializeSw64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); +} + +bool Sw64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + auto *TPC = &getAnalysis(); + Sw64PreLegalizerCombinerInfo PCInfo; + Combiner C(PCInfo, TPC); + return C.combineMachineInstrs(MF, nullptr); +} + +char Sw64PreLegalizerCombiner::ID = 0; +INITIALIZE_PASS_BEGIN(Sw64PreLegalizerCombiner, DEBUG_TYPE, + "Combine Sw64 machine instrs before legalization", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(Sw64PreLegalizerCombiner, DEBUG_TYPE, + "Combine Sw64 machine instrs before legalization", false, + false) + +namespace llvm { +FunctionPass *createSw64PreLegalizeCombiner() { + return new Sw64PreLegalizerCombiner(); +} +} // end namespace llvm diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp b/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp new file mode 100644 index 000000000000..ce4be089ba09 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp @@ -0,0 +1,296 @@ +//===-- Sw64RegisterInfo.cpp - Sw64 Register Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sw64 implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Sw64RegisterInfo.h" +#include "MCTargetDesc/Sw64ABIInfo.h" +#include "Sw64.h" +#include "Sw64InstrInfo.h" +#include "Sw64MachineFunctionInfo.h" +#include "Sw64Subtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/TargetParser/Triple.h" + +using namespace llvm; + +#define DEBUG_TYPE "sw_64-reg-info" + +#define GET_REGINFO_TARGET_DESC +#include "Sw64GenRegisterInfo.inc" + +static cl::opt EnableOptReg("enable-sw64-opt-reg", + cl::desc("Enalbe R15/R28 reg alloc on SW64"), + cl::init(true), cl::Hidden); + +Sw64RegisterInfo::Sw64RegisterInfo() : Sw64GenRegisterInfo(Sw64::R26) {} + +// helper functions +static long getUpper16(long l) { + long y = l / Sw64::IMM_MULT; + if (l % Sw64::IMM_MULT > Sw64::IMM_HIGH) + ++y; + return y; +} + +static long getLower16(long l) { + long h = getUpper16(l); + return l - h * Sw64::IMM_MULT; +} + +const uint16_t * +Sw64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + + return CSR_F64_SaveList; +} + +BitVector Sw64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const Sw64FrameLowering *TFI = getFrameLowering(MF); + if (EnableOptReg) { + if (TFI->hasFP(MF)) + Reserved.set(Sw64::R15); + } else { + Reserved.set(Sw64::R15); + Reserved.set(Sw64::R28); + } + Reserved.set(Sw64::R29); + Reserved.set(Sw64::R30); + Reserved.set(Sw64::R31); + Reserved.set(Sw64::F31); + Reserved.set(Sw64::V31); + for (size_t i = 0; i < Sw64::GPRCRegClass.getNumRegs(); ++i) { + if (MF.getSubtarget().isRegisterReserved(i)) { + StringRef RegName("$" + std::to_string(i)); + Reserved.set( + MF.getSubtarget().getTargetLowering()->MatchRegName( + RegName)); + } + } + + // hasBP + if (hasStackRealignment(MF) && MF.getFrameInfo().hasVarSizedObjects()) + Reserved.set(Sw64::R14); + + return Reserved; +} + +const u_int32_t * +Sw64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const { + return CSR_F64_RegMask; +} + +const TargetRegisterClass * +Sw64RegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + Sw64PtrClass PtrClassKind = static_cast(Kind); + + switch (PtrClassKind) { + case Sw64PtrClass::Default: + return &Sw64::GPRCRegClass; + case Sw64PtrClass::StackPointer: + return &Sw64::SP64RegClass; + case Sw64PtrClass::GlobalPointer: + return &Sw64::GP64RegClass; + } + + llvm_unreachable("Unknown pointer kind"); +} + +bool Sw64RegisterInfo::requiresRegisterScavenging( + const MachineFunction &MF) const { + return true; +} +bool Sw64RegisterInfo::requiresFrameIndexScavenging( + const MachineFunction &MF) const { + return true; +} +bool Sw64RegisterInfo::trackLivenessAfterRegAlloc( + const MachineFunction &MF) const { + return true; +} + +bool Sw64RegisterInfo::useFPForScavengingIndex( + const MachineFunction &MF) const { + return false; +} + +void Sw64RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, + unsigned OpNo, int FrameIndex, + uint64_t StackSize, int64_t SPOffset) const { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + const Sw64InstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + const Sw64RegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + + unsigned i = OpNo; + int MinCSFI = 0; + int MaxCSFI = -1; + + const std::vector &CSI = MFI.getCalleeSavedInfo(); + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) + FrameReg = Sw64::R30; + else if (RegInfo->hasStackRealignment(MF)) { + if (MFI.hasVarSizedObjects() && !MFI.isFixedObjectIndex(FrameIndex)) + FrameReg = Sw64::R14; + else if (MFI.isFixedObjectIndex(FrameIndex)) + FrameReg = getFrameRegister(MF); + else + FrameReg = Sw64::R30; + } else + FrameReg = getFrameRegister(MF); + + // Calculate final offset. + // - There is no need to change the offset if the frame object is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset = SPOffset + (int64_t)StackSize; + const MCInstrDesc &MCID = TII.get(MI.getOpcode()); + if (MI.getNumOperands() > 2 && MI.getOperand(2).isImm()) { + if (MCID.mayLoad() || MCID.mayStore()) + Offset += MI.getOperand(2).getImm(); + } + + if (MI.getOperand(1).isImm()) + Offset += MI.getOperand(1).getImm(); + + if (MI.isDebugValue()) + MI.getOperand(i + 1).ChangeToRegister(FrameReg, false); + else + MI.getOperand(2).ChangeToRegister(FrameReg, false); + + LLVM_DEBUG(errs() << "Offset : " << Offset << "\n" + << "<--------->\n"); + + // Now add the frame object offset to the offset from the virtual frame index. + if (Offset > Sw64::IMM_HIGH || Offset < Sw64::IMM_LOW) { + LLVM_DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: " + << Offset << "\n"); + // so in this case, we need to use a temporary register, and move the + // original inst off the SP/FP + // fix up the old: + MachineInstr *nMI; + bool FrameRegIsKilled = false; + // insert the new + Register vreg = MF.getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass); + if (MI.getOperand(1).getTargetFlags() == 15) { + nMI = BuildMI(MF, MI.getDebugLoc(), TII.get(Sw64::LDAH), vreg) + .addImm(getUpper16(Offset)) + .addReg(FrameReg); + FrameRegIsKilled = true; + } else { + nMI = BuildMI(MF, MI.getDebugLoc(), TII.get(Sw64::LDAH), vreg) + .addImm(getUpper16(Offset)) + .addReg(FrameReg); + FrameRegIsKilled = true; + } + + MBB.insert(II, nMI); + MI.getOperand(2).ChangeToRegister(vreg, false, false, FrameRegIsKilled); + MI.getOperand(1).ChangeToImmediate(getLower16(Offset)); + } else { + if (MI.isDebugValue()) + MI.getOperand(i + 1).ChangeToImmediate(Offset); + else + MI.getOperand(1).ChangeToImmediate(Offset); + } +} + +// FrameIndex represent objects inside a abstract stack. +// We must replace FrameIndex with an stack/frame pointer +// direct reference. +bool Sw64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + + LLVM_DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; + errs() << "<--------->\n" + << MI); + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + uint64_t stackSize = MF.getFrameInfo().getStackSize(); + int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex); + + LLVM_DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" + << "spOffset : " << spOffset << "\n" + << "stackSize : " << stackSize << "\n" + << "alignment : " + << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex)) + << "\n"); + + eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); + return false; +} + +Register Sw64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const Sw64FrameLowering *TFI = getFrameLowering(MF); + + return TFI->hasFP(MF) ? Sw64::R15 : Sw64::R30; +} + +unsigned Sw64RegisterInfo::getEHExceptionRegister() const { + llvm_unreachable("What is the exception register"); + return 0; +} + +unsigned Sw64RegisterInfo::getEHHandlerRegister() const { + llvm_unreachable("What is the exception handler register"); + return 0; +} + +std::string Sw64RegisterInfo::getPrettyName(unsigned reg) { + std::string s("#reg_#-#"); + return s; +} + +bool Sw64RegisterInfo::needsFrameMoves(const MachineFunction &MF) { + return MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry(); +} diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.h b/llvm/lib/Target/Sw64/Sw64RegisterInfo.h new file mode 100644 index 000000000000..0f0e74f0bbd9 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.h @@ -0,0 +1,79 @@ +//===-- Sw64RegisterInfo.h - Sw64 Register Information Impl ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sw64 implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_SW64_SW64REGISTERINFO_H +#define LLVM_LIB_TARGET_SW64_SW64REGISTERINFO_H + +#include "Sw64.h" +#include "llvm/CodeGen/MachineBasicBlock.h" + +#define GET_REGINFO_HEADER +#include "Sw64GenRegisterInfo.inc" + +namespace llvm { + +class TargetInstrInfo; +class TargetRegisterClass; + +class Sw64RegisterInfo : public Sw64GenRegisterInfo { +public: + Sw64RegisterInfo(); + enum class Sw64PtrClass { + // The default register class for integer values. + Default = 0, + // The stack pointer only. + StackPointer = 1, + // The global pointer only. + GlobalPointer = 2, + }; + + // Code Generation virtual methods... + + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + + BitVector getReservedRegs(const MachineFunction &MF) const override; + + // Eliminate virtual register which Prologue/Epilogue generate. + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; + bool useFPForScavengingIndex(const MachineFunction &MF) const override; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; + + // Code Generation virtual methods... + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const override; + + bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + // Debug information queries. + Register getFrameRegister(const MachineFunction &MF) const override; + + const u_int32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; + + // Return whether to emit frame moves + static bool needsFrameMoves(const MachineFunction &MF); + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + static std::string getPrettyName(unsigned reg); + +private: + void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, int64_t SPOffset) const; +}; + +} // end namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.td b/llvm/lib/Target/Sw64/Sw64RegisterInfo.td new file mode 100644 index 000000000000..2b164147ebfa --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.td @@ -0,0 +1,306 @@ +//===- Sw64RegisterInfo.td - The Sw64 Register File ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Sw64 register set. +// +//===----------------------------------------------------------------------===// +let Namespace = "Sw64" in { +def sub_32: SubRegIndex<32>; +} //Namespace Sw64 + +// For register encoding +class Sw64Reg Enc, string n, list alt= []> : Register { + let HWEncoding = Enc; + let Namespace = "Sw64"; + let AltNames = alt; +} + +class Sw64RegWithSubRegs Enc, string n, list subregs> + : RegisterWithSubRegs { + let HWEncoding = Enc; + let Namespace = "Sw64"; +} + +// GPR - One of the 32 32-bit general-purpose registers +class Sw64GPR Enc, string n, list alt= []> : Sw64Reg; +// FPR - One of the 32 64-bit floating-point registers +class Sw64FPR Enc, string n, list subregs = []> + : Sw64RegWithSubRegs; + +// VEC - One of the 32 256-bit vector registers +class Sw64VEC Enc, string n, list subregs> + : Sw64RegWithSubRegs { + let SubRegIndices = [sub_32]; +} + +class Unallocatable { + bit isAllocatable = 0; +} + +let Namespace = "Sw64" in { + +// General-purpose registers +def R0 : Sw64GPR< 0, "$0">, DwarfRegNum<[0]>; +def R1 : Sw64GPR< 1, "$1">, DwarfRegNum<[1]>; +def R2 : Sw64GPR< 2, "$2">, DwarfRegNum<[2]>; +def R3 : Sw64GPR< 3, "$3">, DwarfRegNum<[3]>; +def R4 : Sw64GPR< 4, "$4">, DwarfRegNum<[4]>; +def R5 : Sw64GPR< 5, "$5">, DwarfRegNum<[5]>; +def R6 : Sw64GPR< 6, "$6">, DwarfRegNum<[6]>; +def R7 : Sw64GPR< 7, "$7">, DwarfRegNum<[7]>; +def R8 : Sw64GPR< 8, "$8">, DwarfRegNum<[8]>; +def R9 : Sw64GPR< 9, "$9">, DwarfRegNum<[9]>; +def R10 : Sw64GPR< 10, "$10">, DwarfRegNum<[10]>; +def R11 : Sw64GPR< 11, "$11">, DwarfRegNum<[11]>; +def R12 : Sw64GPR< 12, "$12">, DwarfRegNum<[12]>; +def R13 : Sw64GPR< 13, "$13">, DwarfRegNum<[13]>; +def R14 : Sw64GPR< 14, "$14">, DwarfRegNum<[14]>; +def R15 : Sw64GPR< 15, "$15", ["$fp"]>, DwarfRegNum<[15]>; +def R16 : Sw64GPR< 16, "$16">, DwarfRegNum<[16]>; +def R17 : Sw64GPR< 17, "$17">, DwarfRegNum<[17]>; +def R18 : Sw64GPR< 18, "$18">, DwarfRegNum<[18]>; +def R19 : Sw64GPR< 19, "$19">, DwarfRegNum<[19]>; +def R20 : Sw64GPR< 20, "$20">, DwarfRegNum<[20]>; +def R21 : Sw64GPR< 21, "$21">, DwarfRegNum<[21]>; +def R22 : Sw64GPR< 22, "$22">, DwarfRegNum<[22]>; +def R23 : Sw64GPR< 23, "$23">, DwarfRegNum<[23]>; +def R24 : Sw64GPR< 24, "$24">, DwarfRegNum<[24]>; +def R25 : Sw64GPR< 25, "$25">, DwarfRegNum<[25]>; +def R26 : Sw64GPR< 26, "$26", ["$ra"]>, DwarfRegNum<[26]>; +def R27 : Sw64GPR< 27, "$27", ["$pv"]>, DwarfRegNum<[27]>; +def R28 : Sw64GPR< 28, "$28", ["$at"]>, DwarfRegNum<[28]>; +def R29 : Sw64GPR< 29, "$29", ["$gp"]>, DwarfRegNum<[29]>; +def R30 : Sw64GPR< 30, "$30", ["$sp"]>, DwarfRegNum<[30]>; +def R31 : Sw64GPR< 31, "$31", ["$zero"]>, DwarfRegNum<[31]>; + +// Floating-point registers +def F0 : Sw64FPR< 0, "$f0">, DwarfRegNum<[32]>; +def F1 : Sw64FPR< 1, "$f1">, DwarfRegNum<[33]>; +def F2 : Sw64FPR< 2, "$f2">, DwarfRegNum<[34]>; +def F3 : Sw64FPR< 3, "$f3">, DwarfRegNum<[35]>; +def F4 : Sw64FPR< 4, "$f4">, DwarfRegNum<[36]>; +def F5 : Sw64FPR< 5, "$f5">, DwarfRegNum<[37]>; +def F6 : Sw64FPR< 6, "$f6">, DwarfRegNum<[38]>; +def F7 : Sw64FPR< 7, "$f7">, DwarfRegNum<[39]>; +def F8 : Sw64FPR< 8, "$f8">, DwarfRegNum<[40]>; +def F9 : Sw64FPR< 9, "$f9">, DwarfRegNum<[41]>; +def F10 : Sw64FPR< 10, "$f10">, DwarfRegNum<[42]>; +def F11 : Sw64FPR< 11, "$f11">, DwarfRegNum<[43]>; +def F12 : Sw64FPR< 12, "$f12">, DwarfRegNum<[44]>; +def F13 : Sw64FPR< 13, "$f13">, DwarfRegNum<[45]>; +def F14 : Sw64FPR< 14, "$f14">, DwarfRegNum<[46]>; +def F15 : Sw64FPR< 15, "$f15">, DwarfRegNum<[47]>; +def F16 : Sw64FPR< 16, "$f16">, DwarfRegNum<[48]>; +def F17 : Sw64FPR< 17, "$f17">, DwarfRegNum<[49]>; +def F18 : Sw64FPR< 18, "$f18">, DwarfRegNum<[50]>; +def F19 : Sw64FPR< 19, "$f19">, DwarfRegNum<[51]>; +def F20 : Sw64FPR< 20, "$f20">, DwarfRegNum<[52]>; +def F21 : Sw64FPR< 21, "$f21">, DwarfRegNum<[53]>; +def F22 : Sw64FPR< 22, "$f22">, DwarfRegNum<[54]>; +def F23 : Sw64FPR< 23, "$f23">, DwarfRegNum<[55]>; +def F24 : Sw64FPR< 24, "$f24">, DwarfRegNum<[56]>; +def F25 : Sw64FPR< 25, "$f25">, DwarfRegNum<[57]>; +def F26 : Sw64FPR< 26, "$f26">, DwarfRegNum<[58]>; +def F27 : Sw64FPR< 27, "$f27">, DwarfRegNum<[59]>; +def F28 : Sw64FPR< 28, "$f28">, DwarfRegNum<[60]>; +def F29 : Sw64FPR< 29, "$f29">, DwarfRegNum<[61]>; +def F30 : Sw64FPR< 30, "$f30">, DwarfRegNum<[62]>; +def F31 : Sw64FPR< 31, "$f31">, DwarfRegNum<[63]>; + +// Floating-point registers +let SubRegIndices = [sub_32] in { +def Q0 : Sw64FPR< 0, "$f0", [F0]>, DwarfRegNum<[32]>; +def Q1 : Sw64FPR< 1, "$f1", [F1]>, DwarfRegNum<[33]>; +def Q2 : Sw64FPR< 2, "$f2", [F2]>, DwarfRegNum<[34]>; +def Q3 : Sw64FPR< 3, "$f3", [F3]>, DwarfRegNum<[35]>; +def Q4 : Sw64FPR< 4, "$f4", [F4]>, DwarfRegNum<[36]>; +def Q5 : Sw64FPR< 5, "$f5", [F5]>, DwarfRegNum<[37]>; +def Q6 : Sw64FPR< 6, "$f6", [F6]>, DwarfRegNum<[38]>; +def Q7 : Sw64FPR< 7, "$f7", [F7]>, DwarfRegNum<[39]>; +def Q8 : Sw64FPR< 8, "$f8", [F8]>, DwarfRegNum<[40]>; +def Q9 : Sw64FPR< 9, "$f9", [F9]>, DwarfRegNum<[41]>; +def Q10 : Sw64FPR< 10, "$f10", [F10]>, DwarfRegNum<[42]>; +def Q11 : Sw64FPR< 11, "$f11", [F11]>, DwarfRegNum<[43]>; +def Q12 : Sw64FPR< 12, "$f12", [F12]>, DwarfRegNum<[44]>; +def Q13 : Sw64FPR< 13, "$f13", [F13]>, DwarfRegNum<[45]>; +def Q14 : Sw64FPR< 14, "$f14", [F14]>, DwarfRegNum<[46]>; +def Q15 : Sw64FPR< 15, "$f15", [F15]>, DwarfRegNum<[47]>; +def Q16 : Sw64FPR< 16, "$f16", [F16]>, DwarfRegNum<[48]>; +def Q17 : Sw64FPR< 17, "$f17", [F17]>, DwarfRegNum<[49]>; +def Q18 : Sw64FPR< 18, "$f18", [F18]>, DwarfRegNum<[50]>; +def Q19 : Sw64FPR< 19, "$f19", [F19]>, DwarfRegNum<[51]>; +def Q20 : Sw64FPR< 20, "$f20", [F20]>, DwarfRegNum<[52]>; +def Q21 : Sw64FPR< 21, "$f21", [F21]>, DwarfRegNum<[53]>; +def Q22 : Sw64FPR< 22, "$f22", [F22]>, DwarfRegNum<[54]>; +def Q23 : Sw64FPR< 23, "$f23", [F23]>, DwarfRegNum<[55]>; +def Q24 : Sw64FPR< 24, "$f24", [F24]>, DwarfRegNum<[56]>; +def Q25 : Sw64FPR< 25, "$f25", [F25]>, DwarfRegNum<[57]>; +def Q26 : Sw64FPR< 26, "$f26", [F26]>, DwarfRegNum<[58]>; +def Q27 : Sw64FPR< 27, "$f27", [F27]>, DwarfRegNum<[59]>; +def Q28 : Sw64FPR< 28, "$f28", [F28]>, DwarfRegNum<[60]>; +def Q29 : Sw64FPR< 29, "$f29", [F29]>, DwarfRegNum<[61]>; +def Q30 : Sw64FPR< 30, "$f30", [F30]>, DwarfRegNum<[62]>; +def Q31 : Sw64FPR< 31, "$f31", [F31]>, DwarfRegNum<[63]>; +} + +// Vector registers +def V0 : Sw64VEC< 0, "$f0", [Q0]>, DwarfRegNum<[32]>; +def V1 : Sw64VEC< 1, "$f1", [Q1]>, DwarfRegNum<[33]>; +def V2 : Sw64VEC< 2, "$f2", [Q2]>, DwarfRegNum<[34]>; +def V3 : Sw64VEC< 3, "$f3", [Q3]>, DwarfRegNum<[35]>; +def V4 : Sw64VEC< 4, "$f4", [Q4]>, DwarfRegNum<[36]>; +def V5 : Sw64VEC< 5, "$f5", [Q5]>, DwarfRegNum<[37]>; +def V6 : Sw64VEC< 6, "$f6", [Q6]>, DwarfRegNum<[38]>; +def V7 : Sw64VEC< 7, "$f7", [Q7]>, DwarfRegNum<[39]>; +def V8 : Sw64VEC< 8, "$f8", [Q8]>, DwarfRegNum<[40]>; +def V9 : Sw64VEC< 9, "$f9", [Q9]>, DwarfRegNum<[41]>; +def V10 : Sw64VEC< 10, "$f10", [Q10]>, DwarfRegNum<[42]>; +def V11 : Sw64VEC< 11, "$f11", [Q11]>, DwarfRegNum<[43]>; +def V12 : Sw64VEC< 12, "$f12", [Q12]>, DwarfRegNum<[44]>; +def V13 : Sw64VEC< 13, "$f13", [Q13]>, DwarfRegNum<[45]>; +def V14 : Sw64VEC< 14, "$f14", [Q14]>, DwarfRegNum<[46]>; +def V15 : Sw64VEC< 15, "$f15", [Q15]>, DwarfRegNum<[47]>; +def V16 : Sw64VEC< 16, "$f16", [Q16]>, DwarfRegNum<[48]>; +def V17 : Sw64VEC< 17, "$f17", [Q17]>, DwarfRegNum<[49]>; +def V18 : Sw64VEC< 18, "$f18", [Q18]>, DwarfRegNum<[50]>; +def V19 : Sw64VEC< 19, "$f19", [Q19]>, DwarfRegNum<[51]>; +def V20 : Sw64VEC< 20, "$f20", [Q20]>, DwarfRegNum<[52]>; +def V21 : Sw64VEC< 21, "$f21", [Q21]>, DwarfRegNum<[53]>; +def V22 : Sw64VEC< 22, "$f22", [Q22]>, DwarfRegNum<[54]>; +def V23 : Sw64VEC< 23, "$f23", [Q23]>, DwarfRegNum<[55]>; +def V24 : Sw64VEC< 24, "$f24", [Q24]>, DwarfRegNum<[56]>; +def V25 : Sw64VEC< 25, "$f25", [Q25]>, DwarfRegNum<[57]>; +def V26 : Sw64VEC< 26, "$f26", [Q26]>, DwarfRegNum<[58]>; +def V27 : Sw64VEC< 27, "$f27", [Q27]>, DwarfRegNum<[59]>; +def V28 : Sw64VEC< 28, "$f28", [Q28]>, DwarfRegNum<[60]>; +def V29 : Sw64VEC< 29, "$f29", [Q29]>, DwarfRegNum<[61]>; +def V30 : Sw64VEC< 30, "$f30", [Q30]>, DwarfRegNum<[62]>; +def V31 : Sw64VEC< 31, "$f31", [Q31]>, DwarfRegNum<[63]>; + +} // Namespace Sw64 + +/// Register classes +def GPRC : RegisterClass<"Sw64", [i64], 64, (add + // Volatile + R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22, + R23, R24, R25, R28, + //Special meaning, but volatile + R27, //procedure address + R26, //return address + R29, //global offset table address + // Non-volatile + R9, R10, R11, R12, R13, R14, + // Don't allocate 15, 30, 31 + R15, R30, R31)>; + +def F4RC : RegisterClass<"Sw64", [f32], 64, (add F0, F1, + F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, + F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, + // Saved: + F2, F3, F4, F5, F6, F7, F8, F9, + // zero: + F31)>; + +def F8RC : RegisterClass<"Sw64", [f64], 64, (add F4RC)>; + +// lowest 64bits part for simd vector +def FPRC : RegisterClass<"Sw64", [i64, f64], 64, (sequence "Q%u", 0, 31)>; + +def FPRC_lo : RegisterClass<"Sw64", [i32, f32], 64, (trunc F4RC, 32)>; + +// Stack pointer and global pointer classes for instructions that are limited +// to a single register. +def SP64 : RegisterClass<"Sw64", [i64], 64, (add R30)>, Unallocatable; +def GP64 : RegisterClass<"Sw64", [i64], 64, (add R29)>, Unallocatable; + +def FP30 : RegisterClass<"Sw64", [f32], 64, (add F30)>, Unallocatable; +def FD30 : RegisterClass<"Sw64", [f64], 64, (add F30)>, Unallocatable; +// Register Operands. + +class Sw64AsmRegOperand : AsmOperandClass { + let ParserMethod = "parseAnyRegister"; +} + +def GPRCAsmOperand : Sw64AsmRegOperand { + let Name = "Reg"; +} + +def F4RCAsmOperand : Sw64AsmRegOperand { + let Name = "F4RCAsmReg"; + let PredicateMethod = "isFGRAsmReg"; +} + +def F8RCAsmOperand : Sw64AsmRegOperand { + let Name = "F8RCAsmReg"; + let PredicateMethod = "isFGRAsmReg"; +} + +def V256AsmOperand : Sw64AsmRegOperand { + let Name = "V256AsmReg"; +} + +def V256B : RegisterClass<"Sw64", [v32i8], 256, (add (sequence "V%u", 0, 31))>; +def V256H : RegisterClass<"Sw64", [v16i16], 256, (add (sequence "V%u", 0, 31))>; +def V256W : RegisterClass<"Sw64", [v4f32], 256, (add (sequence "V%u", 0, 31))>; + +def V256L : RegisterClass<"Sw64", [v32i8, v16i16, v8i32, v4i64, v4f64, v4f32], 256, + (add (sequence "V%u", 0, 31))>; + +def V256all : RegisterClass<"Sw64", [v32i8, v16i16, v8i32, v4i64, v4f32 ,v4f64], + 256, (add (sequence "V%u", 0, 31))>; + +// adding a special class for floating selection +def V256Floating : RegisterClass<"Sw64", [v4f32, v4f64], + 256, (add (sequence "V%u", 0, 31))>; +def V256E64 : RegisterClass<"Sw64", [v4i64, v4f32, v4f64], + 256, (add (sequence "V%u", 0, 31))>; + +def GPRCOpnd : RegisterOperand { + let ParserMatchClass = GPRCAsmOperand; +} + +def F4RCOpnd : RegisterOperand { + let ParserMatchClass = F4RCAsmOperand; +} + +def F8RCOpnd : RegisterOperand { + let ParserMatchClass = F8RCAsmOperand; +} + +def FPRCOpnd : RegisterOperand { + let ParserMatchClass = F8RCAsmOperand; +} + +def FPRCloOpnd : RegisterOperand { + let ParserMatchClass = F8RCAsmOperand; +} + +def V256BOpnd : RegisterOperand { + let ParserMatchClass = V256AsmOperand; +} + +def V256HOpnd : RegisterOperand { + let ParserMatchClass = V256AsmOperand; +} + +def V256WOpnd : RegisterOperand { + let ParserMatchClass = V256AsmOperand; +} + +def V256LOpnd : RegisterOperand { + let ParserMatchClass = V256AsmOperand; +} + +def V256ALOpnd : RegisterOperand { + let ParserMatchClass = V256AsmOperand; +} + +def V256FOpnd : RegisterOperand { + let ParserMatchClass = V256AsmOperand; +} diff --git a/llvm/lib/Target/Sw64/Sw64Relocations.h b/llvm/lib/Target/Sw64/Sw64Relocations.h new file mode 100644 index 000000000000..b32f148d7482 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64Relocations.h @@ -0,0 +1,30 @@ +//===- Sw64Relocations.h - Sw64 Code Relocations --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sw64 target-specific relocation types. +// +//===----------------------------------------------------------------------===// + +#ifndef Sw64RELOCATIONS_H +#define Sw64RELOCATIONS_H + +#include "llvm/CodeGen/MachineRelocation.h" + +namespace llvm { +namespace Sw64 { +enum RelocationType { + reloc_literal, + reloc_gprellow, + reloc_gprelhigh, + reloc_gpdist, + reloc_bsr +}; +} +} // namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore3.td b/llvm/lib/Target/Sw64/Sw64SchedCore3.td new file mode 100644 index 000000000000..f8e424d1639d --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64SchedCore3.td @@ -0,0 +1,213 @@ +//===- Sw64SchedCore3.td - Sw64 Scheduling Definitions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Core3 processor architecture mannual +def GenericSw64Model : SchedMachineModel { +// Core 3 has 4-way decode and 7-way dispatch, +// in a cycle, can maxinum dispatch 3-way to ALU, 2-way to AGU, 2-way to FPU, +// so set the dispatch width to 4 is optitional. + let IssueWidth = 4; + let LoadLatency = 4; // Optimistic load latency + let LoopMicroOpBufferSize = 16; //InsnQueue has 16 entry + let MispredictPenalty = 13; // Fetch + Decode/Rename/Dispatch + Branch + let CompleteModel = false; + let MicroOpBufferSize = 72; // ROB size + + let PostRAScheduler = 1; +} + +let SchedModel = GenericSw64Model in { + // chapter 2.2.1 + // 3 pipeline ALU + def C3PortALU0 : ProcResource<1>; + def C3PortALU1 : ProcResource<1>; + def C3PortALU2 : ProcResource<1>; + + // 2.2.1 + // alu0 has ADD MUL, alu1 has BR/CSR BOP/SHT + // alu2 has CNT BOP/SHT ADD/SEL + def C3PortALU : ProcResGroup<[C3PortALU0, C3PortALU1, C3PortALU2]> { + let BufferSize = 32; + } + def C3PortALU01 : ProcResGroup<[C3PortALU0, C3PortALU1]>; + def C3PortALU12 : ProcResGroup<[C3PortALU1, C3PortALU2]>; + + // 2 pipeline Alu Mem + // 2.2.3 + // Core3a interger has two AGU Unit + // 2 LSU Unit deel with all load/store + def C3LSU : ProcResource<2>; + + def C3PortAGU0 : ProcResource<1>; + def C3PortAGU1 : ProcResource<1>; + + def C3PortAGU01 : ProcResGroup<[C3PortAGU0, C3PortAGU1]>; + + let Super = C3LSU in + def C3Load : ProcResource<2> { + let BufferSize = 32; + } + + def C3LoadQueue : LoadQueue; + + let Super = C3LSU in + def C3Store : ProcResource<1> { + let BufferSize = 16; + } + + def C3StoreQueue : StoreQueue; + + // 2 pipeline FPU-SIMD + def C3PortFPU0 : ProcResource<1>; + def C3PortFPU1 : ProcResource<1>; + + + def C3PortFPU : ProcResGroup<[C3PortFPU0, C3PortFPU1]>; + + def C3GprRF: RegisterFile<105, [GPRC], [1]>; + + def C3FpuRF: RegisterFile<95, [F4RC, F8RC], [1]>; + + def C3RCU : RetireControlUnit<72, 4>; + + class C3WriteRes ExePorts, + int Lat, list Res = [], int UOps = 1> : + WriteRes { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } + + class C3LSWriteRes ExePorts, + int Lat, list Res = [], int UOps = 1> : + WriteRes { + let Latency = !add(Lat, 1); + let ResourceCycles = !if(!empty(Res), [1, 1], !listconcat([1], Res)); + let NumMicroOps = UOps; + } + + def : C3WriteRes ; + def : C3WriteRes ; + def : C3WriteRes ; + def : C3WriteRes ; + def : C3WriteRes ; + def : C3WriteRes ; + def : C3WriteRes ; //nop do not execute in backend + def : C3WriteRes ; + def : C3WriteRes ; + def : C3WriteRes ; + def : C3WriteRes ; + def : C3WriteRes ; + + def : C3WriteRes ; + def : C3WriteRes ; + + def : C3WriteRes ; + + def : C3WriteRes ; + + def : C3LSWriteRes ; + + def : C3WriteRes ; + + def : C3WriteRes ; + + def : C3WriteRes ; + + def : C3WriteRes ; + + def : C3LSWriteRes; + def : C3LSWriteRes; + + def : C3LSWriteRes; + def : C3LSWriteRes; + + def : C3WriteRes; + def : C3WriteRes; + + def : C3WriteRes; + def : C3WriteRes; + + def : C3WriteRes; + def : C3WriteRes; + + def : InstRW<[WriteIALU], (instrs COPY)>; + + def : InstRW<[WriteBR], (instrs BR, BEQ, BGE, + BGT, BLBC, BLBS, BLE, BLT, BNE, BSR)>; + + def : InstRW<[WriteBR], (instrs SYS_CALL)>; + def : InstRW<[WriteBR], (instrs JMP, JSR, RET)>; + def : InstRW<[WriteFBR], (instregex "^FB(EQ|GE|GT|LE|LT|NE)$")>; + + def : InstRW<[WriteLD], (instregex "^(S_FILL|E_FILL)(CS|DE)$")>; + def : InstRW<[WriteLD], (instregex "^FILL(CS|DE|CS_E|DE_E)$")>; + + def : InstRW<[WriteLD], (instregex "^LD(L|W|HU|BU)$")>; + def : InstRW<[WriteFLD], (instregex "^LD(S|D)$")>; + + def : InstRW<[WriteST], (instregex "^ST(L|W|H|B)$")>; + def : InstRW<[WriteFST], (instregex "^ST(S|D)$")>; + + def : InstRW<[WriteImm], (instregex "^LDAH*$")>; + + def : InstRW<[WriteIALU], (instregex "^(ADD|SUB|S(4|8)(ADD|SUB))(L|Q)(r|i)$")>; + def : InstRW<[WriteIMul], (instregex "^(MUL)(L|Q)(r|i)$")>; + + def : InstRW<[WriteCNT], (instrs CTLZ, CTPOP, CTTZ)>; + + def : InstRW<[WriteBOP], (instrs ZAPr, ZAPi, ZAPNOTr, ZAPNOTi, SEXTB, SEXTH)>; + + def : InstRW<[WriteIALU], (instregex "^CMP(EQ|LE|LT|ULE|ULT|BGE)(r|i)*$")>; + def : InstRW<[WriteFPU64], (instregex "^CMP(TEQ|TLE|TLT|TUN)$")>; + + def : InstRW<[WriteIALU], (instregex "^(AND|BIC|BIS|ORNOT|XOR|EQV)(r|i)*$")>; + + def : InstRW<[WriteSHT], (instregex "^(SL|SRA|SRL)(r|i)*$")>; + def : InstRW<[WriteIMul], (instrs UMULHi, UMULHr)>; + + def : InstRW<[WriteSEL], (instregex "^SEL(EQ|NE|LE|LT|GT|GE|LBC|LBS)(r|i)*$")>; + + def : InstRW<[WriteBOP], (instregex "^EXT(BL|WL|LL|LW|HB|HH|HW|HL)(r|i)*$")>; + + def : InstRW<[WriteBOP], (instregex "^MASKL[BHLW](r|i)*$")>; + def : InstRW<[WriteBOP], (instregex "^MASKH[BHLW](r|i)*$")>; + def : InstRW<[WriteBOP], (instregex "^INSL[BHLW](r|i)*$")>; + def : InstRW<[WriteBOP], (instregex "^INSH[BHLW](r|i)*$")>; + + def : InstRW<[WriteFPU32], (instregex "^(ADD|SUB|MUL)(S|D)*$")>; + def : InstRW<[WriteFPU32], (instregex "^CPY(S|SE|SN)(S|D)*$")>; + def : InstRW<[WriteFPU64], (instregex "^SETFPEC(0|1|2|3)*$")>; + def : InstRW<[WriteImm], (instrs NOP)>; + + def : InstRW<[WriteFCvtF64ToF32], (instrs FCVTLW, FCVTWL)>; + def : InstRW<[WriteFCvtF64ToI64], (instrs CVTQS, CVTQT)>; + def : InstRW<[WriteFCvtF64ToI64], (instrs CVTTQ, FCTTDL, FCTTDL_G, FCTTDL_P, FCTTDL_N)>; + def : InstRW<[WriteFCvtF64ToF32], (instrs CVTST, CVTTS)>; + + def : InstRW<[WriteFPU32], (instregex "^(F|FN)M(A|S)S$")>; + def : InstRW<[WriteFPU64], (instregex "^(F|FN)M(A|S)D$")>; + + def : InstRW<[WriteFSEL], (instregex "^FSEL(EQ|GE|GT|LE|LT|NE)S$")>; + def : InstRW<[WriteFSEL], (instregex "^FSEL(EQ|GE|GT|LE|LT|NE)D$")>; + + def : InstRW<[WriteFSqrt32], (instrs SQRTSS)>; + def : InstRW<[WriteFSqrt64], (instrs SQRTSD)>; + + def : InstRW<[WriteFDiv32], (instrs DIVS)>; + def : InstRW<[WriteFDiv64], (instrs DIVD)>; + + def : InstRW<[WriteFPS], (instrs FTOIS, FTOIT, ITOFS, ITOFT)>; + + def : InstRW<[WriteLD], (instrs LDL_L, LDQ_L)>; + def : InstRW<[WriteST], (instrs STL_C, STQ_C)>; + + def : InstRW<[WriteIALU], (instrs RCID, RPCC)>; + def : InstRW<[WriteFPS], (instrs WFPCR, RFPCR)>; +} diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td b/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td new file mode 100644 index 000000000000..bf34ba940ac8 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td @@ -0,0 +1,57 @@ +//===- Sw64SchedCore3SIMD.td - Sw64 Scheduling Definitions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +let SchedModel = GenericSw64Model in { + +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 17; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 3; } + +def : InstRW<[WriteFLDS], (instregex "^(VLD)(W|S|D)(E)$")>; +def : InstRW<[WriteFLDS], (instregex "^(VLD)(S|D)$")>; + +def : InstRW<[WriteFLDS], (instregex "^(VLD)(W|S|D)(U)$")>; +def : InstRW<[WriteFSTDS], (instregex "^(VST)(W|S|D)(U)$")>; +def : InstRW<[WriteFSTDS], (instregex "^(VST)(WU|SU|DU)(L|H)$")>; + +def : InstRW<[WriteFLDS], (instrs VLDDNC)>; +def : InstRW<[WriteFSTDS], (instrs VSTDNC)>; + +def : InstRW<[WriteFMA6], (instregex "^(V)(ADD|SUB|MUL)(S|D)$")>; + +def : InstRW<[WriteFMA6], (instregex "^(VFCMP)(EQ|LE|LT|UN)$")>; +def : InstRW<[WriteFMA2], (instregex "^(VCPY)(S|SE|SN)$")>; +def : InstRW<[WriteFMA2], (instregex "^(V)(M|NM)(A|S)(S|D)$")>; + +def : InstRW<[WriteFMA2], (instregex "^(VFSEL)(EQ|LT|LE)$")>; +def : InstRW<[WriteVPM1], (instregex "^(V)(INS|EXT|CPY)(W|FS|FD)$")>; + +def : InstRW<[WriteVPM1], (instregex "^(VINSECTL)(H|W|L|B)$")>; + +def : InstRW<[WriteVCON1], (instregex "^(VCON)(W|S|D)$")>; +def : InstRW<[WriteVCON2], (instrs VSHFW)>; + +def : InstRW<[WriteVPM2], (instrs VLOGZZ)>; +} diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore4.td b/llvm/lib/Target/Sw64/Sw64SchedCore4.td new file mode 100644 index 000000000000..9972c95699a3 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64SchedCore4.td @@ -0,0 +1,75 @@ +//===- Sw64SchedCore4.td - Sw64 Scheduling Definitions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +let SchedModel = GenericSw64Model in { + // 3 pipeline ALU + +def : WriteRes { let Latency = 1; } + +// FIXME: the latency of div and rem +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +def : WriteRes{ let Latency = 2; } +def : WriteRes{ let Latency = 2; } + +def : WriteRes{ let Latency = 3; } + +def : WriteRes{ let Latency = 2; } +def : WriteRes{ let Latency = 2; } +def : WriteRes{ let Latency = 2; } +def : WriteRes{ let Latency = 2; } +def : WriteRes{ let Latency = 2; } + +def : InstRW<[WriteLD], (instrs LDL_A, LDW_A, LDHU_A, LDBU_A)>; +def : InstRW<[WriteFLD], (instrs LDS_A, LDD_A)>; + +def : InstRW<[WriteST], (instrs STL_A, STW_A, STH_A, STB_A)>; +def : InstRW<[WriteFST], (instrs STS_A, STD_A)>; + +def : InstRW<[WriteIDiv], (instregex "^(DIV)(L|Q)$")>; +def : InstRW<[WriteIDiv], (instregex "^(UDIV)(L|Q)$")>; +def : InstRW<[WriteIRem], (instregex "^(REM)(L|Q)$")>; +def : InstRW<[WriteIRem], (instregex "^(UREM)(L|Q)$")>; +def : InstRW<[WriteJmp], (instrs ADDPI, ADDPIS)>; +def : InstRW<[WriteImm], (instregex "^(C|S)(BT)(r|i)$")>; + +def : InstRW<[WriteIALU], (instrs REVBH, REVBW, REVBL)>; + +def : InstRW<[WriteIALU], (instregex "^(SLLW|SRAW|SRLW|ROLW|ROLL)(r|i)*$")>; + +def : InstRW<[WriteCrc], (instregex "^(CRC32C)(B|H|W|L)*$")>; +def : InstRW<[WriteCrc], (instregex "^(CRC32)(B|H|W|L)*$")>; + +def : InstRW<[WriteFCvtF64ToI64], (instrs CMOVDL, CMOVDL_G, CMOVDL_P, CMOVDL_Z, CMOVDL_N)>; +def : InstRW<[WriteFCvtF64ToI64], (instrs CMOVDLU, CMOVDLU_G, CMOVDLU_P, CMOVDLU_Z, CMOVDLU_N)>; +def : InstRW<[WriteFCvtF64ToI32], (instrs CMOVDWU, CMOVDWU_G, CMOVDWU_P, CMOVDWU_Z, CMOVDWU_N)>; +def : InstRW<[WriteFCvtF64ToI32], (instrs CMOVDW, CMOVDW_G, CMOVDW_P, CMOVDW_Z, CMOVDW_N)>; +def : InstRW<[WriteFCvtF64ToI32], (instrs FCVTHS, FCVTSH)>; + +def : InstRW<[WriteFCvtI64ToF32], (instrs CMOVLS, CMOVULS)>; +def : InstRW<[WriteFCvtI32ToF32], (instrs CMOVWS, CMOVUWS)>; +def : InstRW<[WriteFCvtI64ToF64], (instrs CMOVLD, CMOVULD)>; +def : InstRW<[WriteFCvtI32ToF64], (instrs CMOVWD, CMOVUWD)>; + +def : InstRW<[WriteFCvtF64ToF64], (instrs FRID, FRID_G, FRID_P, FRID_Z, FRID_N)>; +def : InstRW<[WriteFCvtF32ToF32], (instrs FRIS, FRIS_G, FRIS_P, FRIS_Z, FRIS_N)>; + +def : InstRW<[WriteFREC], (instrs FRECS)>; +def : InstRW<[WriteFREC], (instrs FRECD)>; + +def : InstRW<[WriteST], (instrs CASW, CASL)>; + +def : InstRW<[WriteLD], (instrs DPFHR, DPFHW)>; + +def : InstRW<[WriteCSR], (instrs CSRR, CSRW)>; +def : InstRW<[WriteCSR], (instrs CSRWS, CSRWC)>; + +def : InstRW<[WriteJmp], (instrs LBR)>; +} diff --git a/llvm/lib/Target/Sw64/Sw64Schedule.td b/llvm/lib/Target/Sw64/Sw64Schedule.td new file mode 100644 index 000000000000..c4331abd2058 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64Schedule.td @@ -0,0 +1,86 @@ +//===- Sw64Schedule.td - Sw64 Scheduling Definitions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Define scheduler resources associated with def operands. +def WriteIALU : SchedWrite; // 32 or 64-bit integer ALU operations +def WriteSHT : SchedWrite; // 32 or 64-bit integer ALU operations +def WriteIMul : SchedWrite; // 32-bit or 64-bit multiply +def WriteIDiv : SchedWrite; // 32-bit or 64-bit divided +def WriteIRem : SchedWrite; // 32-bit or 64-bit remainder +def WriteImm : SchedWrite; // 32-bit multiply on RV64I +def WriteCrc : SchedWrite; +def WriteJmp : SchedWrite; // Jump +def WriteBR : SchedWrite; // Jump +def WriteFBR : SchedWrite; // float Jump +def WriteNop : SchedWrite; +def WriteLD : SchedWrite; // Load double-word +def WriteFLD : SchedWrite; // Load double-word +def WriteFREC : SchedWrite; +def WriteCSR : SchedWrite; // CSR instructions +def WriteST : SchedWrite; // Store byte +def WriteFST : SchedWrite; // Store byte +def WriteCNT : SchedWrite; //Atomic memory operation word size +def WriteSEL : SchedWrite; // bytes operate and selection operate +def WriteFPS : SchedWrite; //Atomic memory operation double word size +def WriteBOP : SchedWrite; //Atomic memory operation double word size +def WriteAtomicSTW : SchedWrite; // Atomic store word +def WriteAtomicSTD : SchedWrite; // Atomic store double word +def WriteFPU32 : SchedWrite; // FP 32-bit computation +def WriteFPU64 : SchedWrite; // FP 64-bit computation +def WriteFMul32 : SchedWrite; // 32-bit floating point multiply +def WriteFMulAdd32 : SchedWrite; // 32-bit floating point multiply add +def WriteFMulSub32 : SchedWrite; // 32-bit floating point multiply sub +def WriteFMul64 : SchedWrite; // 64-bit floating point multiply +def WriteFMulAdd64 : SchedWrite; // 64-bit floating point multiply add +def WriteFMulSub64 : SchedWrite; // 64-bit floating point multiply sub +def WriteFDiv32 : SchedWrite; // 32-bit floating point divide +def WriteFDiv64 : SchedWrite; // 64-bit floating point divide +def WriteFSqrt32 : SchedWrite; // 32-bit floating point sqrt +def WriteFSqrt64 : SchedWrite; // 64-bit floating point sqrt +def WriteFSEL : SchedWrite; // float selection operate +def WriteNOP : SchedWrite; // float selection operate +def WriteFCvtF32ToI32 : SchedWrite; +def WriteFCvtF32ToI64 : SchedWrite; +def WriteFCvtF64ToI32 : SchedWrite; +def WriteFCvtF64ToI64 : SchedWrite; +def WriteFCvtI32ToF32 : SchedWrite; +def WriteFCvtI32ToF64 : SchedWrite; +def WriteFCvtI64ToF32 : SchedWrite; +def WriteFCvtI64ToF64 : SchedWrite; +def WriteFMovF32ToI32 : SchedWrite; +def WriteFMovI32ToF32 : SchedWrite; +def WriteFMovF64ToI64 : SchedWrite; +def WriteFMovI64ToF64 : SchedWrite; +def WriteFCvtF32ToF64 : SchedWrite; +def WriteFCvtF64ToF32 : SchedWrite; +def WriteFCvtF64ToF64 : SchedWrite; +def WriteFCvtF32ToF32 : SchedWrite; + +def WriteAdrLD : WriteSequence<[WriteImm, WriteLD]>; +def WriteAdrAdr : WriteSequence<[WriteImm, WriteImm]>; + +def WriteFLDS : SchedWrite; +def WriteFSTDS : SchedWrite; +def WriteVEADD : SchedWrite; +def WriteVESHT2 : SchedWrite; +def WriteVESHT3 : SchedWrite; +def WriteVECNT2 : SchedWrite; +def WriteVECNT3 : SchedWrite; +def WriteVESEL : SchedWrite; +def WriteFMA2 : SchedWrite; +def WriteFMA6 : SchedWrite; +def WriteFMA17 : SchedWrite; +def WriteVPM1 : SchedWrite; +def WriteVPM2 : SchedWrite; +def WriteVCON1 : SchedWrite; +def WriteVCON2 : SchedWrite; +def WriteVSUM : SchedWrite; +def WriteVFREC : SchedWrite; +def WriteVFCT : SchedWrite; +def WriteVFRIS : SchedWrite; diff --git a/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp new file mode 100644 index 000000000000..aefaadeb9777 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp @@ -0,0 +1,54 @@ +//===-- Sw64SelectionDAGInfo.cpp - Sw64 SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Sw64SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Sw64TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "sw_64-selectiondag-info" + +SDValue Sw64SelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + unsigned SizeBitWidth = Size.getValueSizeInBits(); + // Call __memcpy_4 if the src, dst and size are all 4 byte aligned. + if (!AlwaysInline && Alignment >= Align(4) && + DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) { + const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + Entry.Node = Src; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol( + "memcpy", TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)) + .setDiscardResult(); + + std::pair CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; + } + + // Otherwise have the target-independent code call memcpy. + return SDValue(); +} diff --git a/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h new file mode 100644 index 000000000000..1d242766f5f8 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h @@ -0,0 +1,34 @@ +//===-- Sw64SelectionDAGInfo.h - Sw64 SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sw64 subclass for SelectionDAGTargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64SELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_SW64_SW64SELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class Sw64TargetMachine; + +class Sw64SelectionDAGInfo : public SelectionDAGTargetInfo { +public: + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, Align Alignment, bool isVolatile, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/Sw64/Sw64Subtarget.cpp b/llvm/lib/Target/Sw64/Sw64Subtarget.cpp new file mode 100644 index 000000000000..96f81e041f47 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64Subtarget.cpp @@ -0,0 +1,117 @@ +//===-- Sw64Subtarget.cpp - Sw64 Subtarget Information ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Sw64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "Sw64Subtarget.h" +#include "Sw64.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/MC/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "sw_64-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "Sw64GenSubtargetInfo.inc" + +static cl::opt Sw64IntArith("sw-int-divmod", cl::init(true), + cl::desc("Enable sw64 core4 integer" + "arithmetic instructions")); + +static cl::opt Sw64IntShift("sw-shift-word", cl::init(false), + cl::desc("Enable sw64 core4 integer" + "shift instructions")); + +static cl::opt Sw64ByteInst("sw-rev", cl::init(false), + cl::desc("Enable sw64 core4 byte" + "manipulation instructions")); + +static cl::opt Sw64FloatArith("sw-recip", cl::init(true), + cl::desc("Enable sw64 core4 float" + "arithmetic instructions")); + +static cl::opt Sw64FloatRound("sw-fprnd", cl::init(false), + cl::desc("Enable sw64 core4 float" + "round instructions")); + +static cl::opt Sw64FloatCmov("sw-cmov", cl::init(true), + cl::desc("Enable sw64 core4 float" + "cmov instructions")); + +static cl::opt Sw64PostInc("sw-auto-inc-dec", cl::init(false), + cl::desc("Enable sw64 core4 post-inc" + "load and store instructions")); + +static cl::opt + Sw64CasInst("sw-use-cas", cl::init(true), + cl::desc("Enable sw64 core4 cas instructions")); + +static cl::opt + Sw64CrcInst("sw-crc32", cl::init(false), + cl::desc("Enable sw64 core4 crc32 instructions")); + +static cl::opt Sw64SCbtInst("sw-sbt-cbt", cl::init(false), + cl::desc("Enable sw64 core4 integer" + "sbt and cbt instructions")); + +static cl::opt + Sw64WmembInst("sw-wmemb", cl::init(false), + cl::desc("Enable sw64 core4 wmemb instructions")); + +static cl::opt Sw64InstMullShiftAddSub("sw64-inst-mull-shiftaddsub", + cl::init(true), + cl::desc("Inst mull optmize to" + "shift with add or sub")); + +static cl::opt Sw64InstExt("sw64-ext-opt", cl::init(false), + cl::desc("Optimize zext and sext")); + +static cl::opt Sw64InstMemset("sw64-inst-memset", cl::init(true), + cl::desc("Delete part of call memset")); + +cl::opt HasSIMD("msimd", cl::desc("Support the SIMD"), cl::init(false)); + +void Sw64Subtarget::anchor() {} + +Sw64Subtarget &Sw64Subtarget::initializeSubtargetDependencies(const Triple &TT, + StringRef CPU, + StringRef FS) { + std::string CPUName = std::string(CPU); + std::string TuneCPUName = std::string(CPU); + ParseSubtargetFeatures(CPUName, /*TuneCPU*/ TuneCPUName, FS); + return *this; +} + +Sw64Subtarget::Sw64Subtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM) + : Sw64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(), + Sw64OptMul(Sw64InstMullShiftAddSub), Sw64OptMemset(Sw64InstMemset), + Sw64OptExt(Sw64InstExt), + ReserveRegister(Sw64::GPRCRegClass.getNumRegs() + + Sw64::F4RCRegClass.getNumRegs() + 1), + Sw64EnableIntAri(Sw64IntArith), Sw64EnableIntShift(Sw64IntShift), + Sw64EnableByteInst(Sw64ByteInst), Sw64EnableFloatAri(Sw64FloatArith), + Sw64EnableFloatRound(Sw64FloatRound), Sw64EnableFloatCmov(Sw64FloatCmov), + Sw64EnablePostInc(Sw64PostInc), Sw64EnableCasInst(Sw64CasInst), + Sw64EnableCrcInst(Sw64CrcInst), Sw64EnableSCbtInst(Sw64SCbtInst), + Sw64EnableWmembInst(Sw64WmembInst), + FrameLowering(initializeSubtargetDependencies(TT, CPU, FS)), + TLInfo(TM, *this), TSInfo(), curgpdist(0) {} + +void Sw64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + unsigned NumRegionInstrs) const { + Policy.OnlyBottomUp = false; + // Spilling is generally expensive on Sw64, so always enable + // register-pressure tracking. + Policy.ShouldTrackPressure = true; +} diff --git a/llvm/lib/Target/Sw64/Sw64Subtarget.h b/llvm/lib/Target/Sw64/Sw64Subtarget.h new file mode 100644 index 000000000000..fc181560f5ba --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64Subtarget.h @@ -0,0 +1,163 @@ +//===-- Sw64Subtarget.h - Define Subtarget for the Sw64 -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Sw64 specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_SW64_SW64SUBTARGET_H +#define LLVM_LIB_TARGET_SW64_SW64SUBTARGET_H + +#include "Sw64FrameLowering.h" +#include "Sw64ISelLowering.h" +#include "Sw64InstrInfo.h" +#include "Sw64SelectionDAGInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include +using namespace llvm; +extern cl::opt Sw64Mieee; +extern cl::opt Sw64DeleteNop; + +extern cl::opt HasSIMD; + +#define GET_SUBTARGETINFO_HEADER +#include "Sw64GenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + +class Sw64Subtarget : public Sw64GenSubtargetInfo { + virtual void anchor(); + + enum Sw64ArchEnum { sw64 = 0, swTarch, core3b, core4 }; + + bool isCore3b; + bool isCore4; + bool relax; + bool Ev; + + bool Sw64OptMul; + + bool Sw64OptMemset; + + bool Sw64OptExt; + + bool Sw64EnableIntAri; + bool Sw64EnableIntShift; + bool Sw64EnableByteInst; + bool Sw64EnableFloatAri; + bool Sw64EnableFloatRound; + bool Sw64EnableFloatCmov; + bool Sw64EnablePostInc; + bool Sw64EnableCasInst; + bool Sw64EnableCrcInst; + bool Sw64EnableSCbtInst; + bool Sw64EnableWmembInst; + bool Misaligned256StoreIsSlow = false; + uint8_t MaxInterleaveFactor = 2; + unsigned WideningBaseCost = 0; + + Sw64InstrInfo InstrInfo; + // ReserveRegister[i] - #i is not available as a general purpose register. + BitVector ReserveRegister; + Sw64FrameLowering FrameLowering; + Sw64TargetLowering TLInfo; + Sw64SelectionDAGInfo TSInfo; + + bool HasCT; + bool Is64Bit = true; + + Sw64ArchEnum Sw64ArchVersion; + +public: + mutable int curgpdist; + // This constructor initializes the data members to match that + // of the specified triple. + Sw64Subtarget &initializeSubtargetDependencies(const Triple &TT, + StringRef CPU, StringRef FS); + + Sw64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, + const TargetMachine &TM); + + bool hasMieee() const { return Sw64Mieee; } + bool hasDeleteNop() const { return Sw64DeleteNop; } + + int &getCurgpdist() const { return curgpdist; } + void setCurgpdist(int &count) { curgpdist = count; } + bool hasSIMD() const { return HasSIMD; } + + unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } + unsigned getWideningBaseCost() const { return WideningBaseCost; } + bool isMisaligned256StoreSlow() const { return Misaligned256StoreIsSlow; } + + // ParseSubtargetFeatures - Parses features string setting specified + // subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + + const Sw64InstrInfo *getInstrInfo() const override { return &InstrInfo; } + const Sw64FrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const Sw64TargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const Sw64SelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + const Sw64RegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + + AntiDepBreakMode getAntiDepBreakMode() const override { + return TargetSubtargetInfo::ANTIDEP_CRITICAL; + } + + // TODO: enable PostRAscheduler for test + bool enablePostRAScheduler() const { return true; } + + bool enableMachineScheduler() const { return true; } + + bool is64Bit() const { return true; } + bool hasCore3b() const { return Sw64ArchVersion == core3b; } +#ifdef SW64_DEFAULT_ARCH_CORE3 + bool hasCore4() const { return Sw64ArchVersion == core4; } +#else + bool hasCore4() const { return true; } +#endif + bool enRelax() const { return relax; } + bool hasEv() const { return Ev; } + bool hasCT() const { return HasCT; } + bool isRegisterReserved(size_t i) const { return ReserveRegister[i]; } + + bool enOptMul() const { return Sw64OptMul; } + + bool enOptMemset() const { return Sw64OptMemset; } + + bool enOptExt() const { return Sw64OptExt; } + + bool enableIntAri() const { return Sw64EnableIntAri; } + bool enableIntShift() const { return Sw64EnableIntShift; } + bool enableByteInst() const { return Sw64EnableByteInst; } + bool enableFloatAri() const { return Sw64EnableFloatAri; } + bool enableFloatRound() const { return Sw64EnableFloatRound; } + bool enableFloatCmov() const { return Sw64EnableFloatCmov; } + bool enablePostInc() const { return Sw64EnablePostInc; } + bool enableCasInst() const { return Sw64EnableCasInst; } + bool enableCrcInst() const { return Sw64EnableCrcInst; } + bool enableSCbtInst() const { return Sw64EnableSCbtInst; } + bool enableWmembInst() const { return Sw64EnableWmembInst; } + + void overrideSchedPolicy(MachineSchedPolicy &Policy, + unsigned NumRegionInstrs) const; +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp b/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp new file mode 100644 index 000000000000..d11b61e7dbc4 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp @@ -0,0 +1,193 @@ +//===-- Sw64TargetMachine.cpp - Define TargetMachine for Sw64 -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "Sw64TargetMachine.h" +#include "MCTargetDesc/Sw64MCTargetDesc.h" +#include "Sw64.h" +#include "Sw64MachineFunctionInfo.h" +#include "Sw64MacroFusion.h" +#include "Sw64TargetObjectFile.h" +#include "Sw64TargetTransformInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Transforms/Scalar.h" +#include + +using namespace llvm; + +static cl::opt EnableMCR("sw_64-enable-mcr", + cl::desc("Enable the machine combiner pass"), + cl::init(true), cl::Hidden); + +static cl::opt + EnablePrefetch("enable-sw64-prefetching", + cl::desc("Enable software prefetching on SW64"), + cl::init(true), cl::Hidden); + +cl::opt FS_LOAD("fastload", + cl::desc("Enable fast/load optimize(developing)"), + cl::init(false), cl::Hidden); + +static Reloc::Model getEffectiveRelocModel(const Triple &TT, + std::optional RM) { + if (!RM) + return Reloc::Static; + return *RM; +} + +static CodeModel::Model +getEffectiveSw64CodeModel(std::optional CM) { + if (CM) { + if (*CM != CodeModel::Small && *CM != CodeModel::Medium && + *CM != CodeModel::Large) + report_fatal_error( + "Target only supports CodeModel Small, Medium or Large"); + return *CM; + } + return CodeModel::Small; +} + +// Create an ILP32 architecture model +Sw64TargetMachine::Sw64TargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + std::optional RM, + std::optional CM, + CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine( + T, + "e-m:e-p:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n64-S128-v256:256", + TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), + getEffectiveSw64CodeModel(CM), OL), + TLOF(std::make_unique()), + ABI(Sw64ABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)), + Subtarget(TT, std::string(CPU), std::string(FS), *this) { + initAsmInfo(); +} + +Sw64TargetMachine::~Sw64TargetMachine() = default; + +namespace { + +// Sw64 Code Generator Pass Configuration Options. +class Sw64PassConfig : public TargetPassConfig { +public: + Sw64PassConfig(Sw64TargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) { + if (TM.getOptLevel() != CodeGenOpt::None) + substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); + } + + Sw64TargetMachine &getSw64TargetMachine() const { + return getTM(); + } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG->addMutation(createSw64MacroFusionDAGMutation()); + return DAG; + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + DAG->addMutation(createSw64MacroFusionDAGMutation()); + return DAG; + } + + void addIRPasses() override; + bool addILPOpts() override; + bool addInstSelector() override; + void addPreSched2() override; + void addPreEmitPass() override; + void addPreRegAlloc() override; + void addPreLegalizeMachineIR() override; + // for Inst Selector. + bool addGlobalInstructionSelect() override; +}; + +} // end anonymous namespace + +TargetPassConfig *Sw64TargetMachine::createPassConfig(PassManagerBase &PM) { + return new Sw64PassConfig(*this, PM); +} + +void Sw64PassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + + if (EnablePrefetch) + addPass(createLoopDataPrefetchPass()); + + TargetPassConfig::addIRPasses(); +} + +void Sw64PassConfig::addPreLegalizeMachineIR() { + addPass(createSw64PreLegalizeCombiner()); +} + +void Sw64PassConfig::addPreSched2() { addPass(createSw64ExpandPseudo2Pass()); } + +bool Sw64PassConfig::addInstSelector() { + addPass(createSw64ISelDag(getSw64TargetMachine(), getOptLevel())); + return false; +} + +void Sw64PassConfig::addPreRegAlloc() { + addPass(createSw64IEEEConstraintPass()); +} + +void Sw64PassConfig::addPreEmitPass() { + addPass(createSw64BranchSelection()); + addPass(createSw64LLRPPass(getSw64TargetMachine())); + addPass(createSw64ExpandPseudoPass()); +} + +bool Sw64PassConfig::addILPOpts() { + + if (EnableMCR) + addPass(&MachineCombinerID); + + return true; +} + +bool Sw64PassConfig::addGlobalInstructionSelect() { + addPass(new InstructionSelect()); + return false; +} + +// Force static initialization. +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64Target() { + RegisterTargetMachine X(getTheSw64Target()); + + PassRegistry *PR = PassRegistry::getPassRegistry(); + initializeSw64BranchSelectionPass(*PR); + initializeSw64PreLegalizerCombinerPass(*PR); + initializeSw64DAGToDAGISelPass(*PR); +} + +TargetTransformInfo +Sw64TargetMachine::getTargetTransformInfo(const Function &F) const { + return TargetTransformInfo(Sw64TTIImpl(this, F)); +} + +MachineFunctionInfo *Sw64TargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return Sw64MachineFunctionInfo::create(Allocator, F, + STI); +} diff --git a/llvm/lib/Target/Sw64/Sw64TargetMachine.h b/llvm/lib/Target/Sw64/Sw64TargetMachine.h new file mode 100644 index 000000000000..40e34b131a42 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64TargetMachine.h @@ -0,0 +1,61 @@ +//===-- Sw64TargetMachine.h - Define TargetMachine for Sw64 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Sw64 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETMACHINE_H +#define LLVM_LIB_TARGET_SW64_SW64TARGETMACHINE_H + +#include "MCTargetDesc/Sw64ABIInfo.h" +#include "Sw64Subtarget.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" +#include +#include + +namespace llvm { + +class Sw64TargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; + Sw64ABIInfo ABI; + Sw64Subtarget Subtarget; + +public: + Sw64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + std::optional RM, + std::optional CM, CodeGenOpt::Level OL, + bool JIT); + ~Sw64TargetMachine() override; + + const Sw64ABIInfo &getABI() const { return ABI; } + const Sw64Subtarget *getSubtargetImpl() const { return &Subtarget; } + const Sw64Subtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } +}; + +} // end namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp new file mode 100644 index 000000000000..545eccc94202 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp @@ -0,0 +1,121 @@ +//===-- Sw64TargetObjectFile.cpp - Sw64 object files --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sw64TargetObjectFile.h" +#include "Sw64Subtarget.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; +void Sw64TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); + + SmallDataSection = getContext().getELFSection( + ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC); + SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC); + // TextSection - see MObjectFileInfo.cpp + // StaticCtorSection - see MObjectFileInfo.cpp + // StaticDtorSection - see MObjectFileInfo.cpp +} +// A address must be loaded from a small section if its size is less than the +// small section size threshold. Data in this section could be addressed by +// using gp_rel operator. +bool Sw64TargetObjectFile::isInSmallSection(uint64_t Size) const { + // gcc has traditionally not treated zero-sized objects as small data, so this + // is effectively part of the ABI. + return Size > 0 && Size <= SSThreshold; +} + +// Return true if this global address should be placed into small data/bss +// section. +bool Sw64TargetObjectFile::isGlobalInSmallSection( + const GlobalObject *GO, const TargetMachine &TM) const { + // Only global variables, not functions. + const GlobalVariable *GVA = dyn_cast(GO); + if (!GVA) + return false; + + // If the variable has an explicit section, it is placed in that section. + if (GVA->hasSection()) { + StringRef Section = GVA->getSection(); + + // Explicitly placing any variable in the small data section overrides + // the global -G value. + if (Section == ".sdata" || Section == ".sbss") + return true; + + // Otherwise reject putting the variable to small section if it has an + // explicit section name. + return false; + } + + if (((GVA->hasExternalLinkage() && GVA->isDeclaration()) || + GVA->hasCommonLinkage())) + return false; + + Type *Ty = GVA->getValueType(); + // It is possible that the type of the global is unsized, i.e. a declaration + // of a extern struct. In this case don't presume it is in the small data + // section. This happens e.g. when building the FreeBSD kernel. + if (!Ty->isSized()) + return false; + + return isInSmallSection( + GVA->getParent()->getDataLayout().getTypeAllocSize(Ty)); +} + +MCSection *Sw64TargetObjectFile::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + // Handle Small Section classification here. + if (Kind.isBSS() && isGlobalInSmallSection(GO, TM)) + return SmallBSSSection; + if (Kind.isData() && isGlobalInSmallSection(GO, TM)) + return SmallDataSection; + if (Kind.isReadOnly()) + return GO->hasLocalLinkage() ? ReadOnlySection : DataRelROSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM); +} + +void Sw64TargetObjectFile::getModuleMetadata(Module &M) { + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + + for (const auto &MFE : ModuleFlags) { + StringRef Key = MFE.Key->getString(); + if (Key == "SmallDataLimit") { + SSThreshold = mdconst::extract(MFE.Val)->getZExtValue(); + break; + } + } +} + +// Return true if this constant should be placed into small data section. +bool Sw64TargetObjectFile::isConstantInSmallSection(const DataLayout &DL, + const Constant *CN) const { + return isInSmallSection(DL.getTypeAllocSize(CN->getType())); +} + +MCSection *Sw64TargetObjectFile::getSectionForConstant(const DataLayout &DL, + SectionKind Kind, + const Constant *C, + Align &Alignment) const { + if (isConstantInSmallSection(DL, C)) + return SmallDataSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C, + Alignment); +} diff --git a/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h new file mode 100644 index 000000000000..0bae78a8106b --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h @@ -0,0 +1,49 @@ +//===-- Sw64TargetObjectFile.h - Sw64 Object Info -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_SW64_SW64TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { + +static const unsigned CodeModelLargeSize = 256; + +class Sw64TargetObjectFile : public TargetLoweringObjectFileELF { + MCSection *BSSSectionLarge; + MCSection *DataSectionLarge; + MCSection *ReadOnlySectionLarge; + MCSection *DataRelROSectionLarge; + MCSection *SmallDataSection; + MCSection *SmallBSSSection; + unsigned SSThreshold = 8; + +public: + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + /// Return true if this global address should be placed into small data/bss + /// section. + bool isGlobalInSmallSection(const GlobalObject *GO, + const TargetMachine &TM) const; + + MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, + const TargetMachine &TM) const override; + + /// Return true if this constant should be placed into small data section. + bool isConstantInSmallSection(const DataLayout &DL, const Constant *CN) const; + + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, + const Constant *C, + Align &Alignment) const override; + + void getModuleMetadata(Module &M) override; + + bool isInSmallSection(uint64_t Size) const; +}; +} // end namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/Sw64TargetStreamer.h b/llvm/lib/Target/Sw64/Sw64TargetStreamer.h new file mode 100644 index 000000000000..884c03d97eb5 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64TargetStreamer.h @@ -0,0 +1,150 @@ +//===-- Sw64TargetStreamer.h - Sw64 Target Streamer ------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETSTREAMER_H +#define LLVM_LIB_TARGET_SW64_SW64TARGETSTREAMER_H + +#include "MCTargetDesc/Sw64ABIFlagsSection.h" +#include "MCTargetDesc/Sw64ABIInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/FormattedStream.h" +#include + +namespace llvm { + +struct Sw64ABIFlagsSection; + +class Sw64TargetStreamer : public MCTargetStreamer { +public: + Sw64TargetStreamer(MCStreamer &S); + + virtual void setPic(bool Value) {} + + virtual void emitDirectiveSetReorder(); + virtual void emitDirectiveSetNoReorder(); + virtual void emitDirectiveSetMacro(); + virtual void emitDirectiveSetNoMacro(); + virtual void emitDirectiveSetAt(); + virtual void emitDirectiveSetNoAt(); + virtual void emitDirectiveEnd(StringRef Name); + + virtual void emitDirectiveEnt(const MCSymbol &Symbol); + virtual void emitDirectiveNaN2008(); + virtual void emitDirectiveNaNLegacy(); + virtual void emitDirectiveInsn(); + virtual void emitDirectiveSetCore3b(); + virtual void emitDirectiveSetCore4(); + virtual void emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg); + virtual void emitDirectiveSetArch(StringRef Arch); + + void prettyPrintAsm(MCInstPrinter &InstPrinter, uint64_t Address, + const MCInst &Inst, const MCSubtargetInfo &STI, + raw_ostream &OS) override; + + void emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI); + + void forbidModuleDirective() { ModuleDirectiveAllowed = false; } + void reallowModuleDirective() { ModuleDirectiveAllowed = true; } + bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; } + + // This method enables template classes to set internal abi flags + // structure values. + template + void updateABIInfo(const PredicateLibrary &P) { + ABI = P.getABI(); + ABIFlagsSection.setAllFromPredicates(P); + } + + Sw64ABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; } + const Sw64ABIInfo &getABI() const { + assert(ABI && "ABI hasn't been set!"); + return *ABI; + } + +protected: + std::optional ABI; + Sw64ABIFlagsSection ABIFlagsSection; + + bool GPRInfoSet; + unsigned GPRBitMask; + int GPROffset; + + bool FPRInfoSet; + unsigned FPRBitMask; + int FPROffset; + + bool FrameInfoSet; + int FrameOffset; + unsigned FrameReg; + unsigned ReturnReg; + +private: + bool ModuleDirectiveAllowed; +}; + +// This part is for ascii assembly output +class Sw64TargetAsmStreamer : public Sw64TargetStreamer { + formatted_raw_ostream &OS; + +public: + Sw64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + + void emitDirectiveSetReorder() override; + void emitDirectiveSetNoReorder() override; + void emitDirectiveSetMacro() override; + void emitDirectiveSetNoMacro() override; + void emitDirectiveSetAt() override; + void emitDirectiveSetNoAt() override; + void emitDirectiveEnd(StringRef Name) override; + + void emitDirectiveEnt(const MCSymbol &Symbol) override; + void emitDirectiveNaN2008() override; + void emitDirectiveNaNLegacy() override; + void emitDirectiveInsn() override; + void emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg) override; + void emitDirectiveSetCore3b() override; + void emitDirectiveSetCore4() override; + + void emitDirectiveSetArch(StringRef Arch) override; +}; + +// This part is for ELF object output +class Sw64TargetELFStreamer : public Sw64TargetStreamer { + bool MicroSw64Enabled; + const MCSubtargetInfo &STI; + bool Pic; + +public: + MCELFStreamer &getStreamer(); + Sw64TargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); + + void setPic(bool Value) override { Pic = Value; } + + void emitLabel(MCSymbol *Symbol) override; + void finish() override; + + void emitDirectiveSetNoReorder() override; + + void emitDirectiveEnt(const MCSymbol &Symbol) override; + void emitDirectiveNaN2008() override; + void emitDirectiveNaNLegacy() override; + void emitDirectiveInsn() override; + void emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg) override; + + void emitSw64AbiFlags(); +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp new file mode 100644 index 000000000000..44c98b98fa01 --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp @@ -0,0 +1,787 @@ +//===-- Sw64TargetTransformInfo.cpp - Sw64-specific TTI -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a TargetTransformInfo analysis pass specific to the +// Sw64 target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "Sw64TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/CostTable.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "sw64tti" + +//===----------------------------------------------------------------------===// +// +// Sw64 cost model. +// +//===----------------------------------------------------------------------===// + +InstructionCost Sw64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented + // yet. + if (BitSize > 64) + return TTI::TCC_Free; + + if (Imm == 0) + return TTI::TCC_Free; + + if (Imm.getBitWidth() <= 64) { + // Constants loaded via lgfi. + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Basic; + // Constants loaded via llihf: + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Basic; + + return 2 * TTI::TCC_Basic; + } + + return 4 * TTI::TCC_Basic; +} + +InstructionCost Sw64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented + // yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (Opcode) { + default: + return TTI::TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TTI::TCC_Basic; + return TTI::TCC_Free; + case Instruction::Store: + return TTI::TCC_Basic; + case Instruction::ICmp: + case Instruction::Add: + case Instruction::Sub: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Or their negation, by swapping addition vs. subtraction. + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Instruction::Mul: + case Instruction::Or: + case Instruction::Xor: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Masks supported by oilf/xilf. + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + // Masks supported by oihf/xihf. + if ((Imm.getZExtValue() & 0xffffffff) == 0) + return TTI::TCC_Free; + } + break; + case Instruction::And: + if (Idx == 1 && Imm.getBitWidth() <= 64) { + // Any 32-bit AND operation can by implemented via nilf. + if (BitSize <= 32) + return TTI::TCC_Free; + // 64-bit masks supported by nilf. + if (isUInt<32>(~Imm.getZExtValue())) + return TTI::TCC_Free; + // 64-bit masks supported by nilh. + if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) + return TTI::TCC_Free; + } + break; + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + break; + } + + return Sw64TTIImpl::getIntImmCost(Imm, Ty, CostKind); +} + +InstructionCost Sw64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, + unsigned Idx, const APInt &Imm, + Type *Ty, + TTI::TargetCostKind CostKind) { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TTI::TCC_Free; + // No cost model for operations on integers larger than 64 bit implemented + // yet. + if (BitSize > 64) + return TTI::TCC_Free; + + switch (IID) { + default: + return TTI::TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + // These get expanded to include a normal addition/subtraction. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isUInt<32>(Imm.getZExtValue())) + return TTI::TCC_Free; + if (isUInt<32>(-Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + // These get expanded to include a normal multiplication. + if (Idx == 1 && Imm.getBitWidth() <= 64) { + if (isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + } + break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; + } + return Sw64TTIImpl::getIntImmCost(Imm, Ty, CostKind); +} + +bool Sw64TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2) { + // check instruction count (first), and don't care about + // ImmCost, since offsets are checked explicitly. + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, + C1.NumBaseAdds, C1.ScaleCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, + C2.NumBaseAdds, C2.ScaleCost, C2.SetupCost); +} + +unsigned Sw64TTIImpl::getNumberOfRegisters(bool Vector) { + if (Vector) { + return 0; + } + return 12; +} + +bool Sw64TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { + EVT VT = TLI->getValueType(DL, DataType); + return (VT.isScalarInteger() && TLI->isTypeLegal(VT)); +} + +void Sw64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { + // Find out if L contains a call, what the machine instruction count + // estimate is, and how many stores there are. + bool HasCall = false; + InstructionCost NumStores = 0; + for (auto &BB : L->blocks()) + for (auto &I : *BB) { + if (isa(&I) || isa(&I)) { + if (const Function *F = cast(I).getCalledFunction()) { + if (isLoweredToCall(F)) + HasCall = true; + if (F->getIntrinsicID() == Intrinsic::memcpy || + F->getIntrinsicID() == Intrinsic::memset) + NumStores++; + } else { // indirect call. + HasCall = true; + } + } + if (isa(&I)) { + Type *MemAccessTy = I.getOperand(0)->getType(); + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, + std::nullopt, 0, TTI::TCK_RecipThroughput); + } + } + + // The processor will run out of store tags if too many stores + // are fed into it too quickly. Therefore make sure there are not + // too many stores in the resulting unrolled loop. + unsigned const NumStoresVal = *NumStores.getValue(); + unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX); + + if (HasCall) { + // Only allow full unrolling if loop has any calls. + UP.FullUnrollMaxCount = Max; + UP.MaxCount = 1; + return; + } + + UP.MaxCount = Max; + if (UP.MaxCount <= 1) + return; + + // Allow partial and runtime trip count unrolling. + UP.Partial = UP.Runtime = true; + + UP.PartialThreshold = 75; + if (L->getLoopDepth() > 1) + UP.PartialThreshold *= 2; + + UP.DefaultUnrollRuntimeCount = 4; + + // Allow expensive instructions in the pre-header of the loop. + UP.AllowExpensiveTripCount = true; + UP.UnrollAndJam = true; + + UP.Force = true; +} + +// Return the bit size for the scalar type or vector element +// type. getScalarSizeInBits() returns 0 for a pointer type. +static unsigned getScalarSizeInBits(Type *Ty) { + unsigned Size = (Ty->isPtrOrPtrVectorTy() ? 64U : Ty->getScalarSizeInBits()); + assert(Size > 0 && "Element must have non-zero size."); + return Size; +} + +// getNumberOfParts() calls getTypeLegalizationCost() which splits the vector +// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of +// 3. +static unsigned getNumVectorRegs(Type *Ty) { return 0; } + +unsigned Sw64TTIImpl::getMaxInterleaveFactor(ElementCount VF) { + return ST->getMaxInterleaveFactor(); +} + +TypeSize Sw64TTIImpl::getRegisterBitWidth(bool Vector) const { + return TypeSize::getFixed(64); +} + +unsigned Sw64TTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind, + const Instruction *I) { + if (CostKind != TTI::TCK_RecipThroughput) + return Opcode == Instruction::PHI ? 0 : 1; + assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind"); + // Branches are assumed to be predicted. + return 0; +} + +bool Sw64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode, + ArrayRef Args) { + + // A helper that returns a vector type from the given type. The number of + // elements in type Ty determine the vector width. + auto toVectorTy = [&](Type *ArgTy) { + return FixedVectorType::get(ArgTy->getScalarType(), + cast(DstTy)->getNumElements()); + }; + + // Exit early if DstTy is not a vector type whose elements are at least + // 16-bits wide. + if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16) + return false; + + // Determine if the operation has a widening variant. We consider both the + // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the + // instructions. + // + // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we + // verify that their extending operands are eliminated during code + // generation. + switch (Opcode) { + case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2). + case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2). + break; + default: + return false; + } + + // To be a widening instruction (either the "wide" or "long" versions), the + // second operand must be a sign- or zero extend having a single user. We + // only consider extends having a single user because they may otherwise not + // be eliminated. + if (Args.size() != 2 || + (!isa(Args[1]) && !isa(Args[1])) || + !Args[1]->hasOneUse()) + return false; + auto *Extend = cast(Args[1]); + + // Legalize the destination type and ensure it can be used in a widening + // operation. + auto DstTyL = getTypeLegalizationCost(DstTy); + unsigned DstElTySize = DstTyL.second.getScalarSizeInBits(); + if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits()) + return false; + + // Legalize the source type and ensure it can be used in a widening + // operation. + auto *SrcTy = toVectorTy(Extend->getSrcTy()); + auto SrcTyL = getTypeLegalizationCost(SrcTy); + unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits(); + if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits()) + return false; + + // Get the total number of vector elements in the legalized types. + InstructionCost NumDstEls = + DstTyL.first * DstTyL.second.getVectorMinNumElements(); + InstructionCost NumSrcEls = + SrcTyL.first * SrcTyL.second.getVectorMinNumElements(); + + // Return true if the legalized types have the same number of vector elements + // and the destination element type size is twice that of the source type. + return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize; +} + +InstructionCost Sw64TTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, + ArrayRef Args, const Instruction *CxtI) { + // TODO: Handle more cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, + Args, CxtI); + + // Legalize the type. + std::pair LT = getTypeLegalizationCost(Ty); + + // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.), + // add in the widening overhead specified by the sub-target. Since the + // extends feeding widening instructions are performed automatically, they + // aren't present in the generated code and have a zero cost. By adding a + // widening overhead here, we attach the total cost of the combined operation + // to the widening instruction. + InstructionCost Cost = 0; + if (isWideningInstruction(Ty, Opcode, Args)) + Cost += ST->getWideningBaseCost(); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + + switch (ISD) { + default: + return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info); + case ISD::SDIV: + if (Op2Info.isConstant() && Op2Info.isUniform() && Op2Info.isPowerOf2()) { + // On Sw64, scalar signed division by constants power-of-two are + // normally expanded to the sequence ADD + CMP + SELECT + SRA. + // The OperandValue properties many not be same as that of previous + // operation; conservatively assume OP_None. + Cost += + getArithmeticInstrCost(Instruction::Add, Ty, CostKind, + Op1Info.getNoProps(), Op2Info.getNoProps()); + Cost += + getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, + Op1Info.getNoProps(), Op2Info.getNoProps()); + Cost += + getArithmeticInstrCost(Instruction::Select, Ty, CostKind, + Op1Info.getNoProps(), Op2Info.getNoProps()); + Cost += + getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, + Op1Info.getNoProps(), Op2Info.getNoProps()); + return Cost; + } + [[fallthrough]]; + case ISD::UDIV: + if (Op2Info.isConstant() && Op2Info.isUniform()) { + auto VT = TLI->getValueType(DL, Ty); + if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) { + // Vector signed division by constant are expanded to the + // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division + // to MULHS + SUB + SRL + ADD + SRL. + InstructionCost MulCost = + getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, + Op1Info.getNoProps(), Op2Info.getNoProps()); + InstructionCost AddCost = + getArithmeticInstrCost(Instruction::Add, Ty, CostKind, + Op1Info.getNoProps(), Op2Info.getNoProps()); + InstructionCost ShrCost = + getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, + Op1Info.getNoProps(), Op2Info.getNoProps()); + return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1; + } + } + + Cost += + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); + if (Ty->isVectorTy()) { + // On Sw64, vector divisions are not supported natively and are + // expanded into scalar divisions of each pair of elements. + Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind, + Op1Info, Op2Info); + Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind, + Op1Info, Op2Info); + // TODO: if one of the arguments is scalar, then it's not necessary to + // double the cost of handling the vector elements. + Cost += Cost; + } + return Cost; + + case ISD::ADD: + case ISD::MUL: + case ISD::XOR: + case ISD::OR: + case ISD::AND: + // These nodes are marked as 'custom' for combining purposes only. + // We know that they are legal. See LowerAdd in ISelLowering. + return (Cost + 1) * LT.first; + + case ISD::FADD: + // These nodes are marked as 'custom' just to lower them to SVE. + // We know said lowering will incur no additional cost. + if (isa(Ty) && !Ty->getScalarType()->isFP128Ty()) + return (Cost + 2) * LT.first; + + return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info); + } +} +InstructionCost Sw64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, + VectorType *Tp, ArrayRef Mask, + TTI::TargetCostKind CostKind, + int Index, VectorType *SubTp, + ArrayRef Args) { + Kind = improveShuffleKindFromMask(Kind, Mask); + return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); +} +// Return the log2 difference of the element sizes of the two vector types. +static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) { + unsigned Bits0 = Ty0->getScalarSizeInBits(); + unsigned Bits1 = Ty1->getScalarSizeInBits(); + + if (Bits1 > Bits0) + return (Log2_32(Bits1) - Log2_32(Bits0)); + + return (Log2_32(Bits0) - Log2_32(Bits1)); +} + +// Return the number of instructions needed to truncate SrcTy to DstTy. +unsigned Sw64TTIImpl::getVectorTruncCost(Type *SrcTy, Type *DstTy) { return 1; } + +// Return the cost of converting a vector bitmask produced by a compare +// (SrcTy), to the type of the select or extend instruction (DstTy). +unsigned Sw64TTIImpl::getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) { + assert(SrcTy->isVectorTy() && DstTy->isVectorTy() && + "Should only be called with vector types."); + + unsigned PackCost = 0; + unsigned SrcScalarBits = SrcTy->getScalarSizeInBits(); + unsigned DstScalarBits = DstTy->getScalarSizeInBits(); + unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy); + if (SrcScalarBits > DstScalarBits) + // The bitmask will be truncated. + PackCost = getVectorTruncCost(SrcTy, DstTy); + else if (SrcScalarBits < DstScalarBits) { + unsigned DstNumParts = getNumVectorRegs(DstTy); + // Each vector select needs its part of the bitmask unpacked. + PackCost = Log2Diff * DstNumParts; + // Extra cost for moving part of mask before unpacking. + PackCost += DstNumParts - 1; + } + + return PackCost; +} + +// Return the type of the compared operands. This is needed to compute the +// cost for a Select / ZExt or SExt instruction. +static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { + Type *OpTy = nullptr; + if (CmpInst *CI = dyn_cast(I->getOperand(0))) + OpTy = CI->getOperand(0)->getType(); + else if (Instruction *LogicI = dyn_cast(I->getOperand(0))) + if (LogicI->getNumOperands() == 2) + if (CmpInst *CI0 = dyn_cast(LogicI->getOperand(0))) + if (isa(LogicI->getOperand(1))) + OpTy = CI0->getOperand(0)->getType(); + + return nullptr; +} + +unsigned Sw64TTIImpl::getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, + const Instruction *I) { + unsigned Cost = 0; + return Cost; +} + +InstructionCost Sw64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) { + // FIXME: Can the logic below also be used for these cost kinds? + if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) { + auto BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + return BaseCost == 0 ? BaseCost : 1; + } + + unsigned DstScalarBits = Dst->getScalarSizeInBits(); + unsigned SrcScalarBits = Src->getScalarSizeInBits(); + + if (!Src->isVectorTy()) { + assert(!Dst->isVectorTy()); + + if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) { + if (SrcScalarBits >= 32 || + (I != nullptr && isa(I->getOperand(0)))) + return 1; + return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/; + } + + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + Src->isIntegerTy(1)) { + + // This should be extension of a compare i1 result, which is done with + // ipm and a varying sequence of instructions. + unsigned Cost = 0; + if (Opcode == Instruction::SExt) + Cost = (DstScalarBits < 64 ? 3 : 4); + if (Opcode == Instruction::ZExt) + Cost = 3; + Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr); + if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy()) + // If operands of an fp-type was compared, this costs +1. + Cost++; + return Cost; + } + } + + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); +} + +// Scalar i8 / i16 operations will typically be made after first extending +// the operands to i32. +static unsigned getOperandsExtensionCost(const Instruction *I) { + unsigned ExtCost = 0; + for (Value *Op : I->operands()) + // A load of i8 or i16 sign/zero extends to i32. + if (!isa(Op) && !isa(Op)) + ExtCost++; + + return ExtCost; +} + +InstructionCost Sw64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I) { + if (CostKind != TTI::TCK_RecipThroughput) + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); + + if (!ValTy->isVectorTy()) { + switch (Opcode) { + case Instruction::ICmp: { + // A loaded value compared with 0 with multiple users becomes Load and + // Test. The load is then not foldable, so return 0 cost for the ICmp. + unsigned ScalarBits = ValTy->getScalarSizeInBits(); + if (I != nullptr && ScalarBits >= 32) + if (LoadInst *Ld = dyn_cast(I->getOperand(0))) + if (const ConstantInt *C = dyn_cast(I->getOperand(1))) + if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() && + C->isZero()) + return 0; + + unsigned Cost = 1; + if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16) + Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2); + return Cost; + } + case Instruction::Select: + if (ValTy->isFloatingPointTy()) + return 4; // No load on condition for FP - costs a conditional jump. + return 1; // Load On Condition / Select Register. + } + } + + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); +} + +// Check if a load may be folded as a memory operand in its user. +bool Sw64TTIImpl::isFoldableLoad(const LoadInst *Ld, + const Instruction *&FoldedValue) { + if (!Ld->hasOneUse()) + return false; + FoldedValue = Ld; + const Instruction *UserI = cast(*Ld->user_begin()); + unsigned LoadedBits = getScalarSizeInBits(Ld->getType()); + unsigned TruncBits = 0; + unsigned SExtBits = 0; + unsigned ZExtBits = 0; + if (UserI->hasOneUse()) { + unsigned UserBits = UserI->getType()->getScalarSizeInBits(); + if (isa(UserI)) + TruncBits = UserBits; + else if (isa(UserI)) + SExtBits = UserBits; + else if (isa(UserI)) + ZExtBits = UserBits; + } + if (TruncBits || SExtBits || ZExtBits) { + FoldedValue = UserI; + UserI = cast(*UserI->user_begin()); + // Load (single use) -> trunc/extend (single use) -> UserI + } + if ((UserI->getOpcode() == Instruction::Sub || + UserI->getOpcode() == Instruction::SDiv || + UserI->getOpcode() == Instruction::UDiv) && + UserI->getOperand(1) != FoldedValue) + return false; // Not commutative, only RHS foldable. + // LoadOrTruncBits holds the number of effectively loaded bits, but 0 if an + // extension was made of the load. + unsigned LoadOrTruncBits = + ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits)); + switch (UserI->getOpcode()) { + case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64 + case Instruction::Sub: + case Instruction::ICmp: + if (LoadedBits == 32 && ZExtBits == 64) + return true; + LLVM_FALLTHROUGH; + case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64 + if (UserI->getOpcode() != Instruction::ICmp) { + if (LoadedBits == 16 && SExtBits == 32) + return true; + if (LoadOrTruncBits == 16) + return true; + } + LLVM_FALLTHROUGH; + case Instruction::SDiv: // SE: 32->64 + if (LoadedBits == 32 && SExtBits == 64) + return true; + LLVM_FALLTHROUGH; + case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + // All possible extensions of memory checked above. + // Comparison between memory and immediate. + if (UserI->getOpcode() == Instruction::ICmp) + if (ConstantInt *CI = dyn_cast(UserI->getOperand(1))) + if (CI->getValue().isIntN(16)) + return true; + return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64); + break; + } + return false; +} + +static bool isBswapIntrinsicCall(const Value *V) { + if (const Instruction *I = dyn_cast(V)) + if (auto *CI = dyn_cast(I)) + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::bswap) + return true; + return false; +} + +InstructionCost Sw64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo OpInfo, + const Instruction *I) { + assert(!Ty->isVoidTy() && "Invalid type"); + + // TODO: Handle other cost kinds. + if (CostKind != TTI::TCK_RecipThroughput) + return 1; + + // Type legalization can't handle structs + if (TLI->getValueType(DL, Ty, true) == MVT::Other) + return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, + CostKind); + + auto LT = getTypeLegalizationCost(Ty); + + if (ST->isMisaligned256StoreSlow() && Opcode == Instruction::Store && + LT.second.is256BitVector() && (!Alignment || *Alignment < Align(32))) { + // Unaligned stores are extremely inefficient. We don't split all + // unaligned 128-bit stores because the negative impact that has shown in + // practice on inlined block copy code. + // We make such stores expensive so that we will only vectorize if there + // are 6 other instructions getting vectorized. + const int AmortizationCost = 6; + + return LT.first * 2 * AmortizationCost; + } + + if (Ty->isVectorTy() && + cast(Ty)->getElementType()->isIntegerTy(8)) { + unsigned ProfitableNumElements; + if (Opcode == Instruction::Store) + // We use a custom trunc store lowering so v.4b should be profitable. + ProfitableNumElements = 4; + else + // We scalarize the loads because there is not v.4b register and we + // have to promote the elements to v.2. + ProfitableNumElements = 8; + + if (cast(Ty)->getNumElements() < ProfitableNumElements) { + unsigned NumVecElts = cast(Ty)->getNumElements(); + unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; + // We generate 2 instructions per vector element. + return NumVectorizableInstsToAmortize * NumVecElts * 2; + } + } + return LT.first; +} + +TargetTransformInfo::PopcntSupportKind +Sw64TTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // Sw64 only support 64 Bit Pop County + if (TyWidth == 32 || TyWidth == 64) + return TTI::PSK_FastHardware; + return TTI::PSK_Software; +} diff --git a/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h new file mode 100644 index 000000000000..cd1b8f2f2f3d --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h @@ -0,0 +1,137 @@ +//===-- Sw64TargetTransformInfo.h - Sw64 specific TTI ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// Sw64 target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_SW64_SW64TARGETTRANSFORMINFO_H + +#include "Sw64.h" +#include "Sw64TargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { + +class Sw64TTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const Sw64Subtarget *ST; + const Sw64TargetLowering *TLI; + + const Sw64Subtarget *getST() const { return ST; } + const Sw64TargetLowering *getTLI() const { return TLI; } + + unsigned const LIBCALL_COST = 30; + + bool isWideningInstruction(Type *Ty, unsigned Opcode, + ArrayRef Args); + +public: + explicit Sw64TTIImpl(const Sw64TargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool Vector = (ClassID == 1); + if (Vector) { + if (ST->hasSIMD()) + return 32; + return 0; + } + return 32; + } + + unsigned getMaxInterleaveFactor(ElementCount VF); + bool enableInterleavedAccessVectorization() { return true; } + TypeSize getRegisterBitWidth(bool Vector) const; + + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, + const Instruction *I); + + InstructionCost getMemoryOpCost( + unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, + const Instruction *I = nullptr); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); + + InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, + TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); + + InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); + + InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr); + InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); + + bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, + const TargetTransformInfo::LSRCost &C2); + + unsigned getNumberOfRegisters(bool Vector); + + unsigned getCacheLineSize() const override { return 128; } + unsigned getPrefetchDistance() const override { return 524; } + unsigned getMinPrefetchStride(unsigned NumMemAccesses, + unsigned NumStridedMemAccesses, + unsigned NumPrefetches, + bool HasCall) const override { + return 1; + } + + bool hasDivRemOp(Type *DataType, bool IsSigned); + bool prefersVectorizedAddressing() { return false; } + bool LSRWithInstrQueries() { return true; } + bool supportsEfficientVectorElementLoadStore() { return true; } + + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + ArrayRef Mask, + TTI::TargetCostKind CostKind, int Index, + VectorType *SubTp, + ArrayRef Args = std::nullopt); + unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); + unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); + unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, + const Instruction *I); + InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); + + TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td b/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td new file mode 100644 index 000000000000..f6996237dbfe --- /dev/null +++ b/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td @@ -0,0 +1,317 @@ +//===- Sw64InstrInfo.td - The Sw64 Instruction Set -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Sw64 Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +class ConstantSImmAsmOperandClass Supers = [], + int Offset = 0> : AsmOperandClass { + let Name = "ConstantSImm" # Bits # "_" # Offset; + let RenderMethod = "addConstantSImmOperands<" # Bits # ", " # Offset # ">"; + let PredicateMethod = "isConstantSImm<" # Bits # ", " # Offset # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits # "_" # Offset; +} + +class ConstantUImmAsmOperandClass Supers = [], + int Offset = 0> : AsmOperandClass { + let Name = "ConstantUImm" # Bits # "_" # Offset; + let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">"; + let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImm" # Bits # "_" # Offset; +} + +def ConstantUImm7Lsl2AsmOperandClass : AsmOperandClass { + let Name = "UImm7Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<7, 2>"; +} + +def ConstantSImm8AsmOperandClass + : ConstantSImmAsmOperandClass<8, [ConstantUImm7Lsl2AsmOperandClass]>; + +def ConstantUImm8AsmOperandClass + : ConstantUImmAsmOperandClass<8, [ConstantUImm7Lsl2AsmOperandClass]>; + +foreach I = {8} in + def vsplat_simm # I : Operand { + let ParserMatchClass = + !cast("ConstantSImm" # I # "AsmOperandClass"); + } + +foreach I = {8} in + def vsplat_uimm # I : Operand { + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + +// Generic case - only to support certain assembly pseudo instructions. +class UImmAnyAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "ImmAny"; + let RenderMethod = "addConstantUImmOperands<32>"; + let PredicateMethod = "isSImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "ImmAny"; +} + +class SImmAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "SImm" # Bits; + let RenderMethod = "addSImmOperands<" # Bits # ">"; + let PredicateMethod = "isSImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits; +} + +class UImmAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "UImm" # Bits; + let RenderMethod = "addUImmOperands<" # Bits # ">"; + let PredicateMethod = "isUImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImm" # Bits; +} + +def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> { + let Name = "UImm32_Coerced"; + let DiagnosticType = "UImm32_Coerced"; +} + +def SImm32RelaxedAsmOperandClass + : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> { + let Name = "SImm32_Relaxed"; + let PredicateMethod = "isAnyImm<33>"; + let DiagnosticType = "SImm32_Relaxed"; +} + +def SImm32AsmOperandClass + : SImmAsmOperandClass<32, [SImm32RelaxedAsmOperandClass]>; +def ConstantUImm26AsmOperandClass + : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>; +def ConstantUImm20AsmOperandClass + : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>; + +def UImm16RelaxedAsmOperandClass + : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> { + let Name = "UImm16_Relaxed"; + let PredicateMethod = "isAnyImm<16>"; + let DiagnosticType = "UImm16_Relaxed"; +} + +// FIXME: One of these should probably have UImm16AsmOperandClass as the +// superclass instead of UImm16RelaxedasmOPerandClass. +def UImm16AsmOperandClass + : UImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]>; +def SImm16RelaxedAsmOperandClass + : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> { + let Name = "SImm16_Relaxed"; + let PredicateMethod = "isAnyImm<16>"; + let DiagnosticType = "SImm16_Relaxed"; +} + +def SImm16AsmOperandClass + : SImmAsmOperandClass<16, [SImm16RelaxedAsmOperandClass]>; + +def ConstantSImm10Lsl3AsmOperandClass : AsmOperandClass { + let Name = "SImm10Lsl3"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<10, 3>"; + let SuperClasses = [SImm16AsmOperandClass]; + let DiagnosticType = "SImm10_Lsl3"; +} + +def Sw64MemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let ParserMethod = "parseMemOperand"; +} + +foreach I = {16, 32} in + def simm # I : Operand { + let DecoderMethod = "DecodeSImmWithOffsetAndScale<" # I # ">"; + let ParserMatchClass = !cast("SImm" # I # "AsmOperandClass"); + } + +foreach I = {1, 2, 3} in + def Sw64MemSimm16Lsl # I # AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm16_" # I; + let SuperClasses = [Sw64MemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<10, " # I # ">"; + let DiagnosticType = "MemSImm10Lsl" # I; + } + +class mem_generic : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops ptr_rc, simm16); + let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = Sw64MemAsmOperand; + let OperandType = "OPERAND_MEMORY"; +} + +def ConstantSImm10Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm10Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<10, 2>"; + let SuperClasses = [ConstantSImm10Lsl3AsmOperandClass]; + let DiagnosticType = "SImm10_Lsl2"; +} + +foreach I = {2, 3} in + def simm16_ # I : Operand { + let DecoderMethod = "DecodeSImmWithOffsetAndScale<10, " # I # ">"; + let ParserMatchClass = + !cast("ConstantSImm10Lsl" # I # "AsmOperandClass"); + } + + def mem_simm16 : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm16_2")); + let EncoderMethod = "getMemEncoding<2>"; + let ParserMatchClass = + !cast("Sw64MemSimm16Lsl2AsmOperand"); + } + + def mem_simm12 : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm16_3")); + let EncoderMethod = "getMemEncoding<3>"; + let ParserMatchClass = + !cast("Sw64MemSimm16Lsl3AsmOperand"); + } + +class ConstantUImmRangeAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "ConstantUImmRange" # Bottom # "_" # Top; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isConstantUImmRange<" # Bottom # ", " # Top # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImmRange" # Bottom # "_" # Top; +} + +def ConstantSImm19Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm19Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<19, 2>"; + let SuperClasses = [ConstantUImm20AsmOperandClass]; + let DiagnosticType = "SImm19_Lsl2"; +} + +def ConstantSImm11AsmOperandClass + : ConstantSImmAsmOperandClass<11, [ConstantSImm10Lsl2AsmOperandClass]>; +def ConstantSImm10Lsl1AsmOperandClass : AsmOperandClass { + let Name = "SImm10Lsl1"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<10, 1>"; + let SuperClasses = [ConstantSImm11AsmOperandClass]; + let DiagnosticType = "SImm10_Lsl1"; +} + +def ConstantUImm10AsmOperandClass + : ConstantUImmAsmOperandClass<10, [ConstantSImm10Lsl1AsmOperandClass]>; +def ConstantSImm10AsmOperandClass + : ConstantSImmAsmOperandClass<10, [ConstantUImm10AsmOperandClass]>; +def ConstantSImm9AsmOperandClass + : ConstantSImmAsmOperandClass<9, [ConstantSImm10AsmOperandClass]>; +def ConstantSImm7Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm7Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<7, 2>"; + let SuperClasses = [ConstantSImm9AsmOperandClass]; + let DiagnosticType = "SImm7_Lsl2"; +} + +def ConstantUImm7Sub1AsmOperandClass + : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass], -1> { + // Specify the names since the -1 offset causes invalid identifiers otherwise. + let Name = "UImm7_N1"; + let DiagnosticType = "UImm7_N1"; +} +def ConstantUImm7AsmOperandClass + : ConstantUImmAsmOperandClass<7, [ConstantUImm7Sub1AsmOperandClass]>; +def ConstantUImm6Lsl2AsmOperandClass : AsmOperandClass { + let Name = "UImm6Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledUImm<6, 2>"; + let SuperClasses = [ConstantUImm7AsmOperandClass]; + let DiagnosticType = "UImm6_Lsl2"; +} + +def ConstantUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<6, [ConstantUImm6Lsl2AsmOperandClass]>; +def ConstantSImm6AsmOperandClass + : ConstantSImmAsmOperandClass<6, [ConstantUImm6AsmOperandClass]>; + +def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass { + let Name = "UImm5Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledUImm<5, 2>"; + let SuperClasses = [ConstantSImm6AsmOperandClass]; + let DiagnosticType = "UImm5_Lsl2"; +} +def ConstantUImm5_Range2_64AsmOperandClass + : ConstantUImmRangeAsmOperandClass<2, 64, [ConstantUImm5Lsl2AsmOperandClass]>; +def ConstantUImm5Plus33AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5_Range2_64AsmOperandClass], + 33>; +def ConstantUImm5ReportUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus33AsmOperandClass]> { + let Name = "ConstantUImm5_0_Report_UImm6"; + let DiagnosticType = "UImm5_0_Report_UImm6"; +} +def ConstantUImm5Plus32AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5ReportUImm6AsmOperandClass], 32>; +def ConstantUImm5Plus32NormalizeAsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus32AsmOperandClass], 32> { + let Name = "ConstantUImm5_32_Norm"; + // We must also subtract 32 when we render the operand. + let RenderMethod = "addConstantUImmOperands<5, 32, -32>"; +} + +def ConstantUImm5Plus1ReportUImm6AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5Plus32NormalizeAsmOperandClass], 1>{ + let Name = "ConstantUImm5_Plus1_Report_UImm6"; +} + +def ConstantUImm5Plus1AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5Plus1ReportUImm6AsmOperandClass], 1>; +def ConstantUImm5AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus1AsmOperandClass]>; +def ConstantSImm5AsmOperandClass + : ConstantSImmAsmOperandClass<5, [ConstantUImm5AsmOperandClass]>; +def ConstantUImm4AsmOperandClass + : ConstantUImmAsmOperandClass<4, [ConstantSImm5AsmOperandClass]>; +def ConstantSImm4AsmOperandClass + : ConstantSImmAsmOperandClass<4, [ConstantUImm4AsmOperandClass]>; +def ConstantUImm3AsmOperandClass + : ConstantUImmAsmOperandClass<3, [ConstantSImm4AsmOperandClass]>; +def ConstantUImm2Plus1AsmOperandClass + : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass], 1>; +def ConstantUImm2AsmOperandClass + : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>; +def ConstantUImm1AsmOperandClass + : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>; + +// Unsigned Operands +foreach I = {1, 2, 3, 4, 5, 6, 7, 8, 10, 20, 26} in + def uimm # I : Operand { + let PrintMethod = "printUImm<" # I # ">"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + +foreach I = {1, 2, 3, 4} in + def uimm # I # _ptr : Operand { + let PrintMethod = "printUImm<" # I # ">"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } diff --git a/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt b/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt new file mode 100644 index 000000000000..cf9ad922078a --- /dev/null +++ b/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt @@ -0,0 +1,10 @@ +add_llvm_component_library(LLVMSw64Info + Sw64TargetInfo.cpp + + LINK_COMPONENTS + Support + + ADD_TO_COMPONENT + Sw64 + + ) diff --git a/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp new file mode 100644 index 000000000000..02a741e72e15 --- /dev/null +++ b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp @@ -0,0 +1,24 @@ +//===-- Sw64TargetInfo.cpp - Sw64 Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "TargetInfo/Sw64TargetInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/TargetRegistry.h" +using namespace llvm; + +Target &llvm::getTheSw64Target() { + static Target TheSw64Target; + return TheSw64Target; +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64TargetInfo() { + RegisterTarget + X(getTheSw64Target(), "sw_64", "Sw64", "Sw64"); +} diff --git a/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h new file mode 100644 index 000000000000..723121a752b1 --- /dev/null +++ b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h @@ -0,0 +1,12 @@ +#ifndef LLVM_LIB_TARGET_SW_64_TARGETINFO_SW64TARGETINFO_H +#define LLVM_LIB_TARGET_SW_64_TARGETINFO_SW64TARGETINFO_H + +namespace llvm { + +class Target; + +Target &getTheSw64Target(); + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 9358b1365958..1a65edd32065 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -486,6 +486,10 @@ StringRef sys::detail::getHostCPUNameForBPF() { #endif } +StringRef sys::detail::getHostCPUNameForSW64(StringRef ProcCpuinfoContent) { + return "sw_64"; +} + #if defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) @@ -1460,6 +1464,12 @@ StringRef sys::getHostCPUName() { return "generic"; } } +#elif defined(__linux__) && defined(__sw_64__) +StringRef sys::getHostCPUName() { + std::unique_ptr P = getProcCpuinfoContent(); + StringRef Content = P ? P->getBuffer() : ""; + return detail::getHostCPUNameForSW64(Content); +} #elif defined(__loongarch__) StringRef sys::getHostCPUName() { // Use processor id to detect cpu name. diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 2d61113f32a8..f25b31f92da8 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -72,6 +72,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) { case spir: return "spir"; case spirv32: return "spirv32"; case spirv64: return "spirv64"; + case sw_64: return "sw_64"; case systemz: return "s390x"; case tce: return "tce"; case tcele: return "tcele"; @@ -131,6 +132,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) { case sparcel: case sparc: return "sparc"; + case sw_64: return "sw64"; + case systemz: return "s390"; case x86: @@ -364,6 +367,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("sparc", sparc) .Case("sparcel", sparcel) .Case("sparcv9", sparcv9) + .Case("sw_64", sw_64) .Case("s390x", systemz) .Case("systemz", systemz) .Case("tce", tce) @@ -511,6 +515,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("sparc", Triple::sparc) .Case("sparcel", Triple::sparcel) .Cases("sparcv9", "sparc64", Triple::sparcv9) + .Cases("sw", "sw_64", "sw6a", "sw6b", "sw4d", "sw8a", Triple::sw_64) .Case("tce", Triple::tce) .Case("tcele", Triple::tcele) .Case("xcore", Triple::xcore) @@ -690,6 +695,17 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) { (SubArchName.endswith("r6el") || SubArchName.endswith("r6"))) return Triple::MipsSubArch_r6; + if (SubArchName.startswith("sw")) { + if (SubArchName.endswith("6a")) + return Triple::Sw64SubArch_6a; + else if (SubArchName.endswith("6b")) + return Triple::Sw64SubArch_6b; + else if (SubArchName.endswith("4d")) + return Triple::Sw64SubArch_4d; + else if (SubArchName.endswith("8a")) + return Triple::Sw64SubArch_8a; + } + if (SubArchName == "powerpcspe") return Triple::PPCSubArch_spe; @@ -853,6 +869,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::sparcv9: case Triple::spir64: case Triple::spir: + case Triple::sw_64: case Triple::tce: case Triple::tcele: case Triple::thumbeb: @@ -1461,6 +1478,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::sparcv9: case llvm::Triple::spir64: case llvm::Triple::spirv64: + case llvm::Triple::sw_64: case llvm::Triple::systemz: case llvm::Triple::ve: case llvm::Triple::wasm64: @@ -1491,6 +1509,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::bpfeb: case Triple::bpfel: case Triple::msp430: + case Triple::sw_64: case Triple::systemz: case Triple::ve: T.setArch(UnknownArch); @@ -1604,6 +1623,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::sparcv9: case Triple::spir64: case Triple::spirv64: + case Triple::sw_64: case Triple::systemz: case Triple::ve: case Triple::wasm64: @@ -1675,6 +1695,7 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::spir: case Triple::spirv32: case Triple::spirv64: + case Triple::sw_64: case Triple::wasm32: case Triple::wasm64: case Triple::x86: @@ -1784,6 +1805,7 @@ bool Triple::isLittleEndian() const { case Triple::spir: case Triple::spirv32: case Triple::spirv64: + case Triple::sw_64: case Triple::tcele: case Triple::thumb: case Triple::ve: diff --git a/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll index 80c90cbf5be3..4aea4f945183 100644 --- a/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll +++ b/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll @@ -1,7 +1,7 @@ ; REQUIRES: cxx-shared-library ; RUN: %lli -jit-kind=mcjit -relocation-model=pic -code-model=large %s ; XFAIL: target={{.*-(cygwin|windows-msvc|windows-gnu)}} -; XFAIL: target={{(mips|mipsel)-.*}}, target={{(i686|i386).*}}, target={{(aarch64|arm).*}} +; XFAIL: target={{(mips|mipsel)-.*}}, target={{(i686|i386).*}}, target={{(aarch64|arm).*}}, target={{(sw_64).*}} declare ptr @__cxa_allocate_exception(i64) declare void @__cxa_throw(ptr, ptr, ptr) declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg index b6874dd86ead..4af68aa23a5a 100644 --- a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg +++ b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg @@ -7,6 +7,7 @@ if ( | ("Mips" in targets) | ("PowerPC" in targets) | ("SystemZ" in targets) + | ("Sw64" in targets) ): config.unsupported = False else: @@ -25,6 +26,7 @@ if root.host_arch not in [ "PowerPC", "ppc64", "ppc64le", + "sw_64", "SystemZ", ]: config.unsupported = True diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg b/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg index 5095d98a5fd6..d584f3aa69c4 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg +++ b/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg @@ -1,6 +1,10 @@ if "armv4" in config.root.target_triple or "armv5" in config.root.target_triple: config.unsupported = True +# Remote MCJIT is not supported on sw_64 now. +if 'sw_64' in config.root.target_triple: + config.unsupported = True + # This is temporary, until Remote MCJIT works on ARM # See http://llvm.org/bugs/show_bug.cgi?id=18057 # if 'armv7' in config.root.target_triple: diff --git a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg index cbd7c544065d..4503e5fbf303 100644 --- a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg +++ b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg @@ -10,6 +10,7 @@ if config.root.host_arch not in [ "mips64", "mips64el", "loongarch64", + "sw_64", ]: config.unsupported = True diff --git a/llvm/test/tools/llvm-reduce/file-output-type.test b/llvm/test/tools/llvm-reduce/file-output-type.test index 93b9ca4acb08..b6a4347af425 100644 --- a/llvm/test/tools/llvm-reduce/file-output-type.test +++ b/llvm/test/tools/llvm-reduce/file-output-type.test @@ -1,4 +1,5 @@ # REQUIRES: default_triple +# XFAIL: target={{(sw_64).*}} # RUN: rm -rf %t.dir && mkdir %t.dir && cd %t.dir # RUN: llvm-as -o test-output-format.bc %p/Inputs/test-output-format.ll diff --git a/third-party/benchmark/src/cycleclock.h b/third-party/benchmark/src/cycleclock.h index d65d32a39d3d..e0eb7f3edbb9 100644 --- a/third-party/benchmark/src/cycleclock.h +++ b/third-party/benchmark/src/cycleclock.h @@ -173,6 +173,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#elif defined(__sw_64__) + // FIXME: SW CPU get cycle time + struct timeval tv; + gettimeofday(&tv, nullptr); + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; #elif defined(__loongarch__) struct timeval tv; gettimeofday(&tv, nullptr); -- Gitee From 75d00b6292113b3e513ca4a13d5a982eb997b8bc Mon Sep 17 00:00:00 2001 From: swcompiler Date: Thu, 23 Jan 2025 14:50:58 +0800 Subject: [PATCH 2/3] [Sw64] Add Sw64 target support for clang --- clang/include/clang/Basic/Attr.td | 12 + clang/include/clang/Basic/AttrDocs.td | 11 + clang/include/clang/Basic/BuiltinsSw64.def | 249 +++ .../clang/Basic/DiagnosticDriverKinds.td | 2 + .../clang/Basic/DiagnosticSemaKinds.td | 4 + clang/include/clang/Basic/TargetBuiltins.h | 13 +- clang/include/clang/Basic/TargetCXXABI.def | 3 + clang/include/clang/Basic/TargetCXXABI.h | 6 + clang/include/clang/Basic/TargetInfo.h | 3 + clang/include/clang/Driver/Options.td | 43 + clang/include/clang/Sema/Sema.h | 3 + clang/lib/AST/ASTContext.cpp | 56 + clang/lib/Basic/CMakeLists.txt | 1 + clang/lib/Basic/Targets.cpp | 4 + clang/lib/Basic/Targets/Sw64.cpp | 125 ++ clang/lib/Basic/Targets/Sw64.h | 141 ++ clang/lib/CodeGen/CGBuiltin.cpp | 108 ++ clang/lib/CodeGen/CMakeLists.txt | 1 + clang/lib/CodeGen/CodeGenFunction.h | 2 + clang/lib/CodeGen/CodeGenModule.cpp | 3 + clang/lib/CodeGen/ItaniumCXXABI.cpp | 3 + clang/lib/CodeGen/TargetInfo.h | 3 + clang/lib/CodeGen/Targets/Sw64.cpp | 545 ++++++ clang/lib/Driver/CMakeLists.txt | 2 + clang/lib/Driver/Driver.cpp | 4 + clang/lib/Driver/ToolChains/Arch/Sw64.cpp | 94 + clang/lib/Driver/ToolChains/Arch/Sw64.h | 34 + clang/lib/Driver/ToolChains/Clang.cpp | 89 + clang/lib/Driver/ToolChains/Clang.h | 2 + clang/lib/Driver/ToolChains/CommonArgs.cpp | 7 + clang/lib/Driver/ToolChains/Gnu.cpp | 23 + clang/lib/Driver/ToolChains/Linux.cpp | 16 + clang/lib/Driver/ToolChains/Sw64Toolchain.cpp | 184 ++ clang/lib/Driver/ToolChains/Sw64Toolchain.h | 79 + clang/lib/Driver/XRayArgs.cpp | 1 + clang/lib/Frontend/CompilerInvocation.cpp | 3 +- clang/lib/Headers/CMakeLists.txt | 12 + clang/lib/Headers/sw64intrin.h | 1590 +++++++++++++++++ clang/lib/Sema/SemaChecking.cpp | 136 ++ clang/lib/Sema/SemaDeclAttr.cpp | 16 + 40 files changed, 3631 insertions(+), 2 deletions(-) create mode 100644 clang/include/clang/Basic/BuiltinsSw64.def create mode 100644 clang/lib/Basic/Targets/Sw64.cpp create mode 100644 clang/lib/Basic/Targets/Sw64.h create mode 100644 clang/lib/CodeGen/Targets/Sw64.cpp create mode 100644 clang/lib/Driver/ToolChains/Arch/Sw64.cpp create mode 100644 clang/lib/Driver/ToolChains/Arch/Sw64.h create mode 100644 clang/lib/Driver/ToolChains/Sw64Toolchain.cpp create mode 100644 clang/lib/Driver/ToolChains/Sw64Toolchain.h create mode 100644 clang/lib/Headers/sw64intrin.h diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index d5204b286966..6ea5e5ee98b9 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -428,6 +428,7 @@ def TargetX86 : TargetArch<["x86"]>; def TargetAnyX86 : TargetArch<["x86", "x86_64"]>; def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>; def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>; +def TargetSw64 : TargetArch<["sw_64"]>; def TargetWindows : TargetSpec { let OSes = ["Win32"]; } @@ -891,6 +892,17 @@ def AVRSignal : InheritableAttr, TargetSpecificAttr { let Documentation = [AVRSignalDocs]; } +def Sw64Interrupt : InheritableAttr, TargetSpecificAttr { + let Spellings = [GCC<"interrupt">]; + let Subjects = SubjectList<[Function]>; + let Args = [EnumArgument<"Interrupt", "InterruptType", + ["user", "supervisor", "machine"], + ["user", "supervisor", "machine"], + 1>]; + let ParseKind = "Interrupt"; + let Documentation = [Sw64InterruptDocs]; +} + def AsmLabel : InheritableAttr { let Spellings = [CustomKeyword<"asm">, CustomKeyword<"__asm__">]; let Args = [ diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2c950231255d..c59c6efd1982 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2375,6 +2375,17 @@ of the type before passing to the attribute. }]; } +def Sw64InterruptDocs : Documentation { + let Category = DocCatFunction; + let Heading = "interrupt (SW64)"; + let Content = [{ +Clang supports the GNU style ``__attribute__((interrupt))`` attribute on SW64 +targets. This attribute may be attached to a function definition and instructs +the backend to generate appropriate function entry/exit code so that it can be +used directly as an interrupt service routine. + }]; +} + def AVRInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (AVR)"; diff --git a/clang/include/clang/Basic/BuiltinsSw64.def b/clang/include/clang/Basic/BuiltinsSw64.def new file mode 100644 index 000000000000..d3e85bf6c876 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsSw64.def @@ -0,0 +1,249 @@ +//===--- BuiltinsSw64.def - Sw64 Builtin function database ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sw64-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +// The format of this database matches clang/Basic/Builtins.def. + +BUILTIN(__builtin_bitrev, "UiUi", "nc") +BUILTIN(__builtin_getid, "Si", "nc") +BUILTIN(__builtin_getps, "UiUi", "n") +BUILTIN(__builtin_setps, "vUiUi", "n") + +BUILTIN(__builtin_sw64_crc32b, "LiLiLi", "n") +BUILTIN(__builtin_sw64_crc32h, "LiLiLi", "n") +BUILTIN(__builtin_sw64_crc32w, "LiLiLi", "n") +BUILTIN(__builtin_sw64_crc32l, "LiLiLi", "n") +BUILTIN(__builtin_sw64_crc32cb, "LiLiLi", "n") +BUILTIN(__builtin_sw64_crc32ch, "LiLiLi", "n") +BUILTIN(__builtin_sw64_crc32cw, "LiLiLi", "n") +BUILTIN(__builtin_sw64_crc32cl, "LiLiLi", "n") + +BUILTIN(__builtin_sw64_sbt, "LiLiLi", "n") +BUILTIN(__builtin_sw64_cbt, "LiLiLi", "n") + +BUILTIN(__builtin_sw_vaddw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vsubw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vucaddw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vucsubw, "V8iV8iV8i", "n") + +BUILTIN(__builtin_sw_vaddl, "V4LiV4LiV4Li", "n") +BUILTIN(__builtin_sw_vsubl, "V4LiV4LiV4Li", "n") + +BUILTIN(__builtin_sw_vucaddh, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vucsubh, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vucaddb, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vucsubb, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vucaddhi, "V8iV8iLi", "n") +BUILTIN(__builtin_sw_vucsubhi, "V8iV8iLi", "n") +BUILTIN(__builtin_sw_vucaddbi, "V8iV8iLi", "n") +BUILTIN(__builtin_sw_vucsubbi, "V8iV8iLi", "n") + +BUILTIN(__builtin_sw_vucaddh_v16hi, "V16sV16sV16s", "n") +BUILTIN(__builtin_sw_vucsubh_v16hi, "V16sV16sV16s", "n") +BUILTIN(__builtin_sw_vucaddb_v32qi, "V32cV32cV32c", "n") +BUILTIN(__builtin_sw_vucsubb_v32qi, "V32cV32cV32c", "n") + +BUILTIN(__builtin_sw_vsumw, "LiV8i", "n") +BUILTIN(__builtin_sw_vsuml, "LiV4Li", "n") +BUILTIN(__builtin_sw_ctpopow, "LiV8i", "n") +BUILTIN(__builtin_sw_ctlzow, "LiV8i", "n") + +BUILTIN(__builtin_sw_vsll, "v.", "t") +BUILTIN(__builtin_sw_vsrl, "v.", "t") +BUILTIN(__builtin_sw_vsra, "v.", "t") +BUILTIN(__builtin_sw_vrol, "v.", "t") + +BUILTIN(__builtin_sw_vsllw, "V8iV8iLi", "ncV:256:") +BUILTIN(__builtin_sw_vsrlw, "V8iV8iLi", "ncV:256:") +BUILTIN(__builtin_sw_vsraw, "V8iV8iLi", "ncV:256:") +BUILTIN(__builtin_sw_vrolw, "V8iV8iLi", "ncV:256:") + +BUILTIN(__builtin_sw_vsllb, "V32cV32cLi", "ncV:256:") +BUILTIN(__builtin_sw_vsrlb, "V32cV32cLi", "ncV:256:") +BUILTIN(__builtin_sw_vsrab, "V32cV32cLi", "ncV:256:") +BUILTIN(__builtin_sw_vrolb, "V32cV32cLi", "ncV:256:") + +BUILTIN(__builtin_sw_vslll, "V4LiV4LiLi", "ncV:256:") +BUILTIN(__builtin_sw_vsrll, "V4LiV4LiLi", "ncV:256:") +BUILTIN(__builtin_sw_vsral, "V4LiV4LiLi", "ncV:256:") +BUILTIN(__builtin_sw_vroll, "V4LiV4LiLi", "ncV:256:") + +BUILTIN(__builtin_sw_vsllh, "V16sV16sLi", "ncV:256:") +BUILTIN(__builtin_sw_vsrlh, "V16sV16sLi", "ncV:256:") +BUILTIN(__builtin_sw_vsrah, "V16sV16sLi", "ncV:256:") +BUILTIN(__builtin_sw_vrolh, "V16sV16sLi", "ncV:256:") + +BUILTIN(__builtin_sw_sllow, "V4LiV4LiLi", "ncV:256:") +BUILTIN(__builtin_sw_srlow, "V4LiV4LiLi", "ncV:256:") +BUILTIN(__builtin_sw_sraow, "V4LiV4LiLi", "ncV:256:") + +BUILTIN(__builtin_sw_vslls, "V4fV4fLi", "ncV:256:") +BUILTIN(__builtin_sw_vslld, "V4dV4dLi", "ncV:256:") +BUILTIN(__builtin_sw_vsrls, "V4fV4fLi", "ncV:256:") +BUILTIN(__builtin_sw_vsrld, "V4dV4dLi", "ncV:256:") + +BUILTIN(__builtin_sw_vcmpgew, "LiV8iV8i", "n") +BUILTIN(__builtin_sw_vcmpeqw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vcmplew, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vcmpltw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vcmpulew, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vcmpultw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vcmpueqb, "V32cV32cV32c", "n") +BUILTIN(__builtin_sw_vcmpugtb, "V32cV32cV32c", "n") + +BUILTIN(__builtin_sw_vmaxb, "V32cV32cV32c", "n") +BUILTIN(__builtin_sw_vmaxh, "V16sV16sV16s", "n") +BUILTIN(__builtin_sw_vmaxw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vmaxl, "V4LiV4LiV4Li", "n") + +BUILTIN(__builtin_sw_vumaxb, "V32cV32cV32c", "n") +BUILTIN(__builtin_sw_vumaxh, "V16sV16sV16s", "n") +BUILTIN(__builtin_sw_vumaxw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vumaxl, "V4LiV4LiV4Li", "n") + +BUILTIN(__builtin_sw_vminb, "V32cV32cV32c", "n") +BUILTIN(__builtin_sw_vminh, "V16sV16sV16s", "n") +BUILTIN(__builtin_sw_vminw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vminl, "V4LiV4LiV4Li", "n") + +BUILTIN(__builtin_sw_vuminb, "V32cV32cV32c", "n") +BUILTIN(__builtin_sw_vuminh, "V16sV16sV16s", "n") +BUILTIN(__builtin_sw_vuminw, "V8iV8iV8i", "n") +BUILTIN(__builtin_sw_vuminl, "V4LiV4LiV4Li", "n") + +BUILTIN(__builtin_sw_vseleqw, "V8iV8iV8iV8i", "n") +BUILTIN(__builtin_sw_vsellew, "V8iV8iV8iV8i", "n") +BUILTIN(__builtin_sw_vselltw, "V8iV8iV8iV8i", "n") +BUILTIN(__builtin_sw_vsellbcw, "V8iV8iV8iV8i", "n") + +BUILTIN(__builtin_sw_vseleqwi, "V8iV8iV8iLi", "n") +BUILTIN(__builtin_sw_vsellewi, "V8iV8iV8iLi", "n") +BUILTIN(__builtin_sw_vselltwi, "V8iV8iV8iLi", "n") +BUILTIN(__builtin_sw_vsellbcwi, "V8iV8iV8iLi", "n") + +BUILTIN(__builtin_sw_vxor, "V4LiV4LiV4Li", "n") +BUILTIN(__builtin_sw_vnot, "V4LiV4LiV4Li", "n") +BUILTIN(__builtin_sw_vorr, "V4LiV4LiV4Li", "n") +BUILTIN(__builtin_sw_vbic, "V4LiV4LiV4Li", "n") +BUILTIN(__builtin_sw_vornot, "V4LiV4LiV4Li", "n") +BUILTIN(__builtin_sw_veqv, "V4LiV4LiV4Li", "n") + +BUILTIN(__builtin_sw_vsqrts, "V4fV4f", "n") +BUILTIN(__builtin_sw_vsqrtd, "V4dV4d", "n") + +BUILTIN(__builtin_sw_vsums, "fV4f", "n") +BUILTIN(__builtin_sw_vsumd, "dV4d", "n") + +BUILTIN(__builtin_sw_vfrecs, "V4fV4f", "n") +BUILTIN(__builtin_sw_vfrecd, "V4dV4d", "n") + +BUILTIN(__builtin_sw_vfcmpeqs, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vfcmplts, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vfcmples, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vfcmpuns, "V4fV4fV4f", "n") + +BUILTIN(__builtin_sw_vfcmpeqd, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vfcmpltd, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vfcmpled, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vfcmpund, "V4dV4dV4d", "n") + +BUILTIN(__builtin_sw_vfcvtsd, "V4dV4f", "n") +BUILTIN(__builtin_sw_vfcvtds, "V4fV4d", "n") +BUILTIN(__builtin_sw_vfcvtld, "V4dV4Li", "n") +BUILTIN(__builtin_sw_vfcvtls, "V4fV4Li", "n") +BUILTIN(__builtin_sw_vfcvtsh, "V4dV4fV4fLi", "n") +BUILTIN(__builtin_sw_vfcvths, "V4fV4dLi", "n") + +BUILTIN(__builtin_sw_vfcvtdl, "V4LiV4d", "n") +BUILTIN(__builtin_sw_vfcvtdl_g, "V4LiV4d", "n") +BUILTIN(__builtin_sw_vfcvtdl_p, "V4LiV4d", "n") +BUILTIN(__builtin_sw_vfcvtdl_z, "V4LiV4d", "n") +BUILTIN(__builtin_sw_vfcvtdl_n, "V4LiV4d", "n") + +BUILTIN(__builtin_sw_vfris, "V4fV4f", "n") +BUILTIN(__builtin_sw_vfris_g, "V4fV4f", "n") +BUILTIN(__builtin_sw_vfris_p, "V4fV4f", "n") +BUILTIN(__builtin_sw_vfris_z, "V4fV4f", "n") +BUILTIN(__builtin_sw_vfris_n, "V4fV4f", "n") + +BUILTIN(__builtin_sw_vfrid, "V4dV4d", "n") +BUILTIN(__builtin_sw_vfrid_g, "V4dV4d", "n") +BUILTIN(__builtin_sw_vfrid_p, "V4dV4d", "n") +BUILTIN(__builtin_sw_vfrid_z, "V4dV4d", "n") +BUILTIN(__builtin_sw_vfrid_n, "V4dV4d", "n") + +BUILTIN(__builtin_sw_vmaxs, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vmaxd, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vmins, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vmind, "V4dV4dV4d", "n") + +BUILTIN(__builtin_sw_vcpyss, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vcpyses, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vcpysns, "V4fV4fV4f", "n") + +BUILTIN(__builtin_sw_vcpysd, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vcpysed, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vcpysnd, "V4dV4dV4d", "n") + +BUILTIN(__builtin_sw_vfseleqs, "V4fV4fV4fV4f", "n") +BUILTIN(__builtin_sw_vfsellts, "V4fV4fV4fV4f", "n") +BUILTIN(__builtin_sw_vfselles, "V4fV4fV4fV4f", "n") + +BUILTIN(__builtin_sw_vfseleqd, "V4dV4dV4dV4d", "n") +BUILTIN(__builtin_sw_vfselltd, "V4dV4dV4dV4d", "n") +BUILTIN(__builtin_sw_vfselled, "V4dV4dV4dV4d", "n") + +BUILTIN(__builtin_sw_vmas, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vmss, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vnmas, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vnmss, "V4fV4fV4f", "n") +BUILTIN(__builtin_sw_vmad, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vmsd, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vnmad, "V4dV4dV4d", "n") +BUILTIN(__builtin_sw_vnmsd, "V4dV4dV4d", "n") + +BUILTIN(__builtin_sw_vinsb, "V32cLiV32cLi", "n") +BUILTIN(__builtin_sw_vinsh, "V16sLiV16sLi", "n") +BUILTIN(__builtin_sw_vinsw, "V8iLiV8iLi", "n") +BUILTIN(__builtin_sw_vinsl, "V4LiLiV4LiLi", "n") +BUILTIN(__builtin_sw_vinsfs, "V4ffV4fLi", "n") +BUILTIN(__builtin_sw_vinsfd, "V4ddV4dLi", "n") + +BUILTIN(__builtin_sw_vextw, "LiV8iLi", "n") +BUILTIN(__builtin_sw_vextl, "LiV4LiLi", "n") +BUILTIN(__builtin_sw_vextfs, "fV4fLi", "n") +BUILTIN(__builtin_sw_vextfd, "dV4dLi", "n") + +BUILTIN(__builtin_sw_vshfw, "V8iV8iV8iLi", "n") +BUILTIN(__builtin_sw_vshfq, "V8iV8iV8iLi", "n") +BUILTIN(__builtin_sw_vshfqb, "V32cV32cV32c", "n") + +BUILTIN(__builtin_sw_vconw, "V8iV8iV8iv*", "n") +BUILTIN(__builtin_sw_vconl, "V4LiV4LiV4Liv*", "n") +BUILTIN(__builtin_sw_vcons, "V4fV4fV4fv*", "n") +BUILTIN(__builtin_sw_vcond, "V4dV4dV4dv*", "n") + +BUILTIN(__builtin_sw_vlogzz, "V4LiV4LiV4LiV4LiLi", "n") +BUILTIN(__builtin_sw_vload, "v.", "t") +BUILTIN(__builtin_sw_vloadu, "v.", "t") +BUILTIN(__builtin_sw_vload_u, "v.", "t") +BUILTIN(__builtin_sw_vloade, "v.", "t") +BUILTIN(__builtin_sw_vloadnc, "v.", "t") +BUILTIN(__builtin_sw_vstore, "v.", "t") +BUILTIN(__builtin_sw_vstoreu, "v.", "t") +BUILTIN(__builtin_sw_vstore_u, "v.", "t") +BUILTIN(__builtin_sw_vstoreuh, "v.", "t") +BUILTIN(__builtin_sw_vstoreul, "v.", "t") +BUILTIN(__builtin_sw_vstorenc, "v.", "t") + +#undef BUILTIN diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 060f96118364..736260b17322 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -31,6 +31,8 @@ def err_drv_invalid_riscv_arch_name : Error< "invalid arch name '%0', %1">; def err_drv_invalid_riscv_cpu_name_for_target : Error< "cpu '%0' does not support rv%select{32|64}1">; +def err_drv_invalid_sw64_ext_arch_name : Error< + "invalid arch name '%0', %1 '%2'">; def warn_drv_invalid_arch_name_with_suggestion : Warning< "ignoring invalid /arch: argument '%0'; for %select{64|32}1-bit expected one of %2">, InGroup; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 0e97620945af..b74b381b374b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11909,4 +11909,8 @@ def err_wasm_builtin_arg_must_match_table_element_type : Error < "%ordinal0 argument must match the element type of the WebAssembly table in the %ordinal1 argument">; def err_wasm_builtin_arg_must_be_integer_type : Error < "%ordinal0 argument must be an integer">; + +// Sw64-specific Diagnostics +def err_invalid_sw64_type_code : Error< + "incompatible type for this __builtin_sw64 function">; } // end of sema component. diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 8f7881abf26f..59487eb04140 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -174,6 +174,16 @@ namespace clang { }; } // namespace LoongArch + /// Sw64 builtins + namespace Sw64 { + enum { + LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "clang/Basic/BuiltinsSw64.def" + LastTSBuiltin + }; + } // namespace Sw64 + /// Flags to identify the types for overloaded Neon builtins. /// /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h. @@ -369,7 +379,8 @@ namespace clang { PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin, Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin, - SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin}); + SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin, + Sw64::LastTSBuiltin}); } // end namespace clang. diff --git a/clang/include/clang/Basic/TargetCXXABI.def b/clang/include/clang/Basic/TargetCXXABI.def index 9501cca76094..70573e5864a0 100644 --- a/clang/include/clang/Basic/TargetCXXABI.def +++ b/clang/include/clang/Basic/TargetCXXABI.def @@ -88,6 +88,9 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64") /// - representation of member function pointers adjusted as in ARM. ITANIUM_CXXABI(GenericMIPS, "mips") +/// The generic Sw64 ABI is a modified version of the Itanium ABI. +ITANIUM_CXXABI(GenericSW64, "sw_64") + /// The WebAssembly ABI is a modified version of the Itanium ABI. /// /// The changes from the Itanium ABI are: diff --git a/clang/include/clang/Basic/TargetCXXABI.h b/clang/include/clang/Basic/TargetCXXABI.h index c113a6a048ad..b62f97be512b 100644 --- a/clang/include/clang/Basic/TargetCXXABI.h +++ b/clang/include/clang/Basic/TargetCXXABI.h @@ -103,6 +103,9 @@ public: case GenericMIPS: return T.isMIPS(); + case GenericSW64: + return T.isSw64(); + case WebAssembly: return T.isWasm(); @@ -165,6 +168,7 @@ public: case GenericARM: case GenericAArch64: case GenericMIPS: + case GenericSW64: // TODO: ARM-style pointers to member functions put the discriminator in // the this adjustment, so they don't require functions to have any // special alignment and could therefore also return false. @@ -249,6 +253,7 @@ public: case iOS: // old iOS compilers did not follow this rule case Microsoft: case GenericMIPS: + case GenericSW64: case XL: return true; } @@ -287,6 +292,7 @@ public: case GenericARM: case iOS: case GenericMIPS: + case GenericSW64: case XL: return UseTailPaddingUnlessPOD03; diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index aeadb7273799..b2575eb6c334 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -332,6 +332,9 @@ public: // } va_list[1]; SystemZBuiltinVaList, + // __builtin_va_list as defined by the Sw64 ABI + Sw64ABIBuiltinVaList, + // typedef struct __va_list_tag { // void *__current_saved_reg_area_pointer; // void *__saved_reg_area_end_pointer; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c109d7a8fcab..b8971182ae76 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -194,6 +194,8 @@ def m_riscv_Features_Group : OptionGroup<"">, Group, DocName<"RISC-V">; def m_loongarch_Features_Group : OptionGroup<"">, Group, DocName<"LoongArch">; +def m_sw_64_Features_Group : OptionGroup<"">, + Group, DocName<"SW64">; def m_libc_Group : OptionGroup<"">, Group, Flags<[HelpHidden]>; @@ -4247,6 +4249,41 @@ def mno_lasx : Flag<["-"], "mno-lasx">, Group, def msimd_EQ : Joined<["-"], "msimd=">, Group, Flags<[TargetSpecific]>, HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">; +def mieee : Flag<["-"], "mieee">, Group, + HelpText<"Use mieee to set setfpec (SW64 only)">; +foreach i = {0-31} in + def ffixed_sw_#i : Flag<["-"], "ffixed-sw-"#i>, Group, + HelpText<"Reserve the "#i#" register (SW64 only)">; +def FS_LOAD : Flag<["-"], "fastload">, Group, Flags<[CC1Option]>, + HelpText<"enable fast load/store instrs in sw_64 target.(Development)">; +def fsw_int_divmod : Flag<["-"], "fsw-int-divmod">, Group, + HelpText<"Enable sw64 core4 int-div/rem instructions">, Flags<[CC1Option]>; +def fsw_shift_word : Flag<["-"], "fsw-shift-word">, Group, + HelpText<"Enable sw64 core4 int-shift instructions">, Flags<[CC1Option]>; +def fsw_rev : Flag<["-"], "fsw-rev">, Group, + HelpText<"Enable sw64 core4 byte-rev instructions">, Flags<[CC1Option]>; +def fsw_recip : Flag<["-"], "fsw-recip">, Group, + HelpText<"Enable sw64 core4 fp-rec instructions">, Flags<[CC1Option]>; +def fsw_fprnd : Flag<["-"], "fsw-fprnd">, Group, + HelpText<"Enable sw64 core4 fp-round instructions">, Flags<[CC1Option]>; +def fsw_cmov : Flag<["-"], "fsw-cmov">, Group, + HelpText<"Enable sw64 core4 fp-cmov instructions">, Flags<[CC1Option]>; +def fsw_auto_inc_dec : Flag<["-"], "fsw-auto-inc-dec">, Group, + HelpText<"Enable sw64 core4 post-inc instructions">, Flags<[CC1Option]>; +def fsw_use_cas : Flag<["-"], "fsw-use-cas">, Group, + HelpText<"Enable sw64 core4 atomic-cas instructions">, Flags<[CC1Option]>; +def msw64_relax : Flag<["-"], "sw64-mrelax">, Group, + HelpText<"Enable linker relaxation">; +def msw64_no_relax : Flag<["-"], "sw64-mno-relax">, Group, + HelpText<"Disable linker relaxation">; +def msw6a : Flag<["-"], "sw6a">, + Alias, AliasArgs<["sw6a"]>, Group, + HelpText<"sw6a">, Flags<[HelpHidden]>; +def msw6b : Flag<["-"], "sw6b">, + Alias, AliasArgs<["sw6b"]>, Group, + HelpText<"sw6b">, Flags<[HelpHidden]>; +def mswEv : Flag<["-"], "mswEv">, Group; + def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, Flags<[CC1Option]>, Group, MarshallingInfoFlag>; @@ -4297,6 +4334,12 @@ def mmsa : Flag<["-"], "mmsa">, Group, HelpText<"Enable MSA ASE (MIPS only)">; def mno_msa : Flag<["-"], "mno-msa">, Group, HelpText<"Disable MSA ASE (MIPS only)">; + +def msimd : Flag<["-"], "msimd">, Group, + HelpText<"Enable SIMD (SW64 only)">; +def mno_simd : Flag<["-"], "mno-simd">, Group, + HelpText<"Disable SIMD (SW64 only)">; + def mmt : Flag<["-"], "mmt">, Group, HelpText<"Enable MT ASE (MIPS only)">; def mno_mt : Flag<["-"], "mno-mt">, Group, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b2ab6d0f8445..b41933afb5a4 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13621,6 +13621,9 @@ private: bool CheckMipsBuiltinCpu(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); bool CheckMipsBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall); + bool CheckSw64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool CheckSw64VectorMemoryIntr(unsigned BuiltinID, CallExpr *TheCall); + bool CheckSw64VectorShift(unsigned BuiltinID, CallExpr *TheCall); bool CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 76000156fece..62a8c227a3a4 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -884,6 +884,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { case TargetCXXABI::WatchOS: case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericSW64: case TargetCXXABI::GenericItanium: case TargetCXXABI::WebAssembly: case TargetCXXABI::XL: @@ -9009,6 +9010,57 @@ CreateAAPCSABIBuiltinVaListDecl(const ASTContext *Context) { return Context->buildImplicitTypedef(T, "__builtin_va_list"); } +static TypedefDecl *CreateSw64ABIBuiltinVaListDecl(const ASTContext *Context) { + // struct __va_list { + RecordDecl *VaListTagDecl = Context->buildImplicitRecord("__va_list"); + + if (Context->getLangOpts().CPlusPlus) { + + // namespace std { + // struct __va_list { + NamespaceDecl *NS; + NS = NamespaceDecl::Create(const_cast(*Context), + Context->getTranslationUnitDecl(), + /*Inline*/ false, SourceLocation(), + SourceLocation(), &Context->Idents.get("std"), + /*PrevDecl*/ nullptr, /*Nested=*/false); + NS->setImplicit(); + VaListTagDecl->setDeclContext(NS); + } + + VaListTagDecl->startDefinition(); + + const size_t NumFields = 2; + QualType FieldTypes[NumFields]; + const char *FieldNames[NumFields]; + + // unsigned gp_offset; + FieldTypes[0] = Context->getPointerType(Context->VoidTy); + FieldNames[0] = "__stack"; + + // unsigned fp_offset; + FieldTypes[1] = Context->IntTy; + FieldNames[1] = "__offs"; + + // Create fields + for (unsigned i = 0; i < NumFields; ++i) { + FieldDecl *Field = FieldDecl::Create( + const_cast(*Context), VaListTagDecl, SourceLocation(), + SourceLocation(), &Context->Idents.get(FieldNames[i]), FieldTypes[i], + /*TInfo=*/nullptr, + /*BitWidth=*/nullptr, + /*Mutable=*/false, ICIS_NoInit); + Field->setAccess(AS_public); + VaListTagDecl->addDecl(Field); + } + VaListTagDecl->completeDefinition(); + Context->VaListTagDecl = VaListTagDecl; + QualType VaListTagType = Context->getRecordType(VaListTagDecl); + + // }; + return Context->buildImplicitTypedef(VaListTagType, "__builtin_va_list"); +} + static TypedefDecl * CreateSystemZBuiltinVaListDecl(const ASTContext *Context) { // struct __va_list_tag { @@ -9136,6 +9188,8 @@ static TypedefDecl *CreateVaListDecl(const ASTContext *Context, return CreateSystemZBuiltinVaListDecl(Context); case TargetInfo::HexagonBuiltinVaList: return CreateHexagonBuiltinVaListDecl(Context); + case TargetInfo::Sw64ABIBuiltinVaList: + return CreateSw64ABIBuiltinVaListDecl(Context); } llvm_unreachable("Unhandled __builtin_va_list type kind"); @@ -12041,6 +12095,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericSW64: case TargetCXXABI::iOS: case TargetCXXABI::WebAssembly: case TargetCXXABI::WatchOS: @@ -12062,6 +12117,7 @@ MangleContext *ASTContext::createDeviceMangleContext(const TargetInfo &T) { case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericSW64: case TargetCXXABI::iOS: case TargetCXXABI::WebAssembly: case TargetCXXABI::WatchOS: diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index caa1b6002e6f..e830db015d0c 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -109,6 +109,7 @@ add_clang_library(clangBasic Targets/RISCV.cpp Targets/SPIR.cpp Targets/Sparc.cpp + Targets/Sw64.cpp Targets/SystemZ.cpp Targets/TCE.cpp Targets/VE.cpp diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index b14d11333412..432f34f94414 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -35,6 +35,7 @@ #include "Targets/RISCV.h" #include "Targets/SPIR.h" #include "Targets/Sparc.h" +#include "Targets/Sw64.h" #include "Targets/SystemZ.h" #include "Targets/TCE.h" #include "Targets/VE.h" @@ -132,6 +133,9 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::lanai: return std::make_unique(Triple, Opts); + case llvm::Triple::sw_64: + return std::make_unique(Triple, Opts); + case llvm::Triple::aarch64_32: if (Triple.isOSDarwin()) return std::make_unique(Triple, Opts); diff --git a/clang/lib/Basic/Targets/Sw64.cpp b/clang/lib/Basic/Targets/Sw64.cpp new file mode 100644 index 000000000000..c622a4b7a4e1 --- /dev/null +++ b/clang/lib/Basic/Targets/Sw64.cpp @@ -0,0 +1,125 @@ +//===--- Sw64.cpp - Implement Sw64 target feature support ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements Sw64 TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#include "Sw64.h" +#include "Targets.h" +#include "clang/Basic/Builtins.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/MacroBuilder.h" +#include "clang/Basic/TargetBuiltins.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Sw64TargetParser.h" + +using namespace clang; +using namespace clang::targets; + +ArrayRef Sw64TargetInfo::getGCCRegNames() const { + static const char *const GCCRegNames[] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", + "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", + "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23", + "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31", + "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", + "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", + "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", + "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31"}; + return llvm::makeArrayRef(GCCRegNames); +} + +ArrayRef Sw64TargetInfo::getGCCRegAliases() const { + static const TargetInfo::GCCRegAlias GCCRegAliases[] = { + {{"v0"}, "$0"}, {{"t0"}, "$1"}, {{"t1"}, "$2"}, {{"t2"}, "$3"}, + {{"t3"}, "$4"}, {{"t4"}, "$5"}, {{"t5"}, "$6"}, {{"t6"}, "$7"}, + {{"t7"}, "$8"}, {{"s0"}, "$9"}, {{"s1"}, "$10"}, {{"s2"}, "$11"}, + {{"s3"}, "$12"}, {{"s4"}, "$13"}, {{"s5"}, "$14"}, {{"fp"}, "$15"}, + {{"a0"}, "$16"}, {{"a1"}, "$17"}, {{"a2"}, "$18"}, {{"a3"}, "$19"}, + {{"a4"}, "$20"}, {{"a5"}, "$21"}, {{"t8"}, "$22"}, {{"t9"}, "$23"}, + {{"t10"}, "$24"}, {{"t11"}, "$25"}, {{"ra"}, "$26"}, {{"t12"}, "$27"}, + {{"at"}, "$28"}, {{"gp"}, "$29"}, {{"sp"}, "$30"}, {{"zero"}, "$31"}}; + return llvm::makeArrayRef(GCCRegAliases); +} + +const Builtin::Info Sw64TargetInfo::BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ + {#ID, TYPE, ATTRS, HEADER, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#include "clang/Basic/BuiltinsSw64.def" +}; + +void Sw64TargetInfo::fillValidCPUList( + SmallVectorImpl &Values) const { + llvm::Sw64::fillValidCPUArchList(Values, true); +} + +bool Sw64TargetInfo::isValidTuneCPUName(StringRef Name) const { + return llvm::Sw64::checkTuneCPUKind(llvm::Sw64::parseTuneCPUKind(Name, true), + /*Is64Bit=*/true); +} + +void Sw64TargetInfo::fillValidTuneCPUList( + SmallVectorImpl &Values) const { + llvm::Sw64::fillValidTuneCPUArchList(Values, true); +} + +bool Sw64TargetInfo::isValidCPUName(StringRef Name) const { + return llvm::Sw64::parseCPUArch(Name) != llvm::Sw64::CK_INVALID; +} + +bool Sw64TargetInfo::setCPU(const std::string &Name) { + return isValidCPUName(Name); +} + +void Sw64TargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + DefineStd(Builder, "sw_64", Opts); + + Builder.defineMacro("__REGISTER_PREFIX__", ""); + Builder.defineMacro("__LONG_DOUBLE_128__"); + + Builder.defineMacro("__ELF__"); + Builder.defineMacro("__sw_64__"); + Builder.defineMacro("__sw_64_sw6a__"); + Builder.defineMacro("__sw_64"); + // Consistent with GCC + Builder.defineMacro("__gnu_linux__"); + + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); + + DefineStd(Builder, "unix", Opts); + DefineStd(Builder, "linux", Opts); + + if (HasCore4) + Builder.defineMacro("__sw_64_sw8a__"); + + if (Opts.CPlusPlus) + Builder.defineMacro("_GNU_SOURCE"); +} + +/// Return true if has this feature, need to sync with handleTargetFeatures. +bool Sw64TargetInfo::hasFeature(StringRef Feature) const { + return llvm::StringSwitch(Feature) + .Case("sw_64", true) + .Case("core3b", HasCore3) + .Case("core4", HasCore4) + .Case("simd", HasSIMD) + .Default(false); +} + +ArrayRef Sw64TargetInfo::getTargetBuiltins() const { + return llvm::makeArrayRef(BuiltinInfo, clang::Sw64::LastTSBuiltin - + Builtin::FirstTSBuiltin); +} diff --git a/clang/lib/Basic/Targets/Sw64.h b/clang/lib/Basic/Targets/Sw64.h new file mode 100644 index 000000000000..791d893a7ea3 --- /dev/null +++ b/clang/lib/Basic/Targets/Sw64.h @@ -0,0 +1,141 @@ +//===--- Sw64.h - Declare Sw64 target feature support ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares Sw64 TargetInfo objects. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_SW64_H +#define LLVM_CLANG_LIB_BASIC_TARGETS_SW64_H + +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Sw64TargetParser.h" +#include "llvm/TargetParser/Triple.h" + +namespace clang { +namespace targets { + +class LLVM_LIBRARY_VISIBILITY Sw64TargetInfo : public TargetInfo { + static const Builtin::Info BuiltinInfo[]; + bool HasCore3 = false; + bool HasCore4 = false; + + // for futrure update + // change data length + void setDataLayout() { + StringRef Layout; + Layout = + "e-m:e-p:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n64-S128-v256:256"; + resetDataLayout(Layout.str()); + } + + bool HasSIMD; + +public: + Sw64TargetInfo(const llvm::Triple &Triple, const TargetOptions &) + : TargetInfo(Triple), HasSIMD(false) { + NoAsmVariants = true; + MCountName = ""; + setABI("sw_64"); + UseZeroLengthBitfieldAlignment = false; + IntMaxType = SignedLong; + } + + bool setABI(const std::string &Name) override { + set64ABITypes(); + return true; + } + + void set64ABITypes(void) { + LongWidth = LongAlign = 64; + PointerWidth = PointerAlign = 64; + LongDoubleWidth = LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); + MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; + DoubleAlign = LongLongAlign = 64; + SuitableAlign = 128; + MaxVectorAlign = 256; + SizeType = UnsignedLong; + PtrDiffType = SignedLong; + IntPtrType = SignedLong; + WCharType = SignedInt; + WIntType = UnsignedInt; + } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override; + + ArrayRef getTargetBuiltins() const override; + + BuiltinVaListKind getBuiltinVaListKind() const override { + return TargetInfo::Sw64ABIBuiltinVaList; + } + + ArrayRef getGCCRegNames() const override; + + ArrayRef getGCCRegAliases() const override; + + std::string_view getClobbers() const override { return ""; } + + bool hasFeature(StringRef Feature) const override; + bool handleTargetFeatures(std::vector &Features, + DiagnosticsEngine &Diags) override { + for (const auto &Feature : Features) { + if (Feature == "+simd") + HasSIMD = true; + if (Feature == "+core3b") + HasCore3 = true; + if (Feature == "+core4") + HasCore4 = true; + } + setDataLayout(); + return true; + }; + + bool isValidCPUName(StringRef Name) const override; + bool setCPU(const std::string &Name) override; + void fillValidCPUList(SmallVectorImpl &Values) const override; + bool isValidTuneCPUName(StringRef Name) const override; + void fillValidTuneCPUList(SmallVectorImpl &Values) const override; + bool validateAsmConstraint(const char *&Name, + TargetInfo::ConstraintInfo &Info) const override { + switch (*Name) { + default: + return false; + case 'I': // Signed 16-bit constant + case 'J': // Integer 0 + case 'K': // Unsigned 16-bit constant + case 'L': // Signed 32-bit constant, lower 16-bit zeros (for lui) + case 'M': // Constants not loadable via lui, addiu, or ori + case 'N': // Constant -1 to -65535 + case 'O': // A signed 15-bit constant + case 'P': // A constant between 1 go 65535 + return true; + } + } + // Return the register number that __builtin_eh_return_regno would return with + // the specified argument. + // + // This corresponds with TargetLowering's getExceptionPointerRegister and + // getExceptionSelectorRegister in the backend. + int getEHDataRegisterNumber(unsigned RegNo) const override { + if (RegNo == 0) + return 16; + if (RegNo == 1) + return 17; + return -1; + } + + bool allowsLargerPreferedTypeAlignment() const override { return false; } + bool hasBitIntType() const override { return true; } +}; +} // namespace targets +} // namespace clang +#endif diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 8f87c4d46109..f63fac117516 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -48,6 +48,7 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsSw64.h" #include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" @@ -5601,6 +5602,8 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); + case llvm::Triple::sw_64: + return CGF->EmitSw64BuiltinExpr(BuiltinID, E, ReturnValue); default: return nullptr; } @@ -20428,3 +20431,108 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); return Builder.CreateCall(F, Ops, ""); } + +Value *CodeGenFunction::EmitSw64BuiltinExpr(unsigned BuiltinID, + const CallExpr *E, + ReturnValueSlot ReturnValue) { + SmallVector Ops; + llvm::Type *ResultType = ConvertType(E->getType()); + Intrinsic::ID ID = Intrinsic::not_intrinsic; + + switch (BuiltinID) { + default: + llvm_unreachable("unexpected builtin ID"); + case Sw64::BI__builtin_sw_vload: + ID = Intrinsic::sw64_vload; + break; + case Sw64::BI__builtin_sw_vloade: + ID = Intrinsic::sw64_vloade; + break; + case Sw64::BI__builtin_sw_vloadu: + ID = Intrinsic::sw64_vloadu; + break; + case Sw64::BI__builtin_sw_vload_u: + ID = Intrinsic::sw64_vload_u; + break; + case Sw64::BI__builtin_sw_vloadnc: + ID = Intrinsic::sw64_vloadnc; + break; + case Sw64::BI__builtin_sw_vstore: + ID = Intrinsic::sw64_vstore; + break; + case Sw64::BI__builtin_sw_vstoreu: + ID = Intrinsic::sw64_vstoreu; + break; + case Sw64::BI__builtin_sw_vstore_u: + ID = Intrinsic::sw64_vstore_u; + break; + case Sw64::BI__builtin_sw_vstoreul: + ID = Intrinsic::sw64_vstoreul; + break; + case Sw64::BI__builtin_sw_vstoreuh: + ID = Intrinsic::sw64_vstoreuh; + break; + case Sw64::BI__builtin_sw_vstorenc: + ID = Intrinsic::sw64_vstorenc; + break; + case Sw64::BI__builtin_sw_vsll: + ID = Intrinsic::sw64_vsll; + break; + case Sw64::BI__builtin_sw_vsrl: + ID = Intrinsic::sw64_vsrl; + break; + case Sw64::BI__builtin_sw_vsra: + ID = Intrinsic::sw64_vsra; + break; + case Sw64::BI__builtin_sw_vrol: + ID = Intrinsic::sw64_vrol; + break; + } + + if (BuiltinID == Sw64::BI__builtin_sw_vload || + BuiltinID == Sw64::BI__builtin_sw_vloade || + BuiltinID == Sw64::BI__builtin_sw_vloadu || + BuiltinID == Sw64::BI__builtin_sw_vload_u || + BuiltinID == Sw64::BI__builtin_sw_vloadnc) { + bool isLoadExt = BuiltinID == Sw64::BI__builtin_sw_vloade; + + Value *LoadAddr = EmitScalarExpr(E->getArg(0)); + QualType Ty = E->getType(); + llvm::Type *ArgTy = LoadAddr->getType(); + llvm::Type *RealResTy = ConvertType(Ty); + llvm::Type *ResPTy = RealResTy->getPointerTo(); + // if target is Load duplicated in vector, do not emit BitCast + ResPTy = isLoadExt ? LoadAddr->getType() : ResPTy; + if (!isLoadExt) { + LoadAddr = Builder.CreateBitCast(LoadAddr, ResPTy); + } + llvm::Type *Tys[2] = {RealResTy, ResPTy}; + Function *F = CGM.getIntrinsic(ID, Tys); + return Builder.CreateCall(F, LoadAddr, "vload"); + } else if (BuiltinID == Sw64::BI__builtin_sw_vstore || + BuiltinID == Sw64::BI__builtin_sw_vstoreu || + BuiltinID == Sw64::BI__builtin_sw_vstore_u || + BuiltinID == Sw64::BI__builtin_sw_vstoreuh || + BuiltinID == Sw64::BI__builtin_sw_vstoreul || + BuiltinID == Sw64::BI__builtin_sw_vstorenc) { + Value *StoreVal = EmitScalarExpr(E->getArg(0)); + Value *StoreAddr = EmitScalarExpr(E->getArg(1)); + QualType Ty = E->getArg(0)->getType(); + llvm::Type *StoreTy = StoreVal->getType(); + StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); + Function *F = + CGM.getIntrinsic(ID, {StoreVal->getType(), StoreAddr->getType()}); + return Builder.CreateCall(F, {StoreVal, StoreAddr}, ""); + } else if (BuiltinID == Sw64::BI__builtin_sw_vsll || + BuiltinID == Sw64::BI__builtin_sw_vsra || + BuiltinID == Sw64::BI__builtin_sw_vsrl || + BuiltinID == Sw64::BI__builtin_sw_vrol) { + Value *ShiftVal = EmitScalarExpr(E->getArg(0)); + Value *ShiftImm = EmitScalarExpr(E->getArg(1)); + QualType Ty = E->getArg(0)->getType(); + + Function *F = + CGM.getIntrinsic(ID, {ShiftVal->getType(), ShiftImm->getType()}); + return Builder.CreateCall(F, {ShiftVal, ShiftImm}, ""); + } +} diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index 1debeb6d9cce..a575aa57d75e 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -108,6 +108,7 @@ add_clang_library(clangCodeGen Targets/RISCV.cpp Targets/SPIR.cpp Targets/Sparc.cpp + Targets/Sw64.cpp Targets/SystemZ.cpp Targets/TCE.cpp Targets/VE.cpp diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 143e0707b942..afcc0a5b927c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4310,6 +4310,8 @@ public: llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitSw64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue); llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index eabc4aabea06..4651228817b5 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -90,6 +90,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { case TargetCXXABI::iOS: case TargetCXXABI::WatchOS: case TargetCXXABI::GenericMIPS: + case TargetCXXABI::GenericSW64: case TargetCXXABI::GenericItanium: case TargetCXXABI::WebAssembly: case TargetCXXABI::XL: @@ -268,6 +269,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) { return createX86_64TargetCodeGenInfo(CGM, AVXLevel); } } + case llvm::Triple::sw_64: + return createSw64TargetCodeGenInfo(CGM); case llvm::Triple::hexagon: return createHexagonTargetCodeGenInfo(CGM); case llvm::Triple::lanai: diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index ede9efb019ce..32b441813bb9 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -570,6 +570,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { case TargetCXXABI::GenericMIPS: return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); + case TargetCXXABI::GenericSW64: + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); + case TargetCXXABI::WebAssembly: return new WebAssemblyCXXABI(CGM); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 14ed5e5d2d2c..a1fc372d46f6 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -459,6 +459,9 @@ std::unique_ptr createLoongArchTargetCodeGenInfo(CodeGenModule &CGM, unsigned GRLen, unsigned FLen); +std::unique_ptr +createSw64TargetCodeGenInfo(CodeGenModule &CGM); + std::unique_ptr createM68kTargetCodeGenInfo(CodeGenModule &CGM); diff --git a/clang/lib/CodeGen/Targets/Sw64.cpp b/clang/lib/CodeGen/Targets/Sw64.cpp new file mode 100644 index 000000000000..0752efaef3c6 --- /dev/null +++ b/clang/lib/CodeGen/Targets/Sw64.cpp @@ -0,0 +1,545 @@ +//===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// These classes wrap the information about a call or function +// definition used to handle ABI compliancy. +// +//===----------------------------------------------------------------------===// + +#include "TargetInfo.h" +#include "ABIInfoImpl.h" +#include "clang/Basic/DiagnosticFrontend.h" +#include "llvm/ADT/SmallBitVector.h" + +using namespace clang; +using namespace clang::CodeGen; + +//===----------------------------------------------------------------------===// +// SW64 ABI Implementation. +//===----------------------------------------------------------------------===// + +namespace { +class Sw64ABIInfo : public ABIInfo { + /// Similar to llvm::CCState, but for Clang. + struct CCState { + CCState(CGFunctionInfo &FI) + : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()), + Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {} + + llvm::SmallBitVector IsPreassigned; + unsigned CC = CallingConv::CC_C; + unsigned FreeRegs = 0; + unsigned FreeSSERegs = 0; + RequiredArgs Required; + bool IsDelegateCall = false; + }; + unsigned MinABIStackAlignInBytes, StackAlignInBytes; + void CoerceToIntArgs(uint64_t TySize, + SmallVectorImpl &ArgList) const; + llvm::Type *HandleAggregates(QualType Ty, uint64_t TySize) const; + llvm::Type *returnAggregateInRegs(QualType RetTy, uint64_t Size) const; + llvm::Type *getPaddingType(uint64_t Align, uint64_t Offset) const; + +public: + Sw64ABIInfo(CodeGenTypes &CGT) + : ABIInfo(CGT), MinABIStackAlignInBytes(8), StackAlignInBytes(16) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset, + CCState &State) const; + ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; + void computeInfo(CGFunctionInfo &FI) const override; + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + ABIArgInfo extendType(QualType Ty) const; +}; + +class Sw64TargetCodeGenInfo : public TargetCodeGenInfo { + unsigned SizeOfUnwindException; + +public: + Sw64TargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique(CGT)), + SizeOfUnwindException(32) {} + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { + return 30; + } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const FunctionDecl *FD = dyn_cast_or_null(D); + if (!FD) + return; + + // Other attributes do not have a meaning for declarations. + if (GV->isDeclaration()) + return; + + // FIXME:Interrupte Attr doesn`t write in SW64. + // const auto *attr = FD->getAttr(); + // if(!attr) + // return + // const char *Kind; + // ... + // + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; + + unsigned getSizeOfUnwindException() const override { + return SizeOfUnwindException; + } +}; +} // namespace + +void Sw64ABIInfo::CoerceToIntArgs( + uint64_t TySize, SmallVectorImpl &ArgList) const { + llvm::IntegerType *IntTy = + llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); + + // Add (TySize / MinABIStackAlignInBytes) args of IntTy. + for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) + ArgList.push_back(IntTy); + + // If necessary, add one more integer type to ArgList. + unsigned R = TySize % (MinABIStackAlignInBytes * 8); + + if (R) + ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); +} + +// In N32/64, an aligned double precision floating point field is passed in +// a register. +llvm::Type *Sw64ABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { + SmallVector ArgList, IntArgList; + + if (Ty->isComplexType()) + return CGT.ConvertType(Ty); + + const RecordType *RT = Ty->getAs(); + + // Unions/vectors are passed in integer registers. + if (!RT || !RT->isStructureOrClassType()) { + CoerceToIntArgs(TySize, ArgList); + return llvm::StructType::get(getVMContext(), ArgList); + } + + const RecordDecl *RD = RT->getDecl(); + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + assert(!(TySize % 8) && "Size of structure must be multiple of 8."); + + uint64_t LastOffset = 0; + unsigned idx = 0; + llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); + + // Iterate over fields in the struct/class and check if there are any aligned + // double fields. + for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); + i != e; ++i, ++idx) { + const QualType Ty = i->getType(); + const BuiltinType *BT = Ty->getAs(); + + if (!BT || BT->getKind() != BuiltinType::Double) + continue; + + uint64_t Offset = Layout.getFieldOffset(idx); + if (Offset % 64) // Ignore doubles that are not aligned. + continue; + + // Add ((Offset - LastOffset) / 64) args of type i64. + for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) + ArgList.push_back(I64); + + // Add double type. + // ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); + ArgList.push_back(llvm::Type::getInt64Ty(getVMContext())); + LastOffset = Offset + 64; + } + + CoerceToIntArgs(TySize - LastOffset, IntArgList); + ArgList.append(IntArgList.begin(), IntArgList.end()); + + return llvm::StructType::get(getVMContext(), ArgList); +} + +llvm::Type *Sw64ABIInfo::getPaddingType(uint64_t OrigOffset, + uint64_t Offset) const { + if (OrigOffset + MinABIStackAlignInBytes > Offset) + return nullptr; + + return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); +} + +ABIArgInfo Sw64ABIInfo::classifyArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + if (isAggregateTypeForABI(Ty)) { + // Records with non trivial destructors/constructors should not be passed + // by value. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + return getNaturalAlignIndirect(Ty); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs()) + Ty = EnumTy->getDecl()->getIntegerType(); + + if (const BuiltinType *BuiltinTy = Ty->getAs()) { + if (BuiltinTy->getKind() == BuiltinType::LongDouble && + getContext().getTypeSize(Ty) == 128) + return getNaturalAlignIndirect(Ty, false); + } + return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect(); +} +ABIArgInfo Sw64ABIInfo::getIndirectResult(QualType Ty, bool ByVal, + CCState &State) const { + if (!ByVal) { + if (State.FreeRegs) { + --State.FreeRegs; // Non-byval indirects just use one pointer. + return getNaturalAlignIndirectInReg(Ty); + } + return getNaturalAlignIndirect(Ty, false); + } + + // Compute the byval alignment. + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + /*Realign=*/TypeAlign > + MinABIStackAlignInBytes); +} + +ABIArgInfo Sw64ABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset, + CCState &State) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + // Check with the C++ ABI first. + const RecordType *RT = Ty->getAs(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) { + return getIndirectResult(Ty, /*ByVal=*/false, State); + } else if (RAA == CGCXXABI::RAA_DirectInMemory) { + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + } + } + + if (Ty->isVectorType()) { + uint64_t Size = getContext().getTypeSize(Ty); + if (Size > 256) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + else if (Size < 128) { + llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); + return ABIArgInfo::getDirect(CoerceTy); + } + } + + if (Ty->isAnyComplexType()) { + if (getContext().getTypeSize(Ty) <= 128) { + return ABIArgInfo::getDirect(); + } else { + return getNaturalAlignIndirect(Ty, false); + } + } + + uint64_t OrigOffset = Offset; + uint64_t TySize = getContext().getTypeSize(Ty); + uint64_t Align = getContext().getTypeAlign(Ty) / 8; + + Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), + (uint64_t)StackAlignInBytes); + unsigned CurrOffset = llvm::alignTo(Offset, Align); + Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; + + if (isAggregateTypeForABI(Ty)) { + // Ignore empty aggregates. + if (TySize == 0) + return ABIArgInfo::getIgnore(); + + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + Offset = OrigOffset + MinABIStackAlignInBytes; + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + } + llvm::LLVMContext &LLVMContext = getVMContext(); + unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 63) / 64; + if (SizeInRegs <= State.FreeRegs) { + llvm::IntegerType *Int64 = llvm::Type::getInt64Ty(LLVMContext); + SmallVector Elements(SizeInRegs, Int64); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + return ABIArgInfo::getDirectInReg(Result); + } else { + // If we have reached here, aggregates are passed directly by coercing to + // another structure type. Padding is inserted if the offset of the + // aggregate is unaligned. + ABIArgInfo ArgInfo = + ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, + getPaddingType(OrigOffset, CurrOffset)); + ArgInfo.setInReg(true); + return ArgInfo; + } + } + + if (const BuiltinType *BuiltinTy = Ty->getAs()) { + if (BuiltinTy->getKind() == BuiltinType::LongDouble && + getContext().getTypeSize(Ty) == 128) + return getNaturalAlignIndirect(Ty, false); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to the GPR width. + if (Ty->isIntegralOrEnumerationType()) + return extendType(Ty); + + return ABIArgInfo::getDirect(nullptr, 0, + getPaddingType(OrigOffset, CurrOffset)); +} + +llvm::Type *Sw64ABIInfo::returnAggregateInRegs(QualType RetTy, + uint64_t Size) const { + const RecordType *RT = RetTy->getAs(); + SmallVector RTList; + + if (RT && RT->isStructureOrClassType()) { + const RecordDecl *RD = RT->getDecl(); + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + unsigned FieldCnt = Layout.getFieldCount(); + + // N32/64 returns struct/classes in floating point registers if the + // following conditions are met: + // 1. The size of the struct/class is no larger than 128-bit. + // 2. The struct/class has one or two fields all of which are floating + // point types. + // 3. The offset of the first field is zero (this follows what gcc does). + // + // Any other composite results are returned in integer registers. + // + if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) { + RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end(); + for (; b != e; ++b) { + const BuiltinType *BT = b->getType()->getAs(); + + if (!BT || !BT->isFloatingPoint()) + break; + + RTList.push_back(CGT.ConvertType(b->getType())); + } + if (b == e) + return llvm::StructType::get(getVMContext(), RTList, + RD->hasAttr()); + + RTList.clear(); + } + } + + CoerceToIntArgs(Size, RTList); + return llvm::StructType::get(getVMContext(), RTList); +} + +ABIArgInfo Sw64ABIInfo::classifyReturnType(QualType RetTy) const { + uint64_t Size = getContext().getTypeSize(RetTy); + + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + // However, N32/N64 ignores zero sized return values. + if (Size == 0) + return ABIArgInfo::getIgnore(); + + // Large vector types should be returned via memory. + if (RetTy->isVectorType() && Size == 256) + return ABIArgInfo::getDirect(); + + if (const auto *BT = RetTy->getAs()) + if (BT->getKind() == BuiltinType::LongDouble || Size >= 128) + return getNaturalAlignIndirect(RetTy); + + if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) { + if ((RetTy->hasFloatingRepresentation() && Size <= 128) || + (!RetTy->hasFloatingRepresentation() && Size <= 64)) { + if (RetTy->isComplexType()) + return ABIArgInfo::getDirect(); + + if (RetTy->isComplexIntegerType() || + (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) { + ABIArgInfo ArgInfo = + ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); + ArgInfo.setInReg(true); + return ArgInfo; + } + } + + return getNaturalAlignIndirect(RetTy); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = RetTy->getAs()) + RetTy = EnumTy->getDecl()->getIntegerType(); + + if (isPromotableIntegerTypeForABI(RetTy)) + return ABIArgInfo::getExtend(RetTy); + + if ((RetTy->isUnsignedIntegerOrEnumerationType() || + RetTy->isSignedIntegerOrEnumerationType()) && + Size == 32) + return ABIArgInfo::getSignExtend(RetTy); + + return ABIArgInfo::getDirect(); +} + +void Sw64ABIInfo::computeInfo(CGFunctionInfo &FI) const { + + CCState State(FI); + if (FI.getHasRegParm()) { + State.FreeRegs = FI.getRegParm(); + } else { + State.FreeRegs = 6; + } + + ABIArgInfo &RetInfo = FI.getReturnInfo(); + if (!getCXXABI().classifyReturnType(FI)) + RetInfo = classifyReturnType(FI.getReturnType()); + + // Check if a pointer to an aggregate is passed as a hidden argument. + uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0; + + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type, Offset, State); +} + +Address Sw64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType OrigTy) const { + + QualType Ty = OrigTy; + auto TyAlign = getContext().getTypeInfoInChars(Ty).Align; + if (!Ty->isStructureOrClassType() && (TyAlign.getQuantity() <= 8)) { + ABIArgInfo AI = classifyArgumentType(Ty); + return EmitVAArgInstr(CGF, VAListAddr, OrigTy, AI); + } + + bool DidPromote = false; + auto TyInfo = getContext().getTypeInfoInChars(Ty); + + // The alignment of things in the argument area is never larger than + // StackAlignInBytes. + TyInfo.Align = + std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes)); + + bool IsIndirect = false; + bool AllowHigherAlign = true; + + CharUnits DirectSize, DirectAlign; + if (IsIndirect) { + DirectAlign = CGF.getPointerAlign(); + } else { + DirectAlign = TyInfo.Align; + } + // Cast the address we've calculated to the right type. + llvm::Type *DirectTy = CGF.ConvertTypeForMem(Ty), *ElementTy = DirectTy; + if (IsIndirect) + DirectTy = DirectTy->getPointerTo(0); + + CharUnits SlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes); + + // Handle vaList specified on Sw64, struct{char *ptr, int offset} + Address vaList_ptr_p = CGF.Builder.CreateStructGEP(VAListAddr, 0); + llvm::Value *vaList_ptr = CGF.Builder.CreateLoad(vaList_ptr_p); + Address vaList_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1); + llvm::Value *vaList_offset = CGF.Builder.CreateLoad(vaList_offset_p); + + uint64_t TySize = TyInfo.Width.getQuantity(); + llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, TySize); + CGF.Builder.CreateStore(CGF.Builder.CreateAdd(vaList_offset, Offset), + vaList_offset_p); + + llvm::Value *GPAddr = + CGF.Builder.CreateGEP(CGF.Int8Ty, vaList_ptr, vaList_offset); + + // If the CC aligns values higher than the slot size, do so if needed. + Address Addr = Address::invalid(); + if (AllowHigherAlign && DirectAlign > SlotSize) { + Addr = Address(emitRoundPointerUpToAlignment(CGF, GPAddr, DirectAlign), + CGF.Int8Ty, DirectAlign); + } else { + Addr = Address(GPAddr, CGF.Int8Ty, SlotSize); + } + + Addr = Addr.withElementType(DirectTy); + + if (IsIndirect) { + Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, TyInfo.Align); + } + + // If there was a promotion, "unpromote" into a temporary. + // TODO: can we just use a pointer into a subset of the original slot? + if (DidPromote) { + Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp"); + llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr); + + // Truncate down to the right width. + llvm::Type *IntTy = + (OrigTy->isIntegerType() ? Temp.getElementType() : CGF.IntPtrTy); + llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy); + if (OrigTy->isPointerType()) + V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType()); + + CGF.Builder.CreateStore(V, Temp); + Addr = Temp; + } + + return Addr; +} + +ABIArgInfo Sw64ABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + + // SW64 ABI requires unsigned 32 bit integers to be sign extended. + if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + + return ABIArgInfo::getExtend(Ty); +} + +bool Sw64TargetCodeGenInfo::initDwarfEHRegSizeTable( + CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { + // SW have much different from Mips. This should be rewrite. + + // This information comes from gcc's implementation, which seems to + // as canonical as it gets. + + // Everything on Sw64 is 4 bytes. Double-precision FP registers + // are aliased to pairs of single-precision FP registers. + llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); + + // 0-31 are the general purpose registers, $0 - $31. + // 32-63 are the floating-point registers, $f0 - $f31. + // 64 and 65 are the multiply/divide registers, $hi and $lo. + // 66 is the (notional, I think) register for signal-handler return. + AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65); + + // 67-74 are the floating-point status registers, $fcc0 - $fcc7. + // They are one bit wide and ignored here. + + // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31. + // (coprocessor 1 is the FP unit) + // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31. + // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31. + // 176-181 are the DSP accumulator registers. + AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181); + return false; +} + +std::unique_ptr +CodeGen::createSw64TargetCodeGenInfo(CodeGenModule &CGM) { + return std::make_unique(CGM.getTypes()); +} diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index ac30007588b1..a1757224b2d8 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -44,6 +44,7 @@ add_clang_library(clangDriver ToolChains/Arch/PPC.cpp ToolChains/Arch/RISCV.cpp ToolChains/Arch/Sparc.cpp + ToolChains/Arch/Sw64.cpp ToolChains/Arch/SystemZ.cpp ToolChains/Arch/VE.cpp ToolChains/Arch/X86.cpp @@ -94,6 +95,7 @@ add_clang_library(clangDriver ToolChains/XCore.cpp ToolChains/PPCLinux.cpp ToolChains/PPCFreeBSD.cpp + ToolChains/Sw64Toolchain.cpp ToolChains/InterfaceStubs.cpp ToolChains/ZOS.cpp Types.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 28b33c8862e4..6a1c6ca2c867 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -49,6 +49,7 @@ #include "ToolChains/RISCVToolchain.h" #include "ToolChains/SPIRV.h" #include "ToolChains/Solaris.h" +#include "ToolChains/Sw64Toolchain.h" #include "ToolChains/TCE.h" #include "ToolChains/VEToolchain.h" #include "ToolChains/WebAssembly.h" @@ -6497,6 +6498,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::csky: TC = std::make_unique(*this, Target, Args); break; + case llvm::Triple::sw_64: + TC = std::make_unique(*this, Target, Args); + break; default: if (Target.getVendor() == llvm::Triple::Myriad) TC = std::make_unique(*this, Target, diff --git a/clang/lib/Driver/ToolChains/Arch/Sw64.cpp b/clang/lib/Driver/ToolChains/Arch/Sw64.cpp new file mode 100644 index 000000000000..895175d223ad --- /dev/null +++ b/clang/lib/Driver/ToolChains/Arch/Sw64.cpp @@ -0,0 +1,94 @@ +//===--------- Sw64.cpp - Sw64 Helpers for Tools ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sw64.h" +#include "ToolChains/CommonArgs.h" +#include "clang/Driver/Options.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/Sw64TargetParser.h" + +using namespace clang::driver; +using namespace clang::driver::tools; +using namespace clang; +using namespace llvm::opt; + +const char *Sw64::getSw64TargetCPU(const ArgList &Args) { + if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) { + StringRef Mcpu = llvm::Sw64::getMcpuFromMArch(A->getValue()); + if (Mcpu != "") + return Mcpu.data(); + else + return A->getValue(); + } + if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) + return A->getValue(); + if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) + return A->getValue(); + return "sw6b"; +} + +void Sw64::getSw64TargetFeatures(const Driver &D, const ArgList &Args, + std::vector &Features) { + // -m(no-)simd overrides use of the vector facility. + AddTargetFeature(Args, Features, options::OPT_msimd, options::OPT_mno_simd, + "simd"); + + if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { + StringRef Mcpu = A->getValue(); + if (Mcpu.startswith("sw6b") || Mcpu.startswith("sw4d")) + Features.push_back("+core3b"); + else if (Mcpu.startswith("sw8a")) + Features.push_back("+core4"); + } + + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { + StringRef March = A->getValue(); + if (March.startswith("core3b")) + Features.push_back("+core3b"); + else if (March.startswith("core4")) + Features.push_back("+core4"); + } + + if (Args.hasArg(options::OPT_ffixed_sw_1)) + Features.push_back("+reserve-r1"); + if (Args.hasArg(options::OPT_ffixed_sw_2)) + Features.push_back("+reserve-r2"); + if (Args.hasArg(options::OPT_ffixed_sw_3)) + Features.push_back("+reserve-r3"); + if (Args.hasArg(options::OPT_ffixed_sw_4)) + Features.push_back("+reserve-r4"); + if (Args.hasArg(options::OPT_ffixed_sw_5)) + Features.push_back("+reserve-r5"); + if (Args.hasArg(options::OPT_ffixed_sw_6)) + Features.push_back("+reserve-r6"); + if (Args.hasArg(options::OPT_ffixed_sw_7)) + Features.push_back("+reserve-r7"); + if (Args.hasArg(options::OPT_ffixed_sw_8)) + Features.push_back("+reserve-r8"); + if (Args.hasArg(options::OPT_ffixed_sw_9)) + Features.push_back("+reserve-r9"); + if (Args.hasArg(options::OPT_ffixed_sw_10)) + Features.push_back("+reserve-r10"); + if (Args.hasArg(options::OPT_ffixed_sw_11)) + Features.push_back("+reserve-r11"); + if (Args.hasArg(options::OPT_ffixed_sw_12)) + Features.push_back("+reserve-r12"); + if (Args.hasArg(options::OPT_ffixed_sw_13)) + Features.push_back("+reserve-r13"); + if (Args.hasArg(options::OPT_ffixed_sw_14)) + Features.push_back("+reserve-r14"); + if (Args.hasArg(options::OPT_ffixed_sw_22)) + Features.push_back("+reserve-r22"); + if (Args.hasArg(options::OPT_ffixed_sw_23)) + Features.push_back("+reserve-r23"); + if (Args.hasArg(options::OPT_ffixed_sw_24)) + Features.push_back("+reserve-r24"); + if (Args.hasArg(options::OPT_ffixed_sw_25)) + Features.push_back("+reserve-r25"); +} diff --git a/clang/lib/Driver/ToolChains/Arch/Sw64.h b/clang/lib/Driver/ToolChains/Arch/Sw64.h new file mode 100644 index 000000000000..cc319026b2d1 --- /dev/null +++ b/clang/lib/Driver/ToolChains/Arch/Sw64.h @@ -0,0 +1,34 @@ +//===--- Sw64.h - Sw64-specific Tool Helpers --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H + +#include "clang/Driver/Driver.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Option/Option.h" +#include "llvm/TargetParser/Triple.h" +#include + +namespace clang { +namespace driver { +namespace tools { +namespace Sw64 { + +const char *getSw64TargetCPU(const llvm::opt::ArgList &Args); + +void getSw64TargetFeatures(const Driver &D, const llvm::opt::ArgList &Args, + std::vector &Features); + +} // end namespace Sw64 +} // end namespace tools +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7e78e4d8d351..fd441db9b1c9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -17,6 +17,7 @@ #include "Arch/PPC.h" #include "Arch/RISCV.h" #include "Arch/Sparc.h" +#include "Arch/Sw64.h" #include "Arch/SystemZ.h" #include "Arch/VE.h" #include "Arch/X86.h" @@ -53,6 +54,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/RISCVISAInfo.h" +#include "llvm/Support/Sw64TargetParser.h" #include "llvm/Support/YAMLParser.h" #include "llvm/TargetParser/ARMTargetParserCommon.h" #include "llvm/TargetParser/Host.h" @@ -478,6 +480,7 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, case llvm::Triple::mips64el: case llvm::Triple::mips: case llvm::Triple::mipsel: + case llvm::Triple::sw_64: case llvm::Triple::systemz: case llvm::Triple::x86: case llvm::Triple::x86_64: @@ -1735,6 +1738,10 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, AddSparcTargetArgs(Args, CmdArgs); break; + case llvm::Triple::sw_64: + AddSw64TargetArgs(Args, CmdArgs); + break; + case llvm::Triple::systemz: AddSystemZTargetArgs(Args, CmdArgs); break; @@ -2233,6 +2240,34 @@ void Clang::AddSparcTargetArgs(const ArgList &Args, } } +void Clang::AddSw64TargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + std::string TuneCPU; + + if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { + StringRef Name = A->getValue(); + + Name = llvm::Sw64::resolveTuneCPUAlias(Name, true); + TuneCPU = std::string(Name); + } + if (!TuneCPU.empty()) { + CmdArgs.push_back("-tune-cpu"); + CmdArgs.push_back(Args.MakeArgString(TuneCPU)); + } + + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + StringRef OOpt; + if (A->getOption().matches(options::OPT_O)) + OOpt = A->getValue(); + + if (A->getOption().matches(options::OPT_O0) || OOpt == "1" || OOpt == "s") + return; + + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-loop-prefetch-writes=true"); + } +} + void Clang::AddSystemZTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { @@ -5096,6 +5131,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_Wa_COMMA, options::OPT_Xassembler, options::OPT_mllvm, + options::OPT_fsw_int_divmod, + options::OPT_fsw_shift_word, + options::OPT_fsw_rev, + options::OPT_fsw_recip, + options::OPT_fsw_fprnd, + options::OPT_fsw_cmov, + options::OPT_fsw_auto_inc_dec, + options::OPT_fsw_use_cas, }; for (const auto &A : Args) if (llvm::is_contained(kBitcodeOptionIgnorelist, A->getOption().getID())) @@ -5287,6 +5330,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, unsigned PICLevel; bool IsPIE; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(TC, Args); + if (TC.getArch() == llvm::Triple::sw_64 && + RelocationModel != llvm::Reloc::PIC_) + RelocationModel = llvm::Reloc::PIC_; + Arg *LastPICDataRelArg = Args.getLastArg(options::OPT_mno_pic_data_is_text_relative, options::OPT_mpic_data_is_text_relative); @@ -5649,6 +5696,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, else if (TC.getTriple().isPPC() && (A->getOption().getID() != options::OPT_mlong_double_80)) A->render(Args, CmdArgs); + else if (TC.getTriple().isSw64()) + A->render(Args, CmdArgs); else D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << TripleStr; @@ -6623,6 +6672,46 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ); + if (Args.getLastArg(options::OPT_fsw_int_divmod)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sw-int-divmod"); + } + + if (Args.getLastArg(options::OPT_fsw_shift_word)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sw-shift-word"); + } + + if (Args.getLastArg(options::OPT_fsw_rev)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sw-rev"); + } + + if (Args.getLastArg(options::OPT_fsw_recip)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sw-recip"); + } + + if (Args.getLastArg(options::OPT_fsw_fprnd)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sw-fprnd"); + } + + if (Args.getLastArg(options::OPT_fsw_cmov)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sw-cmov"); + } + + if (Args.getLastArg(options::OPT_fsw_auto_inc_dec)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sw-auto-inc-dec"); + } + + if (Args.getLastArg(options::OPT_fsw_use_cas)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sw-use-cas"); + } + // -fno-strict-overflow implies -fwrapv if it isn't disabled, but // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index 64fc86b6b0a7..667fe246d80d 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -69,6 +69,8 @@ private: llvm::opt::ArgStringList &CmdArgs) const; void AddSparcTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + void AddSw64TargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; void AddSystemZTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; void AddX86TargetArgs(const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index e01b21e102b1..0f599fdbeef3 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -16,6 +16,7 @@ #include "Arch/PPC.h" #include "Arch/RISCV.h" #include "Arch/Sparc.h" +#include "Arch/Sw64.h" #include "Arch/SystemZ.h" #include "Arch/VE.h" #include "Arch/X86.h" @@ -514,6 +515,9 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, case llvm::Triple::loongarch32: case llvm::Triple::loongarch64: return loongarch::getLoongArchTargetCPU(Args, T); + + case llvm::Triple::sw_64: + return Sw64::getSw64TargetCPU(Args); } } @@ -610,6 +614,9 @@ void tools::getTargetFeatureList(const Driver &D, case llvm::Triple::loongarch64: loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); break; + case llvm::Triple::sw_64: + Sw64::getSw64TargetFeatures(D, Args, Features); + break; } #ifdef ENABLE_CLASSIC_FLANG } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 243724ef528f..20900620a80c 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -14,6 +14,7 @@ #include "Arch/PPC.h" #include "Arch/RISCV.h" #include "Arch/Sparc.h" +#include "Arch/Sw64.h" #include "Arch/SystemZ.h" #include "CommonArgs.h" #include "Linux.h" @@ -287,6 +288,8 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { return "elf64ve"; case llvm::Triple::csky: return "cskyelf_linux"; + case llvm::Triple::sw_64: + return "elf64sw_64"; default: return nullptr; } @@ -974,6 +977,11 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, CmdArgs.push_back(Args.MakeArgString("-mmsa")); } + if (Arg *A = Args.getLastArg(options::OPT_msimd, options::OPT_mno_simd)) { + if (A->getOption().matches(options::OPT_msimd)) + CmdArgs.push_back(Args.MakeArgString("-msimd")); + } + Args.AddLastArg(CmdArgs, options::OPT_mhard_float, options::OPT_msoft_float); @@ -2495,6 +2503,12 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", "s390x-suse-linux", "s390x-redhat-linux"}; + static const char *const Sw64LibDirs[] = {"/lib64", "/lib", + "/lib/gcc/sw_64-sunway-linux-gnu/", + "/sw_64-sunway-linux-gnu/lib"}; + static const char *const Sw64Triples[] = { + "sw_64-sunway-linux-gnu", "sw_64-unknown-linux-gnu", "sw_64-linux-gnu", + "sw_64-openEuler-linux"}; using std::begin; using std::end; @@ -2748,6 +2762,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( LibDirs.append(begin(SystemZLibDirs), end(SystemZLibDirs)); TripleAliases.append(begin(SystemZTriples), end(SystemZTriples)); break; + case llvm::Triple::sw_64: + LibDirs.append(begin(Sw64LibDirs), end(Sw64LibDirs)); + TripleAliases.append(begin(Sw64Triples), end(Sw64Triples)); + break; default: // By default, just rely on the standard lib directories and the original // triple. @@ -3364,4 +3382,9 @@ void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs, if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, true)) CC1Args.push_back("-fno-use-init-array"); + if (getTriple().getArch() == llvm::Triple::sw_64 && + DriverArgs.hasArg(options::OPT_mieee)) { + CC1Args.push_back("-mllvm"); + CC1Args.push_back("-mieee"); + } } diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 186b45e3c437..9557e5f1e348 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -12,6 +12,7 @@ #include "Arch/Mips.h" #include "Arch/PPC.h" #include "Arch/RISCV.h" +#include "Arch/Sw64.h" #include "CommonArgs.h" #include "clang/Config/config.h" #include "clang/Driver/Distro.h" @@ -164,6 +165,8 @@ std::string Linux::getMultiarchTriple(const Driver &D, return "sparc64-linux-gnu"; case llvm::Triple::systemz: return "s390x-linux-gnu"; + case llvm::Triple::sw_64: + return "sw_64-linux-gnu"; } return TargetTriple.str(); } @@ -256,6 +259,10 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) const bool IsHexagon = Arch == llvm::Triple::hexagon; const bool IsRISCV = Triple.isRISCV(); const bool IsCSKY = Triple.isCSKY(); + const bool IsSw64 = Triple.isSw64(); + + if (IsSw64 && !SysRoot.empty()) + ExtraOpts.push_back("--sysroot=" + SysRoot); if (IsCSKY && !SelectedMultilibs.empty()) SysRoot = SysRoot + SelectedMultilibs.back().osSuffix(); @@ -330,6 +337,11 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) addPathIfExists(D, concat(SysRoot, "/usr", OSLibDir, ABIName), Paths); } + if (IsSw64) { + addPathIfExists(D, SysRoot + "/usr/lib/gcc/sw_64-sunway-linux-gnu/", Paths); + addPathIfExists(D, SysRoot + "/usr/sw_64-sunway-linux-gnu/lib", Paths); + } + Generic_GCC::AddMultiarchPaths(D, SysRoot, OSLibDir, Paths); addPathIfExists(D, concat(SysRoot, "/lib"), Paths); @@ -645,6 +657,10 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { LibDir = "lib64"; Loader = "ld-linux.so.2"; break; + case llvm::Triple::sw_64: + LibDir = "lib"; + Loader = "ld-linux.so.2"; + break; case llvm::Triple::systemz: LibDir = "lib"; Loader = "ld64.so.1"; diff --git a/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp b/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp new file mode 100644 index 000000000000..9992b350dda5 --- /dev/null +++ b/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp @@ -0,0 +1,184 @@ +//===--- Sw64Toolchain.cpp - Sw64 ToolChain Implementations -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sw64Toolchain.h" +#include "CommonArgs.h" +#include "Gnu.h" +#include "clang/Config/config.h" +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" + +using namespace clang; +using namespace clang::driver; +using namespace clang::driver::tools; +using namespace clang::driver::toolchains; +using namespace llvm::opt; + +void Sw64::Assembler::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + claimNoWarnArgs(Args); + ArgStringList CmdArgs; + + Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); + + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + + for (const auto &II : Inputs) + CmdArgs.push_back(II.getFilename()); + + const char *Exec = Args.MakeArgString( + getToolChain().GetProgramPath("sw_64-sunway-linux-gnu-as")); + C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), + Exec, CmdArgs, Inputs, Output)); +} + +void Sw64::Linker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + const Driver &D = getToolChain().getDriver(); + ArgStringList CmdArgs; + + if (Output.isFilename()) { + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + } else { + assert(Output.isNothing() && "Invalid output."); + } + + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { + CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); + CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); + CmdArgs.push_back( + Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); + CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); + } + + Args.AddAllArgs(CmdArgs, + {options::OPT_L, options::OPT_T_Group, options::OPT_e}); + + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); + + getToolChain().addProfileRTLibs(Args, CmdArgs); + + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { + if (D.CCCIsCXX()) { + if (getToolChain().ShouldLinkCXXStdlib(Args)) + getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); + CmdArgs.push_back("-lm"); + } + } + + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { + if (Args.hasArg(options::OPT_pthread)) + CmdArgs.push_back("-lpthread"); + CmdArgs.push_back("-lc"); + CmdArgs.push_back("-lgcc"); + CmdArgs.push_back("-lgcc_s"); + CmdArgs.push_back( + Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); + } + + const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); + C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), + Exec, CmdArgs, Inputs, Output)); +} + +/// Sw64Toolchain - Sw64 tool chain which can call as(1) and ld(1) directly. + +Sw64Toolchain::Sw64Toolchain(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) + : Generic_ELF(D, Triple, Args) { + getFilePaths().push_back(getDriver().Dir + "/../lib"); + getFilePaths().push_back("/usr/lib"); +} + +Tool *Sw64Toolchain::buildAssembler() const { + return new Sw64::Assembler(*this); +} + +Tool *Sw64Toolchain::buildLinker() const { return new Sw64::Linker(*this); } + +void Sw64Toolchain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + const Driver &D = getDriver(); + + if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) + return; + + if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) + addSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/local/include"); + + if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "include"); + addSystemInclude(DriverArgs, CC1Args, P); + } + if (DriverArgs.hasArg(options::OPT_nostdlibinc)) + return; + + // Check for configure-time C include directories. + StringRef CIncludeDirs(C_INCLUDE_DIRS); + if (CIncludeDirs != "") { + SmallVector dirs; + CIncludeDirs.split(dirs, ":"); + for (StringRef dir : dirs) { + StringRef Prefix = + llvm::sys::path::is_absolute(dir) ? StringRef(D.SysRoot) : ""; + addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir); + } + return; + } + + // Add include directories specific to the selected multilib set and multilib. + if (GCCInstallation.isValid()) { + const MultilibSet::IncludeDirsFunc &Callback = + Multilibs.includeDirsCallback(); + if (Callback) { + for (const auto &Path : Callback(GCCInstallation.getMultilib())) + addExternCSystemIncludeIfExists( + DriverArgs, CC1Args, GCCInstallation.getInstallPath() + Path); + } + } + + addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include"); +} + +void Sw64Toolchain::addLibStdCxxIncludePaths( + const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const { + // We need a detected GCC installation on Sw64 (similar to Linux) + // to provide libstdc++'s headers. + if (!GCCInstallation.isValid()) + return; + + // By default, look for the C++ headers in an include directory adjacent to + // the lib directory of the GCC installation. + // On Sw64 this usually looks like /usr/gcc/X.Y/include/c++/X.Y.Z + StringRef LibDir = GCCInstallation.getParentLibPath(); + StringRef TripleStr = GCCInstallation.getTriple().str(); + const Multilib &Multilib = GCCInstallation.getMultilib(); + const GCCVersion &Version = GCCInstallation.getVersion(); + + // The primary search for libstdc++ supports multiarch variants. + addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text, + TripleStr, Multilib.includeSuffix(), DriverArgs, + CC1Args); +} diff --git a/clang/lib/Driver/ToolChains/Sw64Toolchain.h b/clang/lib/Driver/ToolChains/Sw64Toolchain.h new file mode 100644 index 000000000000..c32f628b812d --- /dev/null +++ b/clang/lib/Driver/ToolChains/Sw64Toolchain.h @@ -0,0 +1,79 @@ +//===--- Sw64Toolchain.h - Sw64 ToolChain Implementations -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H + +#include "Gnu.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Option/Option.h" + +namespace clang { +namespace driver { +namespace toolchains { + +class LLVM_LIBRARY_VISIBILITY Sw64Toolchain : public Generic_ELF { +public: + Sw64Toolchain(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args); + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + + void + addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + + unsigned GetDefaultDwarfVersion() const override { return 2; } + +protected: + Tool *buildAssembler() const override; + Tool *buildLinker() const override; +}; + +} // end namespace toolchains + +/// Sw64 -- Directly call GNU Binutils assembler and linker +namespace tools { +namespace Sw64 { +class LLVM_LIBRARY_VISIBILITY Assembler : public Tool { +public: + Assembler(const ToolChain &TC) : Tool("sw_64::Assembler", "assembler", TC) {} + + bool hasIntegratedCPP() const override { return false; } + + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + +class LLVM_LIBRARY_VISIBILITY Linker : public Tool { +public: + Linker(const ToolChain &TC) : Tool("sw_64::Linker", "linker", TC) {} + + bool hasIntegratedCPP() const override { return false; } + bool isLinkJob() const override { return true; } + + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; +} // end namespace Sw64 +} // end namespace tools + +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp index 8c5134e25013..75bf3bc28b51 100644 --- a/clang/lib/Driver/XRayArgs.cpp +++ b/clang/lib/Driver/XRayArgs.cpp @@ -53,6 +53,7 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: + case llvm::Triple::sw_64: break; default: D.Diag(diag::err_drv_unsupported_opt_for_target) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 6ec2e3fc2af0..04d0f1c9f7a7 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1733,7 +1733,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, const llvm::Triple::ArchType DebugEntryValueArchs[] = { llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, - llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el}; + llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el, + llvm::Triple::sw_64}; if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() && llvm::is_contained(DebugEntryValueArchs, T.getArch())) diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 356009ae9157..abba9f45221c 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -108,6 +108,10 @@ set(sifive_files sifive_vector.h ) +set(sw64_files + sw64intrin.h + ) + set(systemz_files s390intrin.h vecintrin.h @@ -260,6 +264,7 @@ set(files ${ppc_htm_files} ${riscv_files} ${sifive_files} + ${sw64_files} ${systemz_files} ${ve_files} ${x86_files} @@ -463,6 +468,7 @@ add_header_target("mips-resource-headers" "${mips_msa_files}") add_header_target("ppc-resource-headers" "${ppc_files};${ppc_wrapper_files}") add_header_target("ppc-htm-resource-headers" "${ppc_htm_files}") add_header_target("riscv-resource-headers" "${riscv_files};${riscv_generated_files}") +add_header_target("sw64-resource-headers" "${sw64_files}") add_header_target("systemz-resource-headers" "${systemz_files}") add_header_target("ve-resource-headers" "${ve_files}") add_header_target("webassembly-resource-headers" "${webassembly_files}") @@ -608,6 +614,12 @@ install( EXCLUDE_FROM_ALL COMPONENT riscv-resource-headers) +install( + FILES ${sw64_files} + DESTINATION ${header_install_dir} + EXCLUDE_FROM_ALL + COMPONENT sw64-resource-headers) + install( FILES ${systemz_files} DESTINATION ${header_install_dir} diff --git a/clang/lib/Headers/sw64intrin.h b/clang/lib/Headers/sw64intrin.h new file mode 100644 index 000000000000..86a20c53a7ac --- /dev/null +++ b/clang/lib/Headers/sw64intrin.h @@ -0,0 +1,1590 @@ + +#ifndef __SW64INTRIN_H +#define __SW64INTRIN_H + +#include +#include +#include + +typedef int8_t charv32 __attribute__((__vector_size__(32), __aligned__(32))); +typedef uint8_t ucharv32 __attribute__((__vector_size__(32), __aligned__(32))); +typedef int16_t shortv16 __attribute__((__vector_size__(32), __aligned__(32))); +typedef uint16_t ushortv16 + __attribute__((__vector_size__(32), __aligned__(32))); +typedef int32_t intv8 __attribute__((__vector_size__(32), __aligned__(32))); +typedef uint32_t uintv8 __attribute__((__vector_size__(32), __aligned__(32))); +typedef int64_t longv4 __attribute__((__vector_size__(32), __aligned__(32))); +typedef uint64_t ulongv4 __attribute__((__vector_size__(32), __aligned__(32))); + +// as sw64 target float4v4 is a very special cases, we leaving this for now. +typedef float floatv4 __attribute__((__vector_size__(16), __aligned__(16))); +typedef double doublev4 __attribute__((__vector_size__(32), __aligned__(32))); +// special case for int256 +typedef long long int256 __attribute__((__vector_size__(32), __aligned__(32))); +typedef unsigned long long uint256 + __attribute__((__vector_size__(32), __aligned__(32))); + +// special case for bytes compare +typedef int32_t int1v32_t; +// special case for half transform +typedef unsigned short float16v4_t + __attribute__((__vector_size__(8), __aligned__(8))); +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("simd"), \ + __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS_CORE4 \ + __attribute__((__always_inline__, __nodebug__, __target__("core4,simd"), \ + __min_vector_width__(256))) + +static __inline void simd_fprint_charv32(FILE *fp, charv32 a) { + union { + char __a[32]; + charv32 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[31], __u.__a[30], + __u.__a[29], __u.__a[28], __u.__a[27], __u.__a[26], __u.__a[25], + __u.__a[24]); + fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[23], __u.__a[22], + __u.__a[21], __u.__a[20], __u.__a[19], __u.__a[18], __u.__a[17], + __u.__a[16]); + fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[15], __u.__a[14], + __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9], + __u.__a[8]); + fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6], + __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], + __u.__a[0]); +} + +static __inline void simd_fprint_ucharv32(FILE *fp, ucharv32 a) { + union { + unsigned char __a[32]; + ucharv32 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[31], __u.__a[30], + __u.__a[29], __u.__a[28], __u.__a[27], __u.__a[26], __u.__a[25], + __u.__a[24]); + fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[23], __u.__a[22], + __u.__a[21], __u.__a[20], __u.__a[19], __u.__a[18], __u.__a[17], + __u.__a[16]); + fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[15], __u.__a[14], + __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9], + __u.__a[8]); + fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6], + __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], + __u.__a[0]); +} + +static __inline void simd_fprint_shortv16(FILE *fp, shortv16 a) { + union { + short __a[16]; + shortv16 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[15], __u.__a[14], + __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9], + __u.__a[8]); + fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6], + __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], + __u.__a[0]); +} + +static __inline void simd_fprint_ushortv16(FILE *fp, ushortv16 a) { + union { + unsigned short __a[16]; + ushortv16 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[15], __u.__a[14], + __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9], + __u.__a[8]); + fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6], + __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], + __u.__a[0]); +} + +static __inline void simd_fprint_intv8(FILE *fp, intv8 a) { + union { + int __a[8]; + intv8 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6], + __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], + __u.__a[0]); +} + +static __inline void simd_fprint_uintv8(FILE *fp, uintv8 a) { + union { + unsigned int __a[8]; + uintv8 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6], + __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], + __u.__a[0]); +} + +static __inline void simd_fprint_longv4(FILE *fp, longv4 a) { + union { + long __a[4]; + longv4 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %ld, %ld, %ld, %ld ]\n", __u.__a[3], __u.__a[2], __u.__a[1], + __u.__a[0]); +} + +static __inline void simd_fprint_ulongv4(FILE *fp, ulongv4 a) { + union { + unsigned long __a[4]; + ulongv4 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %lu, %lu, %lu, %lu ]\n", __u.__a[3], __u.__a[2], __u.__a[1], + __u.__a[0]); +} + +static __inline void simd_fprint_floatv4(FILE *fp, floatv4 a) { + union { + float __a[4]; + floatv4 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %.8e, %.8e, %.8e, %.8e ]\n", __u.__a[3], __u.__a[2], + __u.__a[1], __u.__a[0]); +} + +static __inline void simd_fprint_doublev4(FILE *fp, doublev4 a) { + union { + double __a[4]; + doublev4 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ %.16e, %.16e, %.16e, %.16e ]\n", __u.__a[3], __u.__a[2], + __u.__a[1], __u.__a[0]); +} + +static __inline void simd_fprint_int256(FILE *fp, int256 a) { + volatile union { + long __a[4]; + int256 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ 0x%lx, 0x%lx, 0x%lx, 0x%lx ]\n", __u.__a[3], __u.__a[2], + __u.__a[1], __u.__a[0]); +} + +static __inline void simd_fprint_uint256(FILE *fp, uint256 a) { + volatile union { + unsigned long __a[4]; + uint256 __v; + } __u; + __u.__v = a; + fprintf(fp, "[ 0x%lx, 0x%lx, 0x%lx, 0x%lx ]\n", __u.__a[3], __u.__a[2], + __u.__a[1], __u.__a[0]); +} + +static __inline void simd_print_charv32(charv32 arg) { + simd_fprint_charv32(stdout, arg); +} +static __inline void simd_print_ucharv32(ucharv32 arg) { + simd_fprint_ucharv32(stdout, arg); +} +static __inline void simd_print_shortv16(shortv16 arg) { + simd_fprint_shortv16(stdout, arg); +} +static __inline void simd_print_ushortv16(ushortv16 arg) { + simd_fprint_ushortv16(stdout, arg); +} +static __inline void simd_print_intv8(intv8 arg) { + simd_fprint_intv8(stdout, arg); +} +static __inline void simd_print_uintv8(uintv8 arg) { + simd_fprint_uintv8(stdout, arg); +} +static __inline void simd_print_longv4(longv4 arg) { + simd_fprint_longv4(stdout, arg); +} +static __inline void simd_print_ulongv4(ulongv4 arg) { + simd_fprint_ulongv4(stdout, arg); +} +static __inline void simd_print_floatv4(floatv4 arg) { + simd_fprint_floatv4(stdout, arg); +} +static __inline void simd_print_doublev4(doublev4 arg) { + simd_fprint_doublev4(stdout, arg); +} +static __inline void simd_print_int256(int256 arg) { + simd_fprint_int256(stdout, arg); +} +static __inline void simd_print_uint256(uint256 arg) { + simd_fprint_uint256(stdout, arg); +} + +// Vector Load Intrinsic + +#define simd_load(dest, src) \ + do { \ + (dest) = __builtin_sw_vload(src); \ + } while (0) + +#define simd_loadu(dest, src) \ + do { \ + (dest) = __builtin_sw_vloadu(src); \ + } while (0) + +#define simd_load_u(dest, src) \ + do { \ + (dest) = __builtin_sw_vload_u(src); \ + } while (0) + +#define simd_loade(dest, src) \ + do { \ + (dest) = __builtin_sw_vloade(src); \ + } while (0) + +#define simd_vload_nc(dest, src) \ + do { \ + (dest) = __builtin_sw_vloadnc(src); \ + } while (0) + +#define simd_store(src, dest) \ + do { \ + __builtin_sw_vstore(src, dest); \ + } while (0) + +#define simd_storeu(src, dest) \ + do { \ + __builtin_sw_vstoreu(src, dest); \ + } while (0) + +#define simd_store_u(src, dest) \ + do { \ + __builtin_sw_vstore_u(src, dest); \ + } while (0) + +#define simd_storeuh(src, dest) \ + do { \ + uint64_t __ptr = (uint64_t)dest + (uint64_t)sizeof(src); \ + __builtin_sw_vstoreuh(src, (__typeof__(dest))__ptr); \ + } while (0) + +#define simd_storeul(src, dest) \ + do { \ + __builtin_sw_vstoreul(src, dest); \ + } while (0) + +#define simd_vstore_nc(src, dest) \ + do { \ + __builtin_sw_vstorenc(src, dest); \ + } while (0) + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_loads(const float *__ptr) { + return *(floatv4 *)__ptr; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_loadd(const double *__ptr) { + return *(doublev4 *)__ptr; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_stores(const float *__ptr, + floatv4 a) { + *(floatv4 *)__ptr = a; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_stored(const double *__ptr, + doublev4 a) { + *(doublev4 *)__ptr = a; +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_loadew(const int32_t *__ptr) { + int32_t __a = *__ptr; + return __extension__(intv8){__a, __a, __a, __a, __a, __a, __a, __a}; +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS simd_loadel(const int64_t *__ptr) { + int64_t __a = *__ptr; + return __extension__(longv4){__a, __a, __a, __a}; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_loades(const float *__ptr) { + float __a = *__ptr; + return __extension__(floatv4){__a, __a, __a, __a}; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_loaded(const double *__ptr) { + double __a = *__ptr; + return __extension__(doublev4){__a, __a, __a, __a}; +} + +// Vector Setting Intrinsic Sw64 + +static __inline__ charv32 __DEFAULT_FN_ATTRS simd_set_charv32( + int8_t __b31, int8_t __b30, int8_t __b29, int8_t __b28, int8_t __b27, + int8_t __b26, int8_t __b25, int8_t __b24, int8_t __b23, int8_t __b22, + int8_t __b21, int8_t __b20, int8_t __b19, int8_t __b18, int8_t __b17, + int8_t __b16, int8_t __b15, int8_t __b14, int8_t __b13, int8_t __b12, + int8_t __b11, int8_t __b10, int8_t __b09, int8_t __b08, int8_t __b07, + int8_t __b06, int8_t __b05, int8_t __b04, int8_t __b03, int8_t __b02, + int8_t __b01, int8_t __b00) { + return __extension__(charv32){__b31, __b30, __b29, __b28, __b27, __b26, __b25, + __b24, __b23, __b22, __b21, __b20, __b19, __b18, + __b17, __b16, __b15, __b14, __b13, __b12, __b11, + __b10, __b09, __b08, __b07, __b06, __b05, __b04, + __b03, __b02, __b01, __b00}; +} +#define simd_set_ucharv32 simd_set_charv32 + +static __inline__ shortv16 __DEFAULT_FN_ATTRS +simd_set_shortv16(int16_t __b15, int16_t __b14, int16_t __b13, int16_t __b12, + int16_t __b11, int16_t __b10, int16_t __b09, int16_t __b08, + int16_t __b07, int16_t __b06, int16_t __b05, int16_t __b04, + int16_t __b03, int16_t __b02, int16_t __b01, int16_t __b00) { + return __extension__(shortv16){__b15, __b14, __b13, __b12, __b11, __b10, + __b09, __b08, __b07, __b06, __b05, __b04, + __b03, __b02, __b01, __b00}; +} +#define simd_set_ushortv16 simd_set_shortv16 + +static __inline__ intv8 __DEFAULT_FN_ATTRS +simd_set_intv8(int32_t __b07, int32_t __b06, int32_t __b05, int32_t __b04, + int32_t __b03, int32_t __b02, int32_t __b01, int32_t __b00) { + return __extension__(intv8){__b07, __b06, __b05, __b04, + __b03, __b02, __b01, __b00}; +} +#define simd_set_uintv8 simd_set_intv8 + +static __inline__ longv4 __DEFAULT_FN_ATTRS simd_set_longv4(int64_t __b03, + int64_t __b02, + int64_t __b01, + int64_t __b00) { + return __extension__(longv4){__b03, __b02, __b01, __b00}; +} +#define simd_set_ulongv4 simd_set_longv4 +#define simd_set_int256 simd_set_longv4 +#define simd_set_uint256 simd_set_longv4 + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_set_floatv4(float __b03, + float __b02, + float __b01, + float __b00) { + return __extension__(floatv4){__b03, __b02, __b01, __b00}; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_set_doublev4(double __b03, + double __b02, + double __b01, + double __b00) { + return __extension__(doublev4){__b03, __b02, __b01, __b00}; +} + +// Integer Araith Intrinsic Sw64 +// Caculate adden for given vector as int32_tx8, +// it isn't normal overflow result. +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vaddw(intv8 a, intv8 b) { + return a + b; +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vaddwi(intv8 a, + const int32_t b) { + intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; + return a + tmp; +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsubw(intv8 a, intv8 b) { + return a - b; +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsubwi(intv8 a, + const int32_t b) { + intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; + return a - tmp; +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddw(intv8 a, intv8 b) { + return __builtin_sw_vucaddw(a, b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddwi(intv8 a, + const int32_t b) { + intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; + return __builtin_sw_vucaddw(a, tmp); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubw(intv8 a, intv8 b) { + return __builtin_sw_vucsubw(a, b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubwi(intv8 a, + const int32_t b) { + intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; + return __builtin_sw_vucsubw(a, tmp); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vaddl(longv4 a, longv4 b) { + return a + b; +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vaddli(longv4 a, + const int64_t __b) { + longv4 __tmp = __extension__(longv4){__b, __b, __b, __b}; + return a + __tmp; +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vsubl(longv4 a, longv4 b) { + return a - b; +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vsubli(longv4 a, + const int64_t __b) { + longv4 __tmp = __extension__(longv4){__b, __b, __b, __b}; + return a - __tmp; +} + +// for core3 simd doesn't support v16i16, v32i8 +// it must use v8i32 instead. +#ifdef __sw_64_sw8a__ +static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucaddh(shortv16 a, + shortv16 b) { + return __builtin_sw_vucaddh_v16hi(a, b); +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucaddhi(shortv16 a, + const int b) { + int16_t __b = (int16_t)b; + shortv16 tmp = + __extension__(shortv16){__b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b}; + return __builtin_sw_vucaddh_v16hi(a, tmp); +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucsubh(shortv16 a, + shortv16 b) { + return __builtin_sw_vucsubh_v16hi(a, b); +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucsubhi(shortv16 a, + const int b) { + int16_t __b = (int16_t)b; + shortv16 tmp = + __extension__(shortv16){__b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b}; + return __builtin_sw_vucsubh_v16hi(a, tmp); +} + +static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucaddb(charv32 a, + charv32 b) { + return __builtin_sw_vucaddb_v32qi(a, b); +} + +static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucaddbi(charv32 a, + const int b) { + int8_t __b = (int8_t)b; + charv32 tmp = __extension__(charv32){__b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b}; + return __builtin_sw_vucaddb_v32qi(a, tmp); +} + +static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucsubb(charv32 a, + charv32 b) { + charv32 tmp = + __extension__(charv32){b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b}; + return __builtin_sw_vucsubb_v32qi(a, b); +} + +static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucsubbi(charv32 a, + const int b) { + int8_t __b = (int8_t)b; + charv32 tmp = __extension__(charv32){__b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b}; + return __builtin_sw_vucsubb_v32qi(a, tmp); +} +#else +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddh(intv8 a, intv8 b) { + return __builtin_sw_vucaddh(a, b); +} + +#define simd_vucaddhi __builtin_sw_vucaddhi + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubh(intv8 a, intv8 b) { + return __builtin_sw_vucsubh(a, b); +} + +#define simd_vucsubhi __builtin_sw_vucsubhi + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddb(intv8 a, intv8 b) { + return __builtin_sw_vucaddb(a, b); +} + +#define simd_vucaddbi __builtin_sw_vucaddbi + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubb(intv8 a, intv8 b) { + return __builtin_sw_vucsubb(a, b); +} + +#define simd_vucsubbi __builtin_sw_vucsubbi +#endif + +static __inline__ int32_t __DEFAULT_FN_ATTRS_CORE4 simd_vsumw(intv8 a) { + return __builtin_sw_vsumw(a); +} + +static __inline__ int64_t __DEFAULT_FN_ATTRS_CORE4 simd_vsuml(longv4 a) { + return __builtin_sw_vsuml(a); +} + +static __inline__ int32_t __DEFAULT_FN_ATTRS simd_ctpopow(int256 a) { + return __builtin_sw_ctpopow(a); +} + +static __inline__ int32_t __DEFAULT_FN_ATTRS simd_ctlzow(int256 a) { + return __builtin_sw_ctlzow(a); +} + +// Vector Shift intrinsics +// Gerate vsll(b|h|w|l) instruction due to Type define + +static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vsllw(uintv8 a, int i) { + return __builtin_sw_vsll(a, (int64_t)i); +} + +static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vsrlw(uintv8 a, int i) { + return __builtin_sw_vsrl(a, (int64_t)i); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsraw(intv8 a, int i) { + return __builtin_sw_vsra(a, (int64_t)i); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vrolw(intv8 a, int i) { + return __builtin_sw_vrol(a, (int64_t)i); +} + +#define simd_vsllwi simd_vsllw +#define simd_vsrlwi simd_vsrlw +#define simd_vsrawi simd_vsraw +#define simd_vrolwi simd_vrolw + +static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsllb(charv32 a, + int i) { + return __builtin_sw_vsll(a, (int64_t)i); +} + +static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsrlb(ucharv32 a, + int i) { + return __builtin_sw_vsrl(a, (int64_t)i); +} + +static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsrab(charv32 a, + int i) { + return __builtin_sw_vsra(a, (int64_t)i); +} + +static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vrolb(charv32 a, + int i) { + return __builtin_sw_vrol(a, (int64_t)i); +} + +#define simd_vsllbi simd_vsllb +#define simd_vsrlbi simd_vsrlb +#define simd_vsrabi simd_vsrab +#define simd_vrolbi simd_vrolb + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vslll(longv4 a, int i) { + return __builtin_sw_vsll(a, (int64_t)i); +} + +static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vsrll(ulongv4 a, + int i) { + return __builtin_sw_vsrl(a, (int64_t)i); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vsral(longv4 a, int i) { + return __builtin_sw_vsra(a, (int64_t)i); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vroll(longv4 a, int i) { + return __builtin_sw_vrol(a, (int64_t)i); +} + +#define simd_vsllli simd_vslll +#define simd_vsrlli simd_vsrll +#define simd_vsrali simd_vsral +#define simd_vrolli simd_vroll + +static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsllh(shortv16 a, + int i) { + return __builtin_sw_vsll(a, (int64_t)i); +} + +static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsrlh(ushortv16 a, + int i) { + return __builtin_sw_vsrl(a, (int64_t)i); +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsrah(shortv16 a, + int i) { + return __builtin_sw_vsra(a, (int64_t)i); +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vrolh(shortv16 a, + int i) { + return __builtin_sw_vrol(a, (int64_t)i); +} + +#define simd_vsllhi simd_vsllh +#define simd_vsrlhi simd_vsrlh +#define simd_vsrahi simd_vsrah +#define simd_vrolhi simd_vrolh + +static __inline__ int256 __DEFAULT_FN_ATTRS simd_srlow(int256 a, int i) { + return __builtin_sw_srlow(a, (int64_t)i); +} + +static __inline__ int256 __DEFAULT_FN_ATTRS simd_sllow(int256 a, int i) { + return __builtin_sw_sllow(a, (int64_t)i); +} + +static __inline__ int256 __DEFAULT_FN_ATTRS simd_sraow(int256 a, int i) { + return __builtin_sw_sraow(a, (int64_t)i); +} + +#define simd_srlowi simd_srlow +#define simd_sllowi simd_sllow +#define simd_sraowi simd_sraow + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls1(floatv4 a) { + return __builtin_sw_vslls(a, 64); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls2(floatv4 a) { + return __builtin_sw_vslls(a, 128); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls3(floatv4 a) { + return __builtin_sw_vslls(a, 192); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld1(doublev4 a) { + return __builtin_sw_vslld(a, 64); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld2(doublev4 a) { + return __builtin_sw_vslld(a, 128); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld3(doublev4 a) { + return __builtin_sw_vslld(a, 192); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls1(floatv4 a) { + return __builtin_sw_vsrls(a, 64); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls2(floatv4 a) { + return __builtin_sw_vsrls(a, 128); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls3(floatv4 a) { + return __builtin_sw_vsrls(a, 192); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld1(doublev4 a) { + return __builtin_sw_vsrld(a, 64); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld2(doublev4 a) { + return __builtin_sw_vsrld(a, 128); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld3(doublev4 a) { + return __builtin_sw_vsrld(a, 192); +} + +// Integer Compare Inst + +static __inline__ int32_t __DEFAULT_FN_ATTRS simd_vcmpgew(intv8 a, intv8 b) { + return __builtin_sw_vcmpgew(a, b); +} + +static __inline__ int32_t __DEFAULT_FN_ATTRS simd_vcmpgewi(intv8 a, + const int32_t b) { + intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; + return __builtin_sw_vcmpgew(a, tmp); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpeqw(intv8 a, intv8 b) { + return __builtin_sw_vcmpeqw(a, b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpeqwi(intv8 a, + const int32_t b) { + intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; + return __builtin_sw_vcmpeqw(a, tmp); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmplew(intv8 a, intv8 b) { + return __builtin_sw_vcmplew(a, b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmplewi(intv8 a, + const int32_t b) { + intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; + return __builtin_sw_vcmplew(a, tmp); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpltw(intv8 a, intv8 b) { + return __builtin_sw_vcmpltw(a, b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpltwi(intv8 a, + const int32_t b) { + intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; + return __builtin_sw_vcmpltw(a, tmp); +} + +static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpulew(uintv8 a, uintv8 b) { + return __builtin_sw_vcmpulew(a, b); +} + +static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpulewi(uintv8 a, + const uint32_t b) { + uintv8 tmp = __extension__(uintv8){b, b, b, b, b, b, b, b}; + return __builtin_sw_vcmpulew(a, tmp); +} + +static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpultw(uintv8 a, uintv8 b) { + return __builtin_sw_vcmpultw(a, b); +} + +static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpultwi(uintv8 a, + const uint32_t b) { + uintv8 tmp = __extension__(uintv8){b, b, b, b, b, b, b, b}; + return __builtin_sw_vcmpultw(a, tmp); +} + +static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcmpueqb(ucharv32 a, + ucharv32 b) { + ucharv32 res = (ucharv32)__builtin_sw_vcmpueqb(a, b); + return res; +} + +static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 +simd_vcmpueqbi(ucharv32 a, const uint32_t b) { + uint8_t __b = (uint8_t)b; + ucharv32 tmp = __extension__(ucharv32){ + __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, __b, __b}; + ucharv32 res = (ucharv32)__builtin_sw_vcmpueqb(a, tmp); + return res; +} + +static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcmpugtb(ucharv32 a, + ucharv32 b) { + ucharv32 res = (ucharv32)__builtin_sw_vcmpugtb(a, b); + return res; +} + +static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 +simd_vcmpugtbi(ucharv32 a, const uint32_t b) { + uint8_t __b = (uint8_t)b; + ucharv32 tmp = __extension__(ucharv32){ + __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, + __b, __b, __b, __b, __b, __b, __b, __b, __b, __b}; + ucharv32 res = (ucharv32)__builtin_sw_vcmpugtb(a, tmp); + return res; +} + +static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxb(charv32 a, + charv32 b) { + return __builtin_sw_vmaxb(a, b); +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxh(shortv16 a, + shortv16 b) { + return __builtin_sw_vmaxh(a, b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxw(intv8 a, intv8 b) { + return __builtin_sw_vmaxw(a, b); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxl(longv4 a, + longv4 b) { + return __builtin_sw_vmaxl(a, b); +} + +static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxb(ucharv32 a, + ucharv32 b) { + return __builtin_sw_vumaxb(a, b); +} + +static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxh(ushortv16 a, + ushortv16 b) { + return __builtin_sw_vumaxh(a, b); +} + +static __inline__ uintv8 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxw(uintv8 a, + uintv8 b) { + return __builtin_sw_vumaxw(a, b); +} + +static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxl(ulongv4 a, + ulongv4 b) { + return __builtin_sw_vumaxl(a, b); +} + +static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vminb(charv32 a, + charv32 b) { + return __builtin_sw_vminb(a, b); +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vminh(shortv16 a, + shortv16 b) { + return __builtin_sw_vminh(a, b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vminw(intv8 a, intv8 b) { + return __builtin_sw_vminw(a, b); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vminl(longv4 a, + longv4 b) { + return __builtin_sw_vminl(a, b); +} + +static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vuminb(ucharv32 a, + ucharv32 b) { + return __builtin_sw_vuminb(a, b); +} + +static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vuminh(ushortv16 a, + ushortv16 b) { + return __builtin_sw_vuminh(a, b); +} + +static __inline__ uintv8 __DEFAULT_FN_ATTRS_CORE4 simd_vuminw(uintv8 a, + uintv8 b) { + return __builtin_sw_vuminw(a, b); +} + +static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vuminl(ulongv4 a, + ulongv4 b) { + return __builtin_sw_vuminl(a, b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vseleqw(intv8 a, intv8 b, + intv8 c) { + return __builtin_sw_vseleqw(a, b, c); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellew(intv8 a, intv8 b, + intv8 c) { + return __builtin_sw_vsellew(a, b, c); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vselltw(intv8 a, intv8 b, + intv8 c) { + return __builtin_sw_vselltw(a, b, c); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellbcw(intv8 a, intv8 b, + intv8 c) { + return __builtin_sw_vsellbcw(a, b, c); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vseleqwi(intv8 a, intv8 b, + int32_t c) { + intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c}; + return __builtin_sw_vseleqw(a, b, tmp); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellewi(intv8 a, intv8 b, + int32_t c) { + intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c}; + return __builtin_sw_vsellew(a, b, tmp); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vselltwi(intv8 a, intv8 b, + int32_t c) { + intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c}; + return __builtin_sw_vselltw(a, b, tmp); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellbcwi(intv8 a, intv8 b, + int32_t c) { + intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c}; + return __builtin_sw_vsellbcw(a, b, tmp); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vseleql(longv4 a, longv4 b, + longv4 c) { + doublev4 tmp_a = (doublev4)a; + doublev4 tmp_b = (doublev4)b; + doublev4 tmp_c = (doublev4)c; + return (longv4)__builtin_sw_vfseleqd(tmp_a, tmp_b, tmp_c); +} + +// Vector Logic Operation + +#define simd_vlog(a, b, c, opcode) __builtin_sw_vlogzz(a, b, c, opcode) + +#define simd_vand(SUFFIX, TYPE) \ + static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vand##SUFFIX(TYPE a, \ + TYPE b) { \ + return a & b; \ + } + +simd_vand(b, charv32) +simd_vand(h, shortv16) +simd_vand(w, intv8) +simd_vand(l, longv4) + +#define simd_vbic(SUFFIX, TYPE) \ + static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vbic##SUFFIX(TYPE a, \ + TYPE b) { \ + return a & ~b; \ + } + +simd_vbic(b, charv32) +simd_vbic(h, shortv16) +simd_vbic(w, intv8) +simd_vbic(l, longv4) + +#define simd_vbis(SUFFIX, TYPE) \ + static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vbis##SUFFIX(TYPE a, \ + TYPE b) { \ + return a | b; \ + } + +simd_vbis(b, charv32) +simd_vbis(h, shortv16) +simd_vbis(w, intv8) +simd_vbis(l, longv4) + +#define simd_vornot(SUFFIX, TYPE) \ + static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vornot##SUFFIX(TYPE a, \ + TYPE b) { \ + return a | ~b; \ + } + +simd_vornot(b, charv32) +simd_vornot(h, shortv16) +simd_vornot(w, intv8) +simd_vornot(l, longv4) + +#define simd_vxor(SUFFIX, TYPE) \ + static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vxor##SUFFIX(TYPE a, \ + TYPE b) { \ + return a ^ b; \ + } + +simd_vxor(b, charv32) +simd_vxor(h, shortv16) +simd_vxor(w, intv8) +simd_vxor(l, longv4) + +#define simd_veqv(SUFFIX, TYPE) \ + static __inline__ TYPE __DEFAULT_FN_ATTRS simd_veqv##SUFFIX(TYPE a, \ + TYPE b) { \ + return ~(a ^ b); \ + } + +simd_veqv(b, charv32) +simd_veqv(h, shortv16) +simd_veqv(w, intv8) +simd_veqv(l, longv4) + +// float arithmetic Operation + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vadds(floatv4 a, floatv4 b) { + return a + b; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vaddd(doublev4 a, + doublev4 b) { + return a + b; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsubs(floatv4 a, floatv4 b) { + return a - b; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsubd(doublev4 a, + doublev4 b) { + return a - b; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmuls(floatv4 a, floatv4 b) { + return a * b; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmuld(doublev4 a, + doublev4 b) { + return a * b; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vdivs(floatv4 a, floatv4 b) { + return a / b; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vdivd(doublev4 a, + doublev4 b) { + return a / b; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsqrts(floatv4 a) { + return __builtin_sw_vsqrts(a); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsqrtd(doublev4 a) { + return __builtin_sw_vsqrtd(a); +} + +static __inline__ float __DEFAULT_FN_ATTRS_CORE4 simd_vsums(floatv4 a) { + return __builtin_sw_vsums(a); +} + +static __inline__ double __DEFAULT_FN_ATTRS_CORE4 simd_vsumd(doublev4 a) { + return __builtin_sw_vsumd(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrecs(floatv4 a) { + return __builtin_sw_vfrecs(a); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrecd(doublev4 a) { + return __builtin_sw_vfrecd(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmpeqs(floatv4 a, + floatv4 b) { + return __builtin_sw_vfcmpeqs(a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmples(floatv4 a, + floatv4 b) { + return __builtin_sw_vfcmples(a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmplts(floatv4 a, + floatv4 b) { + return __builtin_sw_vfcmplts(a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmpuns(floatv4 a, + floatv4 b) { + return __builtin_sw_vfcmpuns(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpeqd(doublev4 a, + doublev4 b) { + return __builtin_sw_vfcmpeqd(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpled(doublev4 a, + doublev4 b) { + return __builtin_sw_vfcmpled(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpltd(doublev4 a, + doublev4 b) { + return __builtin_sw_vfcmpltd(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpund(doublev4 a, + doublev4 b) { + return __builtin_sw_vfcmpund(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtsd(floatv4 a) { + return __builtin_sw_vfcvtsd(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtds(doublev4 a) { + return __builtin_sw_vfcvtds(a); +} + +#define simd_vfcvtsh(a, b, c) __builtin_sw_vfcvtsh(a, b, c) +#define simd_vfcvths(a, b) __builtin_sw_vfcvths(a, b) + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtls(longv4 a) { + return __builtin_sw_vfcvtls(a); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtld(longv4 a) { + return __builtin_sw_vfcvtld(a); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtsl(floatv4 a) { + doublev4 tmp = __builtin_sw_vfcvtsd(a); + return __builtin_sw_vfcvtdl(tmp); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl(doublev4 a) { + return __builtin_sw_vfcvtdl(a); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_g(doublev4 a) { + return __builtin_sw_vfcvtdl_g(a); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_p(doublev4 a) { + return __builtin_sw_vfcvtdl_p(a); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_z(doublev4 a) { + return __builtin_sw_vfcvtdl_z(a); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_n(doublev4 a) { + return __builtin_sw_vfcvtdl_n(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris(floatv4 a) { + return __builtin_sw_vfris(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_g(floatv4 a) { + return __builtin_sw_vfris_g(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_p(floatv4 a) { + return __builtin_sw_vfris_p(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_z(floatv4 a) { + return __builtin_sw_vfris_z(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_n(floatv4 a) { + return __builtin_sw_vfris_n(a); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid(doublev4 a) { + return __builtin_sw_vfrid(a); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_g(doublev4 a) { + return __builtin_sw_vfrid_g(a); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_p(doublev4 a) { + return __builtin_sw_vfrid_p(a); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_z(doublev4 a) { + return __builtin_sw_vfrid_z(a); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_n(doublev4 a) { + return __builtin_sw_vfrid_n(a); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxs(floatv4 a, + floatv4 b) { + return __builtin_sw_vmaxs(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxd(doublev4 a, + doublev4 b) { + return __builtin_sw_vmaxd(a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmins(floatv4 a, + floatv4 b) { + return __builtin_sw_vmins(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vmind(doublev4 a, + doublev4 b) { + return __builtin_sw_vmind(a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyss(floatv4 a, floatv4 b) { + return __builtin_sw_vcpyss(a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyses(floatv4 a, + floatv4 b) { + return __builtin_sw_vcpyses(a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpysns(floatv4 a, + floatv4 b) { + return __builtin_sw_vcpysns(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysd(doublev4 a, + doublev4 b) { + return __builtin_sw_vcpysd(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysed(doublev4 a, + doublev4 b) { + return __builtin_sw_vcpysed(a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysnd(doublev4 a, + doublev4 b) { + return __builtin_sw_vcpysnd(a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfseleqs(floatv4 cond, + floatv4 a, + floatv4 b) { + return __builtin_sw_vfseleqs(cond, a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfsellts(floatv4 cond, + floatv4 a, + floatv4 b) { + return __builtin_sw_vfsellts(cond, a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfselles(floatv4 cond, + floatv4 a, + floatv4 b) { + return __builtin_sw_vfselles(cond, a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfseleqd(doublev4 cond, + doublev4 a, + doublev4 b) { + return __builtin_sw_vfseleqd(cond, a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfselltd(doublev4 cond, + doublev4 a, + doublev4 b) { + return __builtin_sw_vfselltd(cond, a, b); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfselled(doublev4 cond, + doublev4 a, + doublev4 b) { + return __builtin_sw_vfselled(cond, a, b); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmas(floatv4 a, floatv4 b, + floatv4 c) { + return a * b + c; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmss(floatv4 a, floatv4 b, + floatv4 c) { + return a * b - c; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vnmas(floatv4 a, floatv4 b, + floatv4 c) { + return -a * b + c; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vnmss(floatv4 a, floatv4 b, + floatv4 c) { + return -(a * b + c); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmad(doublev4 a, doublev4 b, + doublev4 c) { + return a * b + c; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmsd(doublev4 a, doublev4 b, + doublev4 c) { + return a * b - c; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vnmad(doublev4 a, doublev4 b, + doublev4 c) { + return -a * b + c; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vnmsd(doublev4 a, doublev4 b, + doublev4 c) { + return -(a * b + c); +} + +// SIMD element Operation + +#ifdef __sw_64_sw8a__ +#define simd_vinsb(elt, vect, num) __builtin_sw_vinsb(elt, vect, num) +#define simd_vinsh(elt, vect, num) __builtin_sw_vinsh(elt, vect, num) +#endif + +#define simd_vinsw(elt, vect, num) __builtin_sw_vinsw(elt, vect, num) +#define simd_vinsl(elt, vect, num) __builtin_sw_vinsl(elt, vect, num) +#define simd_vinsfs(elt, vect, num) __builtin_sw_vinsfs(elt, vect, num) +#define simd_vinsfd(elt, vect, num) __builtin_sw_vinsfd(elt, vect, num) + +#define simd_vinsw0(elt, vect) simd_vinsw(elt, vect, 0) +#define simd_vinsw1(elt, vect) simd_vinsw(elt, vect, 1) +#define simd_vinsw2(elt, vect) simd_vinsw(elt, vect, 2) +#define simd_vinsw3(elt, vect) simd_vinsw(elt, vect, 3) +#define simd_vinsw4(elt, vect) simd_vinsw(elt, vect, 4) +#define simd_vinsw5(elt, vect) simd_vinsw(elt, vect, 5) +#define simd_vinsw6(elt, vect) simd_vinsw(elt, vect, 6) +#define simd_vinsw7(elt, vect) simd_vinsw(elt, vect, 7) + +#define simd_vinsl0(elt, vect) simd_vinsl(elt, vect, 0) +#define simd_vinsl1(elt, vect) simd_vinsl(elt, vect, 1) +#define simd_vinsl2(elt, vect) simd_vinsl(elt, vect, 2) +#define simd_vinsl3(elt, vect) simd_vinsl(elt, vect, 3) + +#define simd_vinsfs0(elt, vect) simd_vinsfs(elt, vect, 0) +#define simd_vinsfs1(elt, vect) simd_vinsfs(elt, vect, 1) +#define simd_vinsfs2(elt, vect) simd_vinsfs(elt, vect, 2) +#define simd_vinsfs3(elt, vect) simd_vinsfs(elt, vect, 3) + +#define simd_vinsfd0(elt, vect) simd_vinsfd(elt, vect, 0) +#define simd_vinsfd1(elt, vect) simd_vinsfd(elt, vect, 1) +#define simd_vinsfd2(elt, vect) simd_vinsfd(elt, vect, 2) +#define simd_vinsfd3(elt, vect) simd_vinsfd(elt, vect, 3) + +static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectlb(charv32 __a, + charv32 __b) { + return __builtin_shufflevector( + __a, __b, 0, 0 + 32, 1, 1 + 32, 2, 2 + 32, 3, 3 + 32, 4, 4 + 32, 5, + 5 + 32, 6, 6 + 32, 7, 7 + 32, 8, 8 + 32, 9, 9 + 32, 10, 10 + 32, 11, + 11 + 32, 12, 12 + 32, 13, 13 + 32, 14, 14 + 32, 15, 15 + 32); +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 +simd_vinsectlh(shortv16 __a, shortv16 __b) { + return __builtin_shufflevector(__a, __b, 0, 0 + 16, 1, 1 + 16, 2, 2 + 16, 3, + 3 + 16, 4, 4 + 16, 5, 5 + 16, 6, 6 + 16, 7, + 7 + 16); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectlw(intv8 __a, + intv8 __b) { + return __builtin_shufflevector(__a, __b, 0, 0 + 8, 1, 1 + 8, 2, 2 + 8, 3, + 3 + 8); +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectll(longv4 __a, + longv4 __b) { + return __builtin_shufflevector(__a, __b, 0, 0 + 4, 1, 1 + 4); +} + +#ifdef __sw_64_sw8a__ +#define simd_vshfq(__a, __b, idx) __builtin_sw_vshfq(__a, __b, idx) +#endif + +static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vshfqb(charv32 __a, + charv32 __b) { + return __builtin_sw_vshfqb(__a, __b); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vshfw(intv8 __a, intv8 __b, + int64_t idx) { + return __builtin_sw_vshfw(__a, __b, idx); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vconw(intv8 __a, intv8 __b, + void *ptr) { + return __builtin_sw_vconw(__a, __b, ptr); +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vconl(intv8 __a, intv8 __b, + void *ptr) { + return __builtin_sw_vconl(__a, __b, ptr); +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcons(floatv4 __a, + floatv4 __b, + void *ptr) { + return __builtin_sw_vcons(__a, __b, ptr); +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcond(doublev4 __a, + doublev4 __b, + void *ptr) { + return __builtin_sw_vcond(__a, __b, ptr); +} + +#define simd_vextw(vect, num) __builtin_sw_vextw(vect, num) +#define simd_vextl(vect, num) __builtin_sw_vextl(vect, num) +#define simd_vextfs(vect, num) __builtin_sw_vextfs(vect, num) +#define simd_vextfd(vect, num) __builtin_sw_vextfd(vect, num) + +#define simd_vextw0(args) simd_vextw(args, 0) +#define simd_vextw1(args) simd_vextw(args, 1) +#define simd_vextw2(args) simd_vextw(args, 2) +#define simd_vextw3(args) simd_vextw(args, 3) +#define simd_vextw4(args) simd_vextw(args, 4) +#define simd_vextw5(args) simd_vextw(args, 5) +#define simd_vextw6(args) simd_vextw(args, 6) +#define simd_vextw7(args) simd_vextw(args, 7) + +#define simd_vextl0(args) simd_vextl(args, 0) +#define simd_vextl1(args) simd_vextl(args, 1) +#define simd_vextl2(args) simd_vextl(args, 2) +#define simd_vextl3(args) simd_vextl(args, 3) + +#define simd_vextfs0(args) simd_vextfs(args, 0) +#define simd_vextfs1(args) simd_vextfs(args, 1) +#define simd_vextfs2(args) simd_vextfs(args, 2) +#define simd_vextfs3(args) simd_vextfs(args, 3) + +#define simd_vextfd0(args) simd_vextfd(args, 0) +#define simd_vextfd1(args) simd_vextfd(args, 1) +#define simd_vextfd2(args) simd_vextfd(args, 2) +#define simd_vextfd3(args) simd_vextfd(args, 3) + +static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcpyb(int8_t b) { + return __extension__(charv32){b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b}; +} + +static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vcpyh(int16_t b) { + return __extension__(shortv16){b, b, b, b, b, b, b, b, + b, b, b, b, b, b, b, b}; +} + +static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcpyw(int32_t b) { + return __extension__(intv8){b, b, b, b, b, b, b, b}; +} + +static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vcpyl(int64_t __a) { + return __extension__(longv4){__a, __a, __a, __a}; +} + +static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyfs(float __a) { + return __extension__(floatv4){__a, __a, __a, __a}; +} + +static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpyfd(double __a) { + return __extension__(doublev4){__a, __a, __a, __a}; +} + +// Test for core3 + +static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_plusw(intv8 __a) { + intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); + __a = __a + __shf; + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); + __a = __a + __shf; + __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); + __a = __a + __shf; + return __builtin_sw_vextw(__a, 0); +} + +static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_pluss(floatv4 __a) { + floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); + __a = __a + __shf; + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + __a = __a + __shf; + return __builtin_sw_vextfs(__a, 0); +} + +static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_plusd(doublev4 __a) { + doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); + __a = __a + __shf; + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + __a = __a + __shf; + return __builtin_sw_vextfd(__a, 0); +} + +static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_smaxw(intv8 __a) { + intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); + intv8 __cmp = simd_vcmpltw(__a, __shf); + __a = simd_vseleqw(__cmp, __a, __shf); + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); + __cmp = simd_vcmpltw(__a, __shf); + __a = simd_vseleqw(__cmp, __a, __shf); + __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); + __cmp = simd_vcmpltw(__a, __shf); + __a = simd_vseleqw(__cmp, __a, __shf); + return __builtin_sw_vextw(__a, 0); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS simd_reduc_umaxw(uintv8 __a) { + uintv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); + uintv8 __cmp = simd_vcmpultw(__a, __shf); + __a = simd_vseleqw(__cmp, __a, __shf); + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); + __cmp = simd_vcmpultw(__a, __shf); + __a = simd_vseleqw(__cmp, __a, __shf); + __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); + __cmp = simd_vcmpultw(__a, __shf); + __a = simd_vseleqw(__cmp, __a, __shf); + return __builtin_sw_vextw(__a, 0); +} + +static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_sminw(intv8 __a) { + intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); + intv8 __cmp = simd_vcmpltw(__a, __a); + __a = simd_vseleqw(__cmp, __shf, __a); + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); + __cmp = simd_vcmpltw(__a, __shf); + __a = simd_vseleqw(__cmp, __shf, __a); + __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); + __cmp = simd_vcmpltw(__a, __shf); + __a = simd_vseleqw(__cmp, __shf, __a); + return __builtin_sw_vextw(__a, 0); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS simd_reduc_uminw(intv8 __a) { + intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); + intv8 __cmp = simd_vcmpultw(__a, __shf); + __a = simd_vseleqw(__cmp, __shf, __a); + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); + __cmp = simd_vcmpultw(__a, __shf); + __a = simd_vseleqw(__cmp, __shf, __a); + __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); + __cmp = simd_vcmpultw(__a, __shf); + __a = simd_vseleqw(__cmp, __shf, __a); + return __builtin_sw_vextw(__a, 0); +} + +static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_smaxs(floatv4 __a) { + floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); + floatv4 __cmp = simd_vfcmplts(__a, __shf); + __a = simd_vfseleqs(__cmp, __a, __shf); + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + __cmp = simd_vfcmplts(__a, __shf); + __a = simd_vfseleqs(__cmp, __a, __shf); + return __builtin_sw_vextfs(__a, 0); +} + +static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_smaxd(doublev4 __a) { + doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); + doublev4 __cmp = simd_vfcmpltd(__a, __shf); + __a = simd_vfseleqd(__cmp, __a, __shf); + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + __cmp = simd_vfcmpltd(__a, __shf); + __a = simd_vfseleqd(__cmp, __a, __shf); + return __builtin_sw_vextfd(__a, 0); +} + +static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_smins(floatv4 __a) { + floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); + floatv4 __cmp = simd_vfcmplts(__a, __shf); + __a = simd_vfseleqs(__cmp, __shf, __a); + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + __cmp = simd_vfcmplts(__a, __shf); + __a = simd_vfseleqs(__cmp, __shf, __a); + return __builtin_sw_vextfs(__a, 0); +} + +static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_smind(doublev4 __a) { + doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); + doublev4 __cmp = simd_vfcmpltd(__a, __shf); + __a = simd_vfseleqd(__cmp, __shf, __a); + __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); + __cmp = simd_vfcmpltd(__a, __shf); + __a = simd_vfseleqd(__cmp, __shf, __a); + return __builtin_sw_vextfd(__a, 0); +} +#endif diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 5ee20554c4cf..d8c57afe8523 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2008,6 +2008,8 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case llvm::Triple::mips64: case llvm::Triple::mips64el: return CheckMipsBuiltinFunctionCall(TI, BuiltinID, TheCall); + case llvm::Triple::sw_64: + return CheckSw64BuiltinFunctionCall(BuiltinID, TheCall); case llvm::Triple::systemz: return CheckSystemZBuiltinFunctionCall(BuiltinID, TheCall); case llvm::Triple::x86: @@ -5799,6 +5801,140 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, return SemaBuiltinConstantArgRange(TheCall, i, l, u); } +bool Sema::CheckSw64VectorMemoryIntr(unsigned BuiltinID, CallExpr *TheCall) { + DeclRefExpr *DRE = + cast(TheCall->getCallee()->IgnoreParenCasts()); + if (BuiltinID == Sw64::BI__builtin_sw_vload || + BuiltinID == Sw64::BI__builtin_sw_vloadu || + BuiltinID == Sw64::BI__builtin_sw_vload_u || + BuiltinID == Sw64::BI__builtin_sw_vloade || + BuiltinID == Sw64::BI__builtin_sw_vloadnc || + BuiltinID == Sw64::BI__builtin_sw_vstore || + BuiltinID == Sw64::BI__builtin_sw_vstoreu || + BuiltinID == Sw64::BI__builtin_sw_vstore_u || + BuiltinID == Sw64::BI__builtin_sw_vstoreuh || + BuiltinID == Sw64::BI__builtin_sw_vstoreul || + BuiltinID == Sw64::BI__builtin_sw_vstorenc) { + + bool isLoad = BuiltinID == Sw64::BI__builtin_sw_vload || + BuiltinID == Sw64::BI__builtin_sw_vloadu || + BuiltinID == Sw64::BI__builtin_sw_vload_u || + BuiltinID == Sw64::BI__builtin_sw_vloade || + BuiltinID == Sw64::BI__builtin_sw_vloadnc; + + bool isLoadExt = BuiltinID == Sw64::BI__builtin_sw_vloade; + + bool isExtMem = BuiltinID == Sw64::BI__builtin_sw_vloadu || + BuiltinID == Sw64::BI__builtin_sw_vload_u || + BuiltinID == Sw64::BI__builtin_sw_vloade || + BuiltinID == Sw64::BI__builtin_sw_vstoreu || + BuiltinID == Sw64::BI__builtin_sw_vstore_u || + BuiltinID == Sw64::BI__builtin_sw_vstoreuh || + BuiltinID == Sw64::BI__builtin_sw_vstoreul; + + if (checkArgCount(*this, TheCall, isLoad ? 1 : 2)) + return true; + + Expr *PointerArg = TheCall->getArg(isLoad ? 0 : 1); + ExprResult PointerArgRes = DefaultFunctionArrayLvalueConversion(PointerArg); + if (PointerArgRes.isInvalid()) + return true; + PointerArg = PointerArgRes.get(); + TheCall->setArg(isLoad ? 0 : 1, PointerArg); + + const PointerType *pointerType = + PointerArg->getType()->getAs(); + QualType ValType = pointerType->getPointeeType(); + QualType VecTy; + bool isVoidPtr = pointerType->isVoidPointerType(); + if (isExtMem) { + if (Context.getTypeSize(ValType) < 32 && !isVoidPtr) { + Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code); + return true; + } + } + + if (ValType->isFloatingType() && + (BuiltinID == Sw64::BI__builtin_sw_vloadnc)) { + if (Context.getTypeSize(ValType) <= 32) { + Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code); + return true; + } + } + + // if Buitlin is Store, it has noreturn, do noting. + if (!isLoad) + return false; + + if (ValType->isIntegerType()) + VecTy = + Context.getExtVectorType(ValType, 256 / Context.getTypeSize(ValType)); + else { + assert(ValType->isFloatingType() && + "Builtin Value should be Integer or Floating type!"); + VecTy = Context.getExtVectorType(ValType, 4); + } + if (isLoad) { + TheCall->setType(VecTy); + return false; + } + } + return true; +} + +bool Sema::CheckSw64VectorShift(unsigned BuiltinID, CallExpr *TheCall) { + DeclRefExpr *DRE = + cast(TheCall->getCallee()->IgnoreParenCasts()); + if (BuiltinID == Sw64::BI__builtin_sw_vsll || + BuiltinID == Sw64::BI__builtin_sw_vsrl || + BuiltinID == Sw64::BI__builtin_sw_vsra || + BuiltinID == Sw64::BI__builtin_sw_vrol) { + Expr *ShiftArg = TheCall->getArg(0); + Expr *ShiftImm = TheCall->getArg(1); + QualType ValType = ShiftArg->getType(); + QualType Imm = ShiftImm->getType(); + + if (checkArgCount(*this, TheCall, 2)) + return true; + + if (ValType->isFloatingType() || + !(ValType->isVectorType() && Imm->isIntegerType())) { + Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code); + return true; + } + + TheCall->setType(ValType); + return false; + } + return true; +} + +bool Sema::CheckSw64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + DeclRefExpr *DRE = + cast(TheCall->getCallee()->IgnoreParenCasts()); + switch (BuiltinID) { + case Sw64::BI__builtin_sw_vload: + case Sw64::BI__builtin_sw_vloadu: + case Sw64::BI__builtin_sw_vload_u: + case Sw64::BI__builtin_sw_vloade: + case Sw64::BI__builtin_sw_vloadnc: + case Sw64::BI__builtin_sw_vstore: + case Sw64::BI__builtin_sw_vstoreu: + case Sw64::BI__builtin_sw_vstore_u: + case Sw64::BI__builtin_sw_vstoreuh: + case Sw64::BI__builtin_sw_vstoreul: + case Sw64::BI__builtin_sw_vstorenc: + return CheckSw64VectorMemoryIntr(BuiltinID, TheCall); + case Sw64::BI__builtin_sw_vsll: + case Sw64::BI__builtin_sw_vsrl: + case Sw64::BI__builtin_sw_vsra: + case Sw64::BI__builtin_sw_vrol: + return CheckSw64VectorShift(BuiltinID, TheCall); + } + + return false; +} + bool Sema::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index ed69e802c95d..0d1e28fa9dce 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7744,6 +7744,19 @@ handleWebAssemblyImportNameAttr(Sema &S, Decl *D, const ParsedAttr &AL) { FD->addAttr(::new (S.Context) WebAssemblyImportNameAttr(S.Context, AL, Str)); } +static void handleSw64InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!isFunctionOrMethod(D)) { + S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type) + << AL << AL.isRegularKeywordAttribute() << ExpectedFunction; + return; + } + + if (!AL.checkExactlyNumArgs(S, 0)) + return; + + handleSimpleAttribute(S, D, AL); +} + static void handleRISCVInterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // Warn about repeated attributes. @@ -7826,6 +7839,9 @@ static void handleInterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { case llvm::Triple::riscv64: handleRISCVInterruptAttr(S, D, AL); break; + case llvm::Triple::sw_64: + handleSw64InterruptAttr(S, D, AL); + break; default: handleARMInterruptAttr(S, D, AL); break; -- Gitee From 5298e93560dddcd7570a5c9d47625526081bd2f1 Mon Sep 17 00:00:00 2001 From: swcompiler Date: Thu, 23 Jan 2025 14:51:38 +0800 Subject: [PATCH 3/3] [Sw64] Add Sw64 target support for openmp --- openmp/README.rst | 2 +- openmp/runtime/CMakeLists.txt | 9 +- openmp/runtime/README.txt | 1 + .../runtime/cmake/LibompGetArchitecture.cmake | 2 + openmp/runtime/cmake/LibompMicroTests.cmake | 3 + openmp/runtime/cmake/LibompUtils.cmake | 2 + openmp/runtime/cmake/config-ix.cmake | 3 +- openmp/runtime/src/kmp_affinity.h | 11 ++ openmp/runtime/src/kmp_os.h | 8 +- openmp/runtime/src/kmp_platform.h | 6 +- openmp/runtime/src/kmp_runtime.cpp | 3 +- openmp/runtime/src/z_Linux_asm.S | 157 +++++++++++++++++- openmp/runtime/src/z_Linux_util.cpp | 2 +- openmp/runtime/test/ompt/callback.h | 10 ++ openmp/runtime/tools/lib/Platform.pm | 7 +- openmp/runtime/tools/lib/Uname.pm | 2 + 16 files changed, 215 insertions(+), 13 deletions(-) diff --git a/openmp/README.rst b/openmp/README.rst index 2cdd38220d52..103cc0dd5f19 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -137,7 +137,7 @@ Options for all Libraries Options for ``libomp`` ---------------------- -**LIBOMP_ARCH** = ``aarch64|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64`` +**LIBOMP_ARCH** = ``aarch64|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|sw_64`` The default value for this option is chosen based on probing the compiler for architecture macros (e.g., is ``__x86_64__`` predefined by compiler?). diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 2b7a3eb5bfce..58265a9eaaa2 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD}) # If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake libomp_get_architecture(LIBOMP_DETECTED_ARCH) set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING - "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).") + "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64/sw_64).") # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") @@ -63,6 +63,8 @@ else() # Part of LLVM build set(LIBOMP_ARCH riscv64) elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64") set(LIBOMP_ARCH loongarch64) + elseif(LIBOMP_NATIVE_ARCH MATCHES "sw_64") + set(LIBOMP_ARCH sw_64) else() # last ditch effort libomp_get_architecture(LIBOMP_ARCH) @@ -83,7 +85,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64") endif() endif() -libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64) +libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64 sw_64) set(LIBOMP_LIB_TYPE normal CACHE STRING "Performance,Profiling,Stubs library (normal/profile/stubs)") @@ -162,6 +164,7 @@ set(MIPS64 FALSE) set(MIPS FALSE) set(RISCV64 FALSE) set(LOONGARCH64 FALSE) +set(SW64 FALSE) if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture set(IA32 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture @@ -188,6 +191,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture set(RISCV64 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture set(LOONGARCH64 TRUE) +elseif("${LIBOMP_ARCH}" STREQUAL "sw_64") # SW64 architecture + set(SW64 TRUE) endif() # Set some flags based on build_type diff --git a/openmp/runtime/README.txt b/openmp/runtime/README.txt index ddd8b0e4282d..2ecc429d92d5 100644 --- a/openmp/runtime/README.txt +++ b/openmp/runtime/README.txt @@ -55,6 +55,7 @@ Architectures Supported * MIPS and MIPS64 architecture * RISCV64 architecture * LoongArch64 architecture +* SW64 architecture Supported RTL Build Configurations ================================== diff --git a/openmp/runtime/cmake/LibompGetArchitecture.cmake b/openmp/runtime/cmake/LibompGetArchitecture.cmake index c338493bad53..9ca2dfc5d7c7 100644 --- a/openmp/runtime/cmake/LibompGetArchitecture.cmake +++ b/openmp/runtime/cmake/LibompGetArchitecture.cmake @@ -49,6 +49,8 @@ function(libomp_get_architecture return_arch) #error ARCHITECTURE=riscv64 #elif defined(__loongarch__) && __loongarch_grlen == 64 #error ARCHITECTURE=loongarch64 + #elif defined(__sw_64__) + #error ARCHITECTURE=sw_64 #else #error ARCHITECTURE=UnknownArchitecture #endif diff --git a/openmp/runtime/cmake/LibompMicroTests.cmake b/openmp/runtime/cmake/LibompMicroTests.cmake index 88deb461dbaf..ff911af4b0b5 100644 --- a/openmp/runtime/cmake/LibompMicroTests.cmake +++ b/openmp/runtime/cmake/LibompMicroTests.cmake @@ -217,6 +217,9 @@ else() elseif(${LOONGARCH64}) libomp_append(libomp_expected_library_deps libc.so.6) libomp_append(libomp_expected_library_deps ld.so.1) + elseif(${SW64}) + libomp_append(libomp_expected_library_deps libc.so.6.1) + libomp_append(libomp_expected_library_deps ld-linux.so.2) endif() libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY) libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC) diff --git a/openmp/runtime/cmake/LibompUtils.cmake b/openmp/runtime/cmake/LibompUtils.cmake index b5ffc97fca3d..ce62b077bd22 100644 --- a/openmp/runtime/cmake/LibompUtils.cmake +++ b/openmp/runtime/cmake/LibompUtils.cmake @@ -111,6 +111,8 @@ function(libomp_get_legal_arch return_arch_string) set(${return_arch_string} "RISCV64" PARENT_SCOPE) elseif(${LOONGARCH64}) set(${return_arch_string} "LOONGARCH64" PARENT_SCOPE) + elseif(${SW64}) + set(${return_arch_string} "SW64" PARENT_SCOPE) else() set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE) libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}") diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake index 9869aeab0354..6cbac229eaed 100644 --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -325,7 +325,8 @@ else() (LIBOMP_ARCH STREQUAL ppc64le) OR (LIBOMP_ARCH STREQUAL ppc64) OR (LIBOMP_ARCH STREQUAL riscv64) OR - (LIBOMP_ARCH STREQUAL loongarch64)) + (LIBOMP_ARCH STREQUAL loongarch64)) OR + (LIBOMP_ARCH STREQUAL sw_64) AND # OS supported? ((WIN32 AND LIBOMP_HAVE_PSAPI) OR APPLE OR (NOT WIN32 AND LIBOMP_HAVE_WEAK_ATTRIBUTE))) set(LIBOMP_HAVE_OMPT_SUPPORT TRUE) diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index f27dd9a5339e..bb9fdc410a73 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -281,6 +281,17 @@ public: #elif __NR_sched_getaffinity != 123 #error Wrong code for getaffinity system call. #endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_SW64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 395 +#elif __NR_sched_setaffinity != 395 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 396 +#elif __NR_sched_getaffinity != 396 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ #else #error Unknown or unsupported architecture #endif /* KMP_ARCH_* */ diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index fec589ab6018..ee1d2c7e3fd5 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -178,7 +178,8 @@ typedef unsigned long long kmp_uint64; #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ + KMP_ARCH_SW64 #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC #else #error "Can't determine size_t printf format specifier." @@ -213,7 +214,7 @@ typedef kmp_uint32 kmp_uint; #define KMP_INT_MIN ((kmp_int32)0x80000000) // stdarg handling -#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && \ +#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_SW64) && \ (KMP_OS_FREEBSD || KMP_OS_LINUX) typedef va_list *kmp_va_list; #define kmp_va_deref(ap) (*(ap)) @@ -1043,7 +1044,8 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); #endif /* KMP_OS_WINDOWS */ #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ + KMP_ARCH_SW64 #if KMP_OS_WINDOWS #undef KMP_MB #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index fcfd8bc5d8d9..780ff3b185b3 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -93,6 +93,7 @@ #define KMP_ARCH_MIPS64 0 #define KMP_ARCH_RISCV64 0 #define KMP_ARCH_LOONGARCH64 0 +#define KMP_ARCH_SW64 0 #if KMP_OS_WINDOWS #if defined(_M_AMD64) || defined(__x86_64) @@ -142,6 +143,9 @@ #elif defined __loongarch__ && __loongarch_grlen == 64 #undef KMP_ARCH_LOONGARCH64 #define KMP_ARCH_LOONGARCH64 1 +#elif defined __sw_64__ +#undef KMP_ARCH_SW64 +#define KMP_ARCH_SW64 1 #endif #endif @@ -206,7 +210,7 @@ // TODO: Fixme - This is clever, but really fugly #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ - KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64) + KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH6464 + KMP_ARCH_SW64) #error Unknown or unsupported architecture #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index c63bd1c63bfd..e86d132fee0f 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -8827,7 +8827,8 @@ __kmp_determine_reduction_method( int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ + KMP_ARCH_SW64 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index 27b063f09e7a..be3494f9589f 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -2060,6 +2060,159 @@ __kmp_invoke_microtask: #endif /* KMP_ARCH_LOONGARCH64 */ +#if KMP_ARCH_SW64 + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)(int *gtid, int *tid, ...); +// +// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, +// void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)(>id, &tid, argv[0], ...); +// +// return 1; +// } +// +// Parameters: +// $16: pkfn +// $17: gtid +// $18: tid +// $19: argc +// $20: p_argv +// $21: exit_frame_ptr +// +// Locals: +// __gtid: gtid param pushed on stack so can pass >id to pkfn +// __tid: tid param pushed on stack so can pass &tid to pkfn +// +// Temp. registers: +// +// $1: used to calculate the dynamic stack size / used to hold pkfn address +// $2: used as temporary for stack placement calculation +// $3: used as temporary for stack arguments +// $4: used as temporary for number of remaining pkfn parms +// $5: used to traverse p_argv array +// +// return: $0 (always 1/TRUE) + +__gtid = -20 +__tid = -24 +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + .globl __kmp_invoke_microtask + .p2align 1 + .type __kmp_invoke_microtask,@function +__kmp_invoke_microtask: + .cfi_startproc + + // First, save $26 and $15 + ldi $30,-16($30) + stl $26, 8($30) + stl $15, 0($30) + ldi $15,16($30) + .cfi_def_cfa $15, 0 + .cfi_offset $26, -8 + .cfi_offset $15, -16 + + // Compute the dynamic stack size: + // + // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by + // reference + // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' + // function by register. Given that we have 8 of such registers (a[0-5]) + // and two + 'argc' arguments (consider >id and &tid), we need to + // reserve max(0, argc - 4)*8 extra bytes + // + // The total number of bytes is then max(0, argc - 4)*8 + 8 + + // Compute max(0, argc - 4) using the following bithack: + // max(0, x) = x - (x & (x >> 31)), where x := argc - 4 + // Source: http://graphics.stanford.edu/~seander/bithacks.html//IntegerMinOrMax + subw $19, 4, $1 + sellt $1, 0, $1, $1 + + ldi $1,1($1) + s8addl $1,0,$1 + subl $30, $1, $30 + + // Align the stack to 16 bytes + bic $30, 0xf, $30 + mov $16, $27 + mov $19, $4 + mov $20, $5 + +#if OMPT_SUPPORT + // Save frame pointer into exit_frame + stl $15, 0($21) +#endif + + // Prepare arguments for the pkfn function (first 6 using $16-$21 registers) + + stw $17, __gtid($15) + stw $18, __tid($15) + + ldi $16, __gtid($15) + ldi $17, __tid($15) + + beq $4, .L_kmp_3 + ldl $18, 0($5) + + ldi $4,-1($4) + beq $4, .L_kmp_3 + ldl $19, 8($5) + + ldi $4,-1($4) + beq $4, .L_kmp_3 + ldl $20, 16($5) + + ldi $4,-1($4) + beq $4, .L_kmp_3 + ldl $21, 24($5) + + // Prepare any additional argument passed through the stack + ldi $5, 32($5) + mov $30, $2 + br $31, .L_kmp_2 +.L_kmp_1: + ldl $3, 0($5) + stl $3, 0($2) + ldi $5, 8($5) + ldi $2, 8($2) +.L_kmp_2: + ldi $4, -1($4) + bne $4, .L_kmp_1 + +.L_kmp_3: + // Call pkfn function + call $26, ($27), 0 + + // Restore stack and return + + ldi $0, 1($31) + + ldi $30,-16($15) + ldl $15, 0($30) + ldl $26, 8($30) + ldi $30,16($30) + ret $31,($26),1 +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask + .cfi_endproc + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_SW64 */ + #if KMP_ARCH_ARM || KMP_ARCH_MIPS .data COMMON .gomp_critical_user_, 32, 3 @@ -2073,7 +2226,7 @@ __kmp_unnamed_critical_addr: #endif #endif /* KMP_ARCH_ARM */ -#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 +#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_SW64 #ifndef KMP_PREFIX_UNDERSCORE # define KMP_PREFIX_UNDERSCORE(x) x #endif @@ -2088,7 +2241,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 #endif #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || - KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */ + KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_SW64 */ #if KMP_OS_LINUX # if KMP_ARCH_ARM || KMP_ARCH_AARCH64 diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 260b982af200..cdfb14687500 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -2452,7 +2452,7 @@ finish: // Clean up and exit. #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ - KMP_ARCH_ARM) + KMP_ARCH_ARM || KMP_ARCH_SW64) // we really only need the case with 1 argument, because CLANG always build // a struct of pointers to shared variables referenced in the outlined function diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h index 8180b3d2663f..32af133ade73 100644 --- a/openmp/runtime/test/ompt/callback.h +++ b/openmp/runtime/test/ompt/callback.h @@ -212,6 +212,16 @@ ompt_label_##id: printf("%" PRIu64 ": current_address=%p or %p\n", \ ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12) #endif +#elif KMP_ARCH_SW64 +// On SW64 the NOP instruction is 4 bytes long, can be followed by some other +// instructions (more bytes). +#define print_possible_return_addresses(addr) \ + printf("%" PRIu64 ": current_address=%p or %p or %p or %p or %p or %p or " \ + "%p or %p or %p or %p\n", \ + ompt_get_thread_data()->value, ((char *)addr) - 16, \ + ((char *)addr) - 20, ((char *)addr) - 24, ((char *)addr) - 28, \ + ((char *)addr) - 32, ((char *)addr) - 36, ((char *)addr) - 40, \ + ((char *)addr) - 44, ((char *)addr) - 48, ((char *)addr) - 52) #elif KMP_ARCH_LOONGARCH64 // On LoongArch64 the NOP instruction is 4 bytes long, can be followed by // inserted jump instruction (another 4 bytes long). And an additional jump diff --git a/openmp/runtime/tools/lib/Platform.pm b/openmp/runtime/tools/lib/Platform.pm index d62d450e9e5d..c7da02499c00 100644 --- a/openmp/runtime/tools/lib/Platform.pm +++ b/openmp/runtime/tools/lib/Platform.pm @@ -65,6 +65,8 @@ sub canon_arch($) { $arch = "riscv64"; } elsif ( $arch =~ m{\Aloongarch64} ) { $arch = "loongarch64"; + } elsif ( $arch =~ m{\Asw_64} ) { + $arch = "sw_64"; } else { $arch = undef; }; # if @@ -100,6 +102,7 @@ sub canon_mic_arch($) { "mips" => "MIPS", "mips64" => "MIPS64", "riscv64" => "RISC-V (64-bit)", + "sw_64" => "SW64", ); sub legal_arch($) { @@ -230,6 +233,8 @@ sub target_options() { $_host_arch = "riscv64"; } elsif ( $hardware_platform eq "loongarch64" ) { $_host_arch = "loongarch64"; + } elsif ( $hardware_platform eq "sw_64" ) { + $_host_arch = "sw_64"; } else { die "Unsupported host hardware platform: \"$hardware_platform\"; stopped"; }; # if @@ -419,7 +424,7 @@ the script assumes host architecture is target one. Input string is an architecture name to canonize. The function recognizes many variants, for example: C<32e>, C, C, etc. Returned string is a canonized architecture name, -one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C, C or C is input string is not recognized. +one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C, C, C or C is input string is not recognized. =item B diff --git a/openmp/runtime/tools/lib/Uname.pm b/openmp/runtime/tools/lib/Uname.pm index 8a976addcff0..d21550711cc3 100644 --- a/openmp/runtime/tools/lib/Uname.pm +++ b/openmp/runtime/tools/lib/Uname.pm @@ -160,6 +160,8 @@ if ( 0 ) { $values{ hardware_platform } = "riscv64"; } elsif ( $values{ machine } =~ m{\Aloongarch64\z} ) { $values{ hardware_platform } = "loongarch64"; + } elsif ( $values{ machine } =~ m{\Asw_64\z} ) { + $values{ hardware_platform } = "sw_64"; } else { die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped"; }; # if -- Gitee