From 30add3b1cf70e1daa02f9390d66db35957d69665 Mon Sep 17 00:00:00 2001 From: swcompiler Date: Fri, 16 May 2025 15:37:22 +0800 Subject: [PATCH] Add Sw64 support for clang --- ...64-Add-Sw64-target-support-for-clang.patch | 4437 +++++++++++++++++ clang.spec | 8 +- 2 files changed, 4444 insertions(+), 1 deletion(-) create mode 100755 0001-Sw64-Add-Sw64-target-support-for-clang.patch diff --git a/0001-Sw64-Add-Sw64-target-support-for-clang.patch b/0001-Sw64-Add-Sw64-target-support-for-clang.patch new file mode 100755 index 0000000..0864c33 --- /dev/null +++ b/0001-Sw64-Add-Sw64-target-support-for-clang.patch @@ -0,0 +1,4437 @@ +From 106809c7dbed97439ce9656055486858d36e6732 Mon Sep 17 00:00:00 2001 +From: xiaol +Date: Tue, 20 May 2025 17:16:36 +0800 +Subject: [PATCH 2/5] clang + +--- + clang/include/clang/Basic/Attr.td | 12 + + clang/include/clang/Basic/AttrDocs.td | 11 + + clang/include/clang/Basic/BuiltinsSw64.def | 249 +++ + .../clang/Basic/DiagnosticDriverKinds.td | 2 + + .../clang/Basic/DiagnosticSemaKinds.td | 4 + + clang/include/clang/Basic/TargetBuiltins.h | 13 +- + clang/include/clang/Basic/TargetCXXABI.def | 3 + + clang/include/clang/Basic/TargetCXXABI.h | 6 + + clang/include/clang/Basic/TargetInfo.h | 3 + + clang/include/clang/Driver/Options.td | 50 + + clang/include/clang/Sema/Sema.h | 3 + + clang/lib/AST/ASTContext.cpp | 56 + + clang/lib/Basic/CMakeLists.txt | 1 + + clang/lib/Basic/Targets.cpp | 4 + + clang/lib/Basic/Targets/Sw64.cpp | 125 ++ + clang/lib/Basic/Targets/Sw64.h | 141 ++ + clang/lib/CodeGen/CGBuiltin.cpp | 108 ++ + clang/lib/CodeGen/CMakeLists.txt | 1 + + clang/lib/CodeGen/CodeGenFunction.h | 2 + + clang/lib/CodeGen/CodeGenModule.cpp | 3 + + clang/lib/CodeGen/ItaniumCXXABI.cpp | 3 + + clang/lib/CodeGen/TargetInfo.h | 3 + + clang/lib/CodeGen/Targets/Sw64.cpp | 545 ++++++ + clang/lib/Driver/CMakeLists.txt | 2 + + clang/lib/Driver/Driver.cpp | 4 + + clang/lib/Driver/ToolChains/Arch/Sw64.cpp | 94 + + clang/lib/Driver/ToolChains/Arch/Sw64.h | 34 + + clang/lib/Driver/ToolChains/Clang.cpp | 89 + + clang/lib/Driver/ToolChains/Clang.h | 2 + + clang/lib/Driver/ToolChains/CommonArgs.cpp | 7 + + clang/lib/Driver/ToolChains/Gnu.cpp | 23 + + clang/lib/Driver/ToolChains/Linux.cpp | 23 +- + clang/lib/Driver/ToolChains/Sw64Toolchain.cpp | 184 ++ + clang/lib/Driver/ToolChains/Sw64Toolchain.h | 79 + + clang/lib/Driver/XRayArgs.cpp | 1 + + clang/lib/Frontend/CompilerInvocation.cpp | 3 +- + clang/lib/Headers/CMakeLists.txt | 13 + + clang/lib/Headers/simd.h | 5 + + clang/lib/Headers/sw64intrin.h | 1590 +++++++++++++++++ + clang/lib/Sema/SemaChecking.cpp | 136 ++ + clang/lib/Sema/SemaDeclAttr.cpp | 16 + + 41 files changed, 3648 insertions(+), 5 deletions(-) + create mode 100644 clang/include/clang/Basic/BuiltinsSw64.def + create mode 100644 clang/lib/Basic/Targets/Sw64.cpp + create mode 100644 clang/lib/Basic/Targets/Sw64.h + create mode 100644 clang/lib/CodeGen/Targets/Sw64.cpp + create mode 100644 clang/lib/Driver/ToolChains/Arch/Sw64.cpp + create mode 100644 clang/lib/Driver/ToolChains/Arch/Sw64.h + create mode 100644 clang/lib/Driver/ToolChains/Sw64Toolchain.cpp + create mode 100644 clang/lib/Driver/ToolChains/Sw64Toolchain.h + create mode 100644 clang/lib/Headers/simd.h + create mode 100644 clang/lib/Headers/sw64intrin.h + +diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td +index d5204b286..6ea5e5ee9 100644 +--- a/clang/include/clang/Basic/Attr.td ++++ b/clang/include/clang/Basic/Attr.td +@@ -428,6 +428,7 @@ def TargetX86 : TargetArch<["x86"]>; + def TargetAnyX86 : TargetArch<["x86", "x86_64"]>; + def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>; + def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>; ++def TargetSw64 : TargetArch<["sw_64"]>; + def TargetWindows : TargetSpec { + let OSes = ["Win32"]; + } +@@ -891,6 +892,17 @@ def AVRSignal : InheritableAttr, TargetSpecificAttr { + let Documentation = [AVRSignalDocs]; + } + ++def Sw64Interrupt : InheritableAttr, TargetSpecificAttr { ++ let Spellings = [GCC<"interrupt">]; ++ let Subjects = SubjectList<[Function]>; ++ let Args = [EnumArgument<"Interrupt", "InterruptType", ++ ["user", "supervisor", "machine"], ++ ["user", "supervisor", "machine"], ++ 1>]; ++ let ParseKind = "Interrupt"; ++ let Documentation = [Sw64InterruptDocs]; ++} ++ + def AsmLabel : InheritableAttr { + let Spellings = [CustomKeyword<"asm">, CustomKeyword<"__asm__">]; + let Args = [ +diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td +index 2c9502312..c59c6efd1 100644 +--- a/clang/include/clang/Basic/AttrDocs.td ++++ b/clang/include/clang/Basic/AttrDocs.td +@@ -2375,6 +2375,17 @@ of the type before passing to the attribute. + }]; + } + ++def Sw64InterruptDocs : Documentation { ++ let Category = DocCatFunction; ++ let Heading = "interrupt (SW64)"; ++ let Content = [{ ++Clang supports the GNU style ``__attribute__((interrupt))`` attribute on SW64 ++targets. This attribute may be attached to a function definition and instructs ++the backend to generate appropriate function entry/exit code so that it can be ++used directly as an interrupt service routine. ++ }]; ++} ++ + def AVRInterruptDocs : Documentation { + let Category = DocCatFunction; + let Heading = "interrupt (AVR)"; +diff --git a/clang/include/clang/Basic/BuiltinsSw64.def b/clang/include/clang/Basic/BuiltinsSw64.def +new file mode 100644 +index 000000000..d3e85bf6c +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsSw64.def +@@ -0,0 +1,249 @@ ++//===--- BuiltinsSw64.def - Sw64 Builtin function database ----*- C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the Sw64-specific builtin function database. Users of ++// this file must define the BUILTIN macro to make use of this information. ++// ++//===----------------------------------------------------------------------===// ++ ++// The format of this database matches clang/Basic/Builtins.def. ++ ++BUILTIN(__builtin_bitrev, "UiUi", "nc") ++BUILTIN(__builtin_getid, "Si", "nc") ++BUILTIN(__builtin_getps, "UiUi", "n") ++BUILTIN(__builtin_setps, "vUiUi", "n") ++ ++BUILTIN(__builtin_sw64_crc32b, "LiLiLi", "n") ++BUILTIN(__builtin_sw64_crc32h, "LiLiLi", "n") ++BUILTIN(__builtin_sw64_crc32w, "LiLiLi", "n") ++BUILTIN(__builtin_sw64_crc32l, "LiLiLi", "n") ++BUILTIN(__builtin_sw64_crc32cb, "LiLiLi", "n") ++BUILTIN(__builtin_sw64_crc32ch, "LiLiLi", "n") ++BUILTIN(__builtin_sw64_crc32cw, "LiLiLi", "n") ++BUILTIN(__builtin_sw64_crc32cl, "LiLiLi", "n") ++ ++BUILTIN(__builtin_sw64_sbt, "LiLiLi", "n") ++BUILTIN(__builtin_sw64_cbt, "LiLiLi", "n") ++ ++BUILTIN(__builtin_sw_vaddw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vsubw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vucaddw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vucsubw, "V8iV8iV8i", "n") ++ ++BUILTIN(__builtin_sw_vaddl, "V4LiV4LiV4Li", "n") ++BUILTIN(__builtin_sw_vsubl, "V4LiV4LiV4Li", "n") ++ ++BUILTIN(__builtin_sw_vucaddh, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vucsubh, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vucaddb, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vucsubb, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vucaddhi, "V8iV8iLi", "n") ++BUILTIN(__builtin_sw_vucsubhi, "V8iV8iLi", "n") ++BUILTIN(__builtin_sw_vucaddbi, "V8iV8iLi", "n") ++BUILTIN(__builtin_sw_vucsubbi, "V8iV8iLi", "n") ++ ++BUILTIN(__builtin_sw_vucaddh_v16hi, "V16sV16sV16s", "n") ++BUILTIN(__builtin_sw_vucsubh_v16hi, "V16sV16sV16s", "n") ++BUILTIN(__builtin_sw_vucaddb_v32qi, "V32cV32cV32c", "n") ++BUILTIN(__builtin_sw_vucsubb_v32qi, "V32cV32cV32c", "n") ++ ++BUILTIN(__builtin_sw_vsumw, "LiV8i", "n") ++BUILTIN(__builtin_sw_vsuml, "LiV4Li", "n") ++BUILTIN(__builtin_sw_ctpopow, "LiV8i", "n") ++BUILTIN(__builtin_sw_ctlzow, "LiV8i", "n") ++ ++BUILTIN(__builtin_sw_vsll, "v.", "t") ++BUILTIN(__builtin_sw_vsrl, "v.", "t") ++BUILTIN(__builtin_sw_vsra, "v.", "t") ++BUILTIN(__builtin_sw_vrol, "v.", "t") ++ ++BUILTIN(__builtin_sw_vsllw, "V8iV8iLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsrlw, "V8iV8iLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsraw, "V8iV8iLi", "ncV:256:") ++BUILTIN(__builtin_sw_vrolw, "V8iV8iLi", "ncV:256:") ++ ++BUILTIN(__builtin_sw_vsllb, "V32cV32cLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsrlb, "V32cV32cLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsrab, "V32cV32cLi", "ncV:256:") ++BUILTIN(__builtin_sw_vrolb, "V32cV32cLi", "ncV:256:") ++ ++BUILTIN(__builtin_sw_vslll, "V4LiV4LiLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsrll, "V4LiV4LiLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsral, "V4LiV4LiLi", "ncV:256:") ++BUILTIN(__builtin_sw_vroll, "V4LiV4LiLi", "ncV:256:") ++ ++BUILTIN(__builtin_sw_vsllh, "V16sV16sLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsrlh, "V16sV16sLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsrah, "V16sV16sLi", "ncV:256:") ++BUILTIN(__builtin_sw_vrolh, "V16sV16sLi", "ncV:256:") ++ ++BUILTIN(__builtin_sw_sllow, "V4LiV4LiLi", "ncV:256:") ++BUILTIN(__builtin_sw_srlow, "V4LiV4LiLi", "ncV:256:") ++BUILTIN(__builtin_sw_sraow, "V4LiV4LiLi", "ncV:256:") ++ ++BUILTIN(__builtin_sw_vslls, "V4fV4fLi", "ncV:256:") ++BUILTIN(__builtin_sw_vslld, "V4dV4dLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsrls, "V4fV4fLi", "ncV:256:") ++BUILTIN(__builtin_sw_vsrld, "V4dV4dLi", "ncV:256:") ++ ++BUILTIN(__builtin_sw_vcmpgew, "LiV8iV8i", "n") ++BUILTIN(__builtin_sw_vcmpeqw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vcmplew, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vcmpltw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vcmpulew, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vcmpultw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vcmpueqb, "V32cV32cV32c", "n") ++BUILTIN(__builtin_sw_vcmpugtb, "V32cV32cV32c", "n") ++ ++BUILTIN(__builtin_sw_vmaxb, "V32cV32cV32c", "n") ++BUILTIN(__builtin_sw_vmaxh, "V16sV16sV16s", "n") ++BUILTIN(__builtin_sw_vmaxw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vmaxl, "V4LiV4LiV4Li", "n") ++ ++BUILTIN(__builtin_sw_vumaxb, "V32cV32cV32c", "n") ++BUILTIN(__builtin_sw_vumaxh, "V16sV16sV16s", "n") ++BUILTIN(__builtin_sw_vumaxw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vumaxl, "V4LiV4LiV4Li", "n") ++ ++BUILTIN(__builtin_sw_vminb, "V32cV32cV32c", "n") ++BUILTIN(__builtin_sw_vminh, "V16sV16sV16s", "n") ++BUILTIN(__builtin_sw_vminw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vminl, "V4LiV4LiV4Li", "n") ++ ++BUILTIN(__builtin_sw_vuminb, "V32cV32cV32c", "n") ++BUILTIN(__builtin_sw_vuminh, "V16sV16sV16s", "n") ++BUILTIN(__builtin_sw_vuminw, "V8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vuminl, "V4LiV4LiV4Li", "n") ++ ++BUILTIN(__builtin_sw_vseleqw, "V8iV8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vsellew, "V8iV8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vselltw, "V8iV8iV8iV8i", "n") ++BUILTIN(__builtin_sw_vsellbcw, "V8iV8iV8iV8i", "n") ++ ++BUILTIN(__builtin_sw_vseleqwi, "V8iV8iV8iLi", "n") ++BUILTIN(__builtin_sw_vsellewi, "V8iV8iV8iLi", "n") ++BUILTIN(__builtin_sw_vselltwi, "V8iV8iV8iLi", "n") ++BUILTIN(__builtin_sw_vsellbcwi, "V8iV8iV8iLi", "n") ++ ++BUILTIN(__builtin_sw_vxor, "V4LiV4LiV4Li", "n") ++BUILTIN(__builtin_sw_vnot, "V4LiV4LiV4Li", "n") ++BUILTIN(__builtin_sw_vorr, "V4LiV4LiV4Li", "n") ++BUILTIN(__builtin_sw_vbic, "V4LiV4LiV4Li", "n") ++BUILTIN(__builtin_sw_vornot, "V4LiV4LiV4Li", "n") ++BUILTIN(__builtin_sw_veqv, "V4LiV4LiV4Li", "n") ++ ++BUILTIN(__builtin_sw_vsqrts, "V4fV4f", "n") ++BUILTIN(__builtin_sw_vsqrtd, "V4dV4d", "n") ++ ++BUILTIN(__builtin_sw_vsums, "fV4f", "n") ++BUILTIN(__builtin_sw_vsumd, "dV4d", "n") ++ ++BUILTIN(__builtin_sw_vfrecs, "V4fV4f", "n") ++BUILTIN(__builtin_sw_vfrecd, "V4dV4d", "n") ++ ++BUILTIN(__builtin_sw_vfcmpeqs, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vfcmplts, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vfcmples, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vfcmpuns, "V4fV4fV4f", "n") ++ ++BUILTIN(__builtin_sw_vfcmpeqd, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vfcmpltd, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vfcmpled, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vfcmpund, "V4dV4dV4d", "n") ++ ++BUILTIN(__builtin_sw_vfcvtsd, "V4dV4f", "n") ++BUILTIN(__builtin_sw_vfcvtds, "V4fV4d", "n") ++BUILTIN(__builtin_sw_vfcvtld, "V4dV4Li", "n") ++BUILTIN(__builtin_sw_vfcvtls, "V4fV4Li", "n") ++BUILTIN(__builtin_sw_vfcvtsh, "V4dV4fV4fLi", "n") ++BUILTIN(__builtin_sw_vfcvths, "V4fV4dLi", "n") ++ ++BUILTIN(__builtin_sw_vfcvtdl, "V4LiV4d", "n") ++BUILTIN(__builtin_sw_vfcvtdl_g, "V4LiV4d", "n") ++BUILTIN(__builtin_sw_vfcvtdl_p, "V4LiV4d", "n") ++BUILTIN(__builtin_sw_vfcvtdl_z, "V4LiV4d", "n") ++BUILTIN(__builtin_sw_vfcvtdl_n, "V4LiV4d", "n") ++ ++BUILTIN(__builtin_sw_vfris, "V4fV4f", "n") ++BUILTIN(__builtin_sw_vfris_g, "V4fV4f", "n") ++BUILTIN(__builtin_sw_vfris_p, "V4fV4f", "n") ++BUILTIN(__builtin_sw_vfris_z, "V4fV4f", "n") ++BUILTIN(__builtin_sw_vfris_n, "V4fV4f", "n") ++ ++BUILTIN(__builtin_sw_vfrid, "V4dV4d", "n") ++BUILTIN(__builtin_sw_vfrid_g, "V4dV4d", "n") ++BUILTIN(__builtin_sw_vfrid_p, "V4dV4d", "n") ++BUILTIN(__builtin_sw_vfrid_z, "V4dV4d", "n") ++BUILTIN(__builtin_sw_vfrid_n, "V4dV4d", "n") ++ ++BUILTIN(__builtin_sw_vmaxs, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vmaxd, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vmins, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vmind, "V4dV4dV4d", "n") ++ ++BUILTIN(__builtin_sw_vcpyss, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vcpyses, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vcpysns, "V4fV4fV4f", "n") ++ ++BUILTIN(__builtin_sw_vcpysd, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vcpysed, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vcpysnd, "V4dV4dV4d", "n") ++ ++BUILTIN(__builtin_sw_vfseleqs, "V4fV4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vfsellts, "V4fV4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vfselles, "V4fV4fV4fV4f", "n") ++ ++BUILTIN(__builtin_sw_vfseleqd, "V4dV4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vfselltd, "V4dV4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vfselled, "V4dV4dV4dV4d", "n") ++ ++BUILTIN(__builtin_sw_vmas, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vmss, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vnmas, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vnmss, "V4fV4fV4f", "n") ++BUILTIN(__builtin_sw_vmad, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vmsd, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vnmad, "V4dV4dV4d", "n") ++BUILTIN(__builtin_sw_vnmsd, "V4dV4dV4d", "n") ++ ++BUILTIN(__builtin_sw_vinsb, "V32cLiV32cLi", "n") ++BUILTIN(__builtin_sw_vinsh, "V16sLiV16sLi", "n") ++BUILTIN(__builtin_sw_vinsw, "V8iLiV8iLi", "n") ++BUILTIN(__builtin_sw_vinsl, "V4LiLiV4LiLi", "n") ++BUILTIN(__builtin_sw_vinsfs, "V4ffV4fLi", "n") ++BUILTIN(__builtin_sw_vinsfd, "V4ddV4dLi", "n") ++ ++BUILTIN(__builtin_sw_vextw, "LiV8iLi", "n") ++BUILTIN(__builtin_sw_vextl, "LiV4LiLi", "n") ++BUILTIN(__builtin_sw_vextfs, "fV4fLi", "n") ++BUILTIN(__builtin_sw_vextfd, "dV4dLi", "n") ++ ++BUILTIN(__builtin_sw_vshfw, "V8iV8iV8iLi", "n") ++BUILTIN(__builtin_sw_vshfq, "V8iV8iV8iLi", "n") ++BUILTIN(__builtin_sw_vshfqb, "V32cV32cV32c", "n") ++ ++BUILTIN(__builtin_sw_vconw, "V8iV8iV8iv*", "n") ++BUILTIN(__builtin_sw_vconl, "V4LiV4LiV4Liv*", "n") ++BUILTIN(__builtin_sw_vcons, "V4fV4fV4fv*", "n") ++BUILTIN(__builtin_sw_vcond, "V4dV4dV4dv*", "n") ++ ++BUILTIN(__builtin_sw_vlogzz, "V4LiV4LiV4LiV4LiLi", "n") ++BUILTIN(__builtin_sw_vload, "v.", "t") ++BUILTIN(__builtin_sw_vloadu, "v.", "t") ++BUILTIN(__builtin_sw_vload_u, "v.", "t") ++BUILTIN(__builtin_sw_vloade, "v.", "t") ++BUILTIN(__builtin_sw_vloadnc, "v.", "t") ++BUILTIN(__builtin_sw_vstore, "v.", "t") ++BUILTIN(__builtin_sw_vstoreu, "v.", "t") ++BUILTIN(__builtin_sw_vstore_u, "v.", "t") ++BUILTIN(__builtin_sw_vstoreuh, "v.", "t") ++BUILTIN(__builtin_sw_vstoreul, "v.", "t") ++BUILTIN(__builtin_sw_vstorenc, "v.", "t") ++ ++#undef BUILTIN +diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td +index 29f932d21..835cb0794 100644 +--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td +@@ -31,6 +31,8 @@ def err_drv_invalid_riscv_arch_name : Error< + "invalid arch name '%0', %1">; + def err_drv_invalid_riscv_cpu_name_for_target : Error< + "cpu '%0' does not support rv%select{32|64}1">; ++def err_drv_invalid_sw64_ext_arch_name : Error< ++ "invalid arch name '%0', %1 '%2'">; + def warn_drv_invalid_arch_name_with_suggestion : Warning< + "ignoring invalid /arch: argument '%0'; for %select{64|32}1-bit expected one of %2">, + InGroup; +diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td +index 0e9762094..b74b381b3 100644 +--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td ++++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td +@@ -11909,4 +11909,8 @@ def err_wasm_builtin_arg_must_match_table_element_type : Error < + "%ordinal0 argument must match the element type of the WebAssembly table in the %ordinal1 argument">; + def err_wasm_builtin_arg_must_be_integer_type : Error < + "%ordinal0 argument must be an integer">; ++ ++// Sw64-specific Diagnostics ++def err_invalid_sw64_type_code : Error< ++ "incompatible type for this __builtin_sw64 function">; + } // end of sema component. +diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h +index 8f7881abf..59487eb04 100644 +--- a/clang/include/clang/Basic/TargetBuiltins.h ++++ b/clang/include/clang/Basic/TargetBuiltins.h +@@ -174,6 +174,16 @@ namespace clang { + }; + } // namespace LoongArch + ++ /// Sw64 builtins ++ namespace Sw64 { ++ enum { ++ LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, ++#define BUILTIN(ID, TYPE, ATTRS) BI##ID, ++#include "clang/Basic/BuiltinsSw64.def" ++ LastTSBuiltin ++ }; ++ } // namespace Sw64 ++ + /// Flags to identify the types for overloaded Neon builtins. + /// + /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h. +@@ -369,7 +379,8 @@ namespace clang { + PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, + X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin, + Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin, +- SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin}); ++ SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin, ++ Sw64::LastTSBuiltin}); + + } // end namespace clang. + +diff --git a/clang/include/clang/Basic/TargetCXXABI.def b/clang/include/clang/Basic/TargetCXXABI.def +index 9501cca76..70573e586 100644 +--- a/clang/include/clang/Basic/TargetCXXABI.def ++++ b/clang/include/clang/Basic/TargetCXXABI.def +@@ -88,6 +88,9 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64") + /// - representation of member function pointers adjusted as in ARM. + ITANIUM_CXXABI(GenericMIPS, "mips") + ++/// The generic Sw64 ABI is a modified version of the Itanium ABI. ++ITANIUM_CXXABI(GenericSW64, "sw_64") ++ + /// The WebAssembly ABI is a modified version of the Itanium ABI. + /// + /// The changes from the Itanium ABI are: +diff --git a/clang/include/clang/Basic/TargetCXXABI.h b/clang/include/clang/Basic/TargetCXXABI.h +index c113a6a04..b62f97be5 100644 +--- a/clang/include/clang/Basic/TargetCXXABI.h ++++ b/clang/include/clang/Basic/TargetCXXABI.h +@@ -103,6 +103,9 @@ public: + case GenericMIPS: + return T.isMIPS(); + ++ case GenericSW64: ++ return T.isSw64(); ++ + case WebAssembly: + return T.isWasm(); + +@@ -165,6 +168,7 @@ public: + case GenericARM: + case GenericAArch64: + case GenericMIPS: ++ case GenericSW64: + // TODO: ARM-style pointers to member functions put the discriminator in + // the this adjustment, so they don't require functions to have any + // special alignment and could therefore also return false. +@@ -249,6 +253,7 @@ public: + case iOS: // old iOS compilers did not follow this rule + case Microsoft: + case GenericMIPS: ++ case GenericSW64: + case XL: + return true; + } +@@ -287,6 +292,7 @@ public: + case GenericARM: + case iOS: + case GenericMIPS: ++ case GenericSW64: + case XL: + return UseTailPaddingUnlessPOD03; + +diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h +index 61be52149..cf2fc307b 100644 +--- a/clang/include/clang/Basic/TargetInfo.h ++++ b/clang/include/clang/Basic/TargetInfo.h +@@ -332,6 +332,9 @@ public: + // } va_list[1]; + SystemZBuiltinVaList, + ++ // __builtin_va_list as defined by the Sw64 ABI ++ Sw64ABIBuiltinVaList, ++ + // typedef struct __va_list_tag { + // void *__current_saved_reg_area_pointer; + // void *__saved_reg_area_end_pointer; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index 6f72b19f8..caf82f7df 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -194,6 +194,8 @@ def m_riscv_Features_Group : OptionGroup<"">, + Group, DocName<"RISC-V">; + def m_loongarch_Features_Group : OptionGroup<"">, + Group, DocName<"LoongArch">; ++def m_sw_64_Features_Group : OptionGroup<"">, ++ Group, DocName<"SW64">; + + def m_libc_Group : OptionGroup<"">, Group, + Flags<[HelpHidden]>; +@@ -756,6 +758,17 @@ paths, for example if also specified with -isystem, the -I option + will be ignored}]>; + def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group, + MetaVarName<"">, HelpText<"Add directory to library search path">; ++def Sw64mieee : Flag<["-"], "mieee">, Group, Flags<[CC1Option]>, ++ HelpText<"Use mieee to set setfpec for sw_64 target.">; ++ ++foreach i = {0-31} in ++def Sw64ffixed_#i : Flag<["-"], "ffixed-"#i>, Group, ++ HelpText<"Reserve the "#i#" register (Sw64 only)">; ++def Sw64nofpregs : Flag<["-"], "mno-fp-regs">, Group, ++ HelpText<"Generate target code that does not use the floating point register group">; ++ ++def FS_LOAD : Flag<["-"], "fastload">, Group, Flags<[CC1Option]>, ++ HelpText<"enable fast load/store instrs in sw_64 target.(Development)">; + def MD : Flag<["-"], "MD">, Group, + HelpText<"Write a depfile containing user and system headers">; + def MMD : Flag<["-"], "MMD">, Group, +@@ -3176,6 +3189,23 @@ def ffinite_loops: Flag<["-"], "ffinite-loops">, Group, + def fno_finite_loops: Flag<["-"], "fno-finite-loops">, Group, + HelpText<"Do not assume that any loop is finite.">, Flags<[CC1Option]>; + ++def fsw_int_divmod : Flag<["-"], "fsw-int-divmod">, Group, ++ HelpText<"Enable sw64 core4 int-div/rem instructions">, Flags<[CC1Option]>; ++def fsw_shift_word : Flag<["-"], "fsw-shift-word">, Group, ++ HelpText<"Enable sw64 core4 int-shift instructions">, Flags<[CC1Option]>; ++def fsw_rev : Flag<["-"], "fsw-rev">, Group, ++ HelpText<"Enable sw64 core4 byte-rev instructions">, Flags<[CC1Option]>; ++def fsw_recip : Flag<["-"], "fsw-recip">, Group, ++ HelpText<"Enable sw64 core4 fp-rec instructions">, Flags<[CC1Option]>; ++def fsw_fprnd : Flag<["-"], "fsw-fprnd">, Group, ++ HelpText<"Enable sw64 core4 fp-round instructions">, Flags<[CC1Option]>; ++def fsw_cmov : Flag<["-"], "fsw-cmov">, Group, ++ HelpText<"Enable sw64 core4 fp-cmov instructions">, Flags<[CC1Option]>; ++def fsw_auto_inc_dec : Flag<["-"], "fsw-auto-inc-dec">, Group, ++ HelpText<"Enable sw64 core4 post-inc instructions">, Flags<[CC1Option]>; ++def fsw_use_cas : Flag<["-"], "fsw-use-cas">, Group, ++ HelpText<"Enable sw64 core4 atomic-cas instructions">, Flags<[CC1Option]>; ++ + def ftrigraphs : Flag<["-"], "ftrigraphs">, Group, + HelpText<"Process trigraph sequences">, Flags<[CC1Option]>; + def fno_trigraphs : Flag<["-"], "fno-trigraphs">, Group, +@@ -3760,6 +3790,20 @@ def mzos_hlq_clang_EQ : Joined<["-"], "mzos-hlq-clang=">, MetaVarName<"">, + HelpText<"High level qualifier for z/OS CSSLIB dataset">; + ++//SW_FIXME: ++def msw64_relax : Flag<["-"], "sw64-mrelax">, Group, ++ HelpText<"Enable linker relaxation">; ++def msw64_no_relax : Flag<["-"], "sw64-mno-relax">, Group, ++ HelpText<"Disable linker relaxation">; ++def msw6a : Flag<["-"], "sw6a">, ++ Alias, AliasArgs<["sw6a"]>, Group, ++ HelpText<"sw6a">, Flags<[HelpHidden]>; ++ ++def msw6b : Flag<["-"], "sw6b">, ++ Alias, AliasArgs<["sw6b"]>, Group, ++ HelpText<"sw6b">, Flags<[HelpHidden]>; ++ ++def mswEv : Flag<["-"], "mswEv">, Group; + def mno_constant_cfstrings : Flag<["-"], "mno-constant-cfstrings">, Group; + def mno_global_merge : Flag<["-"], "mno-global-merge">, Group, Flags<[CC1Option]>, + HelpText<"Disable merging of globals">; +@@ -4253,6 +4297,12 @@ def mmsa : Flag<["-"], "mmsa">, Group, + HelpText<"Enable MSA ASE (MIPS only)">; + def mno_msa : Flag<["-"], "mno-msa">, Group, + HelpText<"Disable MSA ASE (MIPS only)">; ++ ++def msimd : Flag<["-"], "msimd">, Group, ++ HelpText<"Enable SIMD (SW64 only)">; ++def mno_simd : Flag<["-"], "mno-simd">, Group, ++ HelpText<"Disable SIMD (SW64 only)">; ++ + def mmt : Flag<["-"], "mmt">, Group, + HelpText<"Enable MT ASE (MIPS only)">; + def mno_mt : Flag<["-"], "mno-mt">, Group, +diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h +index 3752a23fa..100a788b1 100644 +--- a/clang/include/clang/Sema/Sema.h ++++ b/clang/include/clang/Sema/Sema.h +@@ -13618,6 +13618,9 @@ private: + bool CheckMipsBuiltinCpu(const TargetInfo &TI, unsigned BuiltinID, + CallExpr *TheCall); + bool CheckMipsBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall); ++ bool CheckSw64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); ++ bool CheckSw64VectorMemoryIntr(unsigned BuiltinID, CallExpr *TheCall); ++ bool CheckSw64VectorShift(unsigned BuiltinID, CallExpr *TheCall); + bool CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall); + bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall); +diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp +index 76000156f..62a8c227a 100644 +--- a/clang/lib/AST/ASTContext.cpp ++++ b/clang/lib/AST/ASTContext.cpp +@@ -884,6 +884,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { + case TargetCXXABI::WatchOS: + case TargetCXXABI::GenericAArch64: + case TargetCXXABI::GenericMIPS: ++ case TargetCXXABI::GenericSW64: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: + case TargetCXXABI::XL: +@@ -9009,6 +9010,57 @@ CreateAAPCSABIBuiltinVaListDecl(const ASTContext *Context) { + return Context->buildImplicitTypedef(T, "__builtin_va_list"); + } + ++static TypedefDecl *CreateSw64ABIBuiltinVaListDecl(const ASTContext *Context) { ++ // struct __va_list { ++ RecordDecl *VaListTagDecl = Context->buildImplicitRecord("__va_list"); ++ ++ if (Context->getLangOpts().CPlusPlus) { ++ ++ // namespace std { ++ // struct __va_list { ++ NamespaceDecl *NS; ++ NS = NamespaceDecl::Create(const_cast(*Context), ++ Context->getTranslationUnitDecl(), ++ /*Inline*/ false, SourceLocation(), ++ SourceLocation(), &Context->Idents.get("std"), ++ /*PrevDecl*/ nullptr, /*Nested=*/false); ++ NS->setImplicit(); ++ VaListTagDecl->setDeclContext(NS); ++ } ++ ++ VaListTagDecl->startDefinition(); ++ ++ const size_t NumFields = 2; ++ QualType FieldTypes[NumFields]; ++ const char *FieldNames[NumFields]; ++ ++ // unsigned gp_offset; ++ FieldTypes[0] = Context->getPointerType(Context->VoidTy); ++ FieldNames[0] = "__stack"; ++ ++ // unsigned fp_offset; ++ FieldTypes[1] = Context->IntTy; ++ FieldNames[1] = "__offs"; ++ ++ // Create fields ++ for (unsigned i = 0; i < NumFields; ++i) { ++ FieldDecl *Field = FieldDecl::Create( ++ const_cast(*Context), VaListTagDecl, SourceLocation(), ++ SourceLocation(), &Context->Idents.get(FieldNames[i]), FieldTypes[i], ++ /*TInfo=*/nullptr, ++ /*BitWidth=*/nullptr, ++ /*Mutable=*/false, ICIS_NoInit); ++ Field->setAccess(AS_public); ++ VaListTagDecl->addDecl(Field); ++ } ++ VaListTagDecl->completeDefinition(); ++ Context->VaListTagDecl = VaListTagDecl; ++ QualType VaListTagType = Context->getRecordType(VaListTagDecl); ++ ++ // }; ++ return Context->buildImplicitTypedef(VaListTagType, "__builtin_va_list"); ++} ++ + static TypedefDecl * + CreateSystemZBuiltinVaListDecl(const ASTContext *Context) { + // struct __va_list_tag { +@@ -9136,6 +9188,8 @@ static TypedefDecl *CreateVaListDecl(const ASTContext *Context, + return CreateSystemZBuiltinVaListDecl(Context); + case TargetInfo::HexagonBuiltinVaList: + return CreateHexagonBuiltinVaListDecl(Context); ++ case TargetInfo::Sw64ABIBuiltinVaList: ++ return CreateSw64ABIBuiltinVaListDecl(Context); + } + + llvm_unreachable("Unhandled __builtin_va_list type kind"); +@@ -12041,6 +12095,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { + case TargetCXXABI::GenericItanium: + case TargetCXXABI::GenericARM: + case TargetCXXABI::GenericMIPS: ++ case TargetCXXABI::GenericSW64: + case TargetCXXABI::iOS: + case TargetCXXABI::WebAssembly: + case TargetCXXABI::WatchOS: +@@ -12062,6 +12117,7 @@ MangleContext *ASTContext::createDeviceMangleContext(const TargetInfo &T) { + case TargetCXXABI::GenericItanium: + case TargetCXXABI::GenericARM: + case TargetCXXABI::GenericMIPS: ++ case TargetCXXABI::GenericSW64: + case TargetCXXABI::iOS: + case TargetCXXABI::WebAssembly: + case TargetCXXABI::WatchOS: +diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt +index caa1b6002..e830db015 100644 +--- a/clang/lib/Basic/CMakeLists.txt ++++ b/clang/lib/Basic/CMakeLists.txt +@@ -109,6 +109,7 @@ add_clang_library(clangBasic + Targets/RISCV.cpp + Targets/SPIR.cpp + Targets/Sparc.cpp ++ Targets/Sw64.cpp + Targets/SystemZ.cpp + Targets/TCE.cpp + Targets/VE.cpp +diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp +index 636b59fd1..1dbf40b13 100644 +--- a/clang/lib/Basic/Targets.cpp ++++ b/clang/lib/Basic/Targets.cpp +@@ -35,6 +35,7 @@ + #include "Targets/RISCV.h" + #include "Targets/SPIR.h" + #include "Targets/Sparc.h" ++#include "Targets/Sw64.h" + #include "Targets/SystemZ.h" + #include "Targets/TCE.h" + #include "Targets/VE.h" +@@ -132,6 +133,9 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, + case llvm::Triple::lanai: + return std::make_unique(Triple, Opts); + ++ case llvm::Triple::sw_64: ++ return std::make_unique(Triple, Opts); ++ + case llvm::Triple::aarch64_32: + if (Triple.isOSDarwin()) + return std::make_unique(Triple, Opts); +diff --git a/clang/lib/Basic/Targets/Sw64.cpp b/clang/lib/Basic/Targets/Sw64.cpp +new file mode 100644 +index 000000000..c622a4b7a +--- /dev/null ++++ b/clang/lib/Basic/Targets/Sw64.cpp +@@ -0,0 +1,125 @@ ++//===--- Sw64.cpp - Implement Sw64 target feature support ---------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements Sw64 TargetInfo objects. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "Sw64.h" ++#include "Targets.h" ++#include "clang/Basic/Builtins.h" ++#include "clang/Basic/LangOptions.h" ++#include "clang/Basic/MacroBuilder.h" ++#include "clang/Basic/TargetBuiltins.h" ++#include "llvm/ADT/StringSwitch.h" ++#include "llvm/Support/Sw64TargetParser.h" ++ ++using namespace clang; ++using namespace clang::targets; ++ ++ArrayRef Sw64TargetInfo::getGCCRegNames() const { ++ static const char *const GCCRegNames[] = { ++ "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", ++ "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", ++ "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23", ++ "$24", "$25", "$26", "$27", "$28", "$29", "$30", "$31", ++ "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", ++ "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", ++ "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", ++ "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31"}; ++ return llvm::makeArrayRef(GCCRegNames); ++} ++ ++ArrayRef Sw64TargetInfo::getGCCRegAliases() const { ++ static const TargetInfo::GCCRegAlias GCCRegAliases[] = { ++ {{"v0"}, "$0"}, {{"t0"}, "$1"}, {{"t1"}, "$2"}, {{"t2"}, "$3"}, ++ {{"t3"}, "$4"}, {{"t4"}, "$5"}, {{"t5"}, "$6"}, {{"t6"}, "$7"}, ++ {{"t7"}, "$8"}, {{"s0"}, "$9"}, {{"s1"}, "$10"}, {{"s2"}, "$11"}, ++ {{"s3"}, "$12"}, {{"s4"}, "$13"}, {{"s5"}, "$14"}, {{"fp"}, "$15"}, ++ {{"a0"}, "$16"}, {{"a1"}, "$17"}, {{"a2"}, "$18"}, {{"a3"}, "$19"}, ++ {{"a4"}, "$20"}, {{"a5"}, "$21"}, {{"t8"}, "$22"}, {{"t9"}, "$23"}, ++ {{"t10"}, "$24"}, {{"t11"}, "$25"}, {{"ra"}, "$26"}, {{"t12"}, "$27"}, ++ {{"at"}, "$28"}, {{"gp"}, "$29"}, {{"sp"}, "$30"}, {{"zero"}, "$31"}}; ++ return llvm::makeArrayRef(GCCRegAliases); ++} ++ ++const Builtin::Info Sw64TargetInfo::BuiltinInfo[] = { ++#define BUILTIN(ID, TYPE, ATTRS) \ ++ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, ++#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ ++ {#ID, TYPE, ATTRS, HEADER, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, ++#include "clang/Basic/BuiltinsSw64.def" ++}; ++ ++void Sw64TargetInfo::fillValidCPUList( ++ SmallVectorImpl &Values) const { ++ llvm::Sw64::fillValidCPUArchList(Values, true); ++} ++ ++bool Sw64TargetInfo::isValidTuneCPUName(StringRef Name) const { ++ return llvm::Sw64::checkTuneCPUKind(llvm::Sw64::parseTuneCPUKind(Name, true), ++ /*Is64Bit=*/true); ++} ++ ++void Sw64TargetInfo::fillValidTuneCPUList( ++ SmallVectorImpl &Values) const { ++ llvm::Sw64::fillValidTuneCPUArchList(Values, true); ++} ++ ++bool Sw64TargetInfo::isValidCPUName(StringRef Name) const { ++ return llvm::Sw64::parseCPUArch(Name) != llvm::Sw64::CK_INVALID; ++} ++ ++bool Sw64TargetInfo::setCPU(const std::string &Name) { ++ return isValidCPUName(Name); ++} ++ ++void Sw64TargetInfo::getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const { ++ DefineStd(Builder, "sw_64", Opts); ++ ++ Builder.defineMacro("__REGISTER_PREFIX__", ""); ++ Builder.defineMacro("__LONG_DOUBLE_128__"); ++ ++ Builder.defineMacro("__ELF__"); ++ Builder.defineMacro("__sw_64__"); ++ Builder.defineMacro("__sw_64_sw6a__"); ++ Builder.defineMacro("__sw_64"); ++ // Consistent with GCC ++ Builder.defineMacro("__gnu_linux__"); ++ ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); ++ Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); ++ ++ DefineStd(Builder, "unix", Opts); ++ DefineStd(Builder, "linux", Opts); ++ ++ if (HasCore4) ++ Builder.defineMacro("__sw_64_sw8a__"); ++ ++ if (Opts.CPlusPlus) ++ Builder.defineMacro("_GNU_SOURCE"); ++} ++ ++/// Return true if has this feature, need to sync with handleTargetFeatures. ++bool Sw64TargetInfo::hasFeature(StringRef Feature) const { ++ return llvm::StringSwitch(Feature) ++ .Case("sw_64", true) ++ .Case("core3b", HasCore3) ++ .Case("core4", HasCore4) ++ .Case("simd", HasSIMD) ++ .Default(false); ++} ++ ++ArrayRef Sw64TargetInfo::getTargetBuiltins() const { ++ return llvm::makeArrayRef(BuiltinInfo, clang::Sw64::LastTSBuiltin - ++ Builtin::FirstTSBuiltin); ++} +diff --git a/clang/lib/Basic/Targets/Sw64.h b/clang/lib/Basic/Targets/Sw64.h +new file mode 100644 +index 000000000..791d893a7 +--- /dev/null ++++ b/clang/lib/Basic/Targets/Sw64.h +@@ -0,0 +1,141 @@ ++//===--- Sw64.h - Declare Sw64 target feature support ---------*- C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file declares Sw64 TargetInfo objects. ++// ++//===----------------------------------------------------------------------===// ++#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_SW64_H ++#define LLVM_CLANG_LIB_BASIC_TARGETS_SW64_H ++ ++#include "clang/Basic/TargetInfo.h" ++#include "clang/Basic/TargetOptions.h" ++#include "llvm/Support/Compiler.h" ++#include "llvm/Support/Sw64TargetParser.h" ++#include "llvm/TargetParser/Triple.h" ++ ++namespace clang { ++namespace targets { ++ ++class LLVM_LIBRARY_VISIBILITY Sw64TargetInfo : public TargetInfo { ++ static const Builtin::Info BuiltinInfo[]; ++ bool HasCore3 = false; ++ bool HasCore4 = false; ++ ++ // for futrure update ++ // change data length ++ void setDataLayout() { ++ StringRef Layout; ++ Layout = ++ "e-m:e-p:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n64-S128-v256:256"; ++ resetDataLayout(Layout.str()); ++ } ++ ++ bool HasSIMD; ++ ++public: ++ Sw64TargetInfo(const llvm::Triple &Triple, const TargetOptions &) ++ : TargetInfo(Triple), HasSIMD(false) { ++ NoAsmVariants = true; ++ MCountName = ""; ++ setABI("sw_64"); ++ UseZeroLengthBitfieldAlignment = false; ++ IntMaxType = SignedLong; ++ } ++ ++ bool setABI(const std::string &Name) override { ++ set64ABITypes(); ++ return true; ++ } ++ ++ void set64ABITypes(void) { ++ LongWidth = LongAlign = 64; ++ PointerWidth = PointerAlign = 64; ++ LongDoubleWidth = LongDoubleAlign = 128; ++ LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; ++ DoubleAlign = LongLongAlign = 64; ++ SuitableAlign = 128; ++ MaxVectorAlign = 256; ++ SizeType = UnsignedLong; ++ PtrDiffType = SignedLong; ++ IntPtrType = SignedLong; ++ WCharType = SignedInt; ++ WIntType = UnsignedInt; ++ } ++ ++ void getTargetDefines(const LangOptions &Opts, ++ MacroBuilder &Builder) const override; ++ ++ ArrayRef getTargetBuiltins() const override; ++ ++ BuiltinVaListKind getBuiltinVaListKind() const override { ++ return TargetInfo::Sw64ABIBuiltinVaList; ++ } ++ ++ ArrayRef getGCCRegNames() const override; ++ ++ ArrayRef getGCCRegAliases() const override; ++ ++ std::string_view getClobbers() const override { return ""; } ++ ++ bool hasFeature(StringRef Feature) const override; ++ bool handleTargetFeatures(std::vector &Features, ++ DiagnosticsEngine &Diags) override { ++ for (const auto &Feature : Features) { ++ if (Feature == "+simd") ++ HasSIMD = true; ++ if (Feature == "+core3b") ++ HasCore3 = true; ++ if (Feature == "+core4") ++ HasCore4 = true; ++ } ++ setDataLayout(); ++ return true; ++ }; ++ ++ bool isValidCPUName(StringRef Name) const override; ++ bool setCPU(const std::string &Name) override; ++ void fillValidCPUList(SmallVectorImpl &Values) const override; ++ bool isValidTuneCPUName(StringRef Name) const override; ++ void fillValidTuneCPUList(SmallVectorImpl &Values) const override; ++ bool validateAsmConstraint(const char *&Name, ++ TargetInfo::ConstraintInfo &Info) const override { ++ switch (*Name) { ++ default: ++ return false; ++ case 'I': // Signed 16-bit constant ++ case 'J': // Integer 0 ++ case 'K': // Unsigned 16-bit constant ++ case 'L': // Signed 32-bit constant, lower 16-bit zeros (for lui) ++ case 'M': // Constants not loadable via lui, addiu, or ori ++ case 'N': // Constant -1 to -65535 ++ case 'O': // A signed 15-bit constant ++ case 'P': // A constant between 1 go 65535 ++ return true; ++ } ++ } ++ // Return the register number that __builtin_eh_return_regno would return with ++ // the specified argument. ++ // ++ // This corresponds with TargetLowering's getExceptionPointerRegister and ++ // getExceptionSelectorRegister in the backend. ++ int getEHDataRegisterNumber(unsigned RegNo) const override { ++ if (RegNo == 0) ++ return 16; ++ if (RegNo == 1) ++ return 17; ++ return -1; ++ } ++ ++ bool allowsLargerPreferedTypeAlignment() const override { return false; } ++ bool hasBitIntType() const override { return true; } ++}; ++} // namespace targets ++} // namespace clang ++#endif +diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp +index e512762fa..c88996ef5 100644 +--- a/clang/lib/CodeGen/CGBuiltin.cpp ++++ b/clang/lib/CodeGen/CGBuiltin.cpp +@@ -48,6 +48,7 @@ + #include "llvm/IR/IntrinsicsR600.h" + #include "llvm/IR/IntrinsicsRISCV.h" + #include "llvm/IR/IntrinsicsS390.h" ++#include "llvm/IR/IntrinsicsSw64.h" + #include "llvm/IR/IntrinsicsVE.h" + #include "llvm/IR/IntrinsicsWebAssembly.h" + #include "llvm/IR/IntrinsicsX86.h" +@@ -5587,6 +5588,8 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); ++ case llvm::Triple::sw_64: ++ return CGF->EmitSw64BuiltinExpr(BuiltinID, E, ReturnValue); + default: + return nullptr; + } +@@ -20414,3 +20417,108 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + return Builder.CreateCall(F, Ops, ""); + } ++ ++Value *CodeGenFunction::EmitSw64BuiltinExpr(unsigned BuiltinID, ++ const CallExpr *E, ++ ReturnValueSlot ReturnValue) { ++ SmallVector Ops; ++ llvm::Type *ResultType = ConvertType(E->getType()); ++ Intrinsic::ID ID = Intrinsic::not_intrinsic; ++ ++ switch (BuiltinID) { ++ default: ++ llvm_unreachable("unexpected builtin ID"); ++ case Sw64::BI__builtin_sw_vload: ++ ID = Intrinsic::sw64_vload; ++ break; ++ case Sw64::BI__builtin_sw_vloade: ++ ID = Intrinsic::sw64_vloade; ++ break; ++ case Sw64::BI__builtin_sw_vloadu: ++ ID = Intrinsic::sw64_vloadu; ++ break; ++ case Sw64::BI__builtin_sw_vload_u: ++ ID = Intrinsic::sw64_vload_u; ++ break; ++ case Sw64::BI__builtin_sw_vloadnc: ++ ID = Intrinsic::sw64_vloadnc; ++ break; ++ case Sw64::BI__builtin_sw_vstore: ++ ID = Intrinsic::sw64_vstore; ++ break; ++ case Sw64::BI__builtin_sw_vstoreu: ++ ID = Intrinsic::sw64_vstoreu; ++ break; ++ case Sw64::BI__builtin_sw_vstore_u: ++ ID = Intrinsic::sw64_vstore_u; ++ break; ++ case Sw64::BI__builtin_sw_vstoreul: ++ ID = Intrinsic::sw64_vstoreul; ++ break; ++ case Sw64::BI__builtin_sw_vstoreuh: ++ ID = Intrinsic::sw64_vstoreuh; ++ break; ++ case Sw64::BI__builtin_sw_vstorenc: ++ ID = Intrinsic::sw64_vstorenc; ++ break; ++ case Sw64::BI__builtin_sw_vsll: ++ ID = Intrinsic::sw64_vsll; ++ break; ++ case Sw64::BI__builtin_sw_vsrl: ++ ID = Intrinsic::sw64_vsrl; ++ break; ++ case Sw64::BI__builtin_sw_vsra: ++ ID = Intrinsic::sw64_vsra; ++ break; ++ case Sw64::BI__builtin_sw_vrol: ++ ID = Intrinsic::sw64_vrol; ++ break; ++ } ++ ++ if (BuiltinID == Sw64::BI__builtin_sw_vload || ++ BuiltinID == Sw64::BI__builtin_sw_vloade || ++ BuiltinID == Sw64::BI__builtin_sw_vloadu || ++ BuiltinID == Sw64::BI__builtin_sw_vload_u || ++ BuiltinID == Sw64::BI__builtin_sw_vloadnc) { ++ bool isLoadExt = BuiltinID == Sw64::BI__builtin_sw_vloade; ++ ++ Value *LoadAddr = EmitScalarExpr(E->getArg(0)); ++ QualType Ty = E->getType(); ++ llvm::Type *ArgTy = LoadAddr->getType(); ++ llvm::Type *RealResTy = ConvertType(Ty); ++ llvm::Type *ResPTy = RealResTy->getPointerTo(); ++ // if target is Load duplicated in vector, do not emit BitCast ++ ResPTy = isLoadExt ? LoadAddr->getType() : ResPTy; ++ if (!isLoadExt) { ++ LoadAddr = Builder.CreateBitCast(LoadAddr, ResPTy); ++ } ++ llvm::Type *Tys[2] = {RealResTy, ResPTy}; ++ Function *F = CGM.getIntrinsic(ID, Tys); ++ return Builder.CreateCall(F, LoadAddr, "vload"); ++ } else if (BuiltinID == Sw64::BI__builtin_sw_vstore || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreu || ++ BuiltinID == Sw64::BI__builtin_sw_vstore_u || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreuh || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreul || ++ BuiltinID == Sw64::BI__builtin_sw_vstorenc) { ++ Value *StoreVal = EmitScalarExpr(E->getArg(0)); ++ Value *StoreAddr = EmitScalarExpr(E->getArg(1)); ++ QualType Ty = E->getArg(0)->getType(); ++ llvm::Type *StoreTy = StoreVal->getType(); ++ StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); ++ Function *F = ++ CGM.getIntrinsic(ID, {StoreVal->getType(), StoreAddr->getType()}); ++ return Builder.CreateCall(F, {StoreVal, StoreAddr}, ""); ++ } else if (BuiltinID == Sw64::BI__builtin_sw_vsll || ++ BuiltinID == Sw64::BI__builtin_sw_vsra || ++ BuiltinID == Sw64::BI__builtin_sw_vsrl || ++ BuiltinID == Sw64::BI__builtin_sw_vrol) { ++ Value *ShiftVal = EmitScalarExpr(E->getArg(0)); ++ Value *ShiftImm = EmitScalarExpr(E->getArg(1)); ++ QualType Ty = E->getArg(0)->getType(); ++ ++ Function *F = ++ CGM.getIntrinsic(ID, {ShiftVal->getType(), ShiftImm->getType()}); ++ return Builder.CreateCall(F, {ShiftVal, ShiftImm}, ""); ++ } ++} +diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt +index 1debeb6d9..a575aa57d 100644 +--- a/clang/lib/CodeGen/CMakeLists.txt ++++ b/clang/lib/CodeGen/CMakeLists.txt +@@ -108,6 +108,7 @@ add_clang_library(clangCodeGen + Targets/RISCV.cpp + Targets/SPIR.cpp + Targets/Sparc.cpp ++ Targets/Sw64.cpp + Targets/SystemZ.cpp + Targets/TCE.cpp + Targets/VE.cpp +diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h +index 143e0707b..afcc0a5b9 100644 +--- a/clang/lib/CodeGen/CodeGenFunction.h ++++ b/clang/lib/CodeGen/CodeGenFunction.h +@@ -4310,6 +4310,8 @@ public: + llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E); ++ llvm::Value *EmitSw64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, ++ ReturnValueSlot ReturnValue); + llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, + const CallExpr *E); +diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp +index f09d1129b..fed1ac787 100644 +--- a/clang/lib/CodeGen/CodeGenModule.cpp ++++ b/clang/lib/CodeGen/CodeGenModule.cpp +@@ -90,6 +90,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { + case TargetCXXABI::iOS: + case TargetCXXABI::WatchOS: + case TargetCXXABI::GenericMIPS: ++ case TargetCXXABI::GenericSW64: + case TargetCXXABI::GenericItanium: + case TargetCXXABI::WebAssembly: + case TargetCXXABI::XL: +@@ -268,6 +269,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) { + return createX86_64TargetCodeGenInfo(CGM, AVXLevel); + } + } ++ case llvm::Triple::sw_64: ++ return createSw64TargetCodeGenInfo(CGM); + case llvm::Triple::hexagon: + return createHexagonTargetCodeGenInfo(CGM); + case llvm::Triple::lanai: +diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp +index ede9efb01..32b441813 100644 +--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp ++++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp +@@ -570,6 +570,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { + case TargetCXXABI::GenericMIPS: + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); + ++ case TargetCXXABI::GenericSW64: ++ return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); ++ + case TargetCXXABI::WebAssembly: + return new WebAssemblyCXXABI(CGM); + +diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h +index 14ed5e5d2..a1fc372d4 100644 +--- a/clang/lib/CodeGen/TargetInfo.h ++++ b/clang/lib/CodeGen/TargetInfo.h +@@ -459,6 +459,9 @@ std::unique_ptr + createLoongArchTargetCodeGenInfo(CodeGenModule &CGM, unsigned GRLen, + unsigned FLen); + ++std::unique_ptr ++createSw64TargetCodeGenInfo(CodeGenModule &CGM); ++ + std::unique_ptr + createM68kTargetCodeGenInfo(CodeGenModule &CGM); + +diff --git a/clang/lib/CodeGen/Targets/Sw64.cpp b/clang/lib/CodeGen/Targets/Sw64.cpp +new file mode 100644 +index 000000000..0752efaef +--- /dev/null ++++ b/clang/lib/CodeGen/Targets/Sw64.cpp +@@ -0,0 +1,545 @@ ++//===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// These classes wrap the information about a call or function ++// definition used to handle ABI compliancy. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "TargetInfo.h" ++#include "ABIInfoImpl.h" ++#include "clang/Basic/DiagnosticFrontend.h" ++#include "llvm/ADT/SmallBitVector.h" ++ ++using namespace clang; ++using namespace clang::CodeGen; ++ ++//===----------------------------------------------------------------------===// ++// SW64 ABI Implementation. ++//===----------------------------------------------------------------------===// ++ ++namespace { ++class Sw64ABIInfo : public ABIInfo { ++ /// Similar to llvm::CCState, but for Clang. ++ struct CCState { ++ CCState(CGFunctionInfo &FI) ++ : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()), ++ Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {} ++ ++ llvm::SmallBitVector IsPreassigned; ++ unsigned CC = CallingConv::CC_C; ++ unsigned FreeRegs = 0; ++ unsigned FreeSSERegs = 0; ++ RequiredArgs Required; ++ bool IsDelegateCall = false; ++ }; ++ unsigned MinABIStackAlignInBytes, StackAlignInBytes; ++ void CoerceToIntArgs(uint64_t TySize, ++ SmallVectorImpl &ArgList) const; ++ llvm::Type *HandleAggregates(QualType Ty, uint64_t TySize) const; ++ llvm::Type *returnAggregateInRegs(QualType RetTy, uint64_t Size) const; ++ llvm::Type *getPaddingType(uint64_t Align, uint64_t Offset) const; ++ ++public: ++ Sw64ABIInfo(CodeGenTypes &CGT) ++ : ABIInfo(CGT), MinABIStackAlignInBytes(8), StackAlignInBytes(16) {} ++ ++ ABIArgInfo classifyReturnType(QualType RetTy) const; ++ ABIArgInfo classifyArgumentType(QualType RetTy) const; ++ ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset, ++ CCState &State) const; ++ ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const; ++ void computeInfo(CGFunctionInfo &FI) const override; ++ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType Ty) const override; ++ ABIArgInfo extendType(QualType Ty) const; ++}; ++ ++class Sw64TargetCodeGenInfo : public TargetCodeGenInfo { ++ unsigned SizeOfUnwindException; ++ ++public: ++ Sw64TargetCodeGenInfo(CodeGenTypes &CGT) ++ : TargetCodeGenInfo(std::make_unique(CGT)), ++ SizeOfUnwindException(32) {} ++ ++ int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { ++ return 30; ++ } ++ ++ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, ++ CodeGen::CodeGenModule &CGM) const override { ++ const FunctionDecl *FD = dyn_cast_or_null(D); ++ if (!FD) ++ return; ++ ++ // Other attributes do not have a meaning for declarations. ++ if (GV->isDeclaration()) ++ return; ++ ++ // FIXME:Interrupte Attr doesn`t write in SW64. ++ // const auto *attr = FD->getAttr(); ++ // if(!attr) ++ // return ++ // const char *Kind; ++ // ... ++ // ++ } ++ ++ bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, ++ llvm::Value *Address) const override; ++ ++ unsigned getSizeOfUnwindException() const override { ++ return SizeOfUnwindException; ++ } ++}; ++} // namespace ++ ++void Sw64ABIInfo::CoerceToIntArgs( ++ uint64_t TySize, SmallVectorImpl &ArgList) const { ++ llvm::IntegerType *IntTy = ++ llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); ++ ++ // Add (TySize / MinABIStackAlignInBytes) args of IntTy. ++ for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) ++ ArgList.push_back(IntTy); ++ ++ // If necessary, add one more integer type to ArgList. ++ unsigned R = TySize % (MinABIStackAlignInBytes * 8); ++ ++ if (R) ++ ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); ++} ++ ++// In N32/64, an aligned double precision floating point field is passed in ++// a register. ++llvm::Type *Sw64ABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { ++ SmallVector ArgList, IntArgList; ++ ++ if (Ty->isComplexType()) ++ return CGT.ConvertType(Ty); ++ ++ const RecordType *RT = Ty->getAs(); ++ ++ // Unions/vectors are passed in integer registers. ++ if (!RT || !RT->isStructureOrClassType()) { ++ CoerceToIntArgs(TySize, ArgList); ++ return llvm::StructType::get(getVMContext(), ArgList); ++ } ++ ++ const RecordDecl *RD = RT->getDecl(); ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ assert(!(TySize % 8) && "Size of structure must be multiple of 8."); ++ ++ uint64_t LastOffset = 0; ++ unsigned idx = 0; ++ llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); ++ ++ // Iterate over fields in the struct/class and check if there are any aligned ++ // double fields. ++ for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); ++ i != e; ++i, ++idx) { ++ const QualType Ty = i->getType(); ++ const BuiltinType *BT = Ty->getAs(); ++ ++ if (!BT || BT->getKind() != BuiltinType::Double) ++ continue; ++ ++ uint64_t Offset = Layout.getFieldOffset(idx); ++ if (Offset % 64) // Ignore doubles that are not aligned. ++ continue; ++ ++ // Add ((Offset - LastOffset) / 64) args of type i64. ++ for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) ++ ArgList.push_back(I64); ++ ++ // Add double type. ++ // ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); ++ ArgList.push_back(llvm::Type::getInt64Ty(getVMContext())); ++ LastOffset = Offset + 64; ++ } ++ ++ CoerceToIntArgs(TySize - LastOffset, IntArgList); ++ ArgList.append(IntArgList.begin(), IntArgList.end()); ++ ++ return llvm::StructType::get(getVMContext(), ArgList); ++} ++ ++llvm::Type *Sw64ABIInfo::getPaddingType(uint64_t OrigOffset, ++ uint64_t Offset) const { ++ if (OrigOffset + MinABIStackAlignInBytes > Offset) ++ return nullptr; ++ ++ return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); ++} ++ ++ABIArgInfo Sw64ABIInfo::classifyArgumentType(QualType Ty) const { ++ Ty = useFirstFieldIfTransparentUnion(Ty); ++ if (isAggregateTypeForABI(Ty)) { ++ // Records with non trivial destructors/constructors should not be passed ++ // by value. ++ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) ++ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); ++ ++ return getNaturalAlignIndirect(Ty); ++ } ++ ++ // Treat an enum type as its underlying type. ++ if (const EnumType *EnumTy = Ty->getAs()) ++ Ty = EnumTy->getDecl()->getIntegerType(); ++ ++ if (const BuiltinType *BuiltinTy = Ty->getAs()) { ++ if (BuiltinTy->getKind() == BuiltinType::LongDouble && ++ getContext().getTypeSize(Ty) == 128) ++ return getNaturalAlignIndirect(Ty, false); ++ } ++ return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) ++ : ABIArgInfo::getDirect(); ++} ++ABIArgInfo Sw64ABIInfo::getIndirectResult(QualType Ty, bool ByVal, ++ CCState &State) const { ++ if (!ByVal) { ++ if (State.FreeRegs) { ++ --State.FreeRegs; // Non-byval indirects just use one pointer. ++ return getNaturalAlignIndirectInReg(Ty); ++ } ++ return getNaturalAlignIndirect(Ty, false); ++ } ++ ++ // Compute the byval alignment. ++ unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; ++ return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, ++ /*Realign=*/TypeAlign > ++ MinABIStackAlignInBytes); ++} ++ ++ABIArgInfo Sw64ABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset, ++ CCState &State) const { ++ Ty = useFirstFieldIfTransparentUnion(Ty); ++ // Check with the C++ ABI first. ++ const RecordType *RT = Ty->getAs(); ++ if (RT) { ++ CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); ++ if (RAA == CGCXXABI::RAA_Indirect) { ++ return getIndirectResult(Ty, /*ByVal=*/false, State); ++ } else if (RAA == CGCXXABI::RAA_DirectInMemory) { ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/true); ++ } ++ } ++ ++ if (Ty->isVectorType()) { ++ uint64_t Size = getContext().getTypeSize(Ty); ++ if (Size > 256) ++ return getNaturalAlignIndirect(Ty, /*ByVal=*/false); ++ else if (Size < 128) { ++ llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size); ++ return ABIArgInfo::getDirect(CoerceTy); ++ } ++ } ++ ++ if (Ty->isAnyComplexType()) { ++ if (getContext().getTypeSize(Ty) <= 128) { ++ return ABIArgInfo::getDirect(); ++ } else { ++ return getNaturalAlignIndirect(Ty, false); ++ } ++ } ++ ++ uint64_t OrigOffset = Offset; ++ uint64_t TySize = getContext().getTypeSize(Ty); ++ uint64_t Align = getContext().getTypeAlign(Ty) / 8; ++ ++ Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), ++ (uint64_t)StackAlignInBytes); ++ unsigned CurrOffset = llvm::alignTo(Offset, Align); ++ Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; ++ ++ if (isAggregateTypeForABI(Ty)) { ++ // Ignore empty aggregates. ++ if (TySize == 0) ++ return ABIArgInfo::getIgnore(); ++ ++ if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { ++ Offset = OrigOffset + MinABIStackAlignInBytes; ++ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); ++ } ++ llvm::LLVMContext &LLVMContext = getVMContext(); ++ unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 63) / 64; ++ if (SizeInRegs <= State.FreeRegs) { ++ llvm::IntegerType *Int64 = llvm::Type::getInt64Ty(LLVMContext); ++ SmallVector Elements(SizeInRegs, Int64); ++ llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); ++ return ABIArgInfo::getDirectInReg(Result); ++ } else { ++ // If we have reached here, aggregates are passed directly by coercing to ++ // another structure type. Padding is inserted if the offset of the ++ // aggregate is unaligned. ++ ABIArgInfo ArgInfo = ++ ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, ++ getPaddingType(OrigOffset, CurrOffset)); ++ ArgInfo.setInReg(true); ++ return ArgInfo; ++ } ++ } ++ ++ if (const BuiltinType *BuiltinTy = Ty->getAs()) { ++ if (BuiltinTy->getKind() == BuiltinType::LongDouble && ++ getContext().getTypeSize(Ty) == 128) ++ return getNaturalAlignIndirect(Ty, false); ++ } ++ ++ // Treat an enum type as its underlying type. ++ if (const EnumType *EnumTy = Ty->getAs()) ++ Ty = EnumTy->getDecl()->getIntegerType(); ++ ++ // All integral types are promoted to the GPR width. ++ if (Ty->isIntegralOrEnumerationType()) ++ return extendType(Ty); ++ ++ return ABIArgInfo::getDirect(nullptr, 0, ++ getPaddingType(OrigOffset, CurrOffset)); ++} ++ ++llvm::Type *Sw64ABIInfo::returnAggregateInRegs(QualType RetTy, ++ uint64_t Size) const { ++ const RecordType *RT = RetTy->getAs(); ++ SmallVector RTList; ++ ++ if (RT && RT->isStructureOrClassType()) { ++ const RecordDecl *RD = RT->getDecl(); ++ const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); ++ unsigned FieldCnt = Layout.getFieldCount(); ++ ++ // N32/64 returns struct/classes in floating point registers if the ++ // following conditions are met: ++ // 1. The size of the struct/class is no larger than 128-bit. ++ // 2. The struct/class has one or two fields all of which are floating ++ // point types. ++ // 3. The offset of the first field is zero (this follows what gcc does). ++ // ++ // Any other composite results are returned in integer registers. ++ // ++ if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) { ++ RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end(); ++ for (; b != e; ++b) { ++ const BuiltinType *BT = b->getType()->getAs(); ++ ++ if (!BT || !BT->isFloatingPoint()) ++ break; ++ ++ RTList.push_back(CGT.ConvertType(b->getType())); ++ } ++ if (b == e) ++ return llvm::StructType::get(getVMContext(), RTList, ++ RD->hasAttr()); ++ ++ RTList.clear(); ++ } ++ } ++ ++ CoerceToIntArgs(Size, RTList); ++ return llvm::StructType::get(getVMContext(), RTList); ++} ++ ++ABIArgInfo Sw64ABIInfo::classifyReturnType(QualType RetTy) const { ++ uint64_t Size = getContext().getTypeSize(RetTy); ++ ++ if (RetTy->isVoidType()) ++ return ABIArgInfo::getIgnore(); ++ ++ // However, N32/N64 ignores zero sized return values. ++ if (Size == 0) ++ return ABIArgInfo::getIgnore(); ++ ++ // Large vector types should be returned via memory. ++ if (RetTy->isVectorType() && Size == 256) ++ return ABIArgInfo::getDirect(); ++ ++ if (const auto *BT = RetTy->getAs()) ++ if (BT->getKind() == BuiltinType::LongDouble || Size >= 128) ++ return getNaturalAlignIndirect(RetTy); ++ ++ if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) { ++ if ((RetTy->hasFloatingRepresentation() && Size <= 128) || ++ (!RetTy->hasFloatingRepresentation() && Size <= 64)) { ++ if (RetTy->isComplexType()) ++ return ABIArgInfo::getDirect(); ++ ++ if (RetTy->isComplexIntegerType() || ++ (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) { ++ ABIArgInfo ArgInfo = ++ ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); ++ ArgInfo.setInReg(true); ++ return ArgInfo; ++ } ++ } ++ ++ return getNaturalAlignIndirect(RetTy); ++ } ++ ++ // Treat an enum type as its underlying type. ++ if (const EnumType *EnumTy = RetTy->getAs()) ++ RetTy = EnumTy->getDecl()->getIntegerType(); ++ ++ if (isPromotableIntegerTypeForABI(RetTy)) ++ return ABIArgInfo::getExtend(RetTy); ++ ++ if ((RetTy->isUnsignedIntegerOrEnumerationType() || ++ RetTy->isSignedIntegerOrEnumerationType()) && ++ Size == 32) ++ return ABIArgInfo::getSignExtend(RetTy); ++ ++ return ABIArgInfo::getDirect(); ++} ++ ++void Sw64ABIInfo::computeInfo(CGFunctionInfo &FI) const { ++ ++ CCState State(FI); ++ if (FI.getHasRegParm()) { ++ State.FreeRegs = FI.getRegParm(); ++ } else { ++ State.FreeRegs = 6; ++ } ++ ++ ABIArgInfo &RetInfo = FI.getReturnInfo(); ++ if (!getCXXABI().classifyReturnType(FI)) ++ RetInfo = classifyReturnType(FI.getReturnType()); ++ ++ // Check if a pointer to an aggregate is passed as a hidden argument. ++ uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0; ++ ++ for (auto &I : FI.arguments()) ++ I.info = classifyArgumentType(I.type, Offset, State); ++} ++ ++Address Sw64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, ++ QualType OrigTy) const { ++ ++ QualType Ty = OrigTy; ++ auto TyAlign = getContext().getTypeInfoInChars(Ty).Align; ++ if (!Ty->isStructureOrClassType() && (TyAlign.getQuantity() <= 8)) { ++ ABIArgInfo AI = classifyArgumentType(Ty); ++ return EmitVAArgInstr(CGF, VAListAddr, OrigTy, AI); ++ } ++ ++ bool DidPromote = false; ++ auto TyInfo = getContext().getTypeInfoInChars(Ty); ++ ++ // The alignment of things in the argument area is never larger than ++ // StackAlignInBytes. ++ TyInfo.Align = ++ std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes)); ++ ++ bool IsIndirect = false; ++ bool AllowHigherAlign = true; ++ ++ CharUnits DirectSize, DirectAlign; ++ if (IsIndirect) { ++ DirectAlign = CGF.getPointerAlign(); ++ } else { ++ DirectAlign = TyInfo.Align; ++ } ++ // Cast the address we've calculated to the right type. ++ llvm::Type *DirectTy = CGF.ConvertTypeForMem(Ty), *ElementTy = DirectTy; ++ if (IsIndirect) ++ DirectTy = DirectTy->getPointerTo(0); ++ ++ CharUnits SlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes); ++ ++ // Handle vaList specified on Sw64, struct{char *ptr, int offset} ++ Address vaList_ptr_p = CGF.Builder.CreateStructGEP(VAListAddr, 0); ++ llvm::Value *vaList_ptr = CGF.Builder.CreateLoad(vaList_ptr_p); ++ Address vaList_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1); ++ llvm::Value *vaList_offset = CGF.Builder.CreateLoad(vaList_offset_p); ++ ++ uint64_t TySize = TyInfo.Width.getQuantity(); ++ llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, TySize); ++ CGF.Builder.CreateStore(CGF.Builder.CreateAdd(vaList_offset, Offset), ++ vaList_offset_p); ++ ++ llvm::Value *GPAddr = ++ CGF.Builder.CreateGEP(CGF.Int8Ty, vaList_ptr, vaList_offset); ++ ++ // If the CC aligns values higher than the slot size, do so if needed. ++ Address Addr = Address::invalid(); ++ if (AllowHigherAlign && DirectAlign > SlotSize) { ++ Addr = Address(emitRoundPointerUpToAlignment(CGF, GPAddr, DirectAlign), ++ CGF.Int8Ty, DirectAlign); ++ } else { ++ Addr = Address(GPAddr, CGF.Int8Ty, SlotSize); ++ } ++ ++ Addr = Addr.withElementType(DirectTy); ++ ++ if (IsIndirect) { ++ Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, TyInfo.Align); ++ } ++ ++ // If there was a promotion, "unpromote" into a temporary. ++ // TODO: can we just use a pointer into a subset of the original slot? ++ if (DidPromote) { ++ Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp"); ++ llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr); ++ ++ // Truncate down to the right width. ++ llvm::Type *IntTy = ++ (OrigTy->isIntegerType() ? Temp.getElementType() : CGF.IntPtrTy); ++ llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy); ++ if (OrigTy->isPointerType()) ++ V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType()); ++ ++ CGF.Builder.CreateStore(V, Temp); ++ Addr = Temp; ++ } ++ ++ return Addr; ++} ++ ++ABIArgInfo Sw64ABIInfo::extendType(QualType Ty) const { ++ int TySize = getContext().getTypeSize(Ty); ++ ++ // SW64 ABI requires unsigned 32 bit integers to be sign extended. ++ if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) ++ return ABIArgInfo::getSignExtend(Ty); ++ ++ return ABIArgInfo::getExtend(Ty); ++} ++ ++bool Sw64TargetCodeGenInfo::initDwarfEHRegSizeTable( ++ CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { ++ // SW have much different from Mips. This should be rewrite. ++ ++ // This information comes from gcc's implementation, which seems to ++ // as canonical as it gets. ++ ++ // Everything on Sw64 is 4 bytes. Double-precision FP registers ++ // are aliased to pairs of single-precision FP registers. ++ llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); ++ ++ // 0-31 are the general purpose registers, $0 - $31. ++ // 32-63 are the floating-point registers, $f0 - $f31. ++ // 64 and 65 are the multiply/divide registers, $hi and $lo. ++ // 66 is the (notional, I think) register for signal-handler return. ++ AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65); ++ ++ // 67-74 are the floating-point status registers, $fcc0 - $fcc7. ++ // They are one bit wide and ignored here. ++ ++ // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31. ++ // (coprocessor 1 is the FP unit) ++ // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31. ++ // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31. ++ // 176-181 are the DSP accumulator registers. ++ AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181); ++ return false; ++} ++ ++std::unique_ptr ++CodeGen::createSw64TargetCodeGenInfo(CodeGenModule &CGM) { ++ return std::make_unique(CGM.getTypes()); ++} +diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt +index a6bd2d41e..e5be06fae 100644 +--- a/clang/lib/Driver/CMakeLists.txt ++++ b/clang/lib/Driver/CMakeLists.txt +@@ -38,6 +38,7 @@ add_clang_library(clangDriver + ToolChains/Arch/PPC.cpp + ToolChains/Arch/RISCV.cpp + ToolChains/Arch/Sparc.cpp ++ ToolChains/Arch/Sw64.cpp + ToolChains/Arch/SystemZ.cpp + ToolChains/Arch/VE.cpp + ToolChains/Arch/X86.cpp +@@ -88,6 +89,7 @@ add_clang_library(clangDriver + ToolChains/XCore.cpp + ToolChains/PPCLinux.cpp + ToolChains/PPCFreeBSD.cpp ++ ToolChains/Sw64Toolchain.cpp + ToolChains/InterfaceStubs.cpp + ToolChains/ZOS.cpp + Types.cpp +diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp +index bdbdad936..f03fc3d34 100644 +--- a/clang/lib/Driver/Driver.cpp ++++ b/clang/lib/Driver/Driver.cpp +@@ -49,6 +49,7 @@ + #include "ToolChains/RISCVToolchain.h" + #include "ToolChains/SPIRV.h" + #include "ToolChains/Solaris.h" ++#include "ToolChains/Sw64Toolchain.h" + #include "ToolChains/TCE.h" + #include "ToolChains/VEToolchain.h" + #include "ToolChains/WebAssembly.h" +@@ -6337,6 +6338,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, + case llvm::Triple::csky: + TC = std::make_unique(*this, Target, Args); + break; ++ case llvm::Triple::sw_64: ++ TC = std::make_unique(*this, Target, Args); ++ break; + default: + if (Target.getVendor() == llvm::Triple::Myriad) + TC = std::make_unique(*this, Target, +diff --git a/clang/lib/Driver/ToolChains/Arch/Sw64.cpp b/clang/lib/Driver/ToolChains/Arch/Sw64.cpp +new file mode 100644 +index 000000000..744c26b3b +--- /dev/null ++++ b/clang/lib/Driver/ToolChains/Arch/Sw64.cpp +@@ -0,0 +1,94 @@ ++//===--------- Sw64.cpp - Sw64 Helpers for Tools ----------------*- C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "Sw64.h" ++#include "ToolChains/CommonArgs.h" ++#include "clang/Driver/Options.h" ++#include "llvm/Option/ArgList.h" ++#include "llvm/Support/Sw64TargetParser.h" ++ ++using namespace clang::driver; ++using namespace clang::driver::tools; ++using namespace clang; ++using namespace llvm::opt; ++ ++const char *Sw64::getSw64TargetCPU(const ArgList &Args) { ++ if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) { ++ StringRef Mcpu = llvm::Sw64::getMcpuFromMArch(A->getValue()); ++ if (Mcpu != "") ++ return Mcpu.data(); ++ else ++ return A->getValue(); ++ } ++ if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) ++ return A->getValue(); ++ if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) ++ return A->getValue(); ++ return "sw6b"; ++} ++ ++void Sw64::getSw64TargetFeatures(const Driver &D, const ArgList &Args, ++ std::vector &Features) { ++ // -m(no-)simd overrides use of the vector facility. ++ AddTargetFeature(Args, Features, options::OPT_msimd, options::OPT_mno_simd, ++ "simd"); ++ ++ if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { ++ StringRef Mcpu = A->getValue(); ++ if (Mcpu.startswith("sw6b") || Mcpu.startswith("sw4d")) ++ Features.push_back("+core3b"); ++ else if (Mcpu.startswith("sw8a")) ++ Features.push_back("+core4"); ++ } ++ ++ if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) { ++ StringRef March = A->getValue(); ++ if (March.startswith("core3b")) ++ Features.push_back("+core3b"); ++ else if (March.startswith("core4")) ++ Features.push_back("+core4"); ++ } ++ ++ if (Args.hasArg(options::OPT_Sw64ffixed_1)) ++ Features.push_back("+reserve-r1"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_2)) ++ Features.push_back("+reserve-r2"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_3)) ++ Features.push_back("+reserve-r3"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_4)) ++ Features.push_back("+reserve-r4"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_5)) ++ Features.push_back("+reserve-r5"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_6)) ++ Features.push_back("+reserve-r6"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_7)) ++ Features.push_back("+reserve-r7"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_8)) ++ Features.push_back("+reserve-r8"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_9)) ++ Features.push_back("+reserve-r9"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_10)) ++ Features.push_back("+reserve-r10"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_11)) ++ Features.push_back("+reserve-r11"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_12)) ++ Features.push_back("+reserve-r12"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_13)) ++ Features.push_back("+reserve-r13"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_14)) ++ Features.push_back("+reserve-r14"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_22)) ++ Features.push_back("+reserve-r22"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_23)) ++ Features.push_back("+reserve-r23"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_24)) ++ Features.push_back("+reserve-r24"); ++ if (Args.hasArg(options::OPT_Sw64ffixed_25)) ++ Features.push_back("+reserve-r25"); ++} +diff --git a/clang/lib/Driver/ToolChains/Arch/Sw64.h b/clang/lib/Driver/ToolChains/Arch/Sw64.h +new file mode 100644 +index 000000000..cc319026b +--- /dev/null ++++ b/clang/lib/Driver/ToolChains/Arch/Sw64.h +@@ -0,0 +1,34 @@ ++//===--- Sw64.h - Sw64-specific Tool Helpers --------------*- C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H ++#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H ++ ++#include "clang/Driver/Driver.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/Option/Option.h" ++#include "llvm/TargetParser/Triple.h" ++#include ++ ++namespace clang { ++namespace driver { ++namespace tools { ++namespace Sw64 { ++ ++const char *getSw64TargetCPU(const llvm::opt::ArgList &Args); ++ ++void getSw64TargetFeatures(const Driver &D, const llvm::opt::ArgList &Args, ++ std::vector &Features); ++ ++} // end namespace Sw64 ++} // end namespace tools ++} // end namespace driver ++} // end namespace clang ++ ++#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index dd989c255..c55296533 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -17,6 +17,7 @@ + #include "Arch/PPC.h" + #include "Arch/RISCV.h" + #include "Arch/Sparc.h" ++#include "Arch/Sw64.h" + #include "Arch/SystemZ.h" + #include "Arch/VE.h" + #include "Arch/X86.h" +@@ -53,6 +54,7 @@ + #include "llvm/Support/Path.h" + #include "llvm/Support/Process.h" + #include "llvm/Support/RISCVISAInfo.h" ++#include "llvm/Support/Sw64TargetParser.h" + #include "llvm/Support/YAMLParser.h" + #include "llvm/TargetParser/ARMTargetParserCommon.h" + #include "llvm/TargetParser/Host.h" +@@ -478,6 +480,7 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, + case llvm::Triple::mips64el: + case llvm::Triple::mips: + case llvm::Triple::mipsel: ++ case llvm::Triple::sw_64: + case llvm::Triple::systemz: + case llvm::Triple::x86: + case llvm::Triple::x86_64: +@@ -1735,6 +1738,10 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, + AddSparcTargetArgs(Args, CmdArgs); + break; + ++ case llvm::Triple::sw_64: ++ AddSw64TargetArgs(Args, CmdArgs); ++ break; ++ + case llvm::Triple::systemz: + AddSystemZTargetArgs(Args, CmdArgs); + break; +@@ -2233,6 +2240,34 @@ void Clang::AddSparcTargetArgs(const ArgList &Args, + } + } + ++void Clang::AddSw64TargetArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ std::string TuneCPU; ++ ++ if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { ++ StringRef Name = A->getValue(); ++ ++ Name = llvm::Sw64::resolveTuneCPUAlias(Name, true); ++ TuneCPU = std::string(Name); ++ } ++ if (!TuneCPU.empty()) { ++ CmdArgs.push_back("-tune-cpu"); ++ CmdArgs.push_back(Args.MakeArgString(TuneCPU)); ++ } ++ ++ if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { ++ StringRef OOpt; ++ if (A->getOption().matches(options::OPT_O)) ++ OOpt = A->getValue(); ++ ++ if (A->getOption().matches(options::OPT_O0) || OOpt == "1" || OOpt == "s") ++ return; ++ ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-loop-prefetch-writes=true"); ++ } ++} ++ + void Clang::AddSystemZTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { +@@ -5040,6 +5075,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + options::OPT_Wa_COMMA, + options::OPT_Xassembler, + options::OPT_mllvm, ++ options::OPT_fsw_int_divmod, ++ options::OPT_fsw_shift_word, ++ options::OPT_fsw_rev, ++ options::OPT_fsw_recip, ++ options::OPT_fsw_fprnd, ++ options::OPT_fsw_cmov, ++ options::OPT_fsw_auto_inc_dec, ++ options::OPT_fsw_use_cas, + }; + for (const auto &A : Args) + if (llvm::is_contained(kBitcodeOptionIgnorelist, A->getOption().getID())) +@@ -5231,6 +5274,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + unsigned PICLevel; + bool IsPIE; + std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(TC, Args); ++ if (TC.getArch() == llvm::Triple::sw_64 && ++ RelocationModel != llvm::Reloc::PIC_) ++ RelocationModel = llvm::Reloc::PIC_; ++ + Arg *LastPICDataRelArg = + Args.getLastArg(options::OPT_mno_pic_data_is_text_relative, + options::OPT_mpic_data_is_text_relative); +@@ -5533,6 +5580,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + else if (TC.getTriple().isPPC() && + (A->getOption().getID() != options::OPT_mlong_double_80)) + A->render(Args, CmdArgs); ++ else if (TC.getTriple().isSw64()) ++ A->render(Args, CmdArgs); + else + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << TripleStr; +@@ -6486,6 +6535,46 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + + Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ); + ++ if (Args.getLastArg(options::OPT_fsw_int_divmod)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-sw-int-divmod"); ++ } ++ ++ if (Args.getLastArg(options::OPT_fsw_shift_word)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-sw-shift-word"); ++ } ++ ++ if (Args.getLastArg(options::OPT_fsw_rev)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-sw-rev"); ++ } ++ ++ if (Args.getLastArg(options::OPT_fsw_recip)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-sw-recip"); ++ } ++ ++ if (Args.getLastArg(options::OPT_fsw_fprnd)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-sw-fprnd"); ++ } ++ ++ if (Args.getLastArg(options::OPT_fsw_cmov)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-sw-cmov"); ++ } ++ ++ if (Args.getLastArg(options::OPT_fsw_auto_inc_dec)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-sw-auto-inc-dec"); ++ } ++ ++ if (Args.getLastArg(options::OPT_fsw_use_cas)) { ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back("-sw-use-cas"); ++ } ++ + // -fno-strict-overflow implies -fwrapv if it isn't disabled, but + // -fstrict-overflow won't turn off an explicitly enabled -fwrapv. + if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) { +diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h +index 64fc86b6b..667fe246d 100644 +--- a/clang/lib/Driver/ToolChains/Clang.h ++++ b/clang/lib/Driver/ToolChains/Clang.h +@@ -69,6 +69,8 @@ private: + llvm::opt::ArgStringList &CmdArgs) const; + void AddSparcTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; ++ void AddSw64TargetArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; + void AddSystemZTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + void AddX86TargetArgs(const llvm::opt::ArgList &Args, +diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp +index 0d6907b8e..39a62c4f1 100644 +--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp ++++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp +@@ -16,6 +16,7 @@ + #include "Arch/PPC.h" + #include "Arch/RISCV.h" + #include "Arch/Sparc.h" ++#include "Arch/Sw64.h" + #include "Arch/SystemZ.h" + #include "Arch/VE.h" + #include "Arch/X86.h" +@@ -478,6 +479,9 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + return loongarch::getLoongArchTargetCPU(Args, T); ++ ++ case llvm::Triple::sw_64: ++ return Sw64::getSw64TargetCPU(Args); + } + } + +@@ -566,6 +570,9 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + case llvm::Triple::loongarch64: + loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); + break; ++ case llvm::Triple::sw_64: ++ Sw64::getSw64TargetFeatures(D, Args, Features); ++ break; + } + + for (auto Feature : unifyTargetFeatures(Features)) { +diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp +index b5f143d33..44e083a20 100644 +--- a/clang/lib/Driver/ToolChains/Gnu.cpp ++++ b/clang/lib/Driver/ToolChains/Gnu.cpp +@@ -14,6 +14,7 @@ + #include "Arch/PPC.h" + #include "Arch/RISCV.h" + #include "Arch/Sparc.h" ++#include "Arch/Sw64.h" + #include "Arch/SystemZ.h" + #include "CommonArgs.h" + #include "Linux.h" +@@ -287,6 +288,8 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { + return "elf64ve"; + case llvm::Triple::csky: + return "cskyelf_linux"; ++ case llvm::Triple::sw_64: ++ return "elf64sw_64"; + default: + return nullptr; + } +@@ -921,6 +924,11 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, + CmdArgs.push_back(Args.MakeArgString("-mmsa")); + } + ++ if (Arg *A = Args.getLastArg(options::OPT_msimd, options::OPT_mno_simd)) { ++ if (A->getOption().matches(options::OPT_msimd)) ++ CmdArgs.push_back(Args.MakeArgString("-msimd")); ++ } ++ + Args.AddLastArg(CmdArgs, options::OPT_mhard_float, + options::OPT_msoft_float); + +@@ -2442,6 +2450,11 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", + "s390x-suse-linux", "s390x-redhat-linux"}; + ++ static const char *const Sw64LibDirs[] = {"/lib64", "/lib", ++ "/lib/gcc/sw_64-sunway-linux-gnu/", ++ "/sw_64-sunway-linux-gnu/lib"}; ++ static const char *const Sw64Triples[] = { ++ "sw_64-sunway-linux-gnu", "sw_64-unknown-linux-gnu", "sw_64-linux-gnu"}; + + using std::begin; + using std::end; +@@ -2695,6 +2708,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( + LibDirs.append(begin(SystemZLibDirs), end(SystemZLibDirs)); + TripleAliases.append(begin(SystemZTriples), end(SystemZTriples)); + break; ++ case llvm::Triple::sw_64: ++ LibDirs.append(begin(Sw64LibDirs), end(Sw64LibDirs)); ++ TripleAliases.append(begin(Sw64Triples), end(Sw64Triples)); ++ break; + default: + // By default, just rely on the standard lib directories and the original + // triple. +@@ -2956,6 +2973,7 @@ Generic_GCC::getDefaultUnwindTableLevel(const ArgList &Args) const { + // rhbz#1655546 + case llvm::Triple::systemz: + case llvm::Triple::arm: ++ case llvm::Triple::sw_64: + return UnwindTableLevel::Asynchronous; + default: + return UnwindTableLevel::None; +@@ -3315,4 +3333,9 @@ void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs, + if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, + options::OPT_fno_use_init_array, true)) + CC1Args.push_back("-fno-use-init-array"); ++ if (getTriple().getArch() == llvm::Triple::sw_64 && ++ DriverArgs.hasArg(options::OPT_Sw64mieee)) { ++ CC1Args.push_back("-mllvm"); ++ CC1Args.push_back("-mieee"); ++ } + } +diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp +index 1ba222bf8..9ffb0c1ec 100644 +--- a/clang/lib/Driver/ToolChains/Linux.cpp ++++ b/clang/lib/Driver/ToolChains/Linux.cpp +@@ -12,6 +12,7 @@ + #include "Arch/Mips.h" + #include "Arch/PPC.h" + #include "Arch/RISCV.h" ++#include "Arch/Sw64.h" + #include "CommonArgs.h" + #include "clang/Config/config.h" + #include "clang/Driver/Distro.h" +@@ -164,6 +165,8 @@ std::string Linux::getMultiarchTriple(const Driver &D, + return "sparc64-linux-gnu"; + case llvm::Triple::systemz: + return "s390x-linux-gnu"; ++ case llvm::Triple::sw_64: ++ return "sw_64-linux-gnu"; + } + return TargetTriple.str(); + } +@@ -256,6 +259,10 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) + const bool IsHexagon = Arch == llvm::Triple::hexagon; + const bool IsRISCV = Triple.isRISCV(); + const bool IsCSKY = Triple.isCSKY(); ++ const bool IsSw64 = Triple.isSw64(); ++ ++ if (IsSw64 && !SysRoot.empty()) ++ ExtraOpts.push_back("--sysroot=" + SysRoot); + + if (IsCSKY && !SelectedMultilibs.empty()) + SysRoot = SysRoot + SelectedMultilibs.back().osSuffix(); +@@ -330,6 +337,11 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) + addPathIfExists(D, concat(SysRoot, "/usr", OSLibDir, ABIName), Paths); + } + ++ if (IsSw64) { ++ addPathIfExists(D, SysRoot + "/usr/lib/gcc/sw_64-sunway-linux-gnu/", Paths); ++ addPathIfExists(D, SysRoot + "/usr/sw_64-sunway-linux-gnu/lib", Paths); ++ } ++ + Generic_GCC::AddMultiarchPaths(D, SysRoot, OSLibDir, Paths); + + addPathIfExists(D, concat(SysRoot, "/lib"), Paths); +@@ -577,6 +589,10 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { + LibDir = "lib64"; + Loader = "ld-linux.so.2"; + break; ++ case llvm::Triple::sw_64: ++ LibDir = "lib"; ++ Loader = "ld-linux.so.2"; ++ break; + case llvm::Triple::systemz: + LibDir = "lib"; + Loader = "ld64.so.1"; +@@ -773,6 +789,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { + const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; + const bool IsMIPS = getTriple().isMIPS32(); + const bool IsMIPS64 = getTriple().isMIPS64(); ++ const bool IsSw64 = getTriple().isSw64(); + const bool IsPowerPC64 = getTriple().getArch() == llvm::Triple::ppc64 || + getTriple().getArch() == llvm::Triple::ppc64le; + const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64 || +@@ -798,15 +815,15 @@ SanitizerMask Linux::getSupportedSanitizers() const { + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsLoongArch64) + Res |= SanitizerKind::DataFlow; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 || +- IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64) ++ IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64 || IsSw64) + Res |= SanitizerKind::Leak; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || +- IsLoongArch64) ++ IsLoongArch64 || IsSw64) + Res |= SanitizerKind::Thread; + if (IsX86_64 || IsSystemZ) + Res |= SanitizerKind::KernelMemory; + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || +- IsPowerPC64 || IsHexagon || IsLoongArch64 || IsRISCV64) ++ IsPowerPC64 || IsHexagon || IsLoongArch64 || IsRISCV64 || IsSw64) + Res |= SanitizerKind::Scudo; + if (IsX86_64 || IsAArch64 || IsRISCV64) { + Res |= SanitizerKind::HWAddress; +diff --git a/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp b/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp +new file mode 100644 +index 000000000..9992b350d +--- /dev/null ++++ b/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp +@@ -0,0 +1,184 @@ ++//===--- Sw64Toolchain.cpp - Sw64 ToolChain Implementations -----*- C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "Sw64Toolchain.h" ++#include "CommonArgs.h" ++#include "Gnu.h" ++#include "clang/Config/config.h" ++#include "clang/Driver/Compilation.h" ++#include "clang/Driver/Driver.h" ++#include "clang/Driver/DriverDiagnostic.h" ++#include "clang/Driver/Options.h" ++#include "llvm/Option/ArgList.h" ++#include "llvm/Support/FileSystem.h" ++#include "llvm/Support/Path.h" ++#include "llvm/Support/VirtualFileSystem.h" ++ ++using namespace clang; ++using namespace clang::driver; ++using namespace clang::driver::tools; ++using namespace clang::driver::toolchains; ++using namespace llvm::opt; ++ ++void Sw64::Assembler::ConstructJob(Compilation &C, const JobAction &JA, ++ const InputInfo &Output, ++ const InputInfoList &Inputs, ++ const ArgList &Args, ++ const char *LinkingOutput) const { ++ claimNoWarnArgs(Args); ++ ArgStringList CmdArgs; ++ ++ Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); ++ ++ CmdArgs.push_back("-o"); ++ CmdArgs.push_back(Output.getFilename()); ++ ++ for (const auto &II : Inputs) ++ CmdArgs.push_back(II.getFilename()); ++ ++ const char *Exec = Args.MakeArgString( ++ getToolChain().GetProgramPath("sw_64-sunway-linux-gnu-as")); ++ C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), ++ Exec, CmdArgs, Inputs, Output)); ++} ++ ++void Sw64::Linker::ConstructJob(Compilation &C, const JobAction &JA, ++ const InputInfo &Output, ++ const InputInfoList &Inputs, ++ const ArgList &Args, ++ const char *LinkingOutput) const { ++ const Driver &D = getToolChain().getDriver(); ++ ArgStringList CmdArgs; ++ ++ if (Output.isFilename()) { ++ CmdArgs.push_back("-o"); ++ CmdArgs.push_back(Output.getFilename()); ++ } else { ++ assert(Output.isNothing() && "Invalid output."); ++ } ++ ++ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { ++ CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o"))); ++ CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o"))); ++ CmdArgs.push_back( ++ Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); ++ CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); ++ } ++ ++ Args.AddAllArgs(CmdArgs, ++ {options::OPT_L, options::OPT_T_Group, options::OPT_e}); ++ ++ AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); ++ ++ getToolChain().addProfileRTLibs(Args, CmdArgs); ++ ++ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { ++ if (D.CCCIsCXX()) { ++ if (getToolChain().ShouldLinkCXXStdlib(Args)) ++ getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); ++ CmdArgs.push_back("-lm"); ++ } ++ } ++ ++ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { ++ if (Args.hasArg(options::OPT_pthread)) ++ CmdArgs.push_back("-lpthread"); ++ CmdArgs.push_back("-lc"); ++ CmdArgs.push_back("-lgcc"); ++ CmdArgs.push_back("-lgcc_s"); ++ CmdArgs.push_back( ++ Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); ++ } ++ ++ const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath()); ++ C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), ++ Exec, CmdArgs, Inputs, Output)); ++} ++ ++/// Sw64Toolchain - Sw64 tool chain which can call as(1) and ld(1) directly. ++ ++Sw64Toolchain::Sw64Toolchain(const Driver &D, const llvm::Triple &Triple, ++ const ArgList &Args) ++ : Generic_ELF(D, Triple, Args) { ++ getFilePaths().push_back(getDriver().Dir + "/../lib"); ++ getFilePaths().push_back("/usr/lib"); ++} ++ ++Tool *Sw64Toolchain::buildAssembler() const { ++ return new Sw64::Assembler(*this); ++} ++ ++Tool *Sw64Toolchain::buildLinker() const { return new Sw64::Linker(*this); } ++ ++void Sw64Toolchain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ++ ArgStringList &CC1Args) const { ++ const Driver &D = getDriver(); ++ ++ if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) ++ return; ++ ++ if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) ++ addSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/local/include"); ++ ++ if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { ++ SmallString<128> P(D.ResourceDir); ++ llvm::sys::path::append(P, "include"); ++ addSystemInclude(DriverArgs, CC1Args, P); ++ } ++ if (DriverArgs.hasArg(options::OPT_nostdlibinc)) ++ return; ++ ++ // Check for configure-time C include directories. ++ StringRef CIncludeDirs(C_INCLUDE_DIRS); ++ if (CIncludeDirs != "") { ++ SmallVector dirs; ++ CIncludeDirs.split(dirs, ":"); ++ for (StringRef dir : dirs) { ++ StringRef Prefix = ++ llvm::sys::path::is_absolute(dir) ? StringRef(D.SysRoot) : ""; ++ addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir); ++ } ++ return; ++ } ++ ++ // Add include directories specific to the selected multilib set and multilib. ++ if (GCCInstallation.isValid()) { ++ const MultilibSet::IncludeDirsFunc &Callback = ++ Multilibs.includeDirsCallback(); ++ if (Callback) { ++ for (const auto &Path : Callback(GCCInstallation.getMultilib())) ++ addExternCSystemIncludeIfExists( ++ DriverArgs, CC1Args, GCCInstallation.getInstallPath() + Path); ++ } ++ } ++ ++ addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include"); ++} ++ ++void Sw64Toolchain::addLibStdCxxIncludePaths( ++ const llvm::opt::ArgList &DriverArgs, ++ llvm::opt::ArgStringList &CC1Args) const { ++ // We need a detected GCC installation on Sw64 (similar to Linux) ++ // to provide libstdc++'s headers. ++ if (!GCCInstallation.isValid()) ++ return; ++ ++ // By default, look for the C++ headers in an include directory adjacent to ++ // the lib directory of the GCC installation. ++ // On Sw64 this usually looks like /usr/gcc/X.Y/include/c++/X.Y.Z ++ StringRef LibDir = GCCInstallation.getParentLibPath(); ++ StringRef TripleStr = GCCInstallation.getTriple().str(); ++ const Multilib &Multilib = GCCInstallation.getMultilib(); ++ const GCCVersion &Version = GCCInstallation.getVersion(); ++ ++ // The primary search for libstdc++ supports multiarch variants. ++ addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text, ++ TripleStr, Multilib.includeSuffix(), DriverArgs, ++ CC1Args); ++} +diff --git a/clang/lib/Driver/ToolChains/Sw64Toolchain.h b/clang/lib/Driver/ToolChains/Sw64Toolchain.h +new file mode 100644 +index 000000000..c32f628b8 +--- /dev/null ++++ b/clang/lib/Driver/ToolChains/Sw64Toolchain.h +@@ -0,0 +1,79 @@ ++//===--- Sw64Toolchain.h - Sw64 ToolChain Implementations -------*- C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H ++#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H ++ ++#include "Gnu.h" ++#include "clang/Driver/Driver.h" ++#include "clang/Driver/DriverDiagnostic.h" ++#include "clang/Driver/Tool.h" ++#include "clang/Driver/ToolChain.h" ++#include "llvm/ADT/StringRef.h" ++#include "llvm/Option/Option.h" ++ ++namespace clang { ++namespace driver { ++namespace toolchains { ++ ++class LLVM_LIBRARY_VISIBILITY Sw64Toolchain : public Generic_ELF { ++public: ++ Sw64Toolchain(const Driver &D, const llvm::Triple &Triple, ++ const llvm::opt::ArgList &Args); ++ void ++ AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, ++ llvm::opt::ArgStringList &CC1Args) const override; ++ ++ void ++ addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, ++ llvm::opt::ArgStringList &CC1Args) const override; ++ ++ unsigned GetDefaultDwarfVersion() const override { return 2; } ++ ++protected: ++ Tool *buildAssembler() const override; ++ Tool *buildLinker() const override; ++}; ++ ++} // end namespace toolchains ++ ++/// Sw64 -- Directly call GNU Binutils assembler and linker ++namespace tools { ++namespace Sw64 { ++class LLVM_LIBRARY_VISIBILITY Assembler : public Tool { ++public: ++ Assembler(const ToolChain &TC) : Tool("sw_64::Assembler", "assembler", TC) {} ++ ++ bool hasIntegratedCPP() const override { return false; } ++ ++ void ConstructJob(Compilation &C, const JobAction &JA, ++ const InputInfo &Output, const InputInfoList &Inputs, ++ const llvm::opt::ArgList &TCArgs, ++ const char *LinkingOutput) const override; ++}; ++ ++class LLVM_LIBRARY_VISIBILITY Linker : public Tool { ++public: ++ Linker(const ToolChain &TC) : Tool("sw_64::Linker", "linker", TC) {} ++ ++ bool hasIntegratedCPP() const override { return false; } ++ bool isLinkJob() const override { return true; } ++ ++ void ConstructJob(Compilation &C, const JobAction &JA, ++ const InputInfo &Output, const InputInfoList &Inputs, ++ const llvm::opt::ArgList &TCArgs, ++ const char *LinkingOutput) const override; ++}; ++} // end namespace Sw64 ++} // end namespace tools ++ ++} // end namespace driver ++} // end namespace clang ++ ++#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H +diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp +index 8c5134e25..75bf3bc28 100644 +--- a/clang/lib/Driver/XRayArgs.cpp ++++ b/clang/lib/Driver/XRayArgs.cpp +@@ -53,6 +53,7 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { + case llvm::Triple::mipsel: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: ++ case llvm::Triple::sw_64: + break; + default: + D.Diag(diag::err_drv_unsupported_opt_for_target) +diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp +index 1fba91bed..cb88dbb30 100644 +--- a/clang/lib/Frontend/CompilerInvocation.cpp ++++ b/clang/lib/Frontend/CompilerInvocation.cpp +@@ -1701,7 +1701,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, + const llvm::Triple::ArchType DebugEntryValueArchs[] = { + llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, + llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, +- llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el}; ++ llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el, ++ llvm::Triple::sw_64}; + + if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() && + llvm::is_contained(DebugEntryValueArchs, T.getArch())) +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index 356009ae9..c25178f3c 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -108,6 +108,11 @@ set(sifive_files + sifive_vector.h + ) + ++set(sw64_files ++ sw64intrin.h ++ simd.h ++ ) ++ + set(systemz_files + s390intrin.h + vecintrin.h +@@ -260,6 +265,7 @@ set(files + ${ppc_htm_files} + ${riscv_files} + ${sifive_files} ++ ${sw64_files} + ${systemz_files} + ${ve_files} + ${x86_files} +@@ -463,6 +469,7 @@ add_header_target("mips-resource-headers" "${mips_msa_files}") + add_header_target("ppc-resource-headers" "${ppc_files};${ppc_wrapper_files}") + add_header_target("ppc-htm-resource-headers" "${ppc_htm_files}") + add_header_target("riscv-resource-headers" "${riscv_files};${riscv_generated_files}") ++add_header_target("sw64-resource-headers" "${sw64_files}") + add_header_target("systemz-resource-headers" "${systemz_files}") + add_header_target("ve-resource-headers" "${ve_files}") + add_header_target("webassembly-resource-headers" "${webassembly_files}") +@@ -608,6 +615,12 @@ install( + EXCLUDE_FROM_ALL + COMPONENT riscv-resource-headers) + ++install( ++ FILES ${sw64_files} ++ DESTINATION ${header_install_dir} ++ EXCLUDE_FROM_ALL ++ COMPONENT sw64-resource-headers) ++ + install( + FILES ${systemz_files} + DESTINATION ${header_install_dir} +diff --git a/clang/lib/Headers/simd.h b/clang/lib/Headers/simd.h +new file mode 100644 +index 000000000..aa0cda534 +--- /dev/null ++++ b/clang/lib/Headers/simd.h +@@ -0,0 +1,5 @@ ++#ifndef __SW64SIMD ++#define __SW64SIMD ++ ++#include "sw64intrin.h" ++#endif // __SW64SIMD +diff --git a/clang/lib/Headers/sw64intrin.h b/clang/lib/Headers/sw64intrin.h +new file mode 100644 +index 000000000..86a20c53a +--- /dev/null ++++ b/clang/lib/Headers/sw64intrin.h +@@ -0,0 +1,1590 @@ ++ ++#ifndef __SW64INTRIN_H ++#define __SW64INTRIN_H ++ ++#include ++#include ++#include ++ ++typedef int8_t charv32 __attribute__((__vector_size__(32), __aligned__(32))); ++typedef uint8_t ucharv32 __attribute__((__vector_size__(32), __aligned__(32))); ++typedef int16_t shortv16 __attribute__((__vector_size__(32), __aligned__(32))); ++typedef uint16_t ushortv16 ++ __attribute__((__vector_size__(32), __aligned__(32))); ++typedef int32_t intv8 __attribute__((__vector_size__(32), __aligned__(32))); ++typedef uint32_t uintv8 __attribute__((__vector_size__(32), __aligned__(32))); ++typedef int64_t longv4 __attribute__((__vector_size__(32), __aligned__(32))); ++typedef uint64_t ulongv4 __attribute__((__vector_size__(32), __aligned__(32))); ++ ++// as sw64 target float4v4 is a very special cases, we leaving this for now. ++typedef float floatv4 __attribute__((__vector_size__(16), __aligned__(16))); ++typedef double doublev4 __attribute__((__vector_size__(32), __aligned__(32))); ++// special case for int256 ++typedef long long int256 __attribute__((__vector_size__(32), __aligned__(32))); ++typedef unsigned long long uint256 ++ __attribute__((__vector_size__(32), __aligned__(32))); ++ ++// special case for bytes compare ++typedef int32_t int1v32_t; ++// special case for half transform ++typedef unsigned short float16v4_t ++ __attribute__((__vector_size__(8), __aligned__(8))); ++#define __DEFAULT_FN_ATTRS \ ++ __attribute__((__always_inline__, __nodebug__, __target__("simd"), \ ++ __min_vector_width__(256))) ++#define __DEFAULT_FN_ATTRS_CORE4 \ ++ __attribute__((__always_inline__, __nodebug__, __target__("core4,simd"), \ ++ __min_vector_width__(256))) ++ ++static __inline void simd_fprint_charv32(FILE *fp, charv32 a) { ++ union { ++ char __a[32]; ++ charv32 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[31], __u.__a[30], ++ __u.__a[29], __u.__a[28], __u.__a[27], __u.__a[26], __u.__a[25], ++ __u.__a[24]); ++ fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[23], __u.__a[22], ++ __u.__a[21], __u.__a[20], __u.__a[19], __u.__a[18], __u.__a[17], ++ __u.__a[16]); ++ fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[15], __u.__a[14], ++ __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9], ++ __u.__a[8]); ++ fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6], ++ __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], ++ __u.__a[0]); ++} ++ ++static __inline void simd_fprint_ucharv32(FILE *fp, ucharv32 a) { ++ union { ++ unsigned char __a[32]; ++ ucharv32 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[31], __u.__a[30], ++ __u.__a[29], __u.__a[28], __u.__a[27], __u.__a[26], __u.__a[25], ++ __u.__a[24]); ++ fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[23], __u.__a[22], ++ __u.__a[21], __u.__a[20], __u.__a[19], __u.__a[18], __u.__a[17], ++ __u.__a[16]); ++ fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[15], __u.__a[14], ++ __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9], ++ __u.__a[8]); ++ fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6], ++ __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], ++ __u.__a[0]); ++} ++ ++static __inline void simd_fprint_shortv16(FILE *fp, shortv16 a) { ++ union { ++ short __a[16]; ++ shortv16 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[15], __u.__a[14], ++ __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9], ++ __u.__a[8]); ++ fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6], ++ __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], ++ __u.__a[0]); ++} ++ ++static __inline void simd_fprint_ushortv16(FILE *fp, ushortv16 a) { ++ union { ++ unsigned short __a[16]; ++ ushortv16 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[15], __u.__a[14], ++ __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9], ++ __u.__a[8]); ++ fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6], ++ __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], ++ __u.__a[0]); ++} ++ ++static __inline void simd_fprint_intv8(FILE *fp, intv8 a) { ++ union { ++ int __a[8]; ++ intv8 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6], ++ __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], ++ __u.__a[0]); ++} ++ ++static __inline void simd_fprint_uintv8(FILE *fp, uintv8 a) { ++ union { ++ unsigned int __a[8]; ++ uintv8 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6], ++ __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1], ++ __u.__a[0]); ++} ++ ++static __inline void simd_fprint_longv4(FILE *fp, longv4 a) { ++ union { ++ long __a[4]; ++ longv4 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %ld, %ld, %ld, %ld ]\n", __u.__a[3], __u.__a[2], __u.__a[1], ++ __u.__a[0]); ++} ++ ++static __inline void simd_fprint_ulongv4(FILE *fp, ulongv4 a) { ++ union { ++ unsigned long __a[4]; ++ ulongv4 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %lu, %lu, %lu, %lu ]\n", __u.__a[3], __u.__a[2], __u.__a[1], ++ __u.__a[0]); ++} ++ ++static __inline void simd_fprint_floatv4(FILE *fp, floatv4 a) { ++ union { ++ float __a[4]; ++ floatv4 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %.8e, %.8e, %.8e, %.8e ]\n", __u.__a[3], __u.__a[2], ++ __u.__a[1], __u.__a[0]); ++} ++ ++static __inline void simd_fprint_doublev4(FILE *fp, doublev4 a) { ++ union { ++ double __a[4]; ++ doublev4 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ %.16e, %.16e, %.16e, %.16e ]\n", __u.__a[3], __u.__a[2], ++ __u.__a[1], __u.__a[0]); ++} ++ ++static __inline void simd_fprint_int256(FILE *fp, int256 a) { ++ volatile union { ++ long __a[4]; ++ int256 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ 0x%lx, 0x%lx, 0x%lx, 0x%lx ]\n", __u.__a[3], __u.__a[2], ++ __u.__a[1], __u.__a[0]); ++} ++ ++static __inline void simd_fprint_uint256(FILE *fp, uint256 a) { ++ volatile union { ++ unsigned long __a[4]; ++ uint256 __v; ++ } __u; ++ __u.__v = a; ++ fprintf(fp, "[ 0x%lx, 0x%lx, 0x%lx, 0x%lx ]\n", __u.__a[3], __u.__a[2], ++ __u.__a[1], __u.__a[0]); ++} ++ ++static __inline void simd_print_charv32(charv32 arg) { ++ simd_fprint_charv32(stdout, arg); ++} ++static __inline void simd_print_ucharv32(ucharv32 arg) { ++ simd_fprint_ucharv32(stdout, arg); ++} ++static __inline void simd_print_shortv16(shortv16 arg) { ++ simd_fprint_shortv16(stdout, arg); ++} ++static __inline void simd_print_ushortv16(ushortv16 arg) { ++ simd_fprint_ushortv16(stdout, arg); ++} ++static __inline void simd_print_intv8(intv8 arg) { ++ simd_fprint_intv8(stdout, arg); ++} ++static __inline void simd_print_uintv8(uintv8 arg) { ++ simd_fprint_uintv8(stdout, arg); ++} ++static __inline void simd_print_longv4(longv4 arg) { ++ simd_fprint_longv4(stdout, arg); ++} ++static __inline void simd_print_ulongv4(ulongv4 arg) { ++ simd_fprint_ulongv4(stdout, arg); ++} ++static __inline void simd_print_floatv4(floatv4 arg) { ++ simd_fprint_floatv4(stdout, arg); ++} ++static __inline void simd_print_doublev4(doublev4 arg) { ++ simd_fprint_doublev4(stdout, arg); ++} ++static __inline void simd_print_int256(int256 arg) { ++ simd_fprint_int256(stdout, arg); ++} ++static __inline void simd_print_uint256(uint256 arg) { ++ simd_fprint_uint256(stdout, arg); ++} ++ ++// Vector Load Intrinsic ++ ++#define simd_load(dest, src) \ ++ do { \ ++ (dest) = __builtin_sw_vload(src); \ ++ } while (0) ++ ++#define simd_loadu(dest, src) \ ++ do { \ ++ (dest) = __builtin_sw_vloadu(src); \ ++ } while (0) ++ ++#define simd_load_u(dest, src) \ ++ do { \ ++ (dest) = __builtin_sw_vload_u(src); \ ++ } while (0) ++ ++#define simd_loade(dest, src) \ ++ do { \ ++ (dest) = __builtin_sw_vloade(src); \ ++ } while (0) ++ ++#define simd_vload_nc(dest, src) \ ++ do { \ ++ (dest) = __builtin_sw_vloadnc(src); \ ++ } while (0) ++ ++#define simd_store(src, dest) \ ++ do { \ ++ __builtin_sw_vstore(src, dest); \ ++ } while (0) ++ ++#define simd_storeu(src, dest) \ ++ do { \ ++ __builtin_sw_vstoreu(src, dest); \ ++ } while (0) ++ ++#define simd_store_u(src, dest) \ ++ do { \ ++ __builtin_sw_vstore_u(src, dest); \ ++ } while (0) ++ ++#define simd_storeuh(src, dest) \ ++ do { \ ++ uint64_t __ptr = (uint64_t)dest + (uint64_t)sizeof(src); \ ++ __builtin_sw_vstoreuh(src, (__typeof__(dest))__ptr); \ ++ } while (0) ++ ++#define simd_storeul(src, dest) \ ++ do { \ ++ __builtin_sw_vstoreul(src, dest); \ ++ } while (0) ++ ++#define simd_vstore_nc(src, dest) \ ++ do { \ ++ __builtin_sw_vstorenc(src, dest); \ ++ } while (0) ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_loads(const float *__ptr) { ++ return *(floatv4 *)__ptr; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_loadd(const double *__ptr) { ++ return *(doublev4 *)__ptr; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_stores(const float *__ptr, ++ floatv4 a) { ++ *(floatv4 *)__ptr = a; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_stored(const double *__ptr, ++ doublev4 a) { ++ *(doublev4 *)__ptr = a; ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_loadew(const int32_t *__ptr) { ++ int32_t __a = *__ptr; ++ return __extension__(intv8){__a, __a, __a, __a, __a, __a, __a, __a}; ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS simd_loadel(const int64_t *__ptr) { ++ int64_t __a = *__ptr; ++ return __extension__(longv4){__a, __a, __a, __a}; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_loades(const float *__ptr) { ++ float __a = *__ptr; ++ return __extension__(floatv4){__a, __a, __a, __a}; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_loaded(const double *__ptr) { ++ double __a = *__ptr; ++ return __extension__(doublev4){__a, __a, __a, __a}; ++} ++ ++// Vector Setting Intrinsic Sw64 ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS simd_set_charv32( ++ int8_t __b31, int8_t __b30, int8_t __b29, int8_t __b28, int8_t __b27, ++ int8_t __b26, int8_t __b25, int8_t __b24, int8_t __b23, int8_t __b22, ++ int8_t __b21, int8_t __b20, int8_t __b19, int8_t __b18, int8_t __b17, ++ int8_t __b16, int8_t __b15, int8_t __b14, int8_t __b13, int8_t __b12, ++ int8_t __b11, int8_t __b10, int8_t __b09, int8_t __b08, int8_t __b07, ++ int8_t __b06, int8_t __b05, int8_t __b04, int8_t __b03, int8_t __b02, ++ int8_t __b01, int8_t __b00) { ++ return __extension__(charv32){__b31, __b30, __b29, __b28, __b27, __b26, __b25, ++ __b24, __b23, __b22, __b21, __b20, __b19, __b18, ++ __b17, __b16, __b15, __b14, __b13, __b12, __b11, ++ __b10, __b09, __b08, __b07, __b06, __b05, __b04, ++ __b03, __b02, __b01, __b00}; ++} ++#define simd_set_ucharv32 simd_set_charv32 ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS ++simd_set_shortv16(int16_t __b15, int16_t __b14, int16_t __b13, int16_t __b12, ++ int16_t __b11, int16_t __b10, int16_t __b09, int16_t __b08, ++ int16_t __b07, int16_t __b06, int16_t __b05, int16_t __b04, ++ int16_t __b03, int16_t __b02, int16_t __b01, int16_t __b00) { ++ return __extension__(shortv16){__b15, __b14, __b13, __b12, __b11, __b10, ++ __b09, __b08, __b07, __b06, __b05, __b04, ++ __b03, __b02, __b01, __b00}; ++} ++#define simd_set_ushortv16 simd_set_shortv16 ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS ++simd_set_intv8(int32_t __b07, int32_t __b06, int32_t __b05, int32_t __b04, ++ int32_t __b03, int32_t __b02, int32_t __b01, int32_t __b00) { ++ return __extension__(intv8){__b07, __b06, __b05, __b04, ++ __b03, __b02, __b01, __b00}; ++} ++#define simd_set_uintv8 simd_set_intv8 ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS simd_set_longv4(int64_t __b03, ++ int64_t __b02, ++ int64_t __b01, ++ int64_t __b00) { ++ return __extension__(longv4){__b03, __b02, __b01, __b00}; ++} ++#define simd_set_ulongv4 simd_set_longv4 ++#define simd_set_int256 simd_set_longv4 ++#define simd_set_uint256 simd_set_longv4 ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_set_floatv4(float __b03, ++ float __b02, ++ float __b01, ++ float __b00) { ++ return __extension__(floatv4){__b03, __b02, __b01, __b00}; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_set_doublev4(double __b03, ++ double __b02, ++ double __b01, ++ double __b00) { ++ return __extension__(doublev4){__b03, __b02, __b01, __b00}; ++} ++ ++// Integer Araith Intrinsic Sw64 ++// Caculate adden for given vector as int32_tx8, ++// it isn't normal overflow result. ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vaddw(intv8 a, intv8 b) { ++ return a + b; ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vaddwi(intv8 a, ++ const int32_t b) { ++ intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; ++ return a + tmp; ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsubw(intv8 a, intv8 b) { ++ return a - b; ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsubwi(intv8 a, ++ const int32_t b) { ++ intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; ++ return a - tmp; ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddw(intv8 a, intv8 b) { ++ return __builtin_sw_vucaddw(a, b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddwi(intv8 a, ++ const int32_t b) { ++ intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vucaddw(a, tmp); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubw(intv8 a, intv8 b) { ++ return __builtin_sw_vucsubw(a, b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubwi(intv8 a, ++ const int32_t b) { ++ intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vucsubw(a, tmp); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vaddl(longv4 a, longv4 b) { ++ return a + b; ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vaddli(longv4 a, ++ const int64_t __b) { ++ longv4 __tmp = __extension__(longv4){__b, __b, __b, __b}; ++ return a + __tmp; ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vsubl(longv4 a, longv4 b) { ++ return a - b; ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vsubli(longv4 a, ++ const int64_t __b) { ++ longv4 __tmp = __extension__(longv4){__b, __b, __b, __b}; ++ return a - __tmp; ++} ++ ++// for core3 simd doesn't support v16i16, v32i8 ++// it must use v8i32 instead. ++#ifdef __sw_64_sw8a__ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucaddh(shortv16 a, ++ shortv16 b) { ++ return __builtin_sw_vucaddh_v16hi(a, b); ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucaddhi(shortv16 a, ++ const int b) { ++ int16_t __b = (int16_t)b; ++ shortv16 tmp = ++ __extension__(shortv16){__b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b}; ++ return __builtin_sw_vucaddh_v16hi(a, tmp); ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucsubh(shortv16 a, ++ shortv16 b) { ++ return __builtin_sw_vucsubh_v16hi(a, b); ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucsubhi(shortv16 a, ++ const int b) { ++ int16_t __b = (int16_t)b; ++ shortv16 tmp = ++ __extension__(shortv16){__b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b}; ++ return __builtin_sw_vucsubh_v16hi(a, tmp); ++} ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucaddb(charv32 a, ++ charv32 b) { ++ return __builtin_sw_vucaddb_v32qi(a, b); ++} ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucaddbi(charv32 a, ++ const int b) { ++ int8_t __b = (int8_t)b; ++ charv32 tmp = __extension__(charv32){__b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b}; ++ return __builtin_sw_vucaddb_v32qi(a, tmp); ++} ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucsubb(charv32 a, ++ charv32 b) { ++ charv32 tmp = ++ __extension__(charv32){b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, ++ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vucsubb_v32qi(a, b); ++} ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucsubbi(charv32 a, ++ const int b) { ++ int8_t __b = (int8_t)b; ++ charv32 tmp = __extension__(charv32){__b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b}; ++ return __builtin_sw_vucsubb_v32qi(a, tmp); ++} ++#else ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddh(intv8 a, intv8 b) { ++ return __builtin_sw_vucaddh(a, b); ++} ++ ++#define simd_vucaddhi __builtin_sw_vucaddhi ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubh(intv8 a, intv8 b) { ++ return __builtin_sw_vucsubh(a, b); ++} ++ ++#define simd_vucsubhi __builtin_sw_vucsubhi ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddb(intv8 a, intv8 b) { ++ return __builtin_sw_vucaddb(a, b); ++} ++ ++#define simd_vucaddbi __builtin_sw_vucaddbi ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubb(intv8 a, intv8 b) { ++ return __builtin_sw_vucsubb(a, b); ++} ++ ++#define simd_vucsubbi __builtin_sw_vucsubbi ++#endif ++ ++static __inline__ int32_t __DEFAULT_FN_ATTRS_CORE4 simd_vsumw(intv8 a) { ++ return __builtin_sw_vsumw(a); ++} ++ ++static __inline__ int64_t __DEFAULT_FN_ATTRS_CORE4 simd_vsuml(longv4 a) { ++ return __builtin_sw_vsuml(a); ++} ++ ++static __inline__ int32_t __DEFAULT_FN_ATTRS simd_ctpopow(int256 a) { ++ return __builtin_sw_ctpopow(a); ++} ++ ++static __inline__ int32_t __DEFAULT_FN_ATTRS simd_ctlzow(int256 a) { ++ return __builtin_sw_ctlzow(a); ++} ++ ++// Vector Shift intrinsics ++// Gerate vsll(b|h|w|l) instruction due to Type define ++ ++static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vsllw(uintv8 a, int i) { ++ return __builtin_sw_vsll(a, (int64_t)i); ++} ++ ++static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vsrlw(uintv8 a, int i) { ++ return __builtin_sw_vsrl(a, (int64_t)i); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsraw(intv8 a, int i) { ++ return __builtin_sw_vsra(a, (int64_t)i); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vrolw(intv8 a, int i) { ++ return __builtin_sw_vrol(a, (int64_t)i); ++} ++ ++#define simd_vsllwi simd_vsllw ++#define simd_vsrlwi simd_vsrlw ++#define simd_vsrawi simd_vsraw ++#define simd_vrolwi simd_vrolw ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsllb(charv32 a, ++ int i) { ++ return __builtin_sw_vsll(a, (int64_t)i); ++} ++ ++static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsrlb(ucharv32 a, ++ int i) { ++ return __builtin_sw_vsrl(a, (int64_t)i); ++} ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsrab(charv32 a, ++ int i) { ++ return __builtin_sw_vsra(a, (int64_t)i); ++} ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vrolb(charv32 a, ++ int i) { ++ return __builtin_sw_vrol(a, (int64_t)i); ++} ++ ++#define simd_vsllbi simd_vsllb ++#define simd_vsrlbi simd_vsrlb ++#define simd_vsrabi simd_vsrab ++#define simd_vrolbi simd_vrolb ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vslll(longv4 a, int i) { ++ return __builtin_sw_vsll(a, (int64_t)i); ++} ++ ++static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vsrll(ulongv4 a, ++ int i) { ++ return __builtin_sw_vsrl(a, (int64_t)i); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vsral(longv4 a, int i) { ++ return __builtin_sw_vsra(a, (int64_t)i); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vroll(longv4 a, int i) { ++ return __builtin_sw_vrol(a, (int64_t)i); ++} ++ ++#define simd_vsllli simd_vslll ++#define simd_vsrlli simd_vsrll ++#define simd_vsrali simd_vsral ++#define simd_vrolli simd_vroll ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsllh(shortv16 a, ++ int i) { ++ return __builtin_sw_vsll(a, (int64_t)i); ++} ++ ++static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsrlh(ushortv16 a, ++ int i) { ++ return __builtin_sw_vsrl(a, (int64_t)i); ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsrah(shortv16 a, ++ int i) { ++ return __builtin_sw_vsra(a, (int64_t)i); ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vrolh(shortv16 a, ++ int i) { ++ return __builtin_sw_vrol(a, (int64_t)i); ++} ++ ++#define simd_vsllhi simd_vsllh ++#define simd_vsrlhi simd_vsrlh ++#define simd_vsrahi simd_vsrah ++#define simd_vrolhi simd_vrolh ++ ++static __inline__ int256 __DEFAULT_FN_ATTRS simd_srlow(int256 a, int i) { ++ return __builtin_sw_srlow(a, (int64_t)i); ++} ++ ++static __inline__ int256 __DEFAULT_FN_ATTRS simd_sllow(int256 a, int i) { ++ return __builtin_sw_sllow(a, (int64_t)i); ++} ++ ++static __inline__ int256 __DEFAULT_FN_ATTRS simd_sraow(int256 a, int i) { ++ return __builtin_sw_sraow(a, (int64_t)i); ++} ++ ++#define simd_srlowi simd_srlow ++#define simd_sllowi simd_sllow ++#define simd_sraowi simd_sraow ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls1(floatv4 a) { ++ return __builtin_sw_vslls(a, 64); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls2(floatv4 a) { ++ return __builtin_sw_vslls(a, 128); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls3(floatv4 a) { ++ return __builtin_sw_vslls(a, 192); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld1(doublev4 a) { ++ return __builtin_sw_vslld(a, 64); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld2(doublev4 a) { ++ return __builtin_sw_vslld(a, 128); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld3(doublev4 a) { ++ return __builtin_sw_vslld(a, 192); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls1(floatv4 a) { ++ return __builtin_sw_vsrls(a, 64); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls2(floatv4 a) { ++ return __builtin_sw_vsrls(a, 128); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls3(floatv4 a) { ++ return __builtin_sw_vsrls(a, 192); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld1(doublev4 a) { ++ return __builtin_sw_vsrld(a, 64); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld2(doublev4 a) { ++ return __builtin_sw_vsrld(a, 128); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld3(doublev4 a) { ++ return __builtin_sw_vsrld(a, 192); ++} ++ ++// Integer Compare Inst ++ ++static __inline__ int32_t __DEFAULT_FN_ATTRS simd_vcmpgew(intv8 a, intv8 b) { ++ return __builtin_sw_vcmpgew(a, b); ++} ++ ++static __inline__ int32_t __DEFAULT_FN_ATTRS simd_vcmpgewi(intv8 a, ++ const int32_t b) { ++ intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vcmpgew(a, tmp); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpeqw(intv8 a, intv8 b) { ++ return __builtin_sw_vcmpeqw(a, b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpeqwi(intv8 a, ++ const int32_t b) { ++ intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vcmpeqw(a, tmp); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmplew(intv8 a, intv8 b) { ++ return __builtin_sw_vcmplew(a, b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmplewi(intv8 a, ++ const int32_t b) { ++ intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vcmplew(a, tmp); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpltw(intv8 a, intv8 b) { ++ return __builtin_sw_vcmpltw(a, b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpltwi(intv8 a, ++ const int32_t b) { ++ intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vcmpltw(a, tmp); ++} ++ ++static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpulew(uintv8 a, uintv8 b) { ++ return __builtin_sw_vcmpulew(a, b); ++} ++ ++static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpulewi(uintv8 a, ++ const uint32_t b) { ++ uintv8 tmp = __extension__(uintv8){b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vcmpulew(a, tmp); ++} ++ ++static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpultw(uintv8 a, uintv8 b) { ++ return __builtin_sw_vcmpultw(a, b); ++} ++ ++static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpultwi(uintv8 a, ++ const uint32_t b) { ++ uintv8 tmp = __extension__(uintv8){b, b, b, b, b, b, b, b}; ++ return __builtin_sw_vcmpultw(a, tmp); ++} ++ ++static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcmpueqb(ucharv32 a, ++ ucharv32 b) { ++ ucharv32 res = (ucharv32)__builtin_sw_vcmpueqb(a, b); ++ return res; ++} ++ ++static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 ++simd_vcmpueqbi(ucharv32 a, const uint32_t b) { ++ uint8_t __b = (uint8_t)b; ++ ucharv32 tmp = __extension__(ucharv32){ ++ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b}; ++ ucharv32 res = (ucharv32)__builtin_sw_vcmpueqb(a, tmp); ++ return res; ++} ++ ++static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcmpugtb(ucharv32 a, ++ ucharv32 b) { ++ ucharv32 res = (ucharv32)__builtin_sw_vcmpugtb(a, b); ++ return res; ++} ++ ++static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 ++simd_vcmpugtbi(ucharv32 a, const uint32_t b) { ++ uint8_t __b = (uint8_t)b; ++ ucharv32 tmp = __extension__(ucharv32){ ++ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, ++ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b}; ++ ucharv32 res = (ucharv32)__builtin_sw_vcmpugtb(a, tmp); ++ return res; ++} ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxb(charv32 a, ++ charv32 b) { ++ return __builtin_sw_vmaxb(a, b); ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxh(shortv16 a, ++ shortv16 b) { ++ return __builtin_sw_vmaxh(a, b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxw(intv8 a, intv8 b) { ++ return __builtin_sw_vmaxw(a, b); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxl(longv4 a, ++ longv4 b) { ++ return __builtin_sw_vmaxl(a, b); ++} ++ ++static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxb(ucharv32 a, ++ ucharv32 b) { ++ return __builtin_sw_vumaxb(a, b); ++} ++ ++static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxh(ushortv16 a, ++ ushortv16 b) { ++ return __builtin_sw_vumaxh(a, b); ++} ++ ++static __inline__ uintv8 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxw(uintv8 a, ++ uintv8 b) { ++ return __builtin_sw_vumaxw(a, b); ++} ++ ++static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxl(ulongv4 a, ++ ulongv4 b) { ++ return __builtin_sw_vumaxl(a, b); ++} ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vminb(charv32 a, ++ charv32 b) { ++ return __builtin_sw_vminb(a, b); ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vminh(shortv16 a, ++ shortv16 b) { ++ return __builtin_sw_vminh(a, b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vminw(intv8 a, intv8 b) { ++ return __builtin_sw_vminw(a, b); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vminl(longv4 a, ++ longv4 b) { ++ return __builtin_sw_vminl(a, b); ++} ++ ++static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vuminb(ucharv32 a, ++ ucharv32 b) { ++ return __builtin_sw_vuminb(a, b); ++} ++ ++static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vuminh(ushortv16 a, ++ ushortv16 b) { ++ return __builtin_sw_vuminh(a, b); ++} ++ ++static __inline__ uintv8 __DEFAULT_FN_ATTRS_CORE4 simd_vuminw(uintv8 a, ++ uintv8 b) { ++ return __builtin_sw_vuminw(a, b); ++} ++ ++static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vuminl(ulongv4 a, ++ ulongv4 b) { ++ return __builtin_sw_vuminl(a, b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vseleqw(intv8 a, intv8 b, ++ intv8 c) { ++ return __builtin_sw_vseleqw(a, b, c); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellew(intv8 a, intv8 b, ++ intv8 c) { ++ return __builtin_sw_vsellew(a, b, c); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vselltw(intv8 a, intv8 b, ++ intv8 c) { ++ return __builtin_sw_vselltw(a, b, c); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellbcw(intv8 a, intv8 b, ++ intv8 c) { ++ return __builtin_sw_vsellbcw(a, b, c); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vseleqwi(intv8 a, intv8 b, ++ int32_t c) { ++ intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c}; ++ return __builtin_sw_vseleqw(a, b, tmp); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellewi(intv8 a, intv8 b, ++ int32_t c) { ++ intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c}; ++ return __builtin_sw_vsellew(a, b, tmp); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vselltwi(intv8 a, intv8 b, ++ int32_t c) { ++ intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c}; ++ return __builtin_sw_vselltw(a, b, tmp); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellbcwi(intv8 a, intv8 b, ++ int32_t c) { ++ intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c}; ++ return __builtin_sw_vsellbcw(a, b, tmp); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vseleql(longv4 a, longv4 b, ++ longv4 c) { ++ doublev4 tmp_a = (doublev4)a; ++ doublev4 tmp_b = (doublev4)b; ++ doublev4 tmp_c = (doublev4)c; ++ return (longv4)__builtin_sw_vfseleqd(tmp_a, tmp_b, tmp_c); ++} ++ ++// Vector Logic Operation ++ ++#define simd_vlog(a, b, c, opcode) __builtin_sw_vlogzz(a, b, c, opcode) ++ ++#define simd_vand(SUFFIX, TYPE) \ ++ static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vand##SUFFIX(TYPE a, \ ++ TYPE b) { \ ++ return a & b; \ ++ } ++ ++simd_vand(b, charv32) ++simd_vand(h, shortv16) ++simd_vand(w, intv8) ++simd_vand(l, longv4) ++ ++#define simd_vbic(SUFFIX, TYPE) \ ++ static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vbic##SUFFIX(TYPE a, \ ++ TYPE b) { \ ++ return a & ~b; \ ++ } ++ ++simd_vbic(b, charv32) ++simd_vbic(h, shortv16) ++simd_vbic(w, intv8) ++simd_vbic(l, longv4) ++ ++#define simd_vbis(SUFFIX, TYPE) \ ++ static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vbis##SUFFIX(TYPE a, \ ++ TYPE b) { \ ++ return a | b; \ ++ } ++ ++simd_vbis(b, charv32) ++simd_vbis(h, shortv16) ++simd_vbis(w, intv8) ++simd_vbis(l, longv4) ++ ++#define simd_vornot(SUFFIX, TYPE) \ ++ static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vornot##SUFFIX(TYPE a, \ ++ TYPE b) { \ ++ return a | ~b; \ ++ } ++ ++simd_vornot(b, charv32) ++simd_vornot(h, shortv16) ++simd_vornot(w, intv8) ++simd_vornot(l, longv4) ++ ++#define simd_vxor(SUFFIX, TYPE) \ ++ static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vxor##SUFFIX(TYPE a, \ ++ TYPE b) { \ ++ return a ^ b; \ ++ } ++ ++simd_vxor(b, charv32) ++simd_vxor(h, shortv16) ++simd_vxor(w, intv8) ++simd_vxor(l, longv4) ++ ++#define simd_veqv(SUFFIX, TYPE) \ ++ static __inline__ TYPE __DEFAULT_FN_ATTRS simd_veqv##SUFFIX(TYPE a, \ ++ TYPE b) { \ ++ return ~(a ^ b); \ ++ } ++ ++simd_veqv(b, charv32) ++simd_veqv(h, shortv16) ++simd_veqv(w, intv8) ++simd_veqv(l, longv4) ++ ++// float arithmetic Operation ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vadds(floatv4 a, floatv4 b) { ++ return a + b; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vaddd(doublev4 a, ++ doublev4 b) { ++ return a + b; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsubs(floatv4 a, floatv4 b) { ++ return a - b; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsubd(doublev4 a, ++ doublev4 b) { ++ return a - b; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmuls(floatv4 a, floatv4 b) { ++ return a * b; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmuld(doublev4 a, ++ doublev4 b) { ++ return a * b; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vdivs(floatv4 a, floatv4 b) { ++ return a / b; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vdivd(doublev4 a, ++ doublev4 b) { ++ return a / b; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsqrts(floatv4 a) { ++ return __builtin_sw_vsqrts(a); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsqrtd(doublev4 a) { ++ return __builtin_sw_vsqrtd(a); ++} ++ ++static __inline__ float __DEFAULT_FN_ATTRS_CORE4 simd_vsums(floatv4 a) { ++ return __builtin_sw_vsums(a); ++} ++ ++static __inline__ double __DEFAULT_FN_ATTRS_CORE4 simd_vsumd(doublev4 a) { ++ return __builtin_sw_vsumd(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrecs(floatv4 a) { ++ return __builtin_sw_vfrecs(a); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrecd(doublev4 a) { ++ return __builtin_sw_vfrecd(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmpeqs(floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vfcmpeqs(a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmples(floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vfcmples(a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmplts(floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vfcmplts(a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmpuns(floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vfcmpuns(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpeqd(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vfcmpeqd(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpled(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vfcmpled(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpltd(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vfcmpltd(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpund(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vfcmpund(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtsd(floatv4 a) { ++ return __builtin_sw_vfcvtsd(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtds(doublev4 a) { ++ return __builtin_sw_vfcvtds(a); ++} ++ ++#define simd_vfcvtsh(a, b, c) __builtin_sw_vfcvtsh(a, b, c) ++#define simd_vfcvths(a, b) __builtin_sw_vfcvths(a, b) ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtls(longv4 a) { ++ return __builtin_sw_vfcvtls(a); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtld(longv4 a) { ++ return __builtin_sw_vfcvtld(a); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtsl(floatv4 a) { ++ doublev4 tmp = __builtin_sw_vfcvtsd(a); ++ return __builtin_sw_vfcvtdl(tmp); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl(doublev4 a) { ++ return __builtin_sw_vfcvtdl(a); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_g(doublev4 a) { ++ return __builtin_sw_vfcvtdl_g(a); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_p(doublev4 a) { ++ return __builtin_sw_vfcvtdl_p(a); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_z(doublev4 a) { ++ return __builtin_sw_vfcvtdl_z(a); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_n(doublev4 a) { ++ return __builtin_sw_vfcvtdl_n(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris(floatv4 a) { ++ return __builtin_sw_vfris(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_g(floatv4 a) { ++ return __builtin_sw_vfris_g(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_p(floatv4 a) { ++ return __builtin_sw_vfris_p(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_z(floatv4 a) { ++ return __builtin_sw_vfris_z(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_n(floatv4 a) { ++ return __builtin_sw_vfris_n(a); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid(doublev4 a) { ++ return __builtin_sw_vfrid(a); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_g(doublev4 a) { ++ return __builtin_sw_vfrid_g(a); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_p(doublev4 a) { ++ return __builtin_sw_vfrid_p(a); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_z(doublev4 a) { ++ return __builtin_sw_vfrid_z(a); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_n(doublev4 a) { ++ return __builtin_sw_vfrid_n(a); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxs(floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vmaxs(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxd(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vmaxd(a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmins(floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vmins(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vmind(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vmind(a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyss(floatv4 a, floatv4 b) { ++ return __builtin_sw_vcpyss(a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyses(floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vcpyses(a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpysns(floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vcpysns(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysd(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vcpysd(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysed(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vcpysed(a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysnd(doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vcpysnd(a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfseleqs(floatv4 cond, ++ floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vfseleqs(cond, a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfsellts(floatv4 cond, ++ floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vfsellts(cond, a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfselles(floatv4 cond, ++ floatv4 a, ++ floatv4 b) { ++ return __builtin_sw_vfselles(cond, a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfseleqd(doublev4 cond, ++ doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vfseleqd(cond, a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfselltd(doublev4 cond, ++ doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vfselltd(cond, a, b); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfselled(doublev4 cond, ++ doublev4 a, ++ doublev4 b) { ++ return __builtin_sw_vfselled(cond, a, b); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmas(floatv4 a, floatv4 b, ++ floatv4 c) { ++ return a * b + c; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmss(floatv4 a, floatv4 b, ++ floatv4 c) { ++ return a * b - c; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vnmas(floatv4 a, floatv4 b, ++ floatv4 c) { ++ return -a * b + c; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vnmss(floatv4 a, floatv4 b, ++ floatv4 c) { ++ return -(a * b + c); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmad(doublev4 a, doublev4 b, ++ doublev4 c) { ++ return a * b + c; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmsd(doublev4 a, doublev4 b, ++ doublev4 c) { ++ return a * b - c; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vnmad(doublev4 a, doublev4 b, ++ doublev4 c) { ++ return -a * b + c; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vnmsd(doublev4 a, doublev4 b, ++ doublev4 c) { ++ return -(a * b + c); ++} ++ ++// SIMD element Operation ++ ++#ifdef __sw_64_sw8a__ ++#define simd_vinsb(elt, vect, num) __builtin_sw_vinsb(elt, vect, num) ++#define simd_vinsh(elt, vect, num) __builtin_sw_vinsh(elt, vect, num) ++#endif ++ ++#define simd_vinsw(elt, vect, num) __builtin_sw_vinsw(elt, vect, num) ++#define simd_vinsl(elt, vect, num) __builtin_sw_vinsl(elt, vect, num) ++#define simd_vinsfs(elt, vect, num) __builtin_sw_vinsfs(elt, vect, num) ++#define simd_vinsfd(elt, vect, num) __builtin_sw_vinsfd(elt, vect, num) ++ ++#define simd_vinsw0(elt, vect) simd_vinsw(elt, vect, 0) ++#define simd_vinsw1(elt, vect) simd_vinsw(elt, vect, 1) ++#define simd_vinsw2(elt, vect) simd_vinsw(elt, vect, 2) ++#define simd_vinsw3(elt, vect) simd_vinsw(elt, vect, 3) ++#define simd_vinsw4(elt, vect) simd_vinsw(elt, vect, 4) ++#define simd_vinsw5(elt, vect) simd_vinsw(elt, vect, 5) ++#define simd_vinsw6(elt, vect) simd_vinsw(elt, vect, 6) ++#define simd_vinsw7(elt, vect) simd_vinsw(elt, vect, 7) ++ ++#define simd_vinsl0(elt, vect) simd_vinsl(elt, vect, 0) ++#define simd_vinsl1(elt, vect) simd_vinsl(elt, vect, 1) ++#define simd_vinsl2(elt, vect) simd_vinsl(elt, vect, 2) ++#define simd_vinsl3(elt, vect) simd_vinsl(elt, vect, 3) ++ ++#define simd_vinsfs0(elt, vect) simd_vinsfs(elt, vect, 0) ++#define simd_vinsfs1(elt, vect) simd_vinsfs(elt, vect, 1) ++#define simd_vinsfs2(elt, vect) simd_vinsfs(elt, vect, 2) ++#define simd_vinsfs3(elt, vect) simd_vinsfs(elt, vect, 3) ++ ++#define simd_vinsfd0(elt, vect) simd_vinsfd(elt, vect, 0) ++#define simd_vinsfd1(elt, vect) simd_vinsfd(elt, vect, 1) ++#define simd_vinsfd2(elt, vect) simd_vinsfd(elt, vect, 2) ++#define simd_vinsfd3(elt, vect) simd_vinsfd(elt, vect, 3) ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectlb(charv32 __a, ++ charv32 __b) { ++ return __builtin_shufflevector( ++ __a, __b, 0, 0 + 32, 1, 1 + 32, 2, 2 + 32, 3, 3 + 32, 4, 4 + 32, 5, ++ 5 + 32, 6, 6 + 32, 7, 7 + 32, 8, 8 + 32, 9, 9 + 32, 10, 10 + 32, 11, ++ 11 + 32, 12, 12 + 32, 13, 13 + 32, 14, 14 + 32, 15, 15 + 32); ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 ++simd_vinsectlh(shortv16 __a, shortv16 __b) { ++ return __builtin_shufflevector(__a, __b, 0, 0 + 16, 1, 1 + 16, 2, 2 + 16, 3, ++ 3 + 16, 4, 4 + 16, 5, 5 + 16, 6, 6 + 16, 7, ++ 7 + 16); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectlw(intv8 __a, ++ intv8 __b) { ++ return __builtin_shufflevector(__a, __b, 0, 0 + 8, 1, 1 + 8, 2, 2 + 8, 3, ++ 3 + 8); ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectll(longv4 __a, ++ longv4 __b) { ++ return __builtin_shufflevector(__a, __b, 0, 0 + 4, 1, 1 + 4); ++} ++ ++#ifdef __sw_64_sw8a__ ++#define simd_vshfq(__a, __b, idx) __builtin_sw_vshfq(__a, __b, idx) ++#endif ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vshfqb(charv32 __a, ++ charv32 __b) { ++ return __builtin_sw_vshfqb(__a, __b); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vshfw(intv8 __a, intv8 __b, ++ int64_t idx) { ++ return __builtin_sw_vshfw(__a, __b, idx); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vconw(intv8 __a, intv8 __b, ++ void *ptr) { ++ return __builtin_sw_vconw(__a, __b, ptr); ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vconl(intv8 __a, intv8 __b, ++ void *ptr) { ++ return __builtin_sw_vconl(__a, __b, ptr); ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcons(floatv4 __a, ++ floatv4 __b, ++ void *ptr) { ++ return __builtin_sw_vcons(__a, __b, ptr); ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcond(doublev4 __a, ++ doublev4 __b, ++ void *ptr) { ++ return __builtin_sw_vcond(__a, __b, ptr); ++} ++ ++#define simd_vextw(vect, num) __builtin_sw_vextw(vect, num) ++#define simd_vextl(vect, num) __builtin_sw_vextl(vect, num) ++#define simd_vextfs(vect, num) __builtin_sw_vextfs(vect, num) ++#define simd_vextfd(vect, num) __builtin_sw_vextfd(vect, num) ++ ++#define simd_vextw0(args) simd_vextw(args, 0) ++#define simd_vextw1(args) simd_vextw(args, 1) ++#define simd_vextw2(args) simd_vextw(args, 2) ++#define simd_vextw3(args) simd_vextw(args, 3) ++#define simd_vextw4(args) simd_vextw(args, 4) ++#define simd_vextw5(args) simd_vextw(args, 5) ++#define simd_vextw6(args) simd_vextw(args, 6) ++#define simd_vextw7(args) simd_vextw(args, 7) ++ ++#define simd_vextl0(args) simd_vextl(args, 0) ++#define simd_vextl1(args) simd_vextl(args, 1) ++#define simd_vextl2(args) simd_vextl(args, 2) ++#define simd_vextl3(args) simd_vextl(args, 3) ++ ++#define simd_vextfs0(args) simd_vextfs(args, 0) ++#define simd_vextfs1(args) simd_vextfs(args, 1) ++#define simd_vextfs2(args) simd_vextfs(args, 2) ++#define simd_vextfs3(args) simd_vextfs(args, 3) ++ ++#define simd_vextfd0(args) simd_vextfd(args, 0) ++#define simd_vextfd1(args) simd_vextfd(args, 1) ++#define simd_vextfd2(args) simd_vextfd(args, 2) ++#define simd_vextfd3(args) simd_vextfd(args, 3) ++ ++static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcpyb(int8_t b) { ++ return __extension__(charv32){b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, ++ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b}; ++} ++ ++static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vcpyh(int16_t b) { ++ return __extension__(shortv16){b, b, b, b, b, b, b, b, ++ b, b, b, b, b, b, b, b}; ++} ++ ++static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcpyw(int32_t b) { ++ return __extension__(intv8){b, b, b, b, b, b, b, b}; ++} ++ ++static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vcpyl(int64_t __a) { ++ return __extension__(longv4){__a, __a, __a, __a}; ++} ++ ++static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyfs(float __a) { ++ return __extension__(floatv4){__a, __a, __a, __a}; ++} ++ ++static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpyfd(double __a) { ++ return __extension__(doublev4){__a, __a, __a, __a}; ++} ++ ++// Test for core3 ++ ++static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_plusw(intv8 __a) { ++ intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); ++ __a = __a + __shf; ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); ++ __a = __a + __shf; ++ __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); ++ __a = __a + __shf; ++ return __builtin_sw_vextw(__a, 0); ++} ++ ++static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_pluss(floatv4 __a) { ++ floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); ++ __a = __a + __shf; ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); ++ __a = __a + __shf; ++ return __builtin_sw_vextfs(__a, 0); ++} ++ ++static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_plusd(doublev4 __a) { ++ doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); ++ __a = __a + __shf; ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); ++ __a = __a + __shf; ++ return __builtin_sw_vextfd(__a, 0); ++} ++ ++static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_smaxw(intv8 __a) { ++ intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); ++ intv8 __cmp = simd_vcmpltw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __a, __shf); ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); ++ __cmp = simd_vcmpltw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __a, __shf); ++ __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); ++ __cmp = simd_vcmpltw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __a, __shf); ++ return __builtin_sw_vextw(__a, 0); ++} ++ ++static __inline__ uint32_t __DEFAULT_FN_ATTRS simd_reduc_umaxw(uintv8 __a) { ++ uintv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); ++ uintv8 __cmp = simd_vcmpultw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __a, __shf); ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); ++ __cmp = simd_vcmpultw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __a, __shf); ++ __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); ++ __cmp = simd_vcmpultw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __a, __shf); ++ return __builtin_sw_vextw(__a, 0); ++} ++ ++static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_sminw(intv8 __a) { ++ intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); ++ intv8 __cmp = simd_vcmpltw(__a, __a); ++ __a = simd_vseleqw(__cmp, __shf, __a); ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); ++ __cmp = simd_vcmpltw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __shf, __a); ++ __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); ++ __cmp = simd_vcmpltw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __shf, __a); ++ return __builtin_sw_vextw(__a, 0); ++} ++ ++static __inline__ uint32_t __DEFAULT_FN_ATTRS simd_reduc_uminw(intv8 __a) { ++ intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7); ++ intv8 __cmp = simd_vcmpultw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __shf, __a); ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6); ++ __cmp = simd_vcmpultw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __shf, __a); ++ __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4); ++ __cmp = simd_vcmpultw(__a, __shf); ++ __a = simd_vseleqw(__cmp, __shf, __a); ++ return __builtin_sw_vextw(__a, 0); ++} ++ ++static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_smaxs(floatv4 __a) { ++ floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); ++ floatv4 __cmp = simd_vfcmplts(__a, __shf); ++ __a = simd_vfseleqs(__cmp, __a, __shf); ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); ++ __cmp = simd_vfcmplts(__a, __shf); ++ __a = simd_vfseleqs(__cmp, __a, __shf); ++ return __builtin_sw_vextfs(__a, 0); ++} ++ ++static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_smaxd(doublev4 __a) { ++ doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); ++ doublev4 __cmp = simd_vfcmpltd(__a, __shf); ++ __a = simd_vfseleqd(__cmp, __a, __shf); ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); ++ __cmp = simd_vfcmpltd(__a, __shf); ++ __a = simd_vfseleqd(__cmp, __a, __shf); ++ return __builtin_sw_vextfd(__a, 0); ++} ++ ++static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_smins(floatv4 __a) { ++ floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); ++ floatv4 __cmp = simd_vfcmplts(__a, __shf); ++ __a = simd_vfseleqs(__cmp, __shf, __a); ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); ++ __cmp = simd_vfcmplts(__a, __shf); ++ __a = simd_vfseleqs(__cmp, __shf, __a); ++ return __builtin_sw_vextfs(__a, 0); ++} ++ ++static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_smind(doublev4 __a) { ++ doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3); ++ doublev4 __cmp = simd_vfcmpltd(__a, __shf); ++ __a = simd_vfseleqd(__cmp, __shf, __a); ++ __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2); ++ __cmp = simd_vfcmpltd(__a, __shf); ++ __a = simd_vfseleqd(__cmp, __shf, __a); ++ return __builtin_sw_vextfd(__a, 0); ++} ++#endif +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index 5ee20554c..d8c57afe8 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -2008,6 +2008,8 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + return CheckMipsBuiltinFunctionCall(TI, BuiltinID, TheCall); ++ case llvm::Triple::sw_64: ++ return CheckSw64BuiltinFunctionCall(BuiltinID, TheCall); + case llvm::Triple::systemz: + return CheckSystemZBuiltinFunctionCall(BuiltinID, TheCall); + case llvm::Triple::x86: +@@ -5799,6 +5801,140 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, + return SemaBuiltinConstantArgRange(TheCall, i, l, u); + } + ++bool Sema::CheckSw64VectorMemoryIntr(unsigned BuiltinID, CallExpr *TheCall) { ++ DeclRefExpr *DRE = ++ cast(TheCall->getCallee()->IgnoreParenCasts()); ++ if (BuiltinID == Sw64::BI__builtin_sw_vload || ++ BuiltinID == Sw64::BI__builtin_sw_vloadu || ++ BuiltinID == Sw64::BI__builtin_sw_vload_u || ++ BuiltinID == Sw64::BI__builtin_sw_vloade || ++ BuiltinID == Sw64::BI__builtin_sw_vloadnc || ++ BuiltinID == Sw64::BI__builtin_sw_vstore || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreu || ++ BuiltinID == Sw64::BI__builtin_sw_vstore_u || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreuh || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreul || ++ BuiltinID == Sw64::BI__builtin_sw_vstorenc) { ++ ++ bool isLoad = BuiltinID == Sw64::BI__builtin_sw_vload || ++ BuiltinID == Sw64::BI__builtin_sw_vloadu || ++ BuiltinID == Sw64::BI__builtin_sw_vload_u || ++ BuiltinID == Sw64::BI__builtin_sw_vloade || ++ BuiltinID == Sw64::BI__builtin_sw_vloadnc; ++ ++ bool isLoadExt = BuiltinID == Sw64::BI__builtin_sw_vloade; ++ ++ bool isExtMem = BuiltinID == Sw64::BI__builtin_sw_vloadu || ++ BuiltinID == Sw64::BI__builtin_sw_vload_u || ++ BuiltinID == Sw64::BI__builtin_sw_vloade || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreu || ++ BuiltinID == Sw64::BI__builtin_sw_vstore_u || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreuh || ++ BuiltinID == Sw64::BI__builtin_sw_vstoreul; ++ ++ if (checkArgCount(*this, TheCall, isLoad ? 1 : 2)) ++ return true; ++ ++ Expr *PointerArg = TheCall->getArg(isLoad ? 0 : 1); ++ ExprResult PointerArgRes = DefaultFunctionArrayLvalueConversion(PointerArg); ++ if (PointerArgRes.isInvalid()) ++ return true; ++ PointerArg = PointerArgRes.get(); ++ TheCall->setArg(isLoad ? 0 : 1, PointerArg); ++ ++ const PointerType *pointerType = ++ PointerArg->getType()->getAs(); ++ QualType ValType = pointerType->getPointeeType(); ++ QualType VecTy; ++ bool isVoidPtr = pointerType->isVoidPointerType(); ++ if (isExtMem) { ++ if (Context.getTypeSize(ValType) < 32 && !isVoidPtr) { ++ Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code); ++ return true; ++ } ++ } ++ ++ if (ValType->isFloatingType() && ++ (BuiltinID == Sw64::BI__builtin_sw_vloadnc)) { ++ if (Context.getTypeSize(ValType) <= 32) { ++ Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code); ++ return true; ++ } ++ } ++ ++ // if Buitlin is Store, it has noreturn, do noting. ++ if (!isLoad) ++ return false; ++ ++ if (ValType->isIntegerType()) ++ VecTy = ++ Context.getExtVectorType(ValType, 256 / Context.getTypeSize(ValType)); ++ else { ++ assert(ValType->isFloatingType() && ++ "Builtin Value should be Integer or Floating type!"); ++ VecTy = Context.getExtVectorType(ValType, 4); ++ } ++ if (isLoad) { ++ TheCall->setType(VecTy); ++ return false; ++ } ++ } ++ return true; ++} ++ ++bool Sema::CheckSw64VectorShift(unsigned BuiltinID, CallExpr *TheCall) { ++ DeclRefExpr *DRE = ++ cast(TheCall->getCallee()->IgnoreParenCasts()); ++ if (BuiltinID == Sw64::BI__builtin_sw_vsll || ++ BuiltinID == Sw64::BI__builtin_sw_vsrl || ++ BuiltinID == Sw64::BI__builtin_sw_vsra || ++ BuiltinID == Sw64::BI__builtin_sw_vrol) { ++ Expr *ShiftArg = TheCall->getArg(0); ++ Expr *ShiftImm = TheCall->getArg(1); ++ QualType ValType = ShiftArg->getType(); ++ QualType Imm = ShiftImm->getType(); ++ ++ if (checkArgCount(*this, TheCall, 2)) ++ return true; ++ ++ if (ValType->isFloatingType() || ++ !(ValType->isVectorType() && Imm->isIntegerType())) { ++ Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code); ++ return true; ++ } ++ ++ TheCall->setType(ValType); ++ return false; ++ } ++ return true; ++} ++ ++bool Sema::CheckSw64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { ++ DeclRefExpr *DRE = ++ cast(TheCall->getCallee()->IgnoreParenCasts()); ++ switch (BuiltinID) { ++ case Sw64::BI__builtin_sw_vload: ++ case Sw64::BI__builtin_sw_vloadu: ++ case Sw64::BI__builtin_sw_vload_u: ++ case Sw64::BI__builtin_sw_vloade: ++ case Sw64::BI__builtin_sw_vloadnc: ++ case Sw64::BI__builtin_sw_vstore: ++ case Sw64::BI__builtin_sw_vstoreu: ++ case Sw64::BI__builtin_sw_vstore_u: ++ case Sw64::BI__builtin_sw_vstoreuh: ++ case Sw64::BI__builtin_sw_vstoreul: ++ case Sw64::BI__builtin_sw_vstorenc: ++ return CheckSw64VectorMemoryIntr(BuiltinID, TheCall); ++ case Sw64::BI__builtin_sw_vsll: ++ case Sw64::BI__builtin_sw_vsrl: ++ case Sw64::BI__builtin_sw_vsra: ++ case Sw64::BI__builtin_sw_vrol: ++ return CheckSw64VectorShift(BuiltinID, TheCall); ++ } ++ ++ return false; ++} ++ + bool Sema::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI, + unsigned BuiltinID, + CallExpr *TheCall) { +diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp +index ed69e802c..0d1e28fa9 100644 +--- a/clang/lib/Sema/SemaDeclAttr.cpp ++++ b/clang/lib/Sema/SemaDeclAttr.cpp +@@ -7744,6 +7744,19 @@ handleWebAssemblyImportNameAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + FD->addAttr(::new (S.Context) WebAssemblyImportNameAttr(S.Context, AL, Str)); + } + ++static void handleSw64InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { ++ if (!isFunctionOrMethod(D)) { ++ S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type) ++ << AL << AL.isRegularKeywordAttribute() << ExpectedFunction; ++ return; ++ } ++ ++ if (!AL.checkExactlyNumArgs(S, 0)) ++ return; ++ ++ handleSimpleAttribute(S, D, AL); ++} ++ + static void handleRISCVInterruptAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + // Warn about repeated attributes. +@@ -7826,6 +7839,9 @@ static void handleInterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + case llvm::Triple::riscv64: + handleRISCVInterruptAttr(S, D, AL); + break; ++ case llvm::Triple::sw_64: ++ handleSw64InterruptAttr(S, D, AL); ++ break; + default: + handleARMInterruptAttr(S, D, AL); + break; +-- +2.33.0 + diff --git a/clang.spec b/clang.spec index 864de19..865ad6d 100644 --- a/clang.spec +++ b/clang.spec @@ -1,4 +1,4 @@ -%define anolis_release 7 +%define anolis_release 8 %global toolchain clang @@ -80,6 +80,9 @@ Patch37: 0024-LoongArch-Support-march-la64v1.0-and-march-la64v1.1-.patch Patch38: 0025-LoongArch-Support-la664-100068.patch Patch39: 0026-LoongArch-Fix-test-issue-of-init-loongarch.c.patch +# Patches for Sw64 +Patch40: 0001-Sw64-Add-Sw64-target-support-for-clang.patch + Patch100: 0001-Bring-back-riscv64-anolis-linux-triplet.patch # Patches for clang-tools-extra # See https://reviews.llvm.org/D120301 @@ -506,6 +509,9 @@ LD_LIBRARY_PATH=%{buildroot}/%{_libdir} %{__ninja} check-all -C %{__cmake_buildd %{python3_sitelib}/clang/ %changelog +* Fri May 16 2025 swcompiler - 17.0.6-8 +- Add Sw64 support for clang + * Fri Apr 25 2025 Shangtong Guo - 17.0.6-7 - add support for riscv64 build -- Gitee