From bd5b02f5ab28a82003c94966f429e35df1e3fbe1 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Thu, 23 Jan 2025 14:45:22 +0800
Subject: [PATCH 1/3] [Sw64] Add Sw64 target support for llvm

---
 llvm/CMakeLists.txt                           |    3 +-
 llvm/cmake/config-ix.cmake                    |    4 +
 llvm/cmake/config.guess                       |    9 +
 llvm/include/llvm/BinaryFormat/ELF.h          |   34 +
 .../llvm/BinaryFormat/ELFRelocs/Sw64.def      |   44 +
 .../llvm/CodeGen/MachineCombinerPattern.h     |   10 +
 .../llvm/ExecutionEngine/Orc/OrcABISupport.h  |   40 +
 llvm/include/llvm/IR/CMakeLists.txt           |    1 +
 llvm/include/llvm/IR/Intrinsics.td            |    1 +
 llvm/include/llvm/IR/IntrinsicsSw64.td        |  651 +++
 llvm/include/llvm/MC/MCAsmInfo.h              |    4 +
 llvm/include/llvm/MC/MCExpr.h                 |   26 +
 llvm/include/llvm/Object/ELFObjectFile.h      |    5 +
 llvm/include/llvm/Support/Sw64ABIFlags.h      |   39 +
 .../include/llvm/Support/Sw64TargetParser.def |   28 +
 llvm/include/llvm/Support/Sw64TargetParser.h  |   53 +
 llvm/include/llvm/TargetParser/Host.h         |    1 +
 llvm/include/llvm/TargetParser/Triple.h       |   25 +-
 llvm/include/module.modulemap                 |    1 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   81 +
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |    7 +
 .../Orc/EPCIndirectionUtils.cpp               |    3 +
 .../ExecutionEngine/Orc/IndirectionUtils.cpp  |   10 +
 llvm/lib/ExecutionEngine/Orc/LLJIT.cpp        |    4 +
 .../lib/ExecutionEngine/Orc/LazyReexports.cpp |    2 +
 .../lib/ExecutionEngine/Orc/OrcABISupport.cpp |  262 ++
 .../RuntimeDyld/CMakeLists.txt                |    1 +
 .../RuntimeDyld/RuntimeDyldELF.cpp            |   59 +
 .../RuntimeDyld/RuntimeDyldELF.h              |    7 +
 .../Targets/RuntimeDyldELFSw64.cpp            |  217 +
 .../RuntimeDyld/Targets/RuntimeDyldELFSw64.h  |   61 +
 llvm/lib/IR/Function.cpp                      |    1 +
 llvm/lib/MC/ELFObjectWriter.cpp               |   33 +
 llvm/lib/MC/MCAsmStreamer.cpp                 |   11 +-
 llvm/lib/MC/MCELFStreamer.cpp                 |   10 +
 llvm/lib/MC/MCExpr.cpp                        |   50 +
 llvm/lib/MC/MCObjectFileInfo.cpp              |    3 +
 llvm/lib/MC/MCSectionELF.cpp                  |    4 +
 llvm/lib/Object/ELF.cpp                       |   24 +
 llvm/lib/Object/RelocationResolver.cpp        |   27 +
 llvm/lib/Support/CMakeLists.txt               |    1 +
 llvm/lib/Support/Sw64TargetParser.cpp         |   96 +
 llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt |   13 +
 .../Target/Sw64/AsmParser/Sw64AsmParser.cpp   | 2005 +++++++++
 llvm/lib/Target/Sw64/CMakeLists.txt           |   64 +
 .../Target/Sw64/Disassembler/CMakeLists.txt   |   11 +
 .../Sw64/Disassembler/Sw64Disassembler.cpp    |  390 ++
 .../Target/Sw64/InstPrinter/CMakeLists.txt    |   10 +
 .../Sw64/InstPrinter/Sw64InstPrinter.cpp      |  148 +
 .../Target/Sw64/InstPrinter/Sw64InstPrinter.h |   57 +
 .../Target/Sw64/MCTargetDesc/CMakeLists.txt   |   22 +
 .../Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp |   31 +
 .../Sw64/MCTargetDesc/Sw64ABIFlagsSection.h   |  127 +
 .../Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp  |   29 +
 .../Target/Sw64/MCTargetDesc/Sw64ABIInfo.h    |   77 +
 .../Sw64/MCTargetDesc/Sw64AsmBackend.cpp      |  317 ++
 .../Target/Sw64/MCTargetDesc/Sw64AsmBackend.h |   96 +
 .../Target/Sw64/MCTargetDesc/Sw64BaseInfo.h   |  146 +
 .../Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp |  463 ++
 .../Sw64/MCTargetDesc/Sw64ELFStreamer.cpp     |  108 +
 .../Sw64/MCTargetDesc/Sw64ELFStreamer.h       |   83 +
 .../Target/Sw64/MCTargetDesc/Sw64FixupKinds.h |  174 +
 .../Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp       |   42 +
 .../Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h  |   32 +
 .../Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp   |  451 ++
 .../Sw64/MCTargetDesc/Sw64MCCodeEmitter.h     |  111 +
 .../Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp   |  176 +
 .../lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h |   97 +
 .../Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp    |  189 +
 .../Sw64/MCTargetDesc/Sw64MCTargetDesc.h      |   66 +
 .../Sw64/MCTargetDesc/Sw64OptionRecord.cpp    |   32 +
 .../Sw64/MCTargetDesc/Sw64TargetStreamer.cpp  |  388 ++
 llvm/lib/Target/Sw64/README.txt               |    7 +
 llvm/lib/Target/Sw64/Sw64.h                   |   56 +
 llvm/lib/Target/Sw64/Sw64.td                  |  154 +
 llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp       |  308 ++
 llvm/lib/Target/Sw64/Sw64BranchSelector.cpp   |   81 +
 llvm/lib/Target/Sw64/Sw64CallingConv.td       |   72 +
 llvm/lib/Target/Sw64/Sw64CombineLS.cpp        |   63 +
 llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp     | 1141 +++++
 llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp    |  334 ++
 llvm/lib/Target/Sw64/Sw64FrameLowering.cpp    |  456 ++
 llvm/lib/Target/Sw64/Sw64FrameLowering.h      |   82 +
 llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp   |  138 +
 llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp     | 1016 +++++
 llvm/lib/Target/Sw64/Sw64ISelLowering.cpp     | 3984 +++++++++++++++++
 llvm/lib/Target/Sw64/Sw64ISelLowering.h       |  476 ++
 llvm/lib/Target/Sw64/Sw64InstrFormats.td      |  452 ++
 llvm/lib/Target/Sw64/Sw64InstrFormatsV.td     |  389 ++
 llvm/lib/Target/Sw64/Sw64InstrInfo.cpp        | 1012 +++++
 llvm/lib/Target/Sw64/Sw64InstrInfo.h          |  143 +
 llvm/lib/Target/Sw64/Sw64InstrInfo.td         | 2084 +++++++++
 llvm/lib/Target/Sw64/Sw64InstrVector.td       | 1767 ++++++++
 llvm/lib/Target/Sw64/Sw64LLRP.cpp             |  475 ++
 llvm/lib/Target/Sw64/Sw64MCInstLower.cpp      |  281 ++
 llvm/lib/Target/Sw64/Sw64MCInstLower.h        |   44 +
 .../Target/Sw64/Sw64MachineFunctionInfo.cpp   |   33 +
 .../lib/Target/Sw64/Sw64MachineFunctionInfo.h |   69 +
 llvm/lib/Target/Sw64/Sw64MacroFusion.cpp      |   65 +
 llvm/lib/Target/Sw64/Sw64MacroFusion.h        |   28 +
 llvm/lib/Target/Sw64/Sw64OptionRecord.h       |   67 +
 .../Target/Sw64/Sw64PreLegalizerCombiner.cpp  |   96 +
 llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp     |  296 ++
 llvm/lib/Target/Sw64/Sw64RegisterInfo.h       |   79 +
 llvm/lib/Target/Sw64/Sw64RegisterInfo.td      |  306 ++
 llvm/lib/Target/Sw64/Sw64Relocations.h        |   30 +
 llvm/lib/Target/Sw64/Sw64SchedCore3.td        |  213 +
 llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td    |   57 +
 llvm/lib/Target/Sw64/Sw64SchedCore4.td        |   75 +
 llvm/lib/Target/Sw64/Sw64Schedule.td          |   86 +
 llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp |   54 +
 llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h   |   34 +
 llvm/lib/Target/Sw64/Sw64Subtarget.cpp        |  117 +
 llvm/lib/Target/Sw64/Sw64Subtarget.h          |  163 +
 llvm/lib/Target/Sw64/Sw64TargetMachine.cpp    |  193 +
 llvm/lib/Target/Sw64/Sw64TargetMachine.h      |   61 +
 llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp |  121 +
 llvm/lib/Target/Sw64/Sw64TargetObjectFile.h   |   49 +
 llvm/lib/Target/Sw64/Sw64TargetStreamer.h     |  150 +
 .../Target/Sw64/Sw64TargetTransformInfo.cpp   |  787 ++++
 .../lib/Target/Sw64/Sw64TargetTransformInfo.h |  137 +
 llvm/lib/Target/Sw64/Sw64VectorVarDefine.td   |  317 ++
 .../lib/Target/Sw64/TargetInfo/CMakeLists.txt |   10 +
 .../Target/Sw64/TargetInfo/Sw64TargetInfo.cpp |   24 +
 .../Target/Sw64/TargetInfo/Sw64TargetInfo.h   |   12 +
 llvm/lib/TargetParser/Host.cpp                |   10 +
 llvm/lib/TargetParser/Triple.cpp              |   22 +
 llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll  |    2 +-
 llvm/test/ExecutionEngine/MCJIT/lit.local.cfg |    2 +
 .../MCJIT/remote/lit.local.cfg                |    4 +
 .../ExecutionEngine/OrcLazy/lit.local.cfg     |    1 +
 .../tools/llvm-reduce/file-output-type.test   |    1 +
 third-party/benchmark/src/cycleclock.h        |    5 +
 133 files changed, 26692 insertions(+), 7 deletions(-)
 create mode 100644 llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def
 create mode 100644 llvm/include/llvm/IR/IntrinsicsSw64.td
 create mode 100644 llvm/include/llvm/Support/Sw64ABIFlags.h
 create mode 100644 llvm/include/llvm/Support/Sw64TargetParser.def
 create mode 100644 llvm/include/llvm/Support/Sw64TargetParser.h
 create mode 100644 llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp
 create mode 100644 llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h
 create mode 100644 llvm/lib/Support/Sw64TargetParser.cpp
 create mode 100644 llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt
 create mode 100644 llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp
 create mode 100644 llvm/lib/Target/Sw64/CMakeLists.txt
 create mode 100644 llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt
 create mode 100644 llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp
 create mode 100644 llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt
 create mode 100644 llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp
 create mode 100644 llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp
 create mode 100644 llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp
 create mode 100644 llvm/lib/Target/Sw64/README.txt
 create mode 100644 llvm/lib/Target/Sw64/Sw64.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64BranchSelector.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64CallingConv.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64CombineLS.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64FrameLowering.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64FrameLowering.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64ISelLowering.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64ISelLowering.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64InstrFormats.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64InstrFormatsV.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64InstrInfo.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64InstrVector.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64LLRP.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64MCInstLower.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64MCInstLower.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64MacroFusion.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64MacroFusion.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64OptionRecord.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64RegisterInfo.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64Relocations.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore3.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64SchedCore4.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64Schedule.td
 create mode 100644 llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64Subtarget.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64Subtarget.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64TargetMachine.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64TargetMachine.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64TargetObjectFile.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64TargetStreamer.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp
 create mode 100644 llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h
 create mode 100644 llvm/lib/Target/Sw64/Sw64VectorVarDefine.td
 create mode 100644 llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt
 create mode 100644 llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp
 create mode 100644 llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index b0afb47a7243..3b69d027e006 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -441,6 +441,7 @@ set(LLVM_ALL_TARGETS
   PowerPC
   RISCV
   Sparc
+  Sw64
   SystemZ
   VE
   WebAssembly
@@ -458,7 +459,7 @@ set(LLVM_ALL_EXPERIMENTAL_TARGETS
 )
 
 # List of targets with JIT support:
-set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ)
+set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ Sw64)
 
 set(LLVM_TARGETS_TO_BUILD "all"
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index f63c3f1a351f..5d430f974bbf 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -512,6 +512,10 @@ elseif (LLVM_NATIVE_ARCH STREQUAL "m68k")
   set(LLVM_NATIVE_ARCH M68k)
 elseif (LLVM_NATIVE_ARCH MATCHES "loongarch")
   set(LLVM_NATIVE_ARCH LoongArch)
+elseif (LLVM_NATIVE_ARCH MATCHES "sw64")
+  set(LLVM_NATIVE_ARCH Sw64)
+elseif (LLVM_NATIVE_ARCH MATCHES "sw_64")
+  set(LLVM_NATIVE_ARCH Sw64)
 else ()
   message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
 endif ()
diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess
index 71abbf939f97..121b903140eb 100644
--- a/llvm/cmake/config.guess
+++ b/llvm/cmake/config.guess
@@ -880,6 +880,15 @@ EOF
 	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
 	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
 	exit ;;
+    sw_64:Linux:*:* | sw_64:Linux:*:*)
+        case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+          SW6)   UNAME_MACHINE=sw_64 ;;
+        esac
+          UNAME_MACHINE=sw_64
+        objdump --private-headers /bin/sh | grep -q ld.so.1
+        if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+        echo ${UNAME_MACHINE}-sunway-linux-gnu${LIBC}
+        exit ;;
     arm*:Linux:*:*)
 	eval $set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index f5a7cdb387a6..bf13694cf142 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -320,6 +320,7 @@ enum {
   EM_VE = 251,            // NEC SX-Aurora VE
   EM_CSKY = 252,          // C-SKY 32-bit processor
   EM_LOONGARCH = 258,     // LoongArch
+  EM_SW64 = 0x9916,       // SW64
 };
 
 // Object file classes.
@@ -604,6 +605,31 @@ enum {
   ODK_PAGESIZE = 11   // Page size information
 };
 
+// SW64 Specific e_flags
+enum {
+
+  EF_SW64_NOREORDER = 0x00000001,  // Don't reorder instructions
+  EF_SW64_PIC = 0x00000002,        // Position independent code
+  EF_SW64_CPIC = 0x00000004,       // Call object with Position independent code
+  EF_SW64_ABI2 = 0x00000020,       // File uses N32 ABI
+  EF_SW64_32BITMODE = 0x00000100,  // Code compiled for a 64-bit machine
+                                   // in 32-bit mode
+  EF_SW64_FP64 = 0x00000200,       // Code compiled for a 32-bit machine
+                                   // but uses 64-bit FP registers
+  EF_SW64_NAN2008 = 0x00000400,    // Uses IEE 754-2008 NaN encoding
+                                   // ABI flags
+  EF_SW64_ABI_EABI64 = 0x00004000, // EABI in 64 bit mode.
+  EF_SW64_ABI = 0x0000f000,        // Mask for selecting EF_SW64_ABI_ variant.
+  EF_SW64_32BIT = 0x00000001,      // All addresses must be below 2GB.
+  EF_SW64_CANRELAX = 0x00000002    // All relocations needed for relaxation with
+                                   // code movement are present.
+};
+
+// ELF Relocation types for Sw64.
+enum {
+#include "ELFRelocs/Sw64.def"
+};
+
 // Hexagon-specific e_flags
 enum {
   // Object processor version flags, bits[11:0]
@@ -1075,6 +1101,11 @@ enum : unsigned {
 
   SHT_CSKY_ATTRIBUTES = 0x70000001U,
 
+  SHT_SW64_ABIFLAGS = 0x7000002a, // ABI information.
+  SHT_SW64_REGINFO = 0x70000002,  // Register usage information
+  SHT_SW64_OPTIONS = 0x7000000d,  // General options
+  SHT_SW64_DWARF = 0x7000001e,    // DWARF debugging section.
+
   SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type.
   SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
   SHT_HIUSER = 0xffffffff  // Highest type reserved for applications.
@@ -1180,6 +1211,9 @@ enum : unsigned {
   // Section data is string data by default.
   SHF_MIPS_STRING = 0x80000000,
 
+  // Do not strip this section.
+  SHF_SW64_NOSTRIP = 0x8000000,
+
   // Make code section unreadable when in execute-only mode
   SHF_ARM_PURECODE = 0x20000000
 };
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def
new file mode 100644
index 000000000000..0edecd02be6d
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/Sw64.def
@@ -0,0 +1,44 @@
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_SW_64_NONE,         0)
+ELF_RELOC(R_SW_64_REFLONG,      1)
+ELF_RELOC(R_SW_64_REFQUAD,      2)
+ELF_RELOC(R_SW_64_GPREL32,      3)
+ELF_RELOC(R_SW_64_LITERAL,      4)
+ELF_RELOC(R_SW_64_LITUSE,       5)
+ELF_RELOC(R_SW_64_GPDISP,       6)
+ELF_RELOC(R_SW_64_BRADDR,       7)
+ELF_RELOC(R_SW_64_HINT,         8)
+ELF_RELOC(R_SW_64_SREL16,       9)
+ELF_RELOC(R_SW_64_SREL32,       10)
+ELF_RELOC(R_SW_64_SREL64,       11)
+ELF_RELOC(R_SW_64_GPRELHIGH,    17)
+ELF_RELOC(R_SW_64_GPRELLOW,     18)
+ELF_RELOC(R_SW_64_GPREL16,      19)
+ELF_RELOC(R_SW_64_COPY,         24)
+ELF_RELOC(R_SW_64_GLOB_DAT,     25)
+ELF_RELOC(R_SW_64_JMP_SLOT,     26)
+ELF_RELOC(R_SW_64_RELATIVE,     27)
+ELF_RELOC(R_SW_64_BRSGP,        28)
+ELF_RELOC(R_SW_64_TLSGD,        29)
+ELF_RELOC(R_SW_64_TLSLDM,       30)
+ELF_RELOC(R_SW_64_DTPMOD64,     31)
+ELF_RELOC(R_SW_64_GOTDTPREL,    32)
+ELF_RELOC(R_SW_64_DTPREL64,     33)
+ELF_RELOC(R_SW_64_DTPRELHI,     34)
+ELF_RELOC(R_SW_64_DTPRELLO,     35)
+ELF_RELOC(R_SW_64_DTPREL16,     36)
+ELF_RELOC(R_SW_64_GOTTPREL,     37)
+ELF_RELOC(R_SW_64_TPREL64,      38)
+ELF_RELOC(R_SW_64_TPRELHI,      39)
+ELF_RELOC(R_SW_64_TPRELLO,      40)
+ELF_RELOC(R_SW_64_TPREL16,      41)
+ELF_RELOC(R_SW_64_NUM,          42)
+ELF_RELOC(R_SW_64_LITERAL_GOT,  43)
+ELF_RELOC(R_SW_64_TLSREL_GOT,   44)
+ELF_RELOC(R_SW_64_PC32,         48)
+ELF_RELOC(R_SW_64_EH,           49)
+ELF_RELOC(R_SW_64_DUMMY_LITERAL, 98)
+ELF_RELOC(R_SW_64_DUMMY_LITUSE,  99)
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 89eed7463bd7..f73715f6d30a 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -180,6 +180,16 @@ enum class MachineCombinerPattern {
   DPWSSD,
 
   FNMADD,
+
+  // SW64
+  VMULADDS_OP1,
+  VMULADDS_OP2,
+  VMULADDD_OP1,
+  VMULADDD_OP2,
+  VMULSUBS_OP1,
+  VMULSUBS_OP2,
+  VMULSUBD_OP1,
+  VMULSUBD_OP2,
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 5d25a3e85464..e514b0a46f52 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -416,6 +416,46 @@ public:
                                       unsigned NumStubs);
 };
 
+// @brief Sw64 support.
+//
+// Sw64 supports lazy JITing.
+class OrcSw64 {
+public:
+  static constexpr unsigned PointerSize = 8;
+  static constexpr unsigned TrampolineSize = 40;
+  static constexpr unsigned StubSize = 32;
+  static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
+  static constexpr unsigned ResolverCodeSize = 0x218;
+
+  /// Write the resolver code into the given memory. The user is
+  /// responsible for allocating the memory and setting permissions.
+  ///
+  /// ReentryFnAddr should be the address of a function whose signature matches
+  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+  /// argument of writeResolverCode will be passed as the second argument to
+  /// the function at ReentryFnAddr.
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                ExecutorAddr ResolverTargetAddress,
+                                ExecutorAddr ReentryFnAddr,
+                                ExecutorAddr ReentryCtxAddr);
+
+  /// Write the requested number of trampolines into the given memory,
+  /// which must be big enough to hold 1 pointer, plus NumTrampolines
+  /// trampolines.
+  static void writeTrampolines(char *TrampolineBlockWorkingMem,
+                               ExecutorAddr TrampolineBlockTargetAddress,
+                               ExecutorAddr ResolverFnAddr,
+                               unsigned NumTrampolines);
+  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+  /// Nth stub using the Nth pointer in memory starting at
+  /// PointersBlockTargetAddress.
+  static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+                                      ExecutorAddr StubsBlockTargetAddress,
+                                      ExecutorAddr PointersBlockTargetAddress,
+                                      unsigned NumStubs);
+};
+
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt
index 468d663796ed..7a1343b14dfe 100644
--- a/llvm/include/llvm/IR/CMakeLists.txt
+++ b/llvm/include/llvm/IR/CMakeLists.txt
@@ -22,4 +22,5 @@ tablegen(LLVM IntrinsicsWebAssembly.h -gen-intrinsic-enums -intrinsic-prefix=was
 tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86)
 tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore)
 tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve)
+tablegen(LLVM IntrinsicsSw64.h -gen-intrinsic-enums -intrinsic-prefix=sw64)
 add_public_tablegen_target(intrinsics_gen)
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e51c04fbad2f..20a8fa419465 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2535,6 +2535,7 @@ include "llvm/IR/IntrinsicsSystemZ.td"
 include "llvm/IR/IntrinsicsWebAssembly.td"
 include "llvm/IR/IntrinsicsRISCV.td"
 include "llvm/IR/IntrinsicsSPIRV.td"
+include "llvm/IR/IntrinsicsSw64.td"
 include "llvm/IR/IntrinsicsVE.td"
 include "llvm/IR/IntrinsicsDirectX.td"
 include "llvm/IR/IntrinsicsLoongArch.td"
diff --git a/llvm/include/llvm/IR/IntrinsicsSw64.td b/llvm/include/llvm/IR/IntrinsicsSw64.td
new file mode 100644
index 000000000000..1d5671885a94
--- /dev/null
+++ b/llvm/include/llvm/IR/IntrinsicsSw64.td
@@ -0,0 +1,651 @@
+//==- IntrinsicsSw64.td - Sw64 intrinsics                 -*- tablegen -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the Sw64-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "sw64" in {  // All intrinsics start with "llvm.sw64.".
+def int_sw64_umulh : ClangBuiltin<"__builtin_sw_64_umulh">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+
+def int_sw64_crc32b : ClangBuiltin<"__builtin_sw64_crc32b">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_crc32h : ClangBuiltin<"__builtin_sw64_crc32h">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_crc32w : ClangBuiltin<"__builtin_sw64_crc32w">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_crc32l : ClangBuiltin<"__builtin_sw64_crc32l">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_crc32cb : ClangBuiltin<"__builtin_sw64_crc32cb">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_crc32ch : ClangBuiltin<"__builtin_sw64_crc32ch">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_crc32cw : ClangBuiltin<"__builtin_sw64_crc32cw">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_crc32cl : ClangBuiltin<"__builtin_sw64_crc32cl">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+
+def int_sw64_sbt : ClangBuiltin<"__builtin_sw64_sbt">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_cbt : ClangBuiltin<"__builtin_sw64_cbt">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+
+def int_sw64_ctpopow : ClangBuiltin<"__builtin_sw_ctpopow">,
+  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_ctlzow : ClangBuiltin<"__builtin_sw_ctlzow">,
+  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_reduc_plusw : ClangBuiltin<"__builtin_sw_reduc_plusw">,
+  Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_reduc_pluss : ClangBuiltin<"__builtin_sw_reduc_pluss">,
+  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_reduc_plusd : ClangBuiltin<"__builtin_sw_reduc_plusd">,
+  Intrinsic<[llvm_double_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+
+// SIMD Intrincs
+def int_sw64_vaddw : ClangBuiltin<"__builtin_sw_vaddw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddw  : ClangBuiltin<"__builtin_sw_vucaddw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddwi : ClangBuiltin<"__builtin_sw_vucaddwi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubw : ClangBuiltin<"__builtin_sw_vucsubw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubwi : ClangBuiltin<"__builtin_sw_vucsubwi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddh : ClangBuiltin<"__builtin_sw_vucaddh">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddhi : ClangBuiltin<"__builtin_sw_vucaddhi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubh : ClangBuiltin<"__builtin_sw_vucsubh">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubhi : ClangBuiltin<"__builtin_sw_vucsubhi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddb : ClangBuiltin<"__builtin_sw_vucaddb">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddbi : ClangBuiltin<"__builtin_sw_vucaddbi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubb : ClangBuiltin<"__builtin_sw_vucsubb">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubbi : ClangBuiltin<"__builtin_sw_vucsubbi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vslls : ClangBuiltin<"__builtin_sw_vslls">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vslld : ClangBuiltin<"__builtin_sw_vslld">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsrls : ClangBuiltin<"__builtin_sw_vsrls">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsrld : ClangBuiltin<"__builtin_sw_vsrld">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+
+class sw64VectorIntArg
+      : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty],
+      [IntrNoMem]>;
+def int_sw64_vsll : sw64VectorIntArg;
+def int_sw64_vsrl : sw64VectorIntArg;
+def int_sw64_vsra : sw64VectorIntArg;
+def int_sw64_vrol : sw64VectorIntArg;
+
+def int_sw64_vsllw : ClangBuiltin<"__builtin_sw_vsllw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsllh : ClangBuiltin<"__builtin_sw_vsllh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsllb : ClangBuiltin<"__builtin_sw_vslln">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vslll : ClangBuiltin<"__builtin_sw_vslll">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsrlw : ClangBuiltin<"__builtin_sw_vsrlw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsrlh : ClangBuiltin<"__builtin_sw_vsrlh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsrlb : ClangBuiltin<"__builtin_sw_vsrlb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsrll : ClangBuiltin<"__builtin_sw_vsrll">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsraw : ClangBuiltin<"__builtin_sw_vsraw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsrah : ClangBuiltin<"__builtin_sw_vsrah">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsrab : ClangBuiltin<"__builtin_sw_vsrab">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsral : ClangBuiltin<"__builtin_sw_vsral">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vrolw : ClangBuiltin<"__builtin_sw_vrolw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vrolwi : ClangBuiltin<"__builtin_sw_vrolwi">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vrolb : ClangBuiltin<"__builtin_sw_vrolb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vrolbi : ClangBuiltin<"__builtin_sw_vrolbi">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vrolh : ClangBuiltin<"__builtin_sw_vrolh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vrolhi : ClangBuiltin<"__builtin_sw_vrolhi">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vroll : ClangBuiltin<"__builtin_sw_vroll">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vrolli : ClangBuiltin<"__builtin_sw_vrolli">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+
+def int_sw64_sllow : ClangBuiltin<"__builtin_sw_sllow">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_srlow : ClangBuiltin<"__builtin_sw_srlow">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_sllowi : ClangBuiltin<"__builtin_sw_sllowi">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_srlowi : ClangBuiltin<"__builtin_sw_srlowi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vsqrts : ClangBuiltin<"__builtin_sw_vsqrts">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vsqrtd : ClangBuiltin<"__builtin_sw_vsqrtd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vornotw : ClangBuiltin<"__builtin_sw_vornotw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_veqvw : ClangBuiltin<"__builtin_sw_veqvw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vfcmpeqs : ClangBuiltin<"__builtin_sw_vfcmpeqs">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcmples : ClangBuiltin<"__builtin_sw_vfcmples">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcmplts : ClangBuiltin<"__builtin_sw_vfcmplts">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcmpuns : ClangBuiltin<"__builtin_sw_vfcmpuns">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcmpeqd : ClangBuiltin<"__builtin_sw_vfcmpeqd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcmpled : ClangBuiltin<"__builtin_sw_vfcmpled">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcmpltd : ClangBuiltin<"__builtin_sw_vfcmpltd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcmpund : ClangBuiltin<"__builtin_sw_vfcmpund">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vcpyss : ClangBuiltin<"__builtin_sw_vcpyss">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vcpysns : ClangBuiltin<"__builtin_sw_vcpysns">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vcpyses : ClangBuiltin<"__builtin_sw_vcpyses">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vcpysd : ClangBuiltin<"__builtin_sw_vcpysd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vcpysnd : ClangBuiltin<"__builtin_sw_vcpysnd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vcpysed : ClangBuiltin<"__builtin_sw_vcpysed">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vseleqw : ClangBuiltin<"__builtin_sw_vseleqw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vseleqwi : ClangBuiltin<"__builtin_sw_vseleqwi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vselltw : ClangBuiltin<"__builtin_sw_vselltw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vselltwi : ClangBuiltin<"__builtin_sw_vselltwi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsellew : ClangBuiltin<"__builtin_sw_vsellew">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vsellewi : ClangBuiltin<"__builtin_sw_vsellewi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsellbcw : ClangBuiltin<"__builtin_sw_vsellbcw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vsellbcwi : ClangBuiltin<"__builtin_sw_vsellbcwi">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfseleqs : ClangBuiltin<"__builtin_sw_vfseleqs">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfselles : ClangBuiltin<"__builtin_sw_vfselles">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfsellts : ClangBuiltin<"__builtin_sw_vfsellts">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfseleqd : ClangBuiltin<"__builtin_sw_vfseleqd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfselled : ClangBuiltin<"__builtin_sw_vfselled">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfselltd : ClangBuiltin<"__builtin_sw_vfselltd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+
+// Multiply-add instructions
+// FIXME
+def int_sw64_vnmsd : ClangBuiltin<"__builtin_sw_vnmsd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+
+// Vector Insert Intrinsic
+def int_sw64_vinsfs : ClangBuiltin<"__builtin_sw_vinsfs">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_v4f32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vinsfd : ClangBuiltin<"__builtin_sw_vinsfd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_double_ty, llvm_v4f64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vinsw : ClangBuiltin<"__builtin_sw_vinsw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_i64_ty, llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vinsb  : ClangBuiltin<"__builtin_sw_vinsb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_i64_ty, llvm_v32i8_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vinsh  : ClangBuiltin<"__builtin_sw_vinsh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_i64_ty, llvm_v16i16_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vinsl : ClangBuiltin<"__builtin_sw_vinsl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_i64_ty, llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vextfs : ClangBuiltin<"__builtin_sw_vextfs">,
+  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vextfd : ClangBuiltin<"__builtin_sw_vextfd">,
+  Intrinsic<[llvm_double_ty], [llvm_v4f64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vextw : ClangBuiltin<"__builtin_sw_vextw">,
+  Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vextl : ClangBuiltin<"__builtin_sw_vextl">,
+  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vconw : ClangBuiltin<"__builtin_sw_vconw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_ptr_ty],
+  [IntrNoMem, IntrArgMemOnly]>;
+def int_sw64_vconl : ClangBuiltin<"__builtin_sw_vconl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_ptr_ty],
+  [IntrNoMem, IntrArgMemOnly]>;
+def int_sw64_vcons : ClangBuiltin<"__builtin_sw_vcons">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_ptr_ty],
+  [IntrNoMem, IntrArgMemOnly]>;
+def int_sw64_vcond : ClangBuiltin<"__builtin_sw_vcond">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_ptr_ty],
+  [IntrNoMem, IntrArgMemOnly]>;
+
+def int_sw64_vlogzz : ClangBuiltin<"__builtin_sw_vlogzz">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vshfw : ClangBuiltin<"__builtin_sw_vshfw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddh_v16hi : ClangBuiltin<"__builtin_sw_vucaddh_v16hi">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddhi_v16hi : ClangBuiltin<"__builtin_sw_vucaddhi_v16hi">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubh_v16hi : ClangBuiltin<"__builtin_sw_vucsubh_v16hi">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubhi_v16hi : ClangBuiltin<"__builtin_sw_vucsubhi_v16hi">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddb_v32qi : ClangBuiltin<"__builtin_sw_vucaddb_v32qi">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vucaddbi_v32qi : ClangBuiltin<"__builtin_sw_vucaddbi_v32qi">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubb_v32qi : ClangBuiltin<"__builtin_sw_vucsubb_v32qi">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vucsubbi_v32qi : ClangBuiltin<"__builtin_sw_vucsubbi_v32qi">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vmaxb : ClangBuiltin<"__builtin_sw_vmaxb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vminb : ClangBuiltin<"__builtin_sw_vminb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vmaxh : ClangBuiltin<"__builtin_sw_vmaxh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_vminh : ClangBuiltin<"__builtin_sw_vminh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_vmaxw : ClangBuiltin<"__builtin_sw_vmaxw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vminw : ClangBuiltin<"__builtin_sw_vminw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vmaxl : ClangBuiltin<"__builtin_sw_vmaxl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vminl : ClangBuiltin<"__builtin_sw_vminl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vumaxb : ClangBuiltin<"__builtin_sw_vumaxb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vuminb : ClangBuiltin<"__builtin_sw_vuminb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vumaxh : ClangBuiltin<"__builtin_sw_vumaxh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_vuminh : ClangBuiltin<"__builtin_sw_vuminh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_vumaxw : ClangBuiltin<"__builtin_sw_vumaxw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vuminw : ClangBuiltin<"__builtin_sw_vuminw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vumaxl : ClangBuiltin<"__builtin_sw_vumaxl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vuminl : ClangBuiltin<"__builtin_sw_vuminl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_sraow : ClangBuiltin<"__builtin_sw_sraow">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_sraowi : ClangBuiltin<"__builtin_sw_sraowi">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vcmpgew : ClangBuiltin<"__builtin_sw_vcmpgew">,
+  Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vcmpeqw : ClangBuiltin<"__builtin_sw_vcmpeqw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vcmplew : ClangBuiltin<"__builtin_sw_vcmplew">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vcmpltw : ClangBuiltin<"__builtin_sw_vcmpltw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vcmpulew : ClangBuiltin<"__builtin_sw_vcmpulew">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vcmpultw : ClangBuiltin<"__builtin_sw_vcmpultw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vcmpueqb : ClangBuiltin<"__builtin_sw_vcmpueqb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vcmpugtb : ClangBuiltin<"__builtin_sw_vcmpugtb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_vsumw : ClangBuiltin<"__builtin_sw_vsumw">,
+  Intrinsic<[llvm_i64_ty], [llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vsuml : ClangBuiltin<"__builtin_sw_vsuml">,
+  Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vbinvw : ClangBuiltin<"__builtin_sw_vbinvw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vwinv : ClangBuiltin<"__builtin_sw_vwinv">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vseleql : ClangBuiltin<"__builtin_sw_vseleql">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_veqvb : ClangBuiltin<"__builtin_sw_veqvb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+def int_sw64_veqvh : ClangBuiltin<"__builtin_sw_veqvh">,
+  Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+  [IntrNoMem]>;
+def int_sw64_veqvl : ClangBuiltin<"__builtin_sw_veqvl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vsums : ClangBuiltin<"__builtin_sw_vsums">,
+  Intrinsic<[llvm_float_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vsumd : ClangBuiltin<"__builtin_sw_vsumd">,
+  Intrinsic<[llvm_double_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfrecs : ClangBuiltin<"__builtin_sw_vfrecs">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfrecd : ClangBuiltin<"__builtin_sw_vfrecd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfris : ClangBuiltin<"__builtin_sw_vfris">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfris_g : ClangBuiltin<"__builtin_sw_vfris_g">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfris_p : ClangBuiltin<"__builtin_sw_vfris_p">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfris_z : ClangBuiltin<"__builtin_sw_vfris_z">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfris_n : ClangBuiltin<"__builtin_sw_vfris_n">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfrid : ClangBuiltin<"__builtin_sw_vfrid">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfrid_g : ClangBuiltin<"__builtin_sw_vfrid_g">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfrid_p : ClangBuiltin<"__builtin_sw_vfrid_p">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfrid_z : ClangBuiltin<"__builtin_sw_vfrid_z">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfrid_n : ClangBuiltin<"__builtin_sw_vfrid_n">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vmaxs : ClangBuiltin<"__builtin_sw_vmaxs">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vmins : ClangBuiltin<"__builtin_sw_vmins">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vmaxd : ClangBuiltin<"__builtin_sw_vmaxd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vmind : ClangBuiltin<"__builtin_sw_vmind">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtsd : ClangBuiltin<"__builtin_sw_vfcvtsd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtds : ClangBuiltin<"__builtin_sw_vfcvtds">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtsh : ClangBuiltin<"__builtin_sw_vfcvtsh">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvths : ClangBuiltin<"__builtin_sw_vfcvths">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtls : ClangBuiltin<"__builtin_sw_vfcvtls">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtld : ClangBuiltin<"__builtin_sw_vfcvtld">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtdl : ClangBuiltin<"__builtin_sw_vfcvtdl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtdl_g : ClangBuiltin<"__builtin_sw_vfcvtdl_g">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtdl_p : ClangBuiltin<"__builtin_sw_vfcvtdl_p">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtdl_z : ClangBuiltin<"__builtin_sw_vfcvtdl_z">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtdl_n : ClangBuiltin<"__builtin_sw_vfcvtdl_n">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty],
+  [IntrNoMem]>;
+def int_sw64_vfcvtsl  : ClangBuiltin<"__builtin_sw_vfcvtsl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_v4f32_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vshfq : ClangBuiltin<"__builtin_sw_vshfq">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i64_ty],
+  [IntrNoMem]>;
+def int_sw64_vshfqb : ClangBuiltin<"__builtin_sw_vshfqb">,
+  Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vsm3msw  : ClangBuiltin<"__builtin_sw_vsm3msw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vsm3r  : ClangBuiltin<"__builtin_sw_vsm3r">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vsm4key  : ClangBuiltin<"__builtin_sw_vsm4key">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty],
+  [IntrNoMem]>;
+def int_sw64_vsm4r  : ClangBuiltin<"__builtin_sw_vsm4r">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+  [IntrNoMem]>;
+
+def int_sw64_vldw : ClangBuiltin<"__builtin_sw_vldw">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_vldl : ClangBuiltin<"__builtin_sw_vldl">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_vlds : ClangBuiltin<"__builtin_sw_vlds">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_vldd : ClangBuiltin<"__builtin_sw_vldd">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+
+def int_sw64_vload : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_vloadu : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_vload_u : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_vloade : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_vloadnc : Intrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_vstore   : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
+  [IntrWriteMem, IntrArgMemOnly]>;
+def int_sw64_vstoreu  : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
+  [IntrWriteMem, IntrArgMemOnly]>;
+def int_sw64_vstore_u : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
+  [IntrWriteMem, IntrArgMemOnly]>;
+def int_sw64_vstoreuh : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
+  [IntrWriteMem, IntrArgMemOnly]>;
+def int_sw64_vstoreul : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
+  [IntrWriteMem, IntrArgMemOnly]>;
+def int_sw64_vstorenc : Intrinsic<[], [llvm_anyvector_ty, llvm_anyptr_ty],
+  [IntrWriteMem, IntrArgMemOnly]>;
+
+def int_sw64_loadu : ClangBuiltin<"__builtin_sw_loadu">,
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_loadu_i : ClangBuiltin<"__builtin_sw_loadu_i">,
+  Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_loadu_l : ClangBuiltin<"__builtin_sw_loadu_l">,
+  Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_loadu_f : ClangBuiltin<"__builtin_sw_loadu_f">,
+  Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_loadu_d : ClangBuiltin<"__builtin_sw_loadu_d">,
+  Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+def int_sw64_load_u : ClangBuiltin<"__builtin_sw_load_u">,
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty],
+  [IntrReadMem, IntrArgMemOnly]>;
+}
diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index c28cd1211235..125fdefd094f 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -536,6 +536,9 @@ protected:
   // %hi(), and similar unary operators.
   bool HasMipsExpressions = false;
 
+  // If true, then the assembler supports the .set directive.
+  bool HasSw64SetDirective = false;
+
   // If true, use Motorola-style integers in Assembly (ex. $0ac).
   bool UseMotorolaIntegers = false;
 
@@ -881,6 +884,7 @@ public:
   bool canRelaxRelocations() const { return RelaxELFRelocations; }
   void setRelaxELFRelocations(bool V) { RelaxELFRelocations = V; }
   bool hasMipsExpressions() const { return HasMipsExpressions; }
+  bool hasSw64SetDirective() const { return HasSw64SetDirective; }
   bool needsFunctionDescriptors() const { return NeedsFunctionDescriptors; }
   bool shouldUseMotorolaIntegers() const { return UseMotorolaIntegers; }
 };
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 5bc5e04f79ff..5d788b02ae6d 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -314,6 +314,32 @@ public:
     VK_PPC_NOTOC,           // symbol@notoc
     VK_PPC_PCREL_OPT,       // .reloc expr, R_PPC64_PCREL_OPT, expr
 
+    VK_SW64_ELF_LITERAL,
+    VK_SW64_LITUSE_ADDR,
+    VK_SW64_LITUSE_BASE,
+    VK_SW64_LITUSE_BYTOFF,
+    VK_SW64_LITUSE_JSR,
+    VK_SW64_LITUSE_TLSGD,
+    VK_SW64_LITUSE_TLSLDM,
+    VK_SW64_LITUSE_JSRDIRECT,
+    VK_SW64_GPDISP,
+    VK_SW64_GPDISP_HI16,
+    VK_SW64_GPDISP_LO16,
+    VK_SW64_GPREL_HI16,
+    VK_SW64_GPREL_LO16,
+    VK_SW64_GPREL16,
+    VK_SW64_BRSGP,
+    VK_SW64_TLSGD,
+    VK_SW64_TLSLDM,
+    VK_SW64_GOTDTPREL16,
+    VK_SW64_DTPREL_HI16,
+    VK_SW64_DTPREL_LO16,
+    VK_SW64_DTPREL16,
+    VK_SW64_GOTTPREL16,
+    VK_SW64_TPREL_HI16,
+    VK_SW64_TPREL_LO16,
+    VK_SW64_TPREL16,
+
     VK_COFF_IMGREL32, // symbol@imgrel (image-relative)
 
     VK_Hexagon_LO16,
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index f3016cc141b0..3c004b384e32 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -1254,6 +1254,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
       return "elf64-ve";
     case ELF::EM_LOONGARCH:
       return "elf64-loongarch";
+    case ELF::EM_SW64:
+      return "elf64-sw_64";
     default:
       return "elf64-unknown";
     }
@@ -1352,6 +1354,9 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const {
   case ELF::EM_XTENSA:
     return Triple::xtensa;
 
+  case ELF::EM_SW64:
+    return Triple::sw_64;
+
   default:
     return Triple::UnknownArch;
   }
diff --git a/llvm/include/llvm/Support/Sw64ABIFlags.h b/llvm/include/llvm/Support/Sw64ABIFlags.h
new file mode 100644
index 000000000000..44fc9dbf102d
--- /dev/null
+++ b/llvm/include/llvm/Support/Sw64ABIFlags.h
@@ -0,0 +1,39 @@
+//===--- Sw64ABIFlags.h - SW64 ABI flags ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the constants for the ABI flags structure contained
+// in the .Sw64.abiflags section.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_Sw64ABIFLAGS_H
+#define LLVM_SUPPORT_Sw64ABIFLAGS_H
+
+namespace llvm {
+namespace Sw64 {
+
+// Values for the xxx_size bytes of an ABI flags structure.
+enum AFL_REG {
+  AFL_REG_NONE = 0x00, // No registers
+  AFL_REG_32 = 0x01,   // 32-bit registers
+  AFL_REG_64 = 0x02,   // 64-bit registers
+  AFL_REG_128 = 0x03   // 128-bit registers
+};
+
+// Values for the flags1 word of an ABI flags structure.
+enum AFL_FLAGS1 { AFL_FLAGS1_ODDSPREG = 1 };
+
+enum AFL_EXT {
+  AFL_EXT_NONE = 0,  // None
+  AFL_EXT_OCTEON = 5 // Cavium Networks Octeon
+};
+} // namespace Sw64
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Sw64TargetParser.def b/llvm/include/llvm/Support/Sw64TargetParser.def
new file mode 100644
index 000000000000..cb598dc25c23
--- /dev/null
+++ b/llvm/include/llvm/Support/Sw64TargetParser.def
@@ -0,0 +1,28 @@
+//===- Sw64TargetParser.def - Sw64 target parsing defines ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides defines to build up the Sw64 target parser's logic.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PROC_ALIAS
+#define PROC_ALIAS(NAME, SW64)
+#endif
+
+#undef PROC_ALIAS
+
+#ifndef SW64_CPU
+#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH)
+#endif
+
+SW64_CPU(INVALID, {"invalid"}, FK_INVALID, {""})
+SW64_CPU(SW6B, {"sw6b"}, FK_64BIT, {"core3b"})
+SW64_CPU(SW4D, {"sw4d"}, FK_64BIT, {"core3b"})
+SW64_CPU(SW8A, {"sw8a"}, FK_64BIT, {"core4"})
+
+#undef SW64_CPU
diff --git a/llvm/include/llvm/Support/Sw64TargetParser.h b/llvm/include/llvm/Support/Sw64TargetParser.h
new file mode 100644
index 000000000000..ceb0caff4a78
--- /dev/null
+++ b/llvm/include/llvm/Support/Sw64TargetParser.h
@@ -0,0 +1,53 @@
+//===-- Sw64TargetParser - Parser for Sw64 features -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise SW64 hardware features
+// such as FPU/CPU/ARCH and extension names.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SW64TARGETPARSER_H
+#define LLVM_SUPPORT_SW64TARGETPARSER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+
+// FIXME:This should be made into class design,to avoid dupplication.
+namespace llvm {
+class StringRef;
+
+namespace Sw64 {
+
+enum CPUKind : unsigned { CK_INVALID = 0, CK_SW6B, CK_SW4D, CK_SW8A };
+
+enum FeatureKind : unsigned {
+  FK_INVALID = 0,
+  FK_NONE = 1,
+  FK_STDEXTM = 1 << 2,
+  FK_STDEXTA = 1 << 3,
+  FK_STDEXTF = 1 << 4,
+  FK_STDEXTD = 1 << 5,
+  FK_STDEXTC = 1 << 6,
+  FK_64BIT = 1 << 7,
+};
+
+bool checkCPUKind(CPUKind Kind, bool IsSw64);
+bool checkTuneCPUKind(CPUKind Kind, bool IsSw64);
+CPUKind parseARCHKind(StringRef CPU);
+CPUKind parseTuneCPUKind(StringRef CPU, bool IsSw64);
+StringRef getMcpuFromMArch(StringRef CPU);
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsSw64);
+void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsSw64);
+StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsSw64);
+CPUKind parseCPUArch(StringRef CPU);
+
+} // namespace Sw64
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/TargetParser/Host.h b/llvm/include/llvm/TargetParser/Host.h
index af72045a8fe6..8da1991e3035 100644
--- a/llvm/include/llvm/TargetParser/Host.h
+++ b/llvm/include/llvm/TargetParser/Host.h
@@ -67,6 +67,7 @@ namespace sys {
   StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent);
   StringRef getHostCPUNameForSPARC(StringRef ProcCpuinfoContent);
   StringRef getHostCPUNameForBPF();
+  StringRef getHostCPUNameForSW64(StringRef ProcCpuinfoContent);
 
   /// Helper functions to extract CPU details from CPUID on x86.
   namespace x86 {
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index e0ac050b1c7b..91d4190836fb 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -17,6 +17,7 @@
 #undef NetBSD
 #undef mips
 #undef sparc
+#undef sw_64
 
 namespace llvm {
 
@@ -106,7 +107,8 @@ public:
     renderscript32, // 32-bit RenderScript
     renderscript64, // 64-bit RenderScript
     ve,             // NEC SX-Aurora Vector Engine
-    LastArchType = ve
+    sw_64,          // sw64:basic Arch for SW
+    LastArchType = sw_64
   };
   enum SubArchType {
     NoSubArch,
@@ -153,6 +155,11 @@ public:
 
     MipsSubArch_r6,
 
+    Sw64SubArch_4d,
+    Sw64SubArch_6a,
+    Sw64SubArch_6b,
+    Sw64SubArch_8a,
+
     PPCSubArch_spe,
 
     // SPIR-V sub-arch corresponds to its version.
@@ -890,6 +897,21 @@ public:
     return isMIPS32() || isMIPS64();
   }
 
+  /// Tests whether the target is SW64 64-bit (little endian).
+  bool isSw64() const { return getArch() == Triple::sw_64; }
+
+  bool isSw6a() const { return getSubArch() == Triple::Sw64SubArch_6a; }
+
+  bool isSw6b() const { return getSubArch() == Triple::Sw64SubArch_6b; }
+
+  bool isSw4d() const { return getSubArch() == Triple::Sw64SubArch_4d; }
+
+  bool isSw8a() const { return getSubArch() == Triple::Sw64SubArch_8a; }
+
+  bool isSW() const {
+    return isSw64() || isSw6a() || isSw6b() || isSw4d() || isSw8a();
+  }
+
   /// Tests whether the target is PowerPC (32- or 64-bit LE or BE).
   bool isPPC() const {
     return getArch() == Triple::ppc || getArch() == Triple::ppc64 ||
@@ -1140,5 +1162,4 @@ public:
 
 } // End llvm namespace
 
-
 #endif
diff --git a/llvm/include/module.modulemap b/llvm/include/module.modulemap
index 4c2ba437edb9..22256bb2cdcb 100644
--- a/llvm/include/module.modulemap
+++ b/llvm/include/module.modulemap
@@ -100,6 +100,7 @@ module LLVM_BinaryFormat {
     textual header "llvm/BinaryFormat/ELFRelocs/PowerPC.def"
     textual header "llvm/BinaryFormat/ELFRelocs/RISCV.def"
     textual header "llvm/BinaryFormat/ELFRelocs/Sparc.def"
+    textual header "llvm/BinaryFormat/ELFRelocs/Sw64.def"
     textual header "llvm/BinaryFormat/ELFRelocs/SystemZ.def"
     textual header "llvm/BinaryFormat/ELFRelocs/VE.def"
     textual header "llvm/BinaryFormat/ELFRelocs/x86_64.def"
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a84d35a6ea4e..5a2fca731ba7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -408,6 +408,87 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
   auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
   NewLHS = Call.first;
   NewRHS = DAG.getConstant(0, dl, RetVT);
+  if (Triple(this->getTargetMachine().getTargetTriple()).getArch() ==
+      Triple::sw_64) {
+
+    ShouldInvertCC = false;
+    switch (CCCode) {
+
+    case llvm::ISD::SETOGT:
+    case llvm::ISD::SETUGT:
+    case llvm::ISD::SETGT:
+      // from:
+      // ldi $1,0($31)
+      // cmplt $1,$0,$0
+      // to:
+      //
+      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+      NewLHS = Call.first;
+      NewRHS = DAG.getConstant(0, dl, RetVT);
+      break;
+    case llvm::ISD::SETOGE:
+    case llvm::ISD::SETUGE:
+    case llvm::ISD::SETGE:
+      // from:
+      // ldi $1,-1($31)
+      // cmplt $1,$0,$0
+      // to:
+      // ldi $1 0($31)
+      // complt $1,$0,$0
+
+      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+      NewLHS = Call.first;
+      NewRHS = DAG.getConstant(1, dl, RetVT);
+      break;
+    case llvm::ISD::SETOLT:
+    case llvm::ISD::SETULT:
+    case llvm::ISD::SETLT:
+
+      // from:
+      // cmplt $0,0,$0
+      // to:
+      // cmplt $31,$0,$0
+
+      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+      NewRHS = Call.first;
+      NewLHS = DAG.getConstant(0, dl, RetVT);
+      break;
+    case llvm::ISD::SETOLE:
+    case llvm::ISD::SETULE:
+    case llvm::ISD::SETLE:
+      //    from:
+      //    cmplt $0,-1,$0
+      //    to:
+      //    cmplt $31,$0,$0
+      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+      NewRHS = Call.first;
+      NewLHS = DAG.getConstant(1, dl, RetVT);
+      break;
+    case llvm::ISD::SETUEQ:
+    case llvm::ISD::SETOEQ:
+    case llvm::ISD::SETEQ:
+      // from:
+      // cmplt $0,0,$0
+      // to:
+      // cmplt $0,-1,$0
+      //
+      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+      NewLHS = Call.first;
+      NewRHS = DAG.getConstant(1, dl, RetVT);
+      break;
+    case llvm::ISD::SETONE:
+      ShouldInvertCC = true;
+      Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+      NewLHS = Call.first;
+      NewRHS = DAG.getConstant(1, dl, RetVT);
+      break;
+    case llvm::ISD::SETO:
+      ShouldInvertCC = true;
+      LLVM_FALLTHROUGH;
+    default:
+      break;
+    }
+  }
 
   CCCode = getCmpLibcallCC(LC1);
   if (ShouldInvertCC) {
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 4ffffd85ee53..0980e8238365 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -290,6 +290,13 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
                     dwarf::DW_EH_PE_sdata4;
     break;
+  case Triple::sw_64:
+    PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+      dwarf::DW_EH_PE_sdata4;
+    LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+    TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+      dwarf::DW_EH_PE_sdata4;
+    break;
   default:
     break;
   }
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
index 833be826f8ae..8141c8ce74af 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
@@ -262,6 +262,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
   case Triple::riscv64:
     return CreateWithABI<OrcRiscv64>(EPC);
 
+  case Triple::sw_64:
+    return CreateWithABI<OrcSw64>(EPC);
+
   case Triple::x86_64:
     if (TT.getOS() == Triple::OSType::Win32)
       return CreateWithABI<OrcX86_64_Win32>(EPC);
diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index a0d81cdf2086..d8520bb9680b 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -158,6 +158,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
       return CCMgrT::Create(ES, ErrorHandlerAddress);
     }
 
+    case Triple::sw_64: {
+      typedef orc::LocalJITCompileCallbackManager<orc::OrcSw64> CCMgrT;
+      return CCMgrT::Create(ES, ErrorHandlerAddress);
+    }
+
     case Triple::x86_64: {
       if (T.getOS() == Triple::OSType::Win32) {
         typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
@@ -224,6 +229,11 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
             orc::LocalIndirectStubsManager<orc::OrcRiscv64>>();
       };
 
+    case Triple::sw_64:
+      return []() {
+        return std::make_unique<orc::LocalIndirectStubsManager<orc::OrcSw64>>();
+      };
+
     case Triple::x86_64:
       if (T.getOS() == Triple::OSType::Win32) {
         return [](){
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 7c7c2f000368..da3a19b25501 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -913,6 +913,10 @@ LLJIT::createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES) {
        S.JTMB->getTargetTriple().getArch() == Triple::ArchType::ppc64le))
     Layer->setAutoClaimResponsibilityForObjectSymbols(true);
 
+  if (S.JTMB->getTargetTriple().isOSBinFormatELF() &&
+      S.JTMB->getTargetTriple().getArch() == Triple::ArchType::sw_64)
+    Layer->setAutoClaimResponsibilityForObjectSymbols(true);
+
   // FIXME: Explicit conversion to std::unique_ptr<ObjectLayer> added to silence
   //        errors from some GCC / libstdc++ bots. Remove this conversion (i.e.
   //        just return ObjLinkingLayer) once those bots are upgraded.
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index d95a642934f1..0e5dc629ddd8 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -139,6 +139,8 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
   case Triple::riscv64:
     return LocalLazyCallThroughManager::Create<OrcRiscv64>(ES,
                                                            ErrorHandlerAddr);
+  case Triple::sw_64:
+    return LocalLazyCallThroughManager::Create<OrcSw64>(ES, ErrorHandlerAddr);
 
   case Triple::x86_64:
     if (T.getOS() == Triple::OSType::Win32)
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index 6d568199378a..07c1d14daabd 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -915,6 +915,268 @@ void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
   }
 }
 
+void OrcSw64::writeResolverCode(char *ResolverWorkingMem,
+                                ExecutorAddr ResolverTargetAddress,
+                                ExecutorAddr ReentryFnAddr,
+                                ExecutorAddr ReentryCtxAddr) {
+  const uint32_t ResolverCode[] = {
+      // resolver_entry:
+      0xfbdefe38, // ldi    sp,-456(sp)
+      0xac1e0000, // stl    v0,0(sp)
+      0xae1e0008, // stl    a0,8(sp)
+      0xae3e0010, // stl    a1,16(sp)
+      0xae5e0018, // stl    a2,24(sp)
+      0xae7e0020, // stl    a3,32(sp)
+      0xae9e0028, // stl    a4,40(sp)
+      0xaebe0030, // stl    a5,48(sp)
+      0xad3e0038, // stl    s0,56(sp)
+      0xad5e0040, // stl    s1,64(sp)
+      0xad7e0048, // stl    s2,72(sp)
+      0xad9e0050, // stl    s3,80(sp)
+      0xadbe0058, // stl    s4,88(sp)
+      0xadde0060, // stl    s5,96(sp)
+      0xac3e0068, // stl    t0,104(sp)
+      0xac5e0070, // stl    t1,112(sp)
+      0xac7e0078, // stl    t2,120(sp)
+      0xac9e0080, // stl    t3,128(sp)
+      0xacbe0088, // stl    t4,136(sp)
+      0xacde0090, // stl    t5,144(sp)
+      0xacfe0098, // stl    t6,152(sp)
+      0xad1e00a0, // stl    t7,160(sp)
+      0xaede00a8, // stl    t8,168(sp)
+      0xaefe00b0, // stl    t9,176(sp)
+      0xaf1e00b8, // stl    t10,184(sp)
+      0xaf3e00c0, // stl    t11,192(sp)
+      0xaf7e00c8, // stl    t12,200(sp)
+      0xadfe00d0, // stl    fp,208(sp)
+      0xaf5e00d8, // stl    ra,216(sp)
+
+      0xbc5e00e0, // fstd    $f2,224(sp)
+      0xbc7e00e8, // fstd    $f3,232(sp)
+      0xbc9e00f0, // fstd    $f4,240(sp)
+      0xbcbe00f8, // fstd    $f5,248(sp)
+      0xbcde0100, // fstd    $f6,256(sp)
+      0xbcfe0108, // fstd    $f7,264(sp)
+      0xbd1e0110, // fstd    $f8,272(sp)
+      0xbd3e0118, // fstd    $f9,280(sp)
+      0xbd5e0120, // fstd    $f10,288(sp)
+      0xbd7e0128, // fstd    $f11,296(sp)
+      0xbd9e0130, // fstd    $f12,304(sp)
+      0xbdbe0138, // fstd    $f13,312(sp)
+      0xbdde0140, // fstd    $f14,320(sp)
+      0xbdfe0148, // fstd    $f15,328(sp)
+      0xbe1e0150, // fstd    $f16,336(sp)
+      0xbe3e0158, // fstd    $f17,344(sp)
+      0xbe5e0160, // fstd    $f18,352(sp)
+      0xbe7e0168, // fstd    $f19,360(sp)
+      0xbe9e0170, // fstd    $f20,368(sp)
+      0xbebe0178, // fstd    $f21,376(sp)
+      0xbede0180, // fstd    $f22,384(sp)
+      0xbefe0188, // fstd    $f23,392(sp)
+      0xbf1e0190, // fstd    $f24,400(sp)
+      0xbf3e0198, // fstd    $f25,408(sp)
+      0xbf5e01a0, // fstd    $f26,416(sp)
+      0xbf7e01a8, // fstd    $f27,424(sp)
+      0xbf9e01b0, // fstd    $f28,432(sp)
+      0xbfbe01b8, // fstd    $f29,440(sp)
+      0xbfde01c0, // fstd    $f30,448(sp)
+
+      // JIT re-entry ctx addr.
+      0x00000000, // ldih   $16,ctxhighest($31)
+      0x00000000, // ldi    $16,ctxhigher($16)
+      0x00000000, // sll    $16,16,$16
+      0x00000000, // ldi    $16,ctxhi($16)
+      0x00000000, // sll    $16,16,$16
+      0x00000000, // ldi    $16,ctxlo($16)
+      0x435a0751, // or     ra,ra,a1
+      0xfa31ffe0, // ldi    a1,-32(a1)
+      // JIT re-entry fn addr:
+      0x00000000, // ldih   $27,reentry($31)
+      0x00000000, // ldi    $27,reentry($27)
+      0x00000000, // sll    $27,16,$27
+      0x00000000, // ldi    $27,reentryhi($27)
+      0x00000000, // sll    $27,16,$27
+      0x00000000, // ldi    $27,reentrylo($27)
+      0x075b0000, // call   ra,(t12),6c <main+0x6c>
+      0x43ff075f, // nop
+
+      0x9fde01c0, // fldd    $f30,448(sp)
+      0x9fbe01b8, // fldd    $f29,440(sp)
+      0x9f9e01b0, // fldd    $f28,432(sp)
+      0x9f7e01a8, // fldd    $f27,424(sp)
+      0x9f5e01a0, // fldd    $f26,416(sp)
+      0x9f3e0198, // fldd    $f25,408(sp)
+      0x9f1e0190, // fldd    $f24,400(sp)
+      0x9efe0188, // fldd    $f23,392(sp)
+      0x9ede0180, // fldd    $f22,384(sp)
+      0x9ebe0178, // fldd    $f21,376(sp)
+      0x9e9e0170, // fldd    $f20,368(sp)
+      0x9e7e0168, // fldd    $f19,360(sp)
+      0x9e5e0160, // fldd    $f18,352(sp)
+      0x9e3e0158, // fldd    $f17,344(sp)
+      0x9e1e0150, // fldd    $f16,336(sp)
+      0x9dfe0148, // fldd    $f15,328(sp)
+      0x9dde0140, // fldd    $f14,320(sp)
+      0x9dbe0138, // fldd    $f13,312(sp)
+      0x9d9e0130, // fldd    $f12,304(sp)
+      0x9d7e0128, // fldd    $f11,296(sp)
+      0x9d5e0120, // fldd    $f10,288(sp)
+      0x9d3e0118, // fldd    $f9,280(sp)
+      0x9d1e0110, // fldd    $f8,272(sp)
+      0x9cfe0108, // fldd    $f7,264(sp)
+      0x9cde0100, // fldd    $f6,256(sp)
+      0x9cbe00f8, // fldd    $f5,248(sp)
+      0x9c9e00f0, // fldd    $f4,240(sp)
+      0x9c7e00e8, // fldd    $f3,232(sp)
+      0x9c5e00e0, // fldd    $f2,224(sp)
+
+      0x8f5e00d8, // ldl    ra,216(sp)
+      0x8dfe00d0, // ldl    fp,208(sp)
+      0x8f7e00c8, // ldl    t12,200(sp)
+      0x8f3e00c0, // ldl    t11,192(sp)
+      0x8f1e00b8, // ldl    t10,184(sp)
+      0x8efe00b0, // ldl    t9,176(sp)
+      0x8ede00a8, // ldl    t8,168(sp)
+      0x8d1e00a0, // ldl    t7,160(sp)
+      0x8cfe0098, // ldl    t6,152(sp)
+      0x8cde0090, // ldl    t5,144(sp)
+      0x8cbe0088, // ldl    t4,136(sp)
+      0x8c9e0080, // ldl    t3,128(sp)
+      0x8c7e0078, // ldl    t2,120(sp)
+      0x8c5e0070, // ldl    t1,112(sp)
+      0x8c3e0068, // ldl    t0,104(sp)
+      0x8dde0060, // ldl    s5,96(sp)
+      0x8dbe0058, // ldl    s4,88(sp)
+      0x8d9e0050, // ldl    s3,80(sp)
+      0x8d7e0048, // ldl    s2,72(sp)
+      0x8d5e0040, // ldl    s1,64(sp)
+      0x8d3e0038, // ldl    s0,56(sp)
+      0x8ebe0030, // ldl    a5,48(sp)
+      0x8e9e0028, // ldl    a4,40(sp)
+      0x8e7e0020, // ldl    a3,32(sp)
+      0x8e5e0018, // ldl    a2,24(sp)
+      0x8e3e0010, // ldl    a1,16(sp)
+      0x8e1e0008, // ldl    a0,8(sp)
+      0xfbde01c8, // ldi    sp,456(sp)
+
+      0x4339075a, // or     t11,t11,ra
+      0x4000075b, // or     v0,v0,t12
+      0x0ffb0000, // jmp    zero,(t12),c4 <main+0xc4>
+  };
+  const unsigned ReentryFnAddrOffset = 0x108; // JIT re-entry fn addr lui
+  const unsigned ReentryCtxAddrOffset = 0xe8; // JIT re-entry ctx addr lui
+
+  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+
+  uint32_t ReentryCtxLDIh =
+      0xfe1f0000 | (((ReentryCtxAddr.getValue() >> 48) +
+                     ((ReentryCtxAddr.getValue() >> 47) & 1)) &
+                    0xFFFF);
+  uint32_t ReentryCtxLDI =
+      0xfa100000 | (((ReentryCtxAddr.getValue() >> 32) +
+                     ((ReentryCtxAddr.getValue() >> 31) & 1)) &
+                    0xFFFF);
+  uint32_t ReentryCtxSLL = 0x4a020910;
+  uint32_t ReentryCtxLDI2 =
+      0xfa100000 | (((ReentryCtxAddr.getValue() >> 16) +
+                     ((ReentryCtxAddr.getValue() >> 15) & 1)) &
+                    0xFFFF);
+  uint32_t ReentryCtxSLL2 = 0x4a020910;
+  uint32_t ReentryCtxLDI3 = 0xfa100000 | (ReentryCtxAddr.getValue() & 0xFFFF);
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLDIh,
+         sizeof(ReentryCtxLDIh));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxLDI,
+         sizeof(ReentryCtxLDI));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 8, &ReentryCtxSLL,
+         sizeof(ReentryCtxSLL));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 12, &ReentryCtxLDI2,
+         sizeof(ReentryCtxLDI2));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 16, &ReentryCtxSLL2,
+         sizeof(ReentryCtxSLL2));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 20, &ReentryCtxLDI3,
+         sizeof(ReentryCtxLDI3));
+
+  uint32_t ReentryFnLDIh =
+      0xff7f0000 | (((ReentryFnAddr.getValue() >> 48) +
+                     ((ReentryFnAddr.getValue() >> 47) & 1)) &
+                    0xFFFF);
+  uint32_t ReentryFnLDI =
+      0xfb7b0000 | (((ReentryFnAddr.getValue() >> 32) +
+                     ((ReentryFnAddr.getValue() >> 31) & 1)) &
+                    0xFFFF);
+  uint32_t ReentryFnSLL = 0x4b62091b;
+  uint32_t ReentryFnLDI2 =
+      0xfb7b0000 | (((ReentryFnAddr.getValue() >> 16) +
+                     ((ReentryFnAddr.getValue() >> 15) & 1)) &
+                    0xFFFF);
+  uint32_t ReentryFnSLL2 = 0x4b62091b;
+  uint32_t ReentryFnLDI3 = 0xfb7b0000 | (ReentryFnAddr.getValue() & 0xFFFF);
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLDIh,
+         sizeof(ReentryFnLDIh));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnLDI,
+         sizeof(ReentryFnLDI));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 8, &ReentryFnSLL,
+         sizeof(ReentryFnSLL));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 12, &ReentryFnLDI2,
+         sizeof(ReentryFnLDI2));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 16, &ReentryFnSLL2,
+         sizeof(ReentryFnSLL2));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 20, &ReentryFnLDI3,
+         sizeof(ReentryFnLDI3));
+}
+
+void OrcSw64::writeTrampolines(char *TrampolineBlockWorkingMem,
+                               ExecutorAddr TrampolineBlockTargetAddress,
+                               ExecutorAddr ResolverAddr,
+                               unsigned NumTrampolines) {
+
+  uint32_t *Trampolines =
+      reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
+  uint64_t HighestAddr =
+      (ResolverAddr.getValue() >> 48) + ((ResolverAddr.getValue() >> 47) & 1);
+  uint64_t HigherAddr =
+      (ResolverAddr.getValue() >> 32) + ((ResolverAddr.getValue() >> 31) & 1);
+  uint64_t HiAddr =
+      (ResolverAddr.getValue() >> 16) + ((ResolverAddr.getValue() >> 15) & 1);
+
+  for (unsigned I = 0; I < NumTrampolines; ++I) {
+    Trampolines[10 * I + 0] = 0x435a0759; // or  ra,ra,t11
+    Trampolines[10 * I + 1] = 0xff7f0000 | (HighestAddr & 0xFFFF);
+    Trampolines[10 * I + 2] = 0xfb7b0000 | (HigherAddr & 0xFFFF);
+    Trampolines[10 * I + 3] = 0x4b62091b; // sll
+    Trampolines[10 * I + 4] = 0xfb7b0000 | (HiAddr & 0xFFFF);
+    Trampolines[10 * I + 5] = 0x4b62091b; // sll2
+    Trampolines[10 * I + 6] = 0xfb7b0000 | (ResolverAddr.getValue() & 0xFFFF);
+    Trampolines[10 * I + 7] = 0x075b0000; // call
+    Trampolines[10 * I + 8] = 0x43ff075f; // nop
+    Trampolines[10 * I + 9] = 0x43ff075f; // nop
+  }
+}
+
+void OrcSw64::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+                                      ExecutorAddr StubsBlockTargetAddress,
+                                      ExecutorAddr PointersBlockTargetAddress,
+                                      unsigned NumStubs) {
+
+  // Populate the stubs page stubs and mark it executable.
+  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
+  uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
+
+  for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
+    uint64_t HighestAddr = (PtrAddr >> 48) + ((PtrAddr >> 47) & 1);
+    uint64_t HigherAddr = (PtrAddr >> 32) + ((PtrAddr >> 31) & 1);
+    uint64_t HiAddr = (PtrAddr >> 16) + ((PtrAddr >> 15) & 1);
+    Stub[8 * I + 0] = 0xff7f0000 | (HighestAddr & 0xFFFF); // ldih
+    Stub[8 * I + 1] = 0xfb7b0000 | (HigherAddr & 0xFFFF);  // ldi
+    Stub[8 * I + 2] = 0x4b62091b;                          // sll
+    Stub[8 * I + 3] = 0xfb7b0000 | (HiAddr & 0xFFFF);      // ldi
+    Stub[8 * I + 4] = 0x4b62091b;                          // sll2
+    Stub[8 * I + 5] = 0x8f7b0000 | (PtrAddr & 0xFFFF);     // ldl
+    Stub[8 * I + 6] = 0x0ffb0000;                          // jmp $31,($27),0
+    Stub[8 * I + 7] = 0x43ff075f;                          // nop
+  }
+}
+
 void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
                                    ExecutorAddr ResolverTargetAddress,
                                    ExecutorAddr ReentryFnAddr,
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index 1278e2f43c3b..79c1fa6a4a04 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMRuntimeDyld
   RuntimeDyldELF.cpp
   RuntimeDyldMachO.cpp
   Targets/RuntimeDyldELFMips.cpp
+  Targets/RuntimeDyldELFSw64.cpp
 
   DEPENDS
   intrinsics_gen
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index d439b1b4ebfb..66ed10693b9e 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -13,6 +13,7 @@
 #include "RuntimeDyldELF.h"
 #include "RuntimeDyldCheckerImpl.h"
 #include "Targets/RuntimeDyldELFMips.h"
+#include "Targets/RuntimeDyldELFSw64.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -241,6 +242,8 @@ llvm::RuntimeDyldELF::create(Triple::ArchType Arch,
   case Triple::mips64:
   case Triple::mips64el:
     return std::make_unique<RuntimeDyldELFMips>(MemMgr, Resolver);
+  case Triple::sw_64:
+    return make_unique<RuntimeDyldELFSw64>(MemMgr, Resolver);
   }
 }
 
@@ -1878,6 +1881,42 @@ RuntimeDyldELF::processRelocationRef(
     } else {
       processSimpleRelocation(SectionID, Offset, RelType, Value);
     }
+  } else if (Arch == Triple::sw_64) {
+    uint32_t r_type = RelType & 0xff;
+    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
+    LLVM_DEBUG(dbgs() << "Resolve Sw64 reloc" << TargetName << "\n");
+    if (r_type == ELF::R_SW_64_GPDISP) {
+      TargetName = "gphi";
+      StringMap<uint64_t>::iterator i = GOTSymbolOffsets.find(TargetName);
+      if (i != GOTSymbolOffsets.end())
+        RE.SymOffset = i->second;
+      else {
+        RE.SymOffset = allocateGOTEntries(1);
+        GOTSymbolOffsets[TargetName] = RE.SymOffset;
+      }
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+    } else if (RelType == ELF::R_SW_64_BRADDR) {
+      // This is an Sw64 branch relocation, need to use a stub function.
+      LLVM_DEBUG(dbgs() << "\t\tThis is a Sw64 branch relocation.");
+      llvm_unreachable(" Sw64 branch relocation not yet supported.");
+    } else if (r_type == ELF::R_SW_64_LITERAL) {
+      StringMap<uint64_t>::iterator a = GOTSymbolOffsets.find(TargetName);
+      if (a != GOTSymbolOffsets.end())
+        RE.SymOffset = a->second;
+      else {
+        RE.SymOffset = allocateGOTEntries(1);
+        GOTSymbolOffsets[TargetName] = RE.SymOffset;
+      }
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+    } else {
+      processSimpleRelocation(SectionID, Offset, RelType, Value);
+    }
   } else {
     if (Arch == Triple::x86) {
       Value.Addend += support::ulittle32_t::ref(computePlaceholderAddress(SectionID, Offset));
@@ -2221,6 +2260,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
   case Triple::aarch64_be:
   case Triple::ppc64:
   case Triple::ppc64le:
+  case Triple::sw_64:
   case Triple::systemz:
     Result = sizeof(uint64_t);
     break;
@@ -2390,6 +2430,25 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
       }
       GOTSymbolOffsets.clear();
     }
+    if (Arch == Triple::sw_64) {
+      // To correctly resolve Sw64 GOT relocations, we need a mapping from
+      // object's sections to GOTs.
+      for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
+           SI != SE; ++SI) {
+        if (SI->relocation_begin() != SI->relocation_end()) {
+          Expected<section_iterator> RelSecOrErr = SI->getRelocatedSection();
+          if (!RelSecOrErr)
+            return make_error<RuntimeDyldError>(
+                toString(RelSecOrErr.takeError()));
+
+          section_iterator RelocatedSection = *RelSecOrErr;
+          ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection);
+          assert(i != SectionMap.end());
+          SectionToGOTMap[i->second] = GOTSectionID;
+        }
+      }
+      GOTSymbolOffsets.clear();
+    }
   }
 
   // Look for and record the EH frame section.
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index dfdd98cb3a34..13fa4e6ef5ac 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -15,6 +15,8 @@
 
 #include "RuntimeDyldImpl.h"
 #include "llvm/ADT/DenseMap.h"
+#include <vector>
+using namespace std;
 
 using namespace llvm;
 
@@ -60,6 +62,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset,
                             uint64_t Value, uint32_t Type, int64_t Addend);
 
+  void resolveSW64Relocation(const SectionEntry &Section, uint64_t Offset,
+                             uint64_t Value, uint32_t Type, int32_t Addend);
+
   unsigned getMaxStubSize() const override {
     if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
       return 20; // movz; movk; movk; movk; br
@@ -75,6 +80,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
       return 6; // 2-byte jmp instruction + 32-bit relative address
     else if (Arch == Triple::systemz)
       return 16;
+    else if (Arch == Triple::sw_64)
+      return 16;
     else
       return 0;
   }
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp
new file mode 100644
index 000000000000..81a819abc74f
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.cpp
@@ -0,0 +1,217 @@
+//===-- RuntimeDyldELFSw64.cpp ---- ELF/Sw64 specific code. -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RuntimeDyldELFSw64.h"
+#include "llvm/BinaryFormat/ELF.h"
+
+#define DEBUG_TYPE "dyld"
+
+void RuntimeDyldELFSw64::resolveRelocation(const RelocationEntry &RE,
+                                           uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+
+  resolveSw64Relocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
+                        RE.SymOffset, RE.SectionID);
+}
+
+uint64_t RuntimeDyldELFSw64::evaluateRelocation(const RelocationEntry &RE,
+                                                uint64_t Value,
+                                                uint64_t Addend) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  Value = evaluateSw64Relocation(Section, RE.Offset, Value, RE.RelType, Addend,
+                                 RE.SymOffset, RE.SectionID);
+  return Value;
+}
+
+void RuntimeDyldELFSw64::applyRelocation(const RelocationEntry &RE,
+                                         uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  applySw64Relocation(Section.getAddressWithOffset(RE.Offset), Value,
+                      RE.RelType);
+  return;
+}
+
+int64_t RuntimeDyldELFSw64::evaluateSw64Relocation(
+    const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type,
+    int64_t Addend, uint64_t SymOffset, SID SectionID) {
+
+  LLVM_DEBUG(dbgs() << "evaluateSw64Relocation, LocalAddress: 0x"
+                    << format("%llx", Section.getAddressWithOffset(Offset))
+                    << " GOTAddr: 0x"
+                    << format("%llx",
+                              getSectionLoadAddress(SectionToGOTMap[SectionID]))
+                    << " FinalAddress: 0x"
+                    << format("%llx", Section.getLoadAddressWithOffset(Offset))
+                    << " Value: 0x" << format("%llx", Value) << " Type: 0x"
+                    << format("%x", Type) << " Addend: 0x"
+                    << format("%llx", Addend)
+                    << " Offset: " << format("%llx", Offset)
+                    << " SID: " << format("%d", SectionID)
+                    << " SymOffset: " << format("%x", SymOffset) << "\n");
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Not implemented relocation type!");
+    break;
+  case ELF::R_SW_64_GPDISP: {
+    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+    uint32_t *LocalAddress =
+        reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
+
+    uint8_t *LocalGOTAddr =
+        getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset;
+    uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, getGOTEntrySize());
+
+    LLVM_DEBUG(dbgs() << "Debug gpdisp: "
+                      << " GOTAddr: 0x" << format("%llx", GOTAddr)
+                      << " GOTEntry: 0x" << format("%llx", GOTEntry)
+                      << " LocalGOTAddr: 0x" << format("%llx", LocalGOTAddr)
+                      << " LocalAddress: 0x" << format("%llx", LocalAddress)
+                      << "\n");
+    if (GOTEntry)
+      assert(GOTEntry == Value && "GOT entry has two different addresses.");
+    else
+      writeBytesUnaligned(Value, LocalGOTAddr, getGOTEntrySize());
+
+    return (int64_t)GOTAddr + 0x8000 - (int64_t)LocalAddress;
+  }
+  case ELF::R_SW_64_LITERAL: {
+    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+    uint32_t *LocalAddress =
+        reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
+
+    uint8_t *LocalGOTAddr =
+        getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset;
+    uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, getGOTEntrySize());
+
+    LLVM_DEBUG(dbgs() << "Debug literal: "
+                      << " GOTAddr: 0x" << format("%llx", GOTAddr)
+                      << " GOTEntry: 0x" << format("%llx", GOTEntry)
+                      << " LocalGOTAddr: 0x" << format("%llx", LocalGOTAddr)
+                      << " LocalAddress: 0x" << format("%llx", LocalAddress)
+                      << "\n");
+
+    Value += Addend;
+    if (GOTEntry)
+      assert(GOTEntry == Value && "GOT entry has two different addresses.");
+    else
+      writeBytesUnaligned(Value, LocalGOTAddr, getGOTEntrySize());
+
+    if (SymOffset > 65536)
+      report_fatal_error(".got subsegment exceeds 64K (literal)!!\n");
+
+    if ((SymOffset) < 32768)
+      return (int64_t)(SymOffset - 0x8000);
+    else
+      return (int64_t)(0x8000 - SymOffset);
+  }
+  case ELF::R_SW_64_GPRELHIGH: {
+    // Get the higher 16-bits.
+    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+    uint64_t Disp = Value + Addend - (GOTAddr + 0x8000);
+    if (Disp & 0x8000)
+      return ((Disp + 0x8000) >> 16) & 0xffff;
+    else
+      return (Disp >> 16) & 0xffff;
+  }
+  case ELF::R_SW_64_GPRELLOW: {
+    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+
+    return (Value + Addend - (GOTAddr + 0x8000)) & 0xffff;
+  }
+  case ELF::R_SW_64_REFQUAD: {
+    return Value + Addend;
+  }
+  case ELF::R_SW_64_SREL32: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return Value + Addend - FinalAddress;
+  }
+  case ELF::R_SW_64_GPREL32: {
+    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+    return Value + Addend - (GOTAddr + 0x7ff0);
+  }
+  case ELF::R_SW_64_TPRELHI:
+  case ELF::R_SW_64_TPRELLO:
+    report_fatal_error("Current Sw64 JIT does not support TPREL relocs");
+    break;
+  case ELF::R_SW_64_LITERAL_GOT:
+  case ELF::R_SW_64_HINT:
+  case ELF::R_SW_64_LITUSE:
+    return 0;
+  }
+  return 0;
+}
+
+void RuntimeDyldELFSw64::applySw64Relocation(uint8_t *TargetPtr, int64_t Value,
+                                             uint32_t Type) {
+  uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
+  int64_t Disp_hi, Disp_lo;
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Unknown relocation type!");
+    break;
+  case ELF::R_SW_64_GPDISP: {
+    uint32_t Insn1 = readBytesUnaligned(TargetPtr + 4, 4);
+    if ((Value > 2147483647LL) || (Value < -2147483648LL)) {
+      llvm::dbgs() << "gpdisp Value=" << Value << "\n";
+      report_fatal_error(".got subsegment exceeds 2GB (gpdisp)!!\n");
+    }
+
+    Disp_hi = (Value + 0x8000) >> 16;
+    Disp_lo = Value & 0xffff;
+
+    Insn = (Insn & 0xffff0000) | (Disp_hi & 0x0000ffff);
+    Insn1 = (Insn1 & 0xffff0000) | (Disp_lo & 0x0000ffff);
+
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    writeBytesUnaligned(Insn1, TargetPtr + 4, 4);
+    break;
+  }
+  case ELF::R_SW_64_LITERAL:
+    Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff);
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  case ELF::R_SW_64_LITERAL_GOT:
+    Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff);
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  case ELF::R_SW_64_GPRELHIGH:
+  case ELF::R_SW_64_GPRELLOW:
+    Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff);
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  case ELF::R_SW_64_REFQUAD:
+    writeBytesUnaligned(Value, TargetPtr, 8);
+    break;
+  case ELF::R_SW_64_SREL32:
+    writeBytesUnaligned(Value & 0xffffffff, TargetPtr, 4);
+    break;
+  case ELF::R_SW_64_GPREL32:
+    writeBytesUnaligned(Value & 0xffffffff, TargetPtr, 4);
+    break;
+  }
+}
+
+void RuntimeDyldELFSw64::resolveSw64Relocation(const SectionEntry &Section,
+                                               uint64_t Offset, uint64_t Value,
+                                               uint32_t Type, int64_t Addend,
+                                               uint64_t SymOffset,
+                                               SID SectionID) {
+  uint32_t r_type = Type & 0xff;
+
+  // RelType is used to keep information for which relocation type we are
+  // applying relocation.
+  uint32_t RelType = r_type;
+  int64_t CalculatedValue = evaluateSw64Relocation(
+      Section, Offset, Value, RelType, Addend, SymOffset, SectionID);
+
+  applySw64Relocation(Section.getAddressWithOffset(Offset), CalculatedValue,
+                      RelType);
+}
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h
new file mode 100644
index 000000000000..c333dc4bdf85
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFSw64.h
@@ -0,0 +1,61 @@
+//===-- RuntimeDyldELFSw64.h ---- ELF/Sw64 specific code. -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFSw64_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFSw64_H
+
+#include "../RuntimeDyldELF.h"
+#include <string>
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldELFSw64 : public RuntimeDyldELF {
+public:
+  typedef uint64_t TargetPtrT;
+
+  RuntimeDyldELFSw64(RuntimeDyld::MemoryManager &MM,
+                     JITSymbolResolver &Resolver)
+      : RuntimeDyldELF(MM, Resolver) {}
+
+  void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override;
+
+protected:
+  void resolveSw64Relocation(const SectionEntry &Section, uint64_t Offset,
+                             uint64_t Value, uint32_t Type, int64_t Addend,
+                             uint64_t SymOffset, SID SectionID);
+
+  uint64_t GOTOffset = 0;
+  uint64_t GPOffset_Modify = 0;
+
+private:
+  /// A object file specific relocation resolver
+  /// \param RE The relocation to be resolved
+  /// \param Value Target symbol address to apply the relocation action
+  uint64_t evaluateRelocation(const RelocationEntry &RE, uint64_t Value,
+                              uint64_t Addend);
+
+  /// A object file specific relocation resolver
+  /// \param RE The relocation to be resolved
+  /// \param Value Target symbol address to apply the relocation action
+  void applyRelocation(const RelocationEntry &RE, uint64_t Value);
+
+  int64_t evaluateSw64Relocation(const SectionEntry &Section, uint64_t Offset,
+                                 uint64_t Value, uint32_t Type, int64_t Addend,
+                                 uint64_t SymOffset, SID SectionID);
+
+  void applySw64Relocation(uint8_t *TargetPtr, int64_t CalculatedValue,
+                           uint32_t Type);
+};
+} // namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index ec2620efac38..eec4fb8054e4 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -44,6 +44,7 @@
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/IR/IntrinsicsS390.h"
+#include "llvm/IR/IntrinsicsSw64.h"
 #include "llvm/IR/IntrinsicsVE.h"
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 6a6befdd3054..0c45a7d4ffff 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -1318,6 +1318,8 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
   // in a relocation with a null section which is the desired result.
   case MCSymbolRefExpr::VK_PPC_TOCBASE:
     return false;
+  case MCSymbolRefExpr::VK_SW64_GPDISP:
+    return false;
 
   // These VariantKind cause the relocation to refer to something other than
   // the symbol itself, like a linker generated table. Since the address of
@@ -1501,6 +1503,21 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
         SecA ? cast<MCSymbolELF>(SecA->getBeginSymbol()) : nullptr;
     if (SectionSymbol)
       SectionSymbol->setUsedInReloc();
+    if (TargetObjectWriter->getEMachine() == ELF::EM_SW64) {
+      const MCFixupKindInfo &FKI =
+          Asm.getBackend().getFixupKindInfo((MCFixupKind)Fixup.getKind());
+      if (strcmp(FKI.Name, "fixup_SW64_GPDISP_HI16") == 0) {
+
+        Addend = 4;
+        const auto *RenamedSymA =
+            cast<MCSymbolELF>(Asm.getContext().getOrCreateSymbol(".text"));
+
+        RenamedSymA->setUsedInReloc();
+        ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, C);
+        Relocations[&FixupSection].push_back(Rec);
+        return;
+      }
+    }
     ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, C);
     Relocations[&FixupSection].push_back(Rec);
     return;
@@ -1511,6 +1528,22 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
     if (const MCSymbolELF *R = Renames.lookup(SymA))
       RenamedSymA = R;
 
+    if (TargetObjectWriter->getEMachine() == ELF::EM_SW64) {
+      const MCFixupKindInfo &FKI =
+          Asm.getBackend().getFixupKindInfo((MCFixupKind)Fixup.getKind());
+      if (strcmp(FKI.Name, "fixup_SW64_GPDISP_HI16") == 0) {
+        Addend = 4;
+        SymA = nullptr;
+        for (auto it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie;
+             ++it) {
+          if (it->isInSection() && &(it->getSection()) == Fragment->getParent()) {
+            RenamedSymA = cast<MCSymbolELF>(&*it);
+            break;
+          }
+        }
+      }
+    }
+
     if (ViaWeakRef)
       RenamedSymA->setIsWeakrefUsedInReloc();
     else
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 06de70ad2f39..09d41f78889e 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -681,9 +681,14 @@ void MCAsmStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
     if (E->inlineAssignedExpr())
       EmitSet = false;
   if (EmitSet) {
-    OS << ".set ";
-    Symbol->print(OS, MAI);
-    OS << ", ";
+    if (MAI->hasSw64SetDirective()) {
+      Symbol->print(OS, MAI);
+      OS << " = ";
+    } else {
+      OS << ".set ";
+      Symbol->print(OS, MAI);
+      OS << ", ";
+    }
     Value->print(OS, MAI);
 
     EmitEOL();
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 653ff4e9435a..abdd002f9d84 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -472,6 +472,16 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
     case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HI:
     case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA:
     case MCSymbolRefExpr::VK_PPC_TLSLD:
+    case MCSymbolRefExpr::VK_SW64_TLSGD:
+    case MCSymbolRefExpr::VK_SW64_TLSLDM:
+    case MCSymbolRefExpr::VK_SW64_GOTDTPREL16:
+    case MCSymbolRefExpr::VK_SW64_DTPREL_HI16:
+    case MCSymbolRefExpr::VK_SW64_DTPREL_LO16:
+    case MCSymbolRefExpr::VK_SW64_DTPREL16:
+    case MCSymbolRefExpr::VK_SW64_GOTTPREL16:
+    case MCSymbolRefExpr::VK_SW64_TPREL_HI16:
+    case MCSymbolRefExpr::VK_SW64_TPREL_LO16:
+    case MCSymbolRefExpr::VK_SW64_TPREL16:
       break;
     }
     getAssembler().registerSymbol(symRef.getSymbol());
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index c9ff1865cf91..4864468da236 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -347,6 +347,56 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_PPC_LOCAL: return "local";
   case VK_PPC_NOTOC: return "notoc";
   case VK_PPC_PCREL_OPT: return "<<invalid>>";
+  case VK_SW64_ELF_LITERAL:
+    return "ELF_LITERAL";
+  case VK_SW64_LITUSE_ADDR:
+    return "LITUSE_ADDR";
+  case VK_SW64_LITUSE_BASE:
+    return "LITUSE_BASE";
+  case VK_SW64_LITUSE_BYTOFF:
+    return "LITUSE_BYTOFF";
+  case VK_SW64_LITUSE_JSR:
+    return "LITUSE_JSR";
+  case VK_SW64_LITUSE_TLSGD:
+    return "LITUSE_TLSGD";
+  case VK_SW64_LITUSE_TLSLDM:
+    return "LITUSE_TLSLDM";
+  case VK_SW64_LITUSE_JSRDIRECT:
+    return "LITUSE_JSRDIRECT";
+  case VK_SW64_GPDISP:
+    return "GPDISP";
+  case VK_SW64_GPDISP_HI16:
+    return "GPDISP_HI16";
+  case VK_SW64_GPDISP_LO16:
+    return "GPDISP_LO16";
+  case VK_SW64_GPREL_HI16:
+    return "GPREL_HI16";
+  case VK_SW64_GPREL_LO16:
+    return "GPREL_LO16";
+  case VK_SW64_GPREL16:
+    return "GPREL16";
+  case VK_SW64_BRSGP:
+    return "BRSGP";
+  case VK_SW64_TLSGD:
+    return "TLSGD";
+  case VK_SW64_TLSLDM:
+    return "TLSLDM";
+  case VK_SW64_GOTDTPREL16:
+    return "GOTDTPREL16";
+  case VK_SW64_DTPREL_HI16:
+    return "DTPREL_HI16";
+  case VK_SW64_DTPREL_LO16:
+    return "DTPREL_LO16";
+  case VK_SW64_DTPREL16:
+    return "DTPREL16";
+  case VK_SW64_GOTTPREL16:
+    return "GOTTPREL16";
+  case VK_SW64_TPREL_HI16:
+    return "TPREL_HI16";
+  case VK_SW64_TPREL_LO16:
+    return "TPREL_LO16";
+  case VK_SW64_TPREL16:
+    return "TPREL16";
   case VK_COFF_IMGREL32: return "IMGREL";
   case VK_Hexagon_LO16: return "LO16";
   case VK_Hexagon_HI16: return "HI16";
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 0b5109e41e71..745ab757839c 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -365,6 +365,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
   case Triple::xtensa:
     FDECFIEncoding = dwarf::DW_EH_PE_sdata4;
     break;
+  case Triple::sw_64:
+    FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+    break;
   default:
     FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
     break;
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
index 666252ffcb74..768fa2cb6bb1 100644
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ b/llvm/lib/MC/MCSectionELF.cpp
@@ -153,6 +153,10 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
     // Print hex value of the flag while we do not have
     // any standard symbolic representation of the flag.
     OS << "0x7000001e";
+  else if (Type == ELF::SHT_SW64_DWARF)
+    // Print hex value of the flag while we do not have
+    // any standard symbolic representation of the flag.
+    OS << "0x7000001e";
   else if (Type == ELF::SHT_LLVM_ODRTAB)
     OS << "llvm_odrtab";
   else if (Type == ELF::SHT_LLVM_LINKER_OPTIONS)
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 0d1862e57371..62c4d41beeb1 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -181,6 +181,13 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
       break;
     }
     break;
+  case ELF::EM_SW64:
+    switch (Type) {
+#include "llvm/BinaryFormat/ELFRelocs/Sw64.def"
+    default:
+      break;
+    }
+    break;
   default:
     break;
   }
@@ -233,6 +240,8 @@ uint32_t llvm::object::getELFRelativeRelocationType(uint32_t Machine) {
     break;
   case ELF::EM_LOONGARCH:
     return ELF::R_LARCH_RELATIVE;
+  case ELF::EM_SW64:
+    break;
   default:
     break;
   }
@@ -276,6 +285,14 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
       STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC);
       STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
     }
+  case ELF::EM_SW64:
+    switch (Type) {
+      STRINGIFY_ENUM_CASE(ELF, SHT_SW64_REGINFO);
+      STRINGIFY_ENUM_CASE(ELF, SHT_SW64_OPTIONS);
+      STRINGIFY_ENUM_CASE(ELF, SHT_SW64_ABIFLAGS);
+      STRINGIFY_ENUM_CASE(ELF, SHT_SW64_DWARF);
+    }
+    break;
   default:
     break;
   }
@@ -498,6 +515,13 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
     }
     break;
 
+  case ELF::EM_SW64:
+    switch (Type) {
+#define SW64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef SW64_DYNAMIC_TAG
+    }
+
   case ELF::EM_PPC64:
     switch (Type) {
 #define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 0e5036d7dfcc..2f97afd147c8 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -428,6 +428,31 @@ static uint64_t resolveSparc32(uint64_t Type, uint64_t Offset, uint64_t S,
   return LocData;
 }
 
+static bool supportsSw64(uint64_t Type) {
+  switch (Type) {
+  case ELF::R_SW_64_REFLONG:
+  case ELF::R_SW_64_REFQUAD:
+  case ELF::R_SW_64_SREL32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static uint64_t resolveSw64(uint64_t Type, uint64_t Offset, uint64_t S,
+                            uint64_t /*LocData*/, int64_t Addend) {
+  switch (Type) {
+  case ELF::R_SW_64_REFLONG:
+  case ELF::R_SW_64_REFQUAD:
+    return S + Addend;
+  case ELF::R_SW_64_SREL32:
+    return (S + Addend) & 0xFFFFFFFF;
+  default:
+    llvm_unreachable("Invalid relocation type");
+  }
+  return 0;
+}
+
 static bool supportsHexagon(uint64_t Type) {
   return Type == ELF::R_HEX_32;
 }
@@ -807,6 +832,8 @@ getRelocationResolver(const ObjectFile &Obj) {
         return {supportsAmdgpu, resolveAmdgpu};
       case Triple::riscv64:
         return {supportsRISCV, resolveRISCV};
+      case Triple::sw_64:
+        return {supportsSw64, resolveSw64};
       default:
         if (isAMDGPU(Obj))
           return {supportsAmdgpu, resolveAmdgpu};
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 87fe7bebf688..7be623c3608d 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -220,6 +220,7 @@ add_llvm_component_library(LLVMSupport
   StringRef.cpp
   SuffixTreeNode.cpp
   SuffixTree.cpp
+  Sw64TargetParser.cpp
   SystemUtils.cpp
   TarWriter.cpp
   ThreadPool.cpp
diff --git a/llvm/lib/Support/Sw64TargetParser.cpp b/llvm/lib/Support/Sw64TargetParser.cpp
new file mode 100644
index 000000000000..f31238c8f4b9
--- /dev/null
+++ b/llvm/lib/Support/Sw64TargetParser.cpp
@@ -0,0 +1,96 @@
+//===-- Sw64TargetParser - Parser for Sw64 features -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise Sw64 hardware features
+// such as FPU/CPU/ARCH and extension names.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Sw64TargetParser.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/TargetParser/Triple.h"
+#include <cctype>
+
+namespace llvm {
+namespace Sw64 {
+
+struct CPUInfo {
+  StringLiteral Name;
+  CPUKind Kind;
+  unsigned Features;
+  StringLiteral DefaultMarch;
+  bool is64Bit() const { return (Features & FK_64BIT); }
+};
+
+constexpr CPUInfo Sw64CPUInfo[] = {
+#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH)                          \
+  {NAME, CK_##ENUM, FEATURES, DEFAULT_MARCH},
+#include "llvm/Support/Sw64TargetParser.def"
+};
+
+bool checkTuneCPUKind(CPUKind Kind, bool IsSw64) {
+  if (Kind == CK_INVALID)
+    return false;
+  return Sw64CPUInfo[static_cast<unsigned>(Kind)].is64Bit() == IsSw64;
+}
+
+CPUKind parseARCHKind(StringRef CPU) {
+  return llvm::StringSwitch<CPUKind>(CPU)
+#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH)                          \
+  .Case(DEFAULT_MARCH, CK_##ENUM)
+#include "llvm/Support/Sw64TargetParser.def"
+      .Default(CK_INVALID);
+}
+
+StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsSw64) {
+  return llvm::StringSwitch<StringRef>(TuneCPU)
+#define PROC_ALIAS(NAME, Sw64) .Case(NAME, StringRef(Sw64))
+#include "llvm/Support/Sw64TargetParser.def"
+      .Default(TuneCPU);
+}
+
+CPUKind parseTuneCPUKind(StringRef TuneCPU, bool IsSw64) {
+  TuneCPU = resolveTuneCPUAlias(TuneCPU, IsSw64);
+
+  return llvm::StringSwitch<CPUKind>(TuneCPU)
+#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
+#include "llvm/Support/Sw64TargetParser.def"
+      .Default(CK_INVALID);
+}
+
+StringRef getMcpuFromMArch(StringRef CPU) {
+  CPUKind Kind = parseARCHKind(CPU);
+  return Sw64CPUInfo[static_cast<unsigned>(Kind)].Name;
+}
+
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsSw64) {
+  for (const auto &C : Sw64CPUInfo) {
+    if (C.Kind != CK_INVALID && IsSw64 == C.is64Bit())
+      Values.emplace_back(C.Name);
+  }
+}
+
+void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsSw64) {
+  for (const auto &C : Sw64CPUInfo) {
+    if (C.Kind != CK_INVALID && IsSw64 == C.is64Bit())
+      Values.emplace_back(C.Name);
+  }
+
+#define PROC_ALIAS(NAME, Sw64) Values.emplace_back(StringRef(NAME));
+#include "llvm/Support/Sw64TargetParser.def"
+}
+
+CPUKind parseCPUArch(StringRef CPU) {
+  return llvm::StringSwitch<CPUKind>(CPU)
+#define SW64_CPU(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
+#include "llvm/Support/Sw64TargetParser.def"
+      .Default(CK_INVALID);
+}
+
+} // namespace Sw64
+} // namespace llvm
diff --git a/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt b/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..90d61cd90208
--- /dev/null
+++ b/llvm/lib/Target/Sw64/AsmParser/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_component_library(LLVMSw64AsmParser
+  Sw64AsmParser.cpp
+
+  LINK_COMPONENTS
+  MC
+  MCParser
+  Sw64Desc
+  Sw64Info
+  Support
+
+  ADD_TO_COMPONENT
+  Sw64
+  )
diff --git a/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp b/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp
new file mode 100644
index 000000000000..e3ce6f0a61c0
--- /dev/null
+++ b/llvm/lib/Target/Sw64/AsmParser/Sw64AsmParser.cpp
@@ -0,0 +1,2005 @@
+//===-- Sw64AsmParser.cpp - Parse Sw64 assembly to MCInst instructions ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/Sw64ABIFlagsSection.h"
+#include "MCTargetDesc/Sw64ABIInfo.h"
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "MCTargetDesc/Sw64MCExpr.h"
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "Sw64TargetStreamer.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sw_64-asm-parser"
+
+static const StringRef RelocTable[] = {
+    "literal",   "lituse_addr", "lituse_jsr", "gpdisp",
+    "gprelhigh", "gprellow",    "gprel",      "tlsgd",
+    "tlsldm",    "gotdtprel",   "dtprelhi",   "dtprello",
+    "gottprel",  "tprelhi",     "tprello",    "tprel"};
+
+namespace llvm {
+
+class MCInstrInfo;
+
+} // end namespace llvm
+
+namespace {
+
+class Sw64AssemblerOptions {
+public:
+  Sw64AssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {}
+
+  Sw64AssemblerOptions(const Sw64AssemblerOptions *Opts) {
+    ATReg = Opts->getATRegIndex();
+    Reorder = Opts->isReorder();
+    Macro = Opts->isMacro();
+    Features = Opts->getFeatures();
+  }
+
+  unsigned getATRegIndex() const { return ATReg; }
+  bool setATRegIndex(unsigned Reg) {
+    if (Reg > 31)
+      return false;
+
+    ATReg = Reg;
+    return true;
+  }
+
+  bool isReorder() const { return Reorder; }
+  void setReorder() { Reorder = true; }
+  void setNoReorder() { Reorder = false; }
+
+  bool isMacro() const { return Macro; }
+  void setMacro() { Macro = true; }
+  void setNoMacro() { Macro = false; }
+
+  const FeatureBitset &getFeatures() const { return Features; }
+  void setFeatures(const FeatureBitset &Features_) { Features = Features_; }
+
+  // Set of features that are either architecture features or referenced
+  // by them (e.g.: FeatureNaN2008 implied by FeatureSw6432r6).
+  // The full table can be found in Sw64GenSubtargetInfo.inc (Sw64FeatureKV[]).
+  // The reason we need this mask is explained in the selectArch function.
+  // FIXME: Ideally we would like TableGen to generate this information.
+  static const FeatureBitset AllArchRelatedMask;
+
+private:
+  unsigned ATReg = 1;
+  bool Reorder = true;
+  bool Macro = true;
+  FeatureBitset Features;
+};
+
+} // end anonymous namespace
+
+const FeatureBitset Sw64AssemblerOptions::AllArchRelatedMask = {
+    Sw64::FeatureCIX, Sw64::Featurecore3b, Sw64::Featurecore4,
+    Sw64::FeatureRelax, Sw64::FeatureEv};
+
+namespace {
+
+class Sw64AsmParser : public MCTargetAsmParser {
+  Sw64TargetStreamer &getTargetStreamer() {
+    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+    return static_cast<Sw64TargetStreamer &>(TS);
+  }
+
+  Sw64ABIInfo ABI;
+  SmallVector<std::unique_ptr<Sw64AssemblerOptions>, 2> AssemblerOptions;
+  MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
+                       // nullptr, which indicates that no function is currently
+                       // selected. This usually happens after an '.end func'
+                       // directive.
+  bool IsLittleEndian;
+  bool IsPicEnabled;
+  bool IsCpRestoreSet;
+  int CpRestoreOffset;
+  unsigned CpSaveLocation;
+  // If true, then CpSaveLocation is a register, otherwise it's an offset.
+  bool CpSaveLocationIsRegister;
+
+  // Map of register aliases created via the .set directive.
+  StringMap<AsmToken> RegisterSets;
+
+#define GET_ASSEMBLER_HEADER
+#include "Sw64GenAsmMatcher.inc"
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
+                               bool MatchingInlineAsm) override;
+
+  // Parse a register as used in CFI directives
+  bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+                     SMLoc &EndLoc) override;
+
+  OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+                                        SMLoc &EndLoc) override;
+
+  bool parseParenSuffix(StringRef Name, OperandVector &Operands);
+
+  bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);
+
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
+
+  bool ParseDirective(AsmToken DirectiveID) override;
+
+  OperandMatchResultTy
+  matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+                                    StringRef Identifier, SMLoc S);
+  OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
+                                                     const AsmToken &Token,
+                                                     SMLoc S);
+  OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
+                                                     SMLoc S);
+  OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
+  OperandMatchResultTy parseMemOperand(OperandVector &Operands);
+  OperandMatchResultTy parseMemOperands(OperandVector &Operands);
+  OperandMatchResultTy parseJmpImm(OperandVector &Operands);
+
+  bool searchSymbolAlias(OperandVector &Operands);
+
+  bool parseOperand(OperandVector &, StringRef Mnemonic);
+
+  void ParsingFixupOperands(std::pair<StringRef, unsigned> reloc);
+
+  enum MacroExpanderResultTy {
+    MER_NotAMacro,
+    MER_Success,
+    MER_Fail,
+  };
+
+  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+                                      unsigned Kind) override;
+
+  bool loadAndAddSymbolAddress(const MCExpr *SymExpr, unsigned DstReg,
+                               unsigned SrcReg, bool Is32BitSym, SMLoc IDLoc,
+                               MCStreamer &Out, const MCSubtargetInfo *STI);
+
+  void expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                     const MCSubtargetInfo *STI, bool IsLoad);
+
+  bool reportParseError(Twine ErrorMsg);
+
+  bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
+
+  bool isEvaluated(const MCExpr *Expr);
+  bool parseSetArchDirective();
+  bool parseDirectiveSet();
+
+  bool parseSetAtDirective();
+  bool parseSetNoAtDirective();
+  bool parseSetMacroDirective();
+  bool parseSetNoMacroDirective();
+  bool parseSetReorderDirective();
+  bool parseSetNoReorderDirective();
+
+  bool parseSetAssignment();
+
+  bool parseFpABIValue(Sw64ABIFlagsSection::FpABIKind &FpABI,
+                       StringRef Directive);
+
+  int matchCPURegisterName(StringRef Symbol);
+
+  int matchFPURegisterName(StringRef Name);
+
+  bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                          const MCSubtargetInfo *STI);
+
+  // Helper function that checks if the value of a vector index is within the
+  // boundaries of accepted values for each RegisterKind
+  // Example: INSERT.B $w0[n], $1 => 16 > n >= 0
+  bool validateMSAIndex(int Val, int RegKind);
+
+  // Selects a new architecture by updating the FeatureBits with the necessary
+  // info including implied dependencies.
+  // Internally, it clears all the feature bits related to *any* architecture
+  // and selects the new one using the ToggleFeature functionality of the
+  // MCSubtargetInfo object that handles implied dependencies. The reason we
+  // clear all the arch related bits manually is because ToggleFeature only
+  // clears the features that imply the feature being cleared and not the
+  // features implied by the feature being cleared. This is easier to see
+  // with an example:
+  //  --------------------------------------------------
+  // | Feature         | Implies                        |
+  // | -------------------------------------------------|
+  // | FeatureCIX      |                                |
+  // | FeatureEV       |                                |
+  // | FeatureSw6a     |                                |
+  // | FeatureSw6b     |                                |
+  // | ...             |                                |
+  //  --------------------------------------------------
+  //
+  // Setting Sw643 is equivalent to set: (FeatureSw643 | FeatureSw642 |
+  // FeatureSw64GP64 | FeatureSw641)
+  // Clearing Sw643 is equivalent to clear (FeatureSw643 | FeatureSw644).
+  void selectArch(StringRef ArchFeature) {
+    MCSubtargetInfo &STI = copySTI();
+    FeatureBitset FeatureBits = STI.getFeatureBits();
+    FeatureBits &= ~Sw64AssemblerOptions::AllArchRelatedMask;
+    STI.setFeatureBits(FeatureBits);
+    setAvailableFeatures(
+        ComputeAvailableFeatures(STI.ToggleFeature(ArchFeature)));
+    AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
+  }
+
+  void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
+    if (!(getSTI().getFeatureBits()[Feature])) {
+      MCSubtargetInfo &STI = copySTI();
+      setAvailableFeatures(
+          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
+      AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
+    }
+  }
+
+  void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
+    if (getSTI().getFeatureBits()[Feature]) {
+      MCSubtargetInfo &STI = copySTI();
+      setAvailableFeatures(
+          ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
+      AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
+    }
+  }
+
+  void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
+    setFeatureBits(Feature, FeatureString);
+    AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
+  }
+
+  void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
+    clearFeatureBits(Feature, FeatureString);
+    AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
+  }
+
+public:
+  MCFixupKind FixupKind;
+
+  enum Sw64MatchResultTy {
+    Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY,
+    Match_RequiresDifferentOperands,
+    Match_RequiresNoZeroRegister,
+    Match_RequiresSameSrcAndDst,
+    Match_NoFCCRegisterForCurrentISA,
+    Match_NonZeroOperandForSync,
+    Match_NonZeroOperandForMTCX,
+    Match_RequiresPosSizeRange0_32,
+    Match_RequiresPosSizeRange33_64,
+    Match_RequiresPosSizeUImm6,
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "Sw64GenAsmMatcher.inc"
+#undef GET_OPERAND_DIAGNOSTIC_TYPES
+  };
+
+  Sw64AsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
+                const MCInstrInfo &MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(Options, sti, MII),
+        ABI(Sw64ABIInfo::computeTargetABI(Triple(sti.getTargetTriple()),
+                                          sti.getCPU(), Options)) {
+    FixupKind = llvm::FirstTargetFixupKind;
+
+    MCAsmParserExtension::Initialize(parser);
+    parser.addAliasForDirective(".asciiz", ".asciz");
+    parser.addAliasForDirective(".hword", ".2byte");
+    parser.addAliasForDirective(".word", ".4byte");
+    parser.addAliasForDirective(".dword", ".8byte");
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
+    // Remember the initial assembler options. The user can not modify these.
+    AssemblerOptions.push_back(
+        std::make_unique<Sw64AssemblerOptions>(getSTI().getFeatureBits()));
+
+    // Create an assembler options environment for the user to modify.
+    AssemblerOptions.push_back(
+        std::make_unique<Sw64AssemblerOptions>(getSTI().getFeatureBits()));
+
+    CurrentFn = nullptr;
+
+    IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent();
+
+    IsCpRestoreSet = false;
+    CpRestoreOffset = -1;
+  }
+
+  const Sw64ABIInfo &getABI() const { return ABI; }
+
+  const MCExpr *createTargetUnaryExpr(const MCExpr *E,
+                                      AsmToken::TokenKind OperatorToken,
+                                      MCContext &Ctx) override {
+    switch (OperatorToken) {
+    default:
+      return nullptr;
+    case AsmToken::PercentGp_Rel:
+      return Sw64MCExpr::create(Sw64MCExpr::MEK_ELF_LITERAL, E, Ctx);
+    case AsmToken::PercentDtprel_Hi:
+      return Sw64MCExpr::create(Sw64MCExpr::MEK_GPREL_HI16, E, Ctx);
+    case AsmToken::PercentDtprel_Lo:
+      return Sw64MCExpr::create(Sw64MCExpr::MEK_GPREL_LO16, E, Ctx);
+    case AsmToken::PercentGot_Hi:
+      return Sw64MCExpr::create(Sw64MCExpr::MEK_GPDISP_HI16, E, Ctx);
+    case AsmToken::PercentGot_Lo:
+      return Sw64MCExpr::create(Sw64MCExpr::MEK_GPDISP_LO16, E, Ctx);
+
+    case AsmToken::PercentTprel_Hi:
+      return Sw64MCExpr::create(Sw64MCExpr::MEK_TPREL_HI16, E, Ctx);
+    case AsmToken::PercentTprel_Lo:
+      return Sw64MCExpr::create(Sw64MCExpr::MEK_TPREL_LO16, E, Ctx);
+    }
+  }
+};
+
+// Sw64Operand - Instances of this class represent a parsed Sw64 machine
+// instruction.
+class Sw64Operand : public MCParsedAsmOperand {
+public:
+  // Broad categories of register classes
+  // The exact class is finalized by the render method.
+  enum RegKind {
+    RegKind_GPR = 1,   // Sw64 GPR Register
+    RegKind_FPR = 2,   // Sw64 FPR Register
+    RegKind_TC = 4,    //  Sw64 Time counter
+    RegKind_CSR = 8,   // Sw64 Control & Status Register
+    RegKind_FPCR = 16, // Sw64 Floating-point Control Register
+                       // Potentially any (e.g. $1)
+    RegKind_Numeric =
+        RegKind_GPR | RegKind_FPR | RegKind_TC | RegKind_CSR | RegKind_FPCR
+  };
+
+private:
+  enum KindTy {
+    k_Immediate,     // An immediate (possibly involving symbol references)
+    k_Memory,        // Base + Offset Memory Address
+    k_Register,      // A RegKind.
+    k_RegisterIndex, // A register index in one or more RegKind.
+    k_Token          // A simple token
+  } Kind;
+
+public:
+  Sw64Operand(KindTy K, Sw64AsmParser &Parser)
+      : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {}
+
+  ~Sw64Operand() override {
+    switch (Kind) {
+    case k_Immediate:
+      break;
+    case k_Memory:
+      delete Mem.Base;
+      break;
+    case k_Register:
+    case k_RegisterIndex:
+    case k_Token:
+      break;
+    }
+  }
+
+private:
+  // For diagnostics, and checking the assembler temporary
+  Sw64AsmParser &AsmParser;
+
+  struct Token {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct RegIdxOp {
+    unsigned Index;   // Index into the register class
+    RegKind Kind;     // Bitfield of the kinds it could possibly be
+    struct Token Tok; // The input token this operand originated from.
+    const MCRegisterInfo *RegInfo;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  struct MemOp {
+    Sw64Operand *Base;
+    const MCExpr *Off;
+  };
+
+  struct RegListOp {
+    SmallVector<unsigned, 10> *List;
+  };
+
+  union {
+    struct Token Tok;
+    struct RegIdxOp RegIdx;
+    struct ImmOp Imm;
+    struct MemOp Mem;
+    struct RegListOp RegList;
+  };
+
+  SMLoc StartLoc, EndLoc;
+
+  // Internal constructor for register kinds
+  static std::unique_ptr<Sw64Operand> CreateReg(unsigned Index, StringRef Str,
+                                                RegKind RegKind,
+                                                const MCRegisterInfo *RegInfo,
+                                                SMLoc S, SMLoc E,
+                                                Sw64AsmParser &Parser) {
+    auto Op = std::make_unique<Sw64Operand>(k_Register, Parser);
+    Op->RegIdx.Index = Index;
+    Op->RegIdx.RegInfo = RegInfo;
+    Op->RegIdx.Kind = RegKind;
+    Op->RegIdx.Tok.Data = Str.data();
+    Op->RegIdx.Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+public:
+  // Coerce the register to GPR64 and return the real register for the current
+  // target.
+  unsigned getGPRReg() const {
+    assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
+    return RegIdx.Index;
+  }
+
+  bool isV256AsmReg() const {
+    return isRegIdx() && RegIdx.Kind & RegKind_FPR &&
+           RegIdx.Index <= Sw64::F31 && RegIdx.Index >= Sw64::F0;
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createReg(getMemBase()->getGPR64Reg()));
+
+    const MCExpr *Expr = getMemOff();
+    addExpr(Inst, Expr);
+  }
+
+private:
+  // Coerce the register to FPR64 and return the real register for the current
+  // target.
+  unsigned getFPR64Reg() const {
+    assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!");
+    return RegIdx.Index;
+  }
+
+public:
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediate when possible.  Null MCExpr = 0.
+    if (!Expr)
+      Inst.addOperand(MCOperand::createImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::createExpr(Expr));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    if (RegIdx.Index > 32)
+      Inst.addOperand(MCOperand::createReg(getGPRReg()));
+    else
+      Inst.addOperand(MCOperand::createReg(getFPR64Reg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCExpr *Expr = getImm();
+    addExpr(Inst, Expr);
+  }
+
+  bool isReg() const override {
+    // As a special case until we sort out the definition of div/divu, accept
+    // $0/$zero here so that MCK_ZERO works correctly.
+    return isGPRAsmReg() || isFPRAsmReg();
+  }
+
+  bool isRegIdx() const { return Kind == k_Register; } // Operand.Kind
+  bool isImm() const override { return Kind == k_Immediate; }
+
+  bool isConstantImm() const {
+    int64_t Res;
+    return isImm() && getImm()->evaluateAsAbsolute(Res);
+  }
+
+  bool isToken() const override {
+    // Note: It's not possible to pretend that other operand kinds are tokens.
+    // The matcher emitter checks tokens first.
+    return Kind == k_Token;
+  }
+
+  bool isMem() const override { return Kind == k_Memory; }
+
+  StringRef getToken() const {
+    assert(Kind == k_Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  unsigned getReg() const override {
+    // As a special case until we sort out the definition of div/divu, accept
+    // $0/$zero here so that MCK_ZERO works correctly.
+    if (Kind == k_Register && RegIdx.Kind & RegKind_GPR)
+      return getGPRReg(); // FIXME: GPR64 too
+
+    if (Kind == k_Register && RegIdx.Kind & RegKind_FPR)
+      return getFPR64Reg(); // FIXME: GPR64 too
+
+    llvm_unreachable("Invalid access!");
+    return 0;
+  }
+
+  const MCExpr *getImm() const {
+    assert((Kind == k_Immediate) && "Invalid access!");
+    return Imm.Val;
+  }
+
+  int64_t getConstantImm() const {
+    const MCExpr *Val = getImm();
+    int64_t Value = 0;
+    (void)Val->evaluateAsAbsolute(Value);
+    return Value;
+  }
+
+  Sw64Operand *getMemBase() const {
+    assert((Kind == k_Memory) && "Invalid access!");
+    return Mem.Base;
+  }
+
+  const MCExpr *getMemOff() const {
+    assert((Kind == k_Memory) && "Invalid access!");
+    return Mem.Off;
+  }
+
+  int64_t getConstantMemOff() const {
+    return static_cast<const MCConstantExpr *>(getMemOff())->getValue();
+  }
+
+  static std::unique_ptr<Sw64Operand> CreateToken(StringRef Str, SMLoc S,
+                                                  Sw64AsmParser &Parser) {
+    auto Op = std::make_unique<Sw64Operand>(k_Token, Parser);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  // Create a numeric register (e.g. $1). The exact register remains
+  // unresolved until an instruction successfully matches
+  static std::unique_ptr<Sw64Operand>
+  createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+                   SMLoc S, SMLoc E, Sw64AsmParser &Parser) {
+    LLVM_DEBUG(dbgs() << "createNumericReg(" << Index + 65 << ", ...)\n");
+    return CreateReg(Index + 65, Str, RegKind_Numeric, RegInfo, S, E, Parser);
+  }
+
+  // Create a register that is definitely a GPR.
+  // This is typically only used for named registers such as $gp.
+  static std::unique_ptr<Sw64Operand>
+  createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+               SMLoc S, SMLoc E, Sw64AsmParser &Parser) {
+    return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser);
+  }
+
+  // Create a register that is definitely a FPR.
+  // This is typically only used for named registers such as $f0.
+  static std::unique_ptr<Sw64Operand>
+  createFPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+               SMLoc S, SMLoc E, Sw64AsmParser &Parser) {
+    return CreateReg(Index, Str, RegKind_FPR, RegInfo, S, E, Parser);
+  }
+
+  static std::unique_ptr<Sw64Operand>
+  CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, Sw64AsmParser &Parser) {
+    auto Op = std::make_unique<Sw64Operand>(k_Immediate, Parser);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<Sw64Operand>
+  CreateMem(std::unique_ptr<Sw64Operand> Base, const MCExpr *Off, SMLoc S,
+            SMLoc E, Sw64AsmParser &Parser) {
+    auto Op = std::make_unique<Sw64Operand>(k_Memory, Parser);
+    Op->Mem.Base = Base.release();
+    Op->Mem.Off = Off;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  bool isGPRAsmReg() const {
+    return isRegIdx() && RegIdx.Kind & RegKind_GPR &&
+           RegIdx.Index <= Sw64::R31 && RegIdx.Index >= Sw64::R0;
+  }
+
+  bool isFPRAsmReg() const {
+    // AFPR64 is $0-$15 but we handle this in getAFGR64()
+    return isRegIdx() && RegIdx.Kind & RegKind_FPR &&
+           RegIdx.Index <= Sw64::F31 && RegIdx.Index >= Sw64::F0;
+    // return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 64 &&
+    //        RegIdx.Index >= 33;
+  }
+
+  // Coerce the register to GPR64 and return the real register for the current
+  // target.
+  unsigned getGPR64Reg() const {
+    assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!");
+    return RegIdx.Index;
+  }
+
+  unsigned getFGR64Reg() const {
+    assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!");
+    return RegIdx.Index;
+  }
+
+  void addF4RCAsmRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
+  }
+
+  void addF8RCAsmRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
+  }
+
+  bool isFGRAsmReg() const {
+    return isRegIdx() && RegIdx.Kind & RegKind_FPR && RegIdx.Index <= 32;
+  }
+
+  // getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const override { return StartLoc; }
+  // getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const override { return EndLoc; }
+
+  void print(raw_ostream &OS) const override {
+    switch (Kind) {
+    case k_Immediate:
+      OS << "Imm<";
+      OS << *Imm.Val;
+      OS << ">";
+      break;
+    case k_Memory:
+      OS << "Mem<";
+      Mem.Base->print(OS);
+      OS << ", ";
+      OS << *Mem.Off;
+      OS << ">";
+      break;
+    case k_Register:
+      OS << "Reg<" << RegIdx.Kind << ", "
+         << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">";
+      break;
+    case k_RegisterIndex:
+      OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", "
+         << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">";
+      break;
+    case k_Token:
+      OS << getToken();
+      break;
+    }
+  }
+
+  bool isValidForTie(const Sw64Operand &Other) const {
+    if (Kind != Other.Kind)
+      return false;
+
+    switch (Kind) {
+    default:
+      llvm_unreachable("Unexpected kind");
+      return false;
+    case k_RegisterIndex: {
+      StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length);
+      StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length);
+      return Token == OtherToken;
+    }
+    }
+  }
+
+  template <unsigned Bits, unsigned ShiftLeftAmount> bool isScaledSImm() const {
+    if (isConstantImm() &&
+        isShiftedInt<Bits, ShiftLeftAmount>(getConstantImm()))
+      return true;
+    // Operand can also be a symbol or symbol plus
+    // offset in case of relocations.
+    if (Kind != k_Immediate)
+      return false;
+    MCValue Res;
+    bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr);
+    return Success && isShiftedInt<Bits, ShiftLeftAmount>(Res.getConstant());
+  }
+
+  template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
+  void addConstantSImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    int64_t Imm = getConstantImm() - Offset;
+    Imm = SignExtend64<Bits>(Imm);
+    Imm += Offset;
+    Imm += AdjustOffset;
+    Inst.addOperand(MCOperand::createImm(Imm));
+  }
+
+  template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
+  void addConstantUImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    uint64_t Imm = getConstantImm() - Offset;
+    Imm &= (1ULL << Bits) - 1;
+    Imm += Offset;
+    Imm += AdjustOffset;
+    Inst.addOperand(MCOperand::createImm(Imm));
+  }
+
+  template <unsigned Bottom, unsigned Top> bool isConstantUImmRange() const {
+    return isConstantImm() && getConstantImm() >= Bottom &&
+           getConstantImm() <= Top;
+  }
+
+  template <unsigned Bits, unsigned ShiftLeftAmount> bool isScaledUImm() const {
+    return isConstantImm() &&
+           isShiftedUInt<Bits, ShiftLeftAmount>(getConstantImm());
+  }
+
+  template <unsigned Bits, int Offset = 0> bool isConstantSImm() const {
+    return isConstantImm() && isInt<Bits>(getConstantImm() - Offset);
+  }
+
+  template <unsigned Bits, int Offset = 0> bool isConstantUImm() const {
+    return isConstantImm() && isUInt<Bits>(getConstantImm() - Offset);
+  }
+
+  // Coerce the register to SIMD and return the real register for the current
+  // target.
+  unsigned getV256Reg() const {
+    assert(isRegIdx() && (RegIdx.Kind & RegKind_FPR) && "Invalid access!");
+    // It doesn't matter which of the MSA128[BHWD] classes we use. They are all
+    // identical
+    unsigned ClassID = Sw64::V256LRegClassID;
+    // RegIdx.Index should be sub 1, or it will be error. such as: $f1 -> $f2
+    return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index - 1);
+  }
+
+  void addV256AsmRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getV256Reg()));
+  }
+
+  bool isConstantMemOff() const {
+    return isMem() && isa<MCConstantExpr>(getMemOff());
+  }
+
+  // Allow relocation operators.
+  // FIXME: This predicate and others need to look through binary expressions
+  //        and determine whether a Value is a constant or not.
+  template <unsigned Bits, unsigned ShiftAmount = 0>
+  bool isMemWithSimmOffset() const {
+    if (!isMem())
+      return false;
+    if (!getMemBase()->isGPRAsmReg())
+      return false;
+    if (isa<MCTargetExpr>(getMemOff()) ||
+        (isConstantMemOff() &&
+         isShiftedInt<Bits, ShiftAmount>(getConstantMemOff())))
+      return true;
+    MCValue Res;
+    bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr);
+    return IsReloc && isShiftedInt<Bits, ShiftAmount>(Res.getConstant());
+  }
+
+  template <unsigned Bits> bool isSImm() const {
+    return isConstantImm() ? isInt<Bits>(getConstantImm()) : isImm();
+  }
+
+  template <unsigned Bits> bool isUImm() const {
+    return isConstantImm() ? isUInt<Bits>(getConstantImm()) : isImm();
+  }
+
+  template <unsigned Bits> bool isAnyImm() const {
+    return isConstantImm() ? (isInt<Bits>(getConstantImm()) ||
+                              isUInt<Bits>(getConstantImm()))
+                           : isImm();
+  }
+
+}; // class Sw64Operand
+
+} // end anonymous namespace
+
+namespace llvm {} // end namespace llvm
+
+bool Sw64AsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+                                       MCStreamer &Out,
+                                       const MCSubtargetInfo *STI) {
+  const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+  Inst.setLoc(IDLoc);
+
+  if (MCID.mayLoad() || MCID.mayStore()) {
+    // Check the offset of memory operand, if it is a symbol
+    // reference or immediate we may have to expand instructions.
+    const MCOperandInfo &OpInfo = MCID.operands()[1];
+    if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+        (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+      MCOperand &Op = Inst.getOperand(1);
+      if (Op.isImm()) {
+        const unsigned Opcode = Inst.getOpcode();
+        switch (Opcode) {
+        default:
+          break;
+        }
+
+        int64_t MemOffset = Op.getImm();
+        if (MemOffset < -32768 || MemOffset > 32767) {
+          // Offset can't exceed 16bit value.
+          expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
+          return getParser().hasPendingError();
+        }
+      } else if (Op.isExpr()) {
+        const MCExpr *Expr = Op.getExpr();
+        if (Expr->getKind() == MCExpr::SymbolRef) {
+          const MCSymbolRefExpr *SR =
+              static_cast<const MCSymbolRefExpr *>(Expr);
+          if (SR->getKind() == MCSymbolRefExpr::VK_None) {
+            // Expand symbol.
+            expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
+            return getParser().hasPendingError();
+          }
+        } else if (!isEvaluated(Expr)) {
+          expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad());
+          return getParser().hasPendingError();
+        }
+      }
+    }
+  } // if load/store
+  static int lockReg = -1;
+  if (Inst.getOpcode() == Sw64::STQ_C || Inst.getOpcode() == Sw64::STL_C) {
+    lockReg = Inst.getOperand(0).getReg();
+  }
+
+  if (Inst.getOpcode() == Sw64::RD_F) {
+    if (lockReg != Inst.getOperand(0).getReg() && lockReg != -1) {
+      Error(IDLoc, "lstX and rd_f must use the same reg!");
+      lockReg = -1;
+      return false;
+    }
+  }
+
+  Out.emitInstruction(Inst, *STI);
+  return true;
+}
+
+// Can the value be represented by a unsigned N-bit value and a shift left?
+template <unsigned N> static bool isShiftedUIntAtAnyPosition(uint64_t x) {
+  return x && isUInt<N>(x >> llvm::countr_zero(x));
+}
+
+OperandMatchResultTy Sw64AsmParser::parseJmpImm(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  LLVM_DEBUG(dbgs() << "parseJumpTarget\n");
+
+  SMLoc S = getLexer().getLoc();
+
+  // Registers are a valid target and have priority over symbols.
+  OperandMatchResultTy ResTy = parseAnyRegister(Operands);
+  if (ResTy != MatchOperand_NoMatch)
+    return ResTy;
+
+  // Integers and expressions are acceptable
+  const MCExpr *Expr = nullptr;
+  if (Parser.parseExpression(Expr)) {
+    // We have no way of knowing if a symbol was consumed so we must ParseFail
+    return MatchOperand_ParseFail;
+  }
+  Operands.push_back(
+      Sw64Operand::CreateImm(Expr, S, getLexer().getLoc(), *this));
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy Sw64AsmParser::parseMemOperands(OperandVector &Operands) {
+  LLVM_DEBUG(dbgs() << "Parsing Memory Operand for store/load\n");
+  SMLoc S = getParser().getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+
+  const AsmToken &Tok = getParser().getTok();
+  switch (Tok.getKind()) {
+  default:
+    return MatchOperand_NoMatch;
+  case AsmToken::EndOfStatement:
+    // Zero register assumed, add a memory operand with ZERO as its base.
+    //  "Base" will be managed by k_Memory.
+    auto Base = Sw64Operand::createGPRReg(
+        0, "0", getContext().getRegisterInfo(), S, E, *this);
+    Operands.push_back(
+        Sw64Operand::CreateMem(std::move(Base), nullptr, S, E, *this));
+    return MatchOperand_Success;
+  }
+
+  return MatchOperand_NoMatch;
+}
+
+void Sw64AsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                                  const MCSubtargetInfo *STI, bool IsLoad) {
+  // ldl $0,a($gp)        Op0                 Op1              Op2
+  //<MCInst 295 <MCOperand Reg:33> <MCOperand Expr:(a)> <MCOperand Reg:62>>
+
+  const MCSymbolRefExpr *SR;
+  MCInst TempInst;
+  unsigned ImmOffset, HiOffset, LoOffset;
+  const MCExpr *ExprOffset;
+
+  // 1st operand is either the source or destination register.
+  assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+  unsigned RegOpNum = Inst.getOperand(0).getReg();
+
+  // 3nd operand is the base register.
+  assert(Inst.getOperand(2).isReg() && "expected register operand kind");
+  unsigned BaseRegNum = Inst.getOperand(2).getReg();
+  const MCOperand &OffsetOp = Inst.getOperand(1);
+
+  // 2rd operand is either an immediate or expression.
+  if (OffsetOp.isImm()) {
+    assert(Inst.getOperand(1).isImm() && "expected immediate operand kind");
+    ImmOffset = Inst.getOperand(2).getImm();
+    LoOffset = ImmOffset & 0x0000ffff;
+    HiOffset = (ImmOffset & 0xffff0000) >> 16;
+    // If msb of LoOffset is 1(negative number) we must increment HiOffset.
+    if (LoOffset & 0x8000)
+      HiOffset++;
+  } else
+    ExprOffset = Inst.getOperand(1).getExpr();
+  // All instructions will have the same location.
+  TempInst.setLoc(IDLoc);
+  TempInst.setOpcode(Inst.getOpcode());
+  TempInst.addOperand(MCOperand::createReg(RegOpNum));
+  if (OffsetOp.isImm())
+    TempInst.addOperand(MCOperand::createImm(ImmOffset));
+  else {
+    if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+      SR = static_cast<const MCSymbolRefExpr *>(ExprOffset);
+
+      TempInst.addOperand(MCOperand::createExpr(SR));
+    } else {
+      llvm_unreachable("Memory offset is not SymbolRef!");
+    }
+  }
+  TempInst.addOperand(MCOperand::createReg(BaseRegNum));
+  Out.emitInstruction(TempInst, *STI);
+  // Prepare TempInst for next instruction.
+  TempInst.clear();
+}
+
+// Expand a integer division macro.
+//
+// Notably we don't have to emit a warning when encountering $rt as the $zero
+// register, or 0 as an immediate. processInstruction() has already done that.
+//
+// The destination register can only be $zero when expanding (S)DivIMacro or
+// D(S)DivMacro.
+
+bool Sw64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                            OperandVector &Operands,
+                                            MCStreamer &Out,
+                                            uint64_t &ErrorInfo,
+                                            bool MatchingInlineAsm) {
+  MCInst Inst;
+  unsigned MatchResult =
+      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+
+  switch (MatchResult) {
+  case Match_Success:
+    if (processInstruction(Inst, IDLoc, Out, STI))
+      return true;
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_InvalidTiedOperand:
+    Error(IDLoc, "operand must match destination register");
+    return true;
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0ULL) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = Operands[ErrorInfo]->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  case Match_MnemonicFail:
+    return Error(IDLoc, "invalid instruction");
+  }
+  llvm_unreachable("Implement any new match types added!");
+}
+
+int Sw64AsmParser::matchCPURegisterName(StringRef Name) {
+  int CC;
+  CC = StringSwitch<unsigned>(Name)
+           .Cases("v0", "r0", Sw64::R0)
+           .Cases("t0", "r1", Sw64::R1)
+           .Cases("t1", "r2", Sw64::R2)
+           .Cases("t2", "r3", Sw64::R3)
+           .Cases("t3", "r4", Sw64::R4)
+           .Cases("t4", "r5", Sw64::R5)
+           .Cases("t5", "r6", Sw64::R6)
+           .Cases("t6", "r7", Sw64::R7)
+           .Cases("t7", "r8", Sw64::R8)
+           .Cases("s0", "r9", Sw64::R9)
+           .Cases("s1", "r10", Sw64::R10)
+           .Cases("s2", "r11", Sw64::R11)
+           .Cases("s3", "r12", Sw64::R12)
+           .Cases("s4", "r13", Sw64::R13)
+           .Cases("s5", "r14", Sw64::R14)
+           .Cases("fp", "r15", Sw64::R15)
+           .Cases("a0", "r16", Sw64::R16)
+           .Cases("a1", "r17", Sw64::R17)
+           .Cases("a2", "r18", Sw64::R18)
+           .Cases("a3", "r19", Sw64::R19)
+           .Cases("a4", "r20", Sw64::R20)
+           .Cases("a5", "r21", Sw64::R21)
+           .Cases("t8", "r22", Sw64::R22)
+           .Cases("t9", "r23", Sw64::R23)
+           .Cases("t10", "r24", Sw64::R24)
+           .Cases("t11", "r25", Sw64::R25)
+           .Cases("ra", "r26", Sw64::R26)
+           .Cases("pv", "r27", Sw64::R27)
+           .Cases("at", "r28", Sw64::R28)
+           .Cases("gp", "r29", Sw64::R29)
+           .Cases("sp", "r30", Sw64::R30)
+           .Cases("zero", "r31", Sw64::R31)
+           .Default(-1);
+
+  return CC;
+}
+
+int Sw64AsmParser::matchFPURegisterName(StringRef Name) {
+  if (Name[0] == 'f') {
+    StringRef NumString = Name.substr(1);
+    unsigned IntVal;
+    if (NumString.getAsInteger(10, IntVal))
+      return -1;     // This is not an integer.
+    if (IntVal > 31) // Maximum index for fpu register.
+      return -1;
+    return IntVal + 1;
+  }
+  return -1;
+}
+
+bool Sw64AsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+  MCAsmParser &Parser = getParser();
+  LLVM_DEBUG(dbgs() << "parseOperand\n");
+
+  // Check if the current operand has a custom associated parser, if so, try to
+  // custom parse the operand, or fallback to the general approach.
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+  if (ResTy == MatchOperand_Success)
+    return false;
+  // If there wasn't a custom match, try the generic matcher below. Otherwise,
+  // there was a match, but an error occurred, in which case, just return that
+  // the operand parsing failed.
+  if (ResTy == MatchOperand_ParseFail)
+    return true;
+
+  if (parseMemOperands(Operands) == MatchOperand_Success)
+    return false;
+
+  LLVM_DEBUG(dbgs() << ".. Generic Parser\n");
+
+  switch (getLexer().getKind()) {
+  case AsmToken::Dollar: {
+    // Parse the register.
+    SMLoc S = Parser.getTok().getLoc();
+
+    // Almost all registers have been parsed by custom parsers. There is only
+    // one exception to this. $zero (and it's alias $0) will reach this point
+    // for div, divu, and similar instructions because it is not an operand
+    // to the instruction definition but an explicit register. Special case
+    // this situation for now.
+    if (parseAnyRegister(Operands) != MatchOperand_NoMatch)
+      return false;
+
+    // Maybe it is a symbol reference.
+    StringRef Identifier;
+    if (Parser.parseIdentifier(Identifier))
+      return true;
+
+    SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
+
+    // Otherwise create a symbol reference.
+    const MCExpr *Res =
+        MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+
+    Operands.push_back(Sw64Operand::CreateImm(Res, S, E, *this));
+    return false;
+  }
+  // parse jmp & ret: ($GPRC)
+  case AsmToken::LParen: {
+    return parseParenSuffix(Mnemonic, Operands);
+  }
+  case AsmToken::Minus:
+  case AsmToken::Plus:
+  case AsmToken::String:
+  case AsmToken::Integer: {
+    LLVM_DEBUG(dbgs() << ".. generic integer expression\n");
+    const MCExpr *IdVal;
+    SMLoc S = Parser.getTok().getLoc(); // Start location of the operand.
+    if (getParser().parseExpression(IdVal))
+      return true;
+
+    std::string Reloc;
+    const MCExpr *Expr;
+    const char *Mnem = Mnemonic.data();
+    AsmToken::TokenKind FirstTokenKind;
+    MCContext &Ctx = getStreamer().getContext();
+    std::string Stxt = S.getPointer();
+    size_t a = Stxt.find_first_of('!');
+    size_t c = Stxt.find_first_of('\n');
+
+    if (a != 0 && a < c) {
+      std::string Reloc1 = Stxt.substr(a + 1, c - a - 1);
+      size_t b = Reloc1.find_last_of('!');
+
+      Reloc = Reloc1.substr(0, b);
+
+      if (Reloc == "gpdisp") {
+        if (strcmp(Mnem, "ldih") == 0)
+          FirstTokenKind = AsmToken::TokenKind::PercentGot_Hi;
+        else if (strcmp(Mnem, "ldi") == 0)
+          FirstTokenKind = AsmToken::TokenKind::PercentGot_Lo;
+
+        Expr = createTargetUnaryExpr(IdVal, FirstTokenKind, Ctx);
+      }
+      SMLoc E =
+          SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+      Operands.push_back(Sw64Operand::CreateImm(Expr, S, E, *this));
+      return false;
+    }
+
+    SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(Sw64Operand::CreateImm(IdVal, S, E, *this));
+    return false;
+  }
+  default: {
+    LLVM_DEBUG(dbgs() << ".. generic expr expression\n");
+
+    const MCExpr *Expr;
+    SMLoc S = Parser.getTok().getLoc();
+    if (getParser().parseExpression(Expr))
+      return true;
+
+    std::string Reloc;
+    AsmToken::TokenKind FirstTokenKind;
+    MCContext &Ctx = getStreamer().getContext();
+    std::string Stxt = S.getPointer();
+    size_t a = Stxt.find_first_of('!');
+    size_t b = Stxt.find_first_of('\n');
+    Reloc = Stxt.substr(a + 1, b - a - 1);
+
+    if (a < b) {
+      if (Reloc == "literal")
+        FirstTokenKind = AsmToken::TokenKind::PercentGp_Rel;
+      else if (Reloc == "gprelhigh")
+        FirstTokenKind = AsmToken::TokenKind::PercentDtprel_Hi;
+      else if (Reloc == "gprellow")
+        FirstTokenKind = AsmToken::TokenKind::PercentDtprel_Lo;
+      else if (Reloc == "tprelhi")
+        FirstTokenKind = AsmToken::TokenKind::PercentTprel_Hi;
+      else if (Reloc == "tprello")
+        FirstTokenKind = AsmToken::TokenKind::PercentTprel_Lo;
+
+      Expr = createTargetUnaryExpr(Expr, FirstTokenKind, Ctx);
+    }
+
+    SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+    Operands.push_back(Sw64Operand::CreateImm(Expr, S, E, *this));
+    return false;
+  }
+  }
+  return true;
+}
+
+bool Sw64AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
+                                  SMLoc &EndLoc) {
+  return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success;
+}
+
+OperandMatchResultTy Sw64AsmParser::tryParseRegister(MCRegister &RegNo,
+                                                     SMLoc &StartLoc,
+                                                     SMLoc &EndLoc) {
+  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
+  OperandMatchResultTy ResTy = parseAnyRegister(Operands);
+  if (ResTy == MatchOperand_Success) {
+    assert(Operands.size() == 1);
+    Sw64Operand &Operand = static_cast<Sw64Operand &>(*Operands.front());
+    StartLoc = Operand.getStartLoc();
+    EndLoc = Operand.getEndLoc();
+
+    // AFAIK, we only support numeric registers and named GPR's in CFI
+    // directives.
+    // Don't worry about eating tokens before failing. Using an unrecognised
+    // register is a parse error.
+    if (Operand.isGPRAsmReg()) {
+      // Resolve to GPR32 or GPR64 appropriately.
+      RegNo = Operand.getGPRReg();
+    }
+
+    return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch
+                                   : MatchOperand_Success;
+  }
+
+  assert(Operands.size() == 0);
+  return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success;
+}
+
+bool Sw64AsmParser::isEvaluated(const MCExpr *Expr) {
+  switch (Expr->getKind()) {
+  case MCExpr::Constant:
+    return true;
+  case MCExpr::SymbolRef:
+    return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+    if (!isEvaluated(BE->getLHS()))
+      return false;
+    return isEvaluated(BE->getRHS());
+  }
+  case MCExpr::Unary:
+    return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
+  case MCExpr::Target:
+    return true;
+  }
+  return false;
+}
+
+bool Sw64AsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
+  SMLoc S;
+
+  if (isParenExpr)
+    return getParser().parseParenExprOfDepth(0, Res, S);
+  return getParser().parseExpression(Res);
+}
+
+OperandMatchResultTy Sw64AsmParser::parseMemOperand(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  LLVM_DEBUG(dbgs() << "parseMemOperand\n");
+  const MCExpr *IdVal = nullptr;
+  SMLoc S;
+  bool isParenExpr = false;
+  OperandMatchResultTy Res = MatchOperand_NoMatch;
+  // First operand is the offset.
+  S = Parser.getTok().getLoc();
+
+  if (getLexer().getKind() == AsmToken::LParen) {
+    Parser.Lex();
+    isParenExpr = true;
+  }
+
+  if (getLexer().getKind() != AsmToken::Dollar) {
+    if (parseMemOffset(IdVal, isParenExpr))
+      return MatchOperand_ParseFail;
+
+    const AsmToken &Tok = Parser.getTok(); // Get the next token.
+    if (Tok.isNot(AsmToken::LParen)) {
+      Sw64Operand &Mnemonic = static_cast<Sw64Operand &>(*Operands[0]);
+      if (Mnemonic.getToken() == "la" || Mnemonic.getToken() == "dla") {
+        SMLoc E =
+            SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+        Operands.push_back(Sw64Operand::CreateImm(IdVal, S, E, *this));
+        return MatchOperand_Success;
+      }
+      if (Tok.is(AsmToken::EndOfStatement)) {
+        SMLoc E =
+            SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+        // Zero register assumed, add a memory operand with ZERO as its base.
+        // "Base" will be managed by k_Memory.
+        auto Base = Sw64Operand::createGPRReg(
+            0, "0", getContext().getRegisterInfo(), S, E, *this);
+        Operands.push_back(
+            Sw64Operand::CreateMem(std::move(Base), IdVal, S, E, *this));
+        return MatchOperand_Success;
+      }
+
+      MCBinaryExpr::Opcode Opcode;
+      // GAS and LLVM treat comparison operators different. GAS will generate -1
+      // or 0, while LLVM will generate 0 or 1. Since a comparsion operator is
+      // highly unlikely to be found in a memory offset expression, we don't
+      // handle them.
+      switch (Tok.getKind()) {
+      case AsmToken::Plus:
+        Opcode = MCBinaryExpr::Add;
+        Parser.Lex();
+        break;
+      case AsmToken::Minus:
+        Opcode = MCBinaryExpr::Sub;
+        Parser.Lex();
+        break;
+      case AsmToken::Star:
+        Opcode = MCBinaryExpr::Mul;
+        Parser.Lex();
+        break;
+      case AsmToken::Pipe:
+        Opcode = MCBinaryExpr::Or;
+        Parser.Lex();
+        break;
+      case AsmToken::Amp:
+        Opcode = MCBinaryExpr::And;
+        Parser.Lex();
+        break;
+      case AsmToken::LessLess:
+        Opcode = MCBinaryExpr::Shl;
+        Parser.Lex();
+        break;
+      case AsmToken::GreaterGreater:
+        Opcode = MCBinaryExpr::LShr;
+        Parser.Lex();
+        break;
+      case AsmToken::Caret:
+        Opcode = MCBinaryExpr::Xor;
+        Parser.Lex();
+        break;
+      case AsmToken::Slash:
+        Opcode = MCBinaryExpr::Div;
+        Parser.Lex();
+        break;
+      case AsmToken::Percent:
+        Opcode = MCBinaryExpr::Mod;
+        Parser.Lex();
+        break;
+      default:
+        Error(Parser.getTok().getLoc(), "'(' or expression expected");
+        return MatchOperand_ParseFail;
+      }
+      const MCExpr *NextExpr;
+      if (getParser().parseExpression(NextExpr))
+        return MatchOperand_ParseFail;
+      IdVal = MCBinaryExpr::create(Opcode, IdVal, NextExpr, getContext());
+    }
+
+    Parser.Lex(); // Eat the '(' token.
+  }
+
+  Res = parseAnyRegister(Operands);
+  if (Res != MatchOperand_Success)
+    return Res;
+
+  if (Parser.getTok().isNot(AsmToken::RParen)) {
+    Error(Parser.getTok().getLoc(), "')' expected");
+    return MatchOperand_ParseFail;
+  }
+
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  Parser.Lex(); // Eat the ')' token.
+
+  if (!IdVal)
+    IdVal = MCConstantExpr::create(0, getContext());
+
+  // Replace the register operand with the memory operand.
+  std::unique_ptr<Sw64Operand> op(
+      static_cast<Sw64Operand *>(Operands.back().release()));
+  // Remove the register from the operands.
+  // "op" will be managed by k_Memory.
+  Operands.pop_back();
+
+  // Add the memory operand.
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) {
+    int64_t Imm;
+    if (IdVal->evaluateAsAbsolute(Imm))
+      IdVal = MCConstantExpr::create(Imm, getContext());
+    else if (BE->getLHS()->getKind() != MCExpr::SymbolRef)
+      IdVal = MCBinaryExpr::create(BE->getOpcode(), BE->getRHS(), BE->getLHS(),
+                                   getContext());
+  }
+
+  Operands.push_back(Sw64Operand::CreateMem(std::move(op), IdVal, S, E, *this));
+  return MatchOperand_Success;
+}
+
+bool Sw64AsmParser::searchSymbolAlias(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier());
+  if (!Sym)
+    return false;
+
+  SMLoc S = Parser.getTok().getLoc();
+  if (Sym->isVariable()) {
+    const MCExpr *Expr = Sym->getVariableValue();
+    if (Expr->getKind() == MCExpr::SymbolRef) {
+      const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
+      StringRef DefSymbol = Ref->getSymbol().getName();
+      if (DefSymbol.startswith("$")) {
+        OperandMatchResultTy ResTy =
+            matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
+        if (ResTy == MatchOperand_Success) {
+          Parser.Lex();
+          return true;
+        }
+        if (ResTy == MatchOperand_ParseFail)
+          llvm_unreachable("Should never ParseFail");
+      }
+    }
+  } else if (Sym->isUnset()) {
+    // If symbol is unset, it might be created in the `parseSetAssignment`
+    // routine as an alias for a numeric register name.
+    // Lookup in the aliases list.
+    auto Entry = RegisterSets.find(Sym->getName());
+    if (Entry != RegisterSets.end()) {
+      OperandMatchResultTy ResTy =
+          matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S);
+      if (ResTy == MatchOperand_Success) {
+        Parser.Lex();
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+OperandMatchResultTy Sw64AsmParser::matchAnyRegisterNameWithoutDollar(
+    OperandVector &Operands, StringRef Identifier, SMLoc S) {
+  int Index = matchCPURegisterName(Identifier);
+  if (Index != -1) {
+    Operands.push_back(Sw64Operand::createGPRReg(
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
+    return MatchOperand_Success;
+  }
+  Index = matchFPURegisterName(Identifier);
+  if (Index != -1) {
+    Operands.push_back(Sw64Operand::createFPRReg(
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
+    return MatchOperand_Success;
+  }
+  return MatchOperand_NoMatch;
+}
+
+OperandMatchResultTy
+Sw64AsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands,
+                                             const AsmToken &Token, SMLoc S) {
+  if (Token.is(AsmToken::Identifier)) {
+    LLVM_DEBUG(dbgs() << ".. identifier\n");
+    StringRef Identifier = Token.getIdentifier();
+    OperandMatchResultTy ResTy =
+        matchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
+    return ResTy;
+  } else if (Token.is(AsmToken::Integer)) {
+    LLVM_DEBUG(dbgs() << ".. integer\n");
+    int64_t RegNum = Token.getIntVal();
+    Operands.push_back(Sw64Operand::createNumericReg(
+        RegNum, Token.getString(), getContext().getRegisterInfo(), S,
+        Token.getLoc(), *this));
+    return MatchOperand_Success;
+  }
+
+  LLVM_DEBUG(dbgs() << Token.getKind() << "\n");
+
+  return MatchOperand_NoMatch;
+}
+
+OperandMatchResultTy
+Sw64AsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
+  auto Token = getLexer().peekTok(false);
+  return matchAnyRegisterWithoutDollar(Operands, Token, S);
+}
+
+OperandMatchResultTy Sw64AsmParser::parseAnyRegister(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  LLVM_DEBUG(dbgs() << "parseAnyRegister\n");
+
+  auto Token = Parser.getTok();
+
+  SMLoc S = Token.getLoc();
+
+  if (Token.isNot(AsmToken::Dollar)) {
+    LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n");
+    if (Token.is(AsmToken::Identifier)) {
+      if (searchSymbolAlias(Operands))
+        return MatchOperand_Success;
+    }
+    LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n");
+    return MatchOperand_NoMatch;
+  }
+  LLVM_DEBUG(dbgs() << ".. $\n");
+
+  OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S);
+  if (ResTy == MatchOperand_Success) {
+    Parser.Lex(); // $
+    Parser.Lex(); // identifier
+  }
+  return ResTy;
+}
+
+bool Sw64AsmParser::parseParenSuffix(StringRef Name, OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  if (getLexer().is(AsmToken::LParen)) {
+    Operands.push_back(
+        Sw64Operand::CreateToken("(", getLexer().getLoc(), *this));
+    Parser.Lex();
+    if (Name == "ret") {
+      Operands.push_back(
+          Sw64Operand::CreateToken("$26)", getLexer().getLoc(), *this));
+      Parser.Lex(); // eat "$"
+      Parser.Lex(); // eat "26"
+      Parser.Lex(); // eat ")"
+    } else {
+      if (parseOperand(Operands, Name)) {
+        SMLoc Loc = getLexer().getLoc();
+        return Error(Loc, "unexpected token in argument list");
+      }
+      if (Parser.getTok().isNot(AsmToken::RParen)) {
+        SMLoc Loc = getLexer().getLoc();
+        return Error(Loc, "unexpected token, expected ')'");
+      }
+      Operands.push_back(
+          Sw64Operand::CreateToken(")", getLexer().getLoc(), *this));
+      Parser.Lex();
+    }
+  }
+  return false;
+}
+
+bool Sw64AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                                     SMLoc NameLoc, OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  LLVM_DEBUG(dbgs() << "ParseInstruction\n");
+
+  std::pair<StringRef, unsigned> RelocOperands;
+  // We have reached first instruction, module directive are now forbidden.
+  // getTargetStreamer().forbidModuleDirective();
+
+  // Check if we have valid mnemonic
+  if (!mnemonicIsValid(Name, 0)) {
+    return Error(NameLoc, "unknown instruction");
+  }
+  // First operand in MCInst is instruction mnemonic.
+  Operands.push_back(Sw64Operand::CreateToken(Name, NameLoc, *this));
+
+  // Read the remaining operands.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (parseOperand(Operands, Name)) {
+      SMLoc Loc = getLexer().getLoc();
+      return Error(Loc, "unexpected token in argument list");
+    }
+
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex(); // Eat the comma.
+      // Parse and remember the operand.
+      if (parseOperand(Operands, Name)) {
+        SMLoc Loc = getLexer().getLoc();
+        return Error(Loc, "unexpected token in argument list");
+      }
+      // Parse parenthesis suffixes before we iterate
+      if (getLexer().is(AsmToken::LParen) && parseParenSuffix(Name, Operands))
+        return true;
+    }
+  }
+  while (Parser.getTok().is(AsmToken::Exclaim)) {
+    if (false) {
+      LLVM_DEBUG(dbgs() << ".. Skip Parse " << Name << " Relocation Symbol\n");
+      Parser.Lex(); // Eat !
+      Parser.Lex(); // Eat reloction symbol.
+    } else {
+      LLVM_DEBUG(dbgs() << ".. Parse \"!");
+      Parser.Lex(); // Eat !
+
+      if (Parser.getTok().is(AsmToken::Identifier)) {
+        // Parse Relocation Symbol ,Add Rel Kind Here !
+        StringRef Identifier = Parser.getTok().getIdentifier();
+        LLVM_DEBUG(dbgs() << Identifier << "\"\n");
+        RelocOperands.first = Identifier;
+      }
+      if (Parser.getTok().is(AsmToken::Integer)) {
+        int64_t RelNum = Parser.getTok().getIntVal();
+        LLVM_DEBUG(dbgs() << RelNum << "\"\n");
+        RelocOperands.second = RelNum;
+      }
+      ParsingFixupOperands(RelocOperands);
+      Parser.Lex(); // Eat reloction symbol.
+    }
+  }
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    SMLoc Loc = getLexer().getLoc();
+    return Error(Loc, "unexpected token in argument list");
+  }
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+// FIXME: Given that these have the same name, these should both be
+// consistent on affecting the Parser.
+bool Sw64AsmParser::reportParseError(Twine ErrorMsg) {
+  SMLoc Loc = getLexer().getLoc();
+  return Error(Loc, ErrorMsg);
+}
+
+bool Sw64AsmParser::parseSetNoAtDirective() {
+  MCAsmParser &Parser = getParser();
+  // Line should look like: ".set noat".
+
+  // Set the $at register to $0.
+  AssemblerOptions.back()->setATRegIndex(0);
+
+  Parser.Lex(); // Eat "noat".
+
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool Sw64AsmParser::parseSetAtDirective() {
+  // Line can be: ".set at", which sets $at to $1
+  //          or  ".set at=$reg", which sets $at to $reg.
+  MCAsmParser &Parser = getParser();
+  Parser.Lex(); // Eat "at".
+
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    // No register was specified, so we set $at to $1.
+    AssemblerOptions.back()->setATRegIndex(1);
+
+    Parser.Lex(); // Consume the EndOfStatement.
+    return false;
+  }
+
+  if (getLexer().isNot(AsmToken::Equal)) {
+    reportParseError("unexpected token, expected equals sign");
+    return false;
+  }
+  Parser.Lex(); // Eat "=".
+
+  if (getLexer().isNot(AsmToken::Dollar)) {
+    if (getLexer().is(AsmToken::EndOfStatement)) {
+      reportParseError("no register specified");
+      return false;
+    } else {
+      reportParseError("unexpected token, expected dollar sign '$'");
+      return false;
+    }
+  }
+  Parser.Lex(); // Eat "$".
+
+  // Find out what "reg" is.
+  unsigned AtRegNo;
+  const AsmToken &Reg = Parser.getTok();
+  if (Reg.is(AsmToken::Identifier)) {
+    AtRegNo = matchCPURegisterName(Reg.getIdentifier());
+  } else if (Reg.is(AsmToken::Integer)) {
+    AtRegNo = Reg.getIntVal();
+  } else {
+    reportParseError("unexpected token, expected identifier or integer");
+    return false;
+  }
+
+  // Check if $reg is a valid register. If it is, set $at to $reg.
+  if (!AssemblerOptions.back()->setATRegIndex(AtRegNo)) {
+    reportParseError("invalid register");
+    return false;
+  }
+  Parser.Lex(); // Eat "reg".
+
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool Sw64AsmParser::parseSetReorderDirective() {
+  MCAsmParser &Parser = getParser();
+  Parser.Lex();
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+  AssemblerOptions.back()->setReorder();
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool Sw64AsmParser::parseSetNoReorderDirective() {
+  MCAsmParser &Parser = getParser();
+  Parser.Lex();
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+  AssemblerOptions.back()->setNoReorder();
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool Sw64AsmParser::parseSetMacroDirective() {
+  MCAsmParser &Parser = getParser();
+  Parser.Lex();
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+  AssemblerOptions.back()->setMacro();
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool Sw64AsmParser::parseSetNoMacroDirective() {
+  MCAsmParser &Parser = getParser();
+  Parser.Lex();
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+  if (AssemblerOptions.back()->isReorder()) {
+    reportParseError("`noreorder' must be set before `nomacro'");
+    return false;
+  }
+  AssemblerOptions.back()->setNoMacro();
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
+bool Sw64AsmParser::parseSetAssignment() {
+  StringRef Name;
+  const MCExpr *Value;
+  MCAsmParser &Parser = getParser();
+
+  if (Parser.parseIdentifier(Name))
+    return reportParseError("expected identifier after .set");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return reportParseError("unexpected token, expected comma");
+  Lex(); // Eat comma
+
+  if (getLexer().is(AsmToken::Dollar) &&
+      getLexer().peekTok().is(AsmToken::Integer)) {
+    // Parse assignment of a numeric register:
+    //   .set r1,$1
+    Parser.Lex(); // Eat $.
+    RegisterSets[Name] = Parser.getTok();
+    Parser.Lex(); // Eat identifier.
+    getContext().getOrCreateSymbol(Name);
+  } else if (!Parser.parseExpression(Value)) {
+    // Parse assignment of an expression including
+    // symbolic registers:
+    //   .set  $tmp, $BB0-$BB1
+    //   .set  r2, $f2
+    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+    Sym->setVariableValue(Value);
+  } else {
+    return reportParseError("expected valid expression after comma");
+  }
+
+  return false;
+}
+
+bool Sw64AsmParser::parseSetArchDirective() {
+  MCAsmParser &Parser = getParser();
+
+  StringRef Arch;
+  if (Parser.parseIdentifier(Arch))
+    return reportParseError("expected arch identifier");
+
+  StringRef ArchFeatureName = StringSwitch<StringRef>(Arch)
+                                  .Case("sw_64", "sw_64")
+                                  .Case("core3b", "core3b")
+                                  .Case("core4", "core4")
+                                  .Default("");
+
+  if (ArchFeatureName.empty())
+    return reportParseError("unsupported architecture");
+
+  selectArch(ArchFeatureName);
+  return false;
+}
+
+bool Sw64AsmParser::parseDirectiveSet() {
+  const AsmToken &Tok = getParser().getTok();
+  StringRef IdVal = Tok.getString();
+
+  if (IdVal == "noat")
+    return parseSetNoAtDirective();
+  if (IdVal == "at")
+    return parseSetAtDirective();
+  if (IdVal == "arch")
+    return parseSetArchDirective();
+
+  if (Tok.getString() == "reorder") {
+    return parseSetReorderDirective();
+  }
+  if (Tok.getString() == "noreorder") {
+    return parseSetNoReorderDirective();
+  }
+  if (Tok.getString() == "macro") {
+    return parseSetMacroDirective();
+  }
+  if (Tok.getString() == "nomacro") {
+    return parseSetNoMacroDirective();
+  }
+  // TODO: temp write
+  if (Tok.getString() == "volatile") {
+    return parseSetNoMacroDirective();
+  }
+  // It is just an identifier, look for an assignment.
+  return parseSetAssignment();
+}
+
+bool Sw64AsmParser::ParseDirective(AsmToken DirectiveID) {
+  // This returns false if this function recognizes the directive
+  // regardless of whether it is successfully handles or reports an
+  // error. Otherwise it returns true to give the generic parser a
+  // chance at recognizing it.
+
+  MCAsmParser &Parser = getParser();
+  StringRef IDVal = DirectiveID.getString();
+
+  if (IDVal == ".ent") {
+    // Ignore this directive for now.
+    Parser.Lex();
+    return false;
+  }
+
+  if (IDVal == ".end") {
+    // Ignore this directive for now.
+    Parser.Lex();
+    return false;
+  }
+
+  if (IDVal == ".frame") {
+    // Ignore this directive for now.
+    Parser.eatToEndOfStatement();
+    return false;
+  }
+
+  if (IDVal == ".set") {
+    parseDirectiveSet();
+    return false;
+  }
+
+  if (IDVal == ".mask" || IDVal == ".fmask") {
+    // Ignore this directive for now.
+    Parser.eatToEndOfStatement();
+    return false;
+  }
+  if (IDVal == ".arch") {
+    // Ignore this directive for now.
+    parseSetArchDirective();
+    Parser.eatToEndOfStatement();
+    return false;
+  }
+  if (IDVal == ".word") {
+    // Ignore this directive for now.
+    Parser.eatToEndOfStatement();
+  }
+  return true;
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64AsmParser() {
+  RegisterMCAsmParser<Sw64AsmParser> X(getTheSw64Target());
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "Sw64GenAsmMatcher.inc"
+
+bool Sw64AsmParser::mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {
+  // Find the appropriate table for this asm variant.
+  const MatchEntry *Start, *End;
+  switch (VariantID) {
+  default:
+    llvm_unreachable("invalid variant!");
+  case 0:
+    Start = std::begin(MatchTable0);
+    End = std::end(MatchTable0);
+    break;
+  }
+  // Search the table.
+  auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode());
+  return MnemonicRange.first != MnemonicRange.second;
+}
+
+unsigned Sw64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+                                                   unsigned Kind) {
+  Sw64Operand &Op = static_cast<Sw64Operand &>(AsmOp);
+  int64_t ExpectedVal;
+
+  switch (Kind) {
+  default:
+    return Match_InvalidOperand;
+  }
+
+  if (!Op.isReg())
+    return Match_InvalidOperand;
+
+  if (Op.getReg() == ExpectedVal)
+    return Match_Success;
+  return Match_InvalidOperand;
+}
+
+void Sw64AsmParser::ParsingFixupOperands(std::pair<StringRef, unsigned> reloc) {
+  for (auto i : RelocTable) {
+    if (reloc.first.startswith(i))
+      FixupKind =
+          StringSwitch<MCFixupKind>(i)
+              .Case("literal", (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL)
+              .Case("literal_got",
+                    (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL_GOT)
+              .Case("lituse_addr", (MCFixupKind)Sw64::fixup_SW64_LITUSE)
+              .Case("lituse_jsr", (MCFixupKind)Sw64::fixup_SW64_HINT)
+              .Case("gpdisp", (MCFixupKind)Sw64::fixup_SW64_GPDISP)
+              .Case("gprelhigh", (MCFixupKind)Sw64::fixup_SW64_GPDISP_HI16)
+              .Case("gprellow", (MCFixupKind)Sw64::fixup_SW64_GPDISP_LO16)
+              .Case("gprel", (MCFixupKind)Sw64::fixup_SW64_GPREL16)
+              .Case("tlsgd", (MCFixupKind)Sw64::fixup_SW64_TLSGD)
+              .Case("tlsldm", (MCFixupKind)Sw64::fixup_SW64_TLSLDM)
+              .Case("gotdtprel", (MCFixupKind)Sw64::fixup_SW64_GOTDTPREL16)
+              .Case("dtprelhi", (MCFixupKind)Sw64::fixup_SW64_DTPREL_HI16)
+              .Case("dtprello", (MCFixupKind)Sw64::fixup_SW64_DTPREL_LO16)
+              .Case("gottprel", (MCFixupKind)Sw64::fixup_SW64_GOTTPREL16)
+              .Case("tprelhi", (MCFixupKind)Sw64::fixup_SW64_TPREL_HI16)
+              .Case("tprello", (MCFixupKind)Sw64::fixup_SW64_TPREL_LO16)
+              .Case("tprel", (MCFixupKind)Sw64::fixup_SW64_TPREL16)
+              .Default(llvm::FirstTargetFixupKind);
+  }
+}
diff --git a/llvm/lib/Target/Sw64/CMakeLists.txt b/llvm/lib/Target/Sw64/CMakeLists.txt
new file mode 100644
index 000000000000..11598fbb8104
--- /dev/null
+++ b/llvm/lib/Target/Sw64/CMakeLists.txt
@@ -0,0 +1,64 @@
+add_llvm_component_group(Sw64)
+
+set(LLVM_TARGET_DEFINITIONS Sw64.td)
+
+tablegen(LLVM Sw64GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM Sw64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM Sw64GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM Sw64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM Sw64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM Sw64GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM Sw64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM Sw64GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM Sw64GenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM Sw64GenMCPseudoLowering.inc -gen-pseudo-lowering)
+
+add_public_tablegen_target(Sw64CommonTableGen)
+
+add_llvm_target(Sw64CodeGen
+  Sw64AsmPrinter.cpp
+  Sw64FrameLowering.cpp
+  Sw64LLRP.cpp
+  Sw64BranchSelector.cpp
+  Sw64InstrInfo.cpp
+  Sw64ISelDAGToDAG.cpp
+  Sw64ISelLowering.cpp
+  Sw64MCInstLower.cpp
+  Sw64MachineFunctionInfo.cpp
+  Sw64MacroFusion.cpp
+  Sw64RegisterInfo.cpp
+  Sw64Subtarget.cpp
+  Sw64TargetMachine.cpp
+  Sw64TargetObjectFile.cpp
+  Sw64SelectionDAGInfo.cpp
+  Sw64ExpandPseudo.cpp
+  Sw64ExpandPseudo2.cpp
+  Sw64PreLegalizerCombiner.cpp
+  Sw64CombineLS.cpp
+  Sw64IEEEConstraint.cpp
+  Sw64TargetTransformInfo.cpp
+
+  LINK_COMPONENTS
+  Analysis
+  AsmPrinter
+  CodeGen
+  Core
+  MC
+  SelectionDAG
+  Support
+  Target
+  TransformUtils
+  Sw64AsmPrinter
+  Sw64Desc
+  Sw64Info
+  GlobalISel
+
+  ADD_TO_COMPONENT
+  Sw64
+  )
+
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+add_subdirectory(Disassembler)
+add_subdirectory(TargetInfo)
+add_subdirectory(AsmParser)
diff --git a/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt b/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..123e27b07e2b
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Disassembler/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_llvm_component_library(LLVMSw64Disassembler
+  Sw64Disassembler.cpp
+
+  LINK_COMPONENTS
+  MCDisassembler
+  Sw64Info
+  Support
+
+  ADD_TO_COMPONENT
+  Sw64
+  )
diff --git a/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp b/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp
new file mode 100644
index 000000000000..9141e7172323
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Disassembler/Sw64Disassembler.cpp
@@ -0,0 +1,390 @@
+//===-- Sw64Disassembler.cpp - Disassembler for Sw64 --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Sw64Disassembler class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "TargetInfo/Sw64TargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDecoderOps.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "Sw64-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class Sw64Disassembler : public MCDisassembler {
+
+public:
+  Sw64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+      : MCDisassembler(STI, Ctx) {}
+  ~Sw64Disassembler() {}
+
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &CStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createSw64Disassembler(const Target &T,
+                                              const MCSubtargetInfo &STI,
+                                              MCContext &Ctx) {
+  return new Sw64Disassembler(STI, Ctx);
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64Disassembler() {
+  // Register the disassembler for each target.
+  TargetRegistry::RegisterMCDisassembler(getTheSw64Target(),
+                                         createSw64Disassembler);
+}
+
+static const unsigned GPRDecoderTable[] = {
+    Sw64::R0,  Sw64::R1,  Sw64::R2,  Sw64::R3,  Sw64::R4,  Sw64::R5,  Sw64::R6,
+    Sw64::R7,  Sw64::R8,  Sw64::R9,  Sw64::R10, Sw64::R11, Sw64::R12, Sw64::R13,
+    Sw64::R14, Sw64::R15, Sw64::R16, Sw64::R17, Sw64::R18, Sw64::R19, Sw64::R20,
+    Sw64::R21, Sw64::R22, Sw64::R23, Sw64::R24, Sw64::R25, Sw64::R26, Sw64::R27,
+    Sw64::R28, Sw64::R29, Sw64::R30, Sw64::R31};
+
+// This instruction does not have a working decoder, and needs to be
+// fixed. This "fixme" function was introduced to keep the backend comiling
+// while making changes to tablegen code.
+static DecodeStatus DecodeFIXMEInstruction(MCInst &Inst, uint64_t RegNo,
+                                           uint64_t Address,
+                                           const MCDisassembler *Decoder) {
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const MCDisassembler *Decoder) {
+  if (RegNo > std::size(GPRDecoderTable))
+    return MCDisassembler::Fail;
+
+  // We must define our own mapping from RegNo to register identifier.
+  // Accessing index RegNo in the register class will work in the case that
+  // registers were added in ascending order, but not in general.
+  unsigned Reg = GPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static const unsigned FPRDecoderTable[] = {
+    Sw64::F0,  Sw64::F1,  Sw64::F2,  Sw64::F3,  Sw64::F4,  Sw64::F5,  Sw64::F6,
+    Sw64::F7,  Sw64::F8,  Sw64::F9,  Sw64::F10, Sw64::F11, Sw64::F12, Sw64::F13,
+    Sw64::F14, Sw64::F15, Sw64::F16, Sw64::F17, Sw64::F18, Sw64::F19, Sw64::F20,
+    Sw64::F21, Sw64::F22, Sw64::F23, Sw64::F24, Sw64::F25, Sw64::F26, Sw64::F27,
+    Sw64::F28, Sw64::F29, Sw64::F30, Sw64::F31};
+
+static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo > 32) {
+    return MCDisassembler::Fail;
+  }
+  unsigned Reg = FPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo > 32) {
+    return MCDisassembler::Fail;
+  }
+  unsigned Reg = FPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeV256LRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo > 32) {
+    return MCDisassembler::Fail;
+  }
+  unsigned Reg = FPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPRC_loRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo > 32) {
+    return MCDisassembler::Fail;
+  }
+  unsigned Reg = FPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeFPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  if (RegNo > 32) {
+    return MCDisassembler::Fail;
+  }
+  unsigned Reg = FPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+template <unsigned N>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
+                                      int64_t Address, const void *Decoder) {
+  assert(isUInt<N>(Imm) && "Invalid immediate");
+  Inst.addOperand(MCOperand::createImm(Imm));
+  return MCDisassembler::Success;
+}
+
+template <unsigned N>
+static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+                                             int64_t Address,
+                                             const void *Decoder) {
+  if (Imm == 0)
+    return MCDisassembler::Fail;
+  return decodeUImmOperand<N>(Inst, Imm, Address, Decoder);
+}
+
+template <unsigned N>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
+                                      int64_t Address, const void *Decoder) {
+  assert(isUInt<N>(Imm) && "Invalid immediate");
+  // Sign-extend the number in the bottom N bits of Imm
+  Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
+  return MCDisassembler::Success;
+}
+
+template <unsigned N>
+static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+                                             int64_t Address,
+                                             const void *Decoder) {
+  if (Imm == 0)
+    return MCDisassembler::Fail;
+  return decodeSImmOperand<N>(Inst, Imm, Address, Decoder);
+}
+
+static DecodeStatus decodeFloatCopyInstruction(uint32_t func, MCInst &MI,
+                                               uint32_t Insn, uint64_t Address,
+                                               const void *Decoder) {
+  switch (func) {
+  default:
+    return MCDisassembler::Fail;
+  case 0x30:
+    MI.setOpcode(Sw64::CPYSS);
+    break;
+  case 0x31:
+    MI.setOpcode(Sw64::CPYSNS);
+    break;
+  case 0x32:
+    MI.setOpcode(Sw64::CPYSES);
+    break;
+  }
+  uint32_t RegOp1 = Insn << 6 >> 27;  // Inst {25-21} Reg operand 1
+  uint32_t RegOp2 = Insn << 11 >> 27; // Inst [20-16] Reg operand 2
+  uint32_t RegOp3 = Insn & 0x1F;      // Inst [4-0 ] Reg operand 3
+  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp3]));
+  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp1]));
+  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp2]));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeFloatInstruction(MCInst &MI, uint32_t Insn,
+                                           uint64_t Address,
+                                           const void *Decoder) {
+  uint32_t func = (Insn & 0x1FE0) >> 5;
+  switch ((func & 0xF0) >> 4) {
+  default:
+    return MCDisassembler::Fail;
+  case 0x3:
+    return decodeFloatCopyInstruction(func, MI, Insn, Address, Decoder);
+  }
+}
+
+static DecodeStatus decodeFloatSelectInstruction(MCInst &MI, uint32_t Insn,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  uint32_t func = (Insn & 0xFC00) >> 10;
+  switch (func) {
+  default:
+    return MCDisassembler::Fail;
+  case 0x10:
+    MI.setOpcode(Sw64::FSELEQS);
+    break;
+  case 0x11:
+    MI.setOpcode(Sw64::FSELNES);
+    break;
+  case 0x12:
+    MI.setOpcode(Sw64::FSELLTS);
+    break;
+  case 0x13:
+    MI.setOpcode(Sw64::FSELLES);
+    break;
+  case 0x14:
+    MI.setOpcode(Sw64::FSELGTS);
+    break;
+  case 0x15:
+    MI.setOpcode(Sw64::FSELGES);
+    break;
+  }
+  uint32_t RegOp1 = Insn << 6 >> 27;     // Inst {25-21} Reg operand 1
+  uint32_t RegOp2 = Insn << 11 >> 27;    // Inst [20-16] Reg operand 2
+  uint32_t RegOp3 = (Insn & 0x3E0) >> 5; // Inst [4-0 ] Reg operand 3
+  uint32_t RegOp4 = Insn & 0x1F;         // Inst [4-0 ] Reg operand 3
+  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp4]));
+  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp3]));
+  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp2]));
+  MI.addOperand(MCOperand::createReg(FPRDecoderTable[RegOp1]));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodePostLSInstruction(MCInst &MI, uint32_t Insn,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  uint32_t func = (Insn & 0xFC00) >> 12;
+  bool isFloat = false;
+  bool isStore = false;
+  switch (func) {
+  default:
+    return MCDisassembler::Fail;
+  case 0x0:
+    MI.setOpcode(Sw64::LDBU_A);
+    break;
+  case 0x1:
+    MI.setOpcode(Sw64::LDHU_A);
+    break;
+  case 0x2:
+    MI.setOpcode(Sw64::LDW_A);
+    break;
+  case 0x3:
+    MI.setOpcode(Sw64::LDL_A);
+    break;
+  case 0x4:
+    MI.setOpcode(Sw64::LDS_A);
+    isFloat = true;
+    break;
+  case 0x5:
+    MI.setOpcode(Sw64::LDD_A);
+    isFloat = true;
+    break;
+  case 0x6:
+    MI.setOpcode(Sw64::STB_A);
+    break;
+  case 0x7:
+    MI.setOpcode(Sw64::STH_A);
+    break;
+  case 0x8:
+    MI.setOpcode(Sw64::STW_A);
+    break;
+  case 0x9:
+    MI.setOpcode(Sw64::STL_A);
+    break;
+  case 0xA:
+    MI.setOpcode(Sw64::STS_A);
+    isFloat = true;
+    isStore = true;
+    break;
+  case 0xB:
+    MI.setOpcode(Sw64::STD_A);
+    isFloat = true;
+    isStore = true;
+    break;
+  }
+  uint32_t RegOp1 = Insn << 6 >> 27;  // Inst {25-21} Reg operand 1
+  uint32_t RegOp2 = Insn << 11 >> 27; // Inst [20-16] Reg operand 2
+  unsigned RegOp3 = Insn & 0xFFF;     // Inst [11-0 ] Reg operand 3
+  uint32_t RegOp4 = Insn << 11 >> 27;
+  MI.addOperand((isFloat && !isStore)
+                    ? MCOperand::createReg(FPRDecoderTable[RegOp1])
+                    : MCOperand::createReg(GPRDecoderTable[RegOp1]));
+  MI.addOperand((isFloat && isStore)
+                    ? MCOperand::createReg(FPRDecoderTable[RegOp4])
+                    : MCOperand::createReg(GPRDecoderTable[RegOp4]));
+  MI.addOperand(MCOperand::createReg(GPRDecoderTable[RegOp2]));
+  MI.addOperand(MCOperand::createImm(RegOp3));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeBarrierInstruction(MCInst &MI, uint32_t Insn,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+  uint32_t func = Insn & 0xFFFF;
+  switch (func) {
+  default:
+    return MCDisassembler::Fail;
+  case 0x00:
+    MI.setOpcode(Sw64::MB);
+    break;
+  case 0x01:
+    MI.setOpcode(Sw64::IMEMB);
+    break;
+  case 0x02:
+    MI.setOpcode(Sw64::WMEMB);
+    break;
+  }
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeConlictInstruction(MCInst &MI, uint32_t Insn,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+  uint32_t Opcode = Insn >> 26;
+  switch (Opcode) {
+  default:
+    return MCDisassembler::Fail;
+  case 0x06:
+    return decodeBarrierInstruction(MI, Insn, Address, Decoder);
+  case 0x18:
+    return decodeFloatInstruction(MI, Insn, Address, Decoder);
+  case 0x19:
+    return decodeFloatSelectInstruction(MI, Insn, Address, Decoder);
+  case 0x1E:
+    return decodePostLSInstruction(MI, Insn, Address, Decoder);
+  }
+}
+
+#include "Sw64GenDisassemblerTables.inc"
+
+DecodeStatus Sw64Disassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+                                              ArrayRef<uint8_t> Bytes,
+                                              uint64_t Address,
+                                              raw_ostream &CStream) const {
+  // TODO: This will need modification when supporting instruction set
+  // extensions with instructions > 32-bits (up to 176 bits wide).
+  uint32_t Insn;
+  DecodeStatus Result;
+
+  if (Bytes.size() < 4) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+  Insn = support::endian::read32le(Bytes.data());
+  LLVM_DEBUG(dbgs() << "Trying Decode Conflict Instruction :\n");
+  Result = decodeConlictInstruction(Instr, Insn, Address, this);
+  if (Result != MCDisassembler::Fail) {
+    Size = 4;
+    return Result;
+  }
+  LLVM_DEBUG(dbgs() << "Trying Sw64 table :\n");
+  Result = decodeInstruction(DecoderTable32, Instr, Insn, Address, this, STI);
+  Size = 4;
+
+  return Result;
+}
diff --git a/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt b/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..b07b33f37e09
--- /dev/null
+++ b/llvm/lib/Target/Sw64/InstPrinter/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_llvm_component_library(LLVMSw64AsmPrinter
+  Sw64InstPrinter.cpp
+
+  LINK_COMPONENTS
+  MC
+  Support
+
+  ADD_TO_COMPONENT
+  Sw64
+  )
diff --git a/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp
new file mode 100644
index 000000000000..74ae067f9f9e
--- /dev/null
+++ b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.cpp
@@ -0,0 +1,148 @@
+//===-- Sw64InstPrinter.cpp - Convert Sw64 MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Sw64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64InstPrinter.h"
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "MCTargetDesc/Sw64MCExpr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "Sw64GenAsmWriter.inc"
+
+void Sw64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+  OS << StringRef(getRegisterName(Reg)).lower();
+}
+
+void Sw64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
+                                StringRef Annot, const MCSubtargetInfo &STI,
+                                raw_ostream &OS) {
+  printInstruction(MI, Address, OS);
+  if (!Annot.empty()) {
+    OS << "\t" << Annot;
+  } else
+    printAnnotation(OS, Annot);
+}
+
+void Sw64InstPrinter::printInlineJT(const MCInst *MI, int opNum,
+                                    raw_ostream &O) {
+  report_fatal_error("can't handle InlineJT");
+}
+
+void Sw64InstPrinter::printInlineJT32(const MCInst *MI, int opNum,
+                                      raw_ostream &O) {
+  report_fatal_error("can't handle InlineJT32");
+}
+
+void Sw64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+    return;
+  }
+
+  if (Op.isImm()) {
+    if (Op.getImm() > 65535) {
+      O << formatHex(Op.getImm());
+      return;
+    }
+    O << Op.getImm();
+    return;
+  }
+
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  Op.getExpr()->print(O, &MAI, true);
+}
+
+void Sw64InstPrinter::printMemoryArg(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+
+  if (Op.isExpr()) {
+    const MCExpr *Expr = Op.getExpr();
+    if (Expr->getKind() == MCExpr::Target) {
+      const Sw64MCExpr *Sw64Expr = cast<Sw64MCExpr>(Expr);
+
+      switch (Sw64Expr->getKind()) {
+      default:
+        break;
+      case Sw64MCExpr::MEK_GPDISP_HI16:
+      case Sw64MCExpr::MEK_GPDISP_LO16:
+      case Sw64MCExpr::MEK_GPDISP:
+        O << "0";
+        return;
+      }
+    }
+  }
+  printOperand(MI, OpNo, O);
+}
+
+void Sw64InstPrinter::printMemOperand(const MCInst *MI, int opNum,
+                                      raw_ostream &O) {
+  // Load/Store memory operands -- imm($reg)
+
+  if (MI->getOperand(opNum).isImm() && MI->getOperand(opNum + 1).isReg()) {
+    printOperand(MI, opNum, O);
+    O << "(";
+    printOperand(MI, opNum + 1, O);
+    O << ")";
+  } else {
+    printOperand(MI, opNum + 1, O);
+    O << "(";
+    printOperand(MI, opNum, O);
+    O << ")";
+  }
+}
+
+template <unsigned Bits, unsigned Offset>
+void Sw64InstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm()) {
+    uint64_t Imm = MO.getImm();
+    Imm -= Offset;
+    Imm &= (1 << Bits) - 1;
+    Imm += Offset;
+    if (MI->getOpcode() == Sw64::VLOGZZ)
+      O << format("%x", Imm);
+    else
+      O << formatImm(Imm);
+    return;
+  }
+
+  printOperand(MI, opNum, O);
+}
+
+// Only for Instruction VLOG
+void Sw64InstPrinter::printHexImm(const MCInst *MI, int opNum, raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm()) {
+    uint64_t Imm = MO.getImm();
+    O << format("%x", ((Imm >> 4) & 0xf)) << format("%x", (Imm & 0xf));
+    return;
+  }
+
+  printOperand(MI, opNum, O);
+}
diff --git a/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h
new file mode 100644
index 000000000000..8d721ac01ac3
--- /dev/null
+++ b/llvm/lib/Target/Sw64/InstPrinter/Sw64InstPrinter.h
@@ -0,0 +1,57 @@
+//== Sw64InstPrinter.h - Convert Sw64 MCInst to assembly syntax -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Sw64InstPrinter class,
+// which is used to print Sw64 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H
+#define LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class Sw64InstPrinter : public MCInstPrinter {
+public:
+  Sw64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                  const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  // Autogenerated by tblgen.
+  std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+  void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
+  static const char *getRegisterName(MCRegister Reg);
+
+  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+                 const MCSubtargetInfo &STI, raw_ostream &OS) override;
+  void printMemoryArg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+private:
+  void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
+  void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum,
+                    raw_ostream &O) {
+    printOperand(MI, OpNum, O);
+  }
+  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+
+  void printHexImm(const MCInst *MI, int opNum, raw_ostream &O);
+
+  template <unsigned Bits, unsigned Offset = 0>
+  void printUImm(const MCInst *MI, int opNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SW64_INSTPRINTER_SW64INSTPRINTER_H
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..69169bf24b07
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,22 @@
+add_llvm_component_library(LLVMSw64Desc
+  Sw64ABIInfo.cpp
+  Sw64ABIFlagsSection.cpp
+  Sw64AsmBackend.cpp
+  Sw64ELFObjectWriter.cpp
+  Sw64ELFStreamer.cpp
+  Sw64MCAsmInfo.cpp
+  Sw64MCCodeEmitter.cpp
+  Sw64MCExpr.cpp
+  Sw64MCTargetDesc.cpp
+  Sw64OptionRecord.cpp
+  Sw64TargetStreamer.cpp
+
+  LINK_COMPONENTS
+  MC
+  Sw64Info
+  Sw64AsmPrinter
+  Support
+
+  ADD_TO_COMPONENT
+  Sw64
+)
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp
new file mode 100644
index 000000000000..0f714e724bcd
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.cpp
@@ -0,0 +1,31 @@
+//===- Sw64ABIFlagsSection.cpp - Sw64 ELF ABI Flags Section ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/Sw64ABIFlagsSection.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Sw64ABIFlags.h"
+
+using namespace llvm;
+
+uint8_t Sw64ABIFlagsSection::getFpABIValue() {
+  llvm_unreachable("unexpected fp abi value");
+}
+
+StringRef Sw64ABIFlagsSection::getFpABIString(FpABIKind Value) {
+  llvm_unreachable("unsupported fp abi value");
+}
+namespace llvm {
+
+MCStreamer &operator<<(MCStreamer &OS, Sw64ABIFlagsSection &ABIFlagsSection) {
+  return OS;
+}
+
+} // end namespace llvm
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h
new file mode 100644
index 000000000000..058c47f58d44
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIFlagsSection.h
@@ -0,0 +1,127 @@
+//===- Sw64ABIFlagsSection.h - Sw64 ELF ABI Flags Section -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Sw64ABIFlags.h"
+#include <cstdint>
+
+namespace llvm {
+
+class MCStreamer;
+
+struct Sw64ABIFlagsSection {
+  // Internal representation of the fp_abi related values used in .module.
+  enum class FpABIKind { ANY, XX, S32, S64, SOFT };
+
+  // Version of flags structure.
+  uint16_t Version = 0;
+  // The level of the ISA: 1-5, 32, 64.
+  uint8_t ISALevel = 0;
+  // The revision of ISA: 0 for SW64 V and below, 1-n otherwise.
+  uint8_t ISARevision = 0;
+  // The size of general purpose registers.
+  Sw64::AFL_REG GPRSize = Sw64::AFL_REG_NONE;
+  // The size of co-processor 1 registers.
+  Sw64::AFL_REG CPR1Size = Sw64::AFL_REG_NONE;
+  // The size of co-processor 2 registers.
+  Sw64::AFL_REG CPR2Size = Sw64::AFL_REG_NONE;
+  // Processor-specific extension.
+  Sw64::AFL_EXT ISAExtension = Sw64::AFL_EXT_NONE;
+  // Mask of ASEs used.
+  uint32_t ASESet = 0;
+
+  bool OddSPReg = false;
+
+protected:
+  // The floating-point ABI.
+  FpABIKind FpABI = FpABIKind::ANY;
+
+public:
+  Sw64ABIFlagsSection() = default;
+
+  uint16_t getVersionValue() { return (uint16_t)Version; }
+  uint8_t getISALevelValue() { return (uint8_t)ISALevel; }
+  uint8_t getISARevisionValue() { return (uint8_t)ISARevision; }
+  uint8_t getGPRSizeValue() { return (uint8_t)GPRSize; }
+  uint8_t getCPR1SizeValue();
+  uint8_t getCPR2SizeValue() { return (uint8_t)CPR2Size; }
+  uint8_t getFpABIValue();
+  uint32_t getISAExtensionValue() { return (uint32_t)ISAExtension; }
+  uint32_t getASESetValue() { return (uint32_t)ASESet; }
+
+  uint32_t getFlags1Value() {
+    uint32_t Value = 0;
+
+    if (OddSPReg)
+      Value |= (uint32_t)Sw64::AFL_FLAGS1_ODDSPREG;
+
+    return Value;
+  }
+
+  uint32_t getFlags2Value() { return 0; }
+
+  FpABIKind getFpABI() { return FpABI; }
+  void setFpABI(FpABIKind Value) {
+    FpABI = Value;
+  }
+
+  StringRef getFpABIString(FpABIKind Value);
+
+  template <class PredicateLibrary>
+  void setGPRSizeFromPredicates(const PredicateLibrary &P) {
+    GPRSize = P.isGP64bit() ? Sw64::AFL_REG_64 : Sw64::AFL_REG_32;
+  }
+
+  template <class PredicateLibrary>
+  void setCPR1SizeFromPredicates(const PredicateLibrary &P) {
+    if (P.useSoftFloat())
+      CPR1Size = Sw64::AFL_REG_NONE;
+    else if (P.hasMSA())
+      CPR1Size = Sw64::AFL_REG_128;
+    else
+      CPR1Size = P.isFP64bit() ? Sw64::AFL_REG_64 : Sw64::AFL_REG_32;
+  }
+
+  template <class PredicateLibrary>
+  void setISAExtensionFromPredicates(const PredicateLibrary &P) {
+    if (P.hasCnSw64())
+      ISAExtension = Sw64::AFL_EXT_OCTEON;
+    else
+      ISAExtension = Sw64::AFL_EXT_NONE;
+  }
+
+  template <class PredicateLibrary>
+  void setFpAbiFromPredicates(const PredicateLibrary &P) {
+    FpABI = FpABIKind::ANY;
+    if (P.useSoftFloat())
+      FpABI = FpABIKind::SOFT;
+
+    if (P.isABI_S64())
+      FpABI = FpABIKind::S64;
+  }
+
+  template <class PredicateLibrary>
+  void setAllFromPredicates(const PredicateLibrary &P) {
+    setGPRSizeFromPredicates(P);
+    setCPR1SizeFromPredicates(P);
+    setISAExtensionFromPredicates(P);
+    setFpAbiFromPredicates(P);
+    OddSPReg = P.useOddSPReg();
+  }
+};
+
+MCStreamer &operator<<(MCStreamer &OS, Sw64ABIFlagsSection &ABIFlagsSection);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIFLAGSSECTION_H
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp
new file mode 100644
index 000000000000..ea5b1f585883
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.cpp
@@ -0,0 +1,29 @@
+//===---- Sw64ABIInfo.cpp - Information about SW64 ABI's ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64ABIInfo.h"
+#include "Sw64RegisterInfo.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCTargetOptions.h"
+
+using namespace llvm;
+Sw64ABIInfo Sw64ABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
+                                          const MCTargetOptions &Options) {
+  if (Options.getABIName().startswith("n64"))
+    return Sw64ABIInfo::S64();
+
+  assert(Options.getABIName().empty() && "Unknown ABI option for SW64");
+
+  if (TT.isSw64())
+    return Sw64ABIInfo::S64();
+  else
+    assert(!TT.isSw64() && "sw_64 ABI is not appoint 64 bit.");
+  return Sw64ABIInfo::S64();
+}
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h
new file mode 100644
index 000000000000..ae758ca8d6f3
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ABIInfo.h
@@ -0,0 +1,77 @@
+//===---- Sw64ABIInfo.h - Information about SW64 ABI's --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIINFO_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ABIINFO_H
+
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/TargetParser/Triple.h"
+
+namespace llvm {
+
+template <typename T> class ArrayRef;
+class MCTargetOptions;
+class StringRef;
+class TargetRegisterClass;
+
+class Sw64ABIInfo {
+public:
+  enum class ABI { Unknown, S64 };
+
+protected:
+  ABI ThisABI;
+
+public:
+  Sw64ABIInfo(ABI ThisABI) : ThisABI(ThisABI) {}
+
+  static Sw64ABIInfo Unknown() { return Sw64ABIInfo(ABI::Unknown); }
+  static Sw64ABIInfo S64() { return Sw64ABIInfo(ABI::S64); }
+  static Sw64ABIInfo computeTargetABI(const Triple &TT, StringRef CPU,
+                                      const MCTargetOptions &Options);
+
+  bool IsKnown() const { return ThisABI != ABI::Unknown; }
+  bool IsS64() const { return ThisABI == ABI::S64; }
+  ABI GetEnumValue() const { return ThisABI; }
+
+  /// The registers to use for byval arguments.
+  ArrayRef<MCPhysReg> GetByValArgRegs() const;
+
+  /// The registers to use for the variable argument list.
+  ArrayRef<MCPhysReg> GetVarArgRegs() const;
+
+  /// Obtain the size of the area allocated by the callee for arguments.
+  /// CallingConv::FastCall affects the value for S32.
+  unsigned GetCalleeAllocdArgSizeInBytes(CallingConv::ID CC) const;
+
+  /// Ordering of ABI's
+  /// Sw64GenSubtargetInfo.inc will use this to resolve conflicts when given
+  /// multiple ABI options.
+  bool operator<(const Sw64ABIInfo Other) const {
+    return ThisABI < Other.GetEnumValue();
+  }
+
+  unsigned GetStackPtr() const;
+  unsigned GetFramePtr() const;
+  unsigned GetBasePtr() const;
+  unsigned GetGlobalPtr() const;
+  unsigned GetNullPtr() const;
+  unsigned GetZeroReg() const;
+  unsigned GetPtrAdduOp() const;
+  unsigned GetPtrAddiuOp() const;
+  unsigned GetPtrSubuOp() const;
+  unsigned GetPtrAndOp() const;
+  unsigned GetGPRMoveOp() const;
+  inline bool ArePtrs64bit() const { return IsS64(); }
+  inline bool AreGprs64bit() const { return IsS64(); }
+
+  unsigned GetEhDataReg(unsigned I) const;
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp
new file mode 100644
index 000000000000..c1cf8243b30d
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.cpp
@@ -0,0 +1,317 @@
+//===-- Sw64AsmBackend.cpp - Sw64 Asm Backend  ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Sw64AsmBackend class.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "MCTargetDesc/Sw64AsmBackend.h"
+#include "MCTargetDesc/Sw64ABIInfo.h"
+#include "MCTargetDesc/Sw64FixupKinds.h"
+#include "MCTargetDesc/Sw64MCExpr.h"
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// Prepare value for the target space for it
+static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+                                 MCContext &Ctx) {
+
+  unsigned Kind = Fixup.getKind();
+  switch (Kind) {
+  default:
+    return 0;
+  case Sw64::fixup_SW64_32:
+  case Sw64::fixup_SW64_64:
+  case FK_Data_4:
+  case FK_Data_8:
+  case Sw64::fixup_SW64_GPREL32:
+  case Sw64::fixup_SW64_LITUSE:
+  case Sw64::fixup_SW64_GPREL_HI16:
+  case Sw64::fixup_SW64_GPREL_LO16:
+  case Sw64::fixup_SW64_GPREL16:
+  case Sw64::fixup_SW64_TLSGD:
+  case Sw64::fixup_SW64_TLSLDM:
+  case Sw64::fixup_SW64_DTPMOD64:
+  case Sw64::fixup_SW64_GOTDTPREL16:
+  case Sw64::fixup_SW64_DTPREL64:
+  case Sw64::fixup_SW64_DTPREL_HI16:
+  case Sw64::fixup_SW64_DTPREL_LO16:
+  case Sw64::fixup_SW64_DTPREL16:
+  case Sw64::fixup_SW64_GOTTPREL16:
+  case Sw64::fixup_SW64_TPREL64:
+  case Sw64::fixup_SW64_TPREL_HI16:
+  case Sw64::fixup_SW64_TPREL_LO16:
+  case Sw64::fixup_SW64_TPREL16:
+    break;
+  case Sw64::fixup_SW64_23_PCREL_S2:
+    // So far we are only using this type for branches.
+    // For branches we start 1 instruction after the branch
+    // so the displacement will be one instruction size less.
+    Value -= 4;
+    // The displacement is then divided by 4 to give us an 18 bit
+    // address range.
+    Value >>= 2;
+    break;
+  case Sw64::fixup_SW64_BRSGP:
+    // So far we are only using this type for jumps.
+    // The displacement is then divided by 4 to give us an 28 bit
+    // address range.
+    Value >>= 2;
+    break;
+  case Sw64::fixup_SW64_ELF_LITERAL:
+    Value &= 0xffff;
+    break;
+  case Sw64::fixup_SW64_ELF_LITERAL_GOT:
+    Value = ((Value + 0x8000) >> 16) & 0xffff;
+    break;
+  }
+  return Value;
+}
+
+std::unique_ptr<MCObjectTargetWriter>
+Sw64AsmBackend::createObjectTargetWriter() const {
+  return createSw64ELFObjectWriter(TheTriple, IsS32);
+}
+
+/// ApplyFixup - Apply the Value for given Fixup into the provided
+/// data fragment, at the offset specified by the fixup and following the
+/// fixup kind as appropriate.
+void Sw64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+                                const MCValue &Target,
+                                MutableArrayRef<char> Data, uint64_t Value,
+                                bool IsResolved,
+                                const MCSubtargetInfo *STI) const {
+  MCFixupKind Kind = Fixup.getKind();
+  MCContext &Ctx = Asm.getContext();
+  Value = adjustFixupValue(Fixup, Value, Ctx);
+
+  if (!Value)
+    return; // Doesn't change encoding.
+
+  // Where do we start in the object
+  unsigned Offset = Fixup.getOffset();
+  // Number of bytes we need to fixup
+  unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+  // Used to point to big endian bytes
+  unsigned FullSize;
+
+  switch ((unsigned)Kind) {
+  case Sw64::fixup_SW64_32:
+    FullSize = 4;
+    break;
+  case Sw64::fixup_SW64_64:
+    FullSize = 8;
+    break;
+  default:
+    FullSize = 4;
+    break;
+  }
+
+  // Grab current value, if any, from bits.
+  uint64_t CurVal = 0;
+
+  for (unsigned i = 0; i != NumBytes; ++i) {
+    unsigned Idx = Endian == support::little ? i : (FullSize - 1 - i);
+    CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i * 8);
+  }
+
+  uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
+  CurVal |= Value & Mask;
+
+  // Write out the fixed up bytes back to the code/data bits.
+  for (unsigned i = 0; i != NumBytes; ++i) {
+    unsigned Idx = Endian == support::little ? i : (FullSize - 1 - i);
+    Data[Offset + Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff);
+  }
+}
+
+std::optional<MCFixupKind> Sw64AsmBackend::getFixupKind(StringRef Name) const {
+  return StringSwitch<std::optional<MCFixupKind>>(Name)
+      .Case("R_SW_64_REFLONG", (MCFixupKind)Sw64::fixup_SW64_32)
+      .Case("R_SW_64_REFQUAD", (MCFixupKind)Sw64::fixup_SW64_64)
+      .Case("R_SW_64_REFQUAD", (MCFixupKind)Sw64::fixup_SW64_CTOR)
+      .Case("R_SW_64_GPREL32", (MCFixupKind)Sw64::fixup_SW64_GPREL32)
+      .Case("R_SW_64_LITERAL", (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL)
+      .Case("R_SW_64_LITUSE", (MCFixupKind)Sw64::fixup_SW64_LITUSE)
+      .Case("R_SW_64_GPDISP", (MCFixupKind)Sw64::fixup_SW64_GPDISP)
+      .Case("R_SW_64_BRADDR", (MCFixupKind)Sw64::fixup_SW64_23_PCREL_S2)
+      .Case("R_SW_64_HINT", (MCFixupKind)Sw64::fixup_SW64_HINT)
+      .Case("R_SW_64_SREL16", (MCFixupKind)Sw64::fixup_SW64_16_PCREL)
+      .Case("R_SW_64_SREL32", (MCFixupKind)Sw64::fixup_SW64_32_PCREL)
+      .Case("R_SW_64_SREL64", (MCFixupKind)Sw64::fixup_SW64_64_PCREL)
+      .Case("R_SW_64_GPRELHIGH", (MCFixupKind)Sw64::fixup_SW64_GPREL_HI16)
+      .Case("R_SW_64_GPRELLOW", (MCFixupKind)Sw64::fixup_SW64_GPREL_LO16)
+      .Case("R_SW_64_GPREL16", (MCFixupKind)Sw64::fixup_SW64_GPREL16)
+      .Case("R_SW_64_BRSGP", (MCFixupKind)Sw64::fixup_SW64_BRSGP)
+      .Case("R_SW_64_TLSGD", (MCFixupKind)Sw64::fixup_SW64_TLSGD)
+      .Case("R_SW_64_TLSLDM", (MCFixupKind)Sw64::fixup_SW64_TLSLDM)
+      .Case("R_SW_64_DTPMOD64", (MCFixupKind)Sw64::fixup_SW64_DTPMOD64)
+      .Case("R_SW_64_GOTDTPREL", (MCFixupKind)Sw64::fixup_SW64_GOTDTPREL16)
+      .Case("R_SW_64_DTPREL64", (MCFixupKind)Sw64::fixup_SW64_DTPREL64)
+      .Case("R_SW_64_DTPRELHI", (MCFixupKind)Sw64::fixup_SW64_DTPREL_HI16)
+      .Case("R_SW_64_DTPRELLO", (MCFixupKind)Sw64::fixup_SW64_DTPREL_LO16)
+      .Case("R_SW_64_DTPREL16", (MCFixupKind)Sw64::fixup_SW64_DTPREL16)
+      .Case("R_SW_64_GOTTPREL", (MCFixupKind)Sw64::fixup_SW64_GOTTPREL16)
+      .Case("R_SW_64_TPREL64", (MCFixupKind)Sw64::fixup_SW64_TPREL64)
+      .Case("R_SW_64_TPRELHI", (MCFixupKind)Sw64::fixup_SW64_TPREL_HI16)
+      .Case("R_SW_64_TPRELLO", (MCFixupKind)Sw64::fixup_SW64_TPREL_LO16)
+      .Case("R_SW_64_TPREL16", (MCFixupKind)Sw64::fixup_SW64_TPREL16)
+      .Case("R_SW_64_LITERAL_GOT",
+            (MCFixupKind)Sw64::fixup_SW64_ELF_LITERAL_GOT)
+      .Default(MCAsmBackend::getFixupKind(Name));
+}
+
+const MCFixupKindInfo &
+Sw64AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+  const static MCFixupKindInfo LittleEndianInfos[] = {
+      // This table *must* be in same the order of fixup_* kinds in
+      // Sw64FixupKinds.h.
+      // name                    offset  bits  flags
+      {"fixup_SW64_NONE", 0, 0, 0},
+      {"fixup_SW64_32", 0, 32, 0},
+      {"fixup_SW64_64", 0, 64, 0},
+      {"fixup_SW64_CTOR", 0, 64, 0},
+      {"fixup_SW64_GPREL32", 0, 32, 0},
+      {"fixup_SW64_ELF_LITERAL", 0, 16, 0},
+      {"fixup_SW64_LITUSE", 0, 32, 0},
+      {"fixup_SW64_GPDISP", 0, 16, 0},
+      {"fixup_SW64_GPDISP_HI16", 0, 16, 0},
+      {"fixup_SW64_GPDISP_LO16", 0, 16, 0},
+      {"fixup_SW64_23_PCREL_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_SW64_HINT", 0, 14, 0},
+      {"fixup_SW64_16_PCREL", 0, 16, 0},
+      {"fixup_SW64_32_PCREL", 0, 32, 0},
+      {"fixup_SW64_64_PCREL", 0, 64, 0},
+      {"fixup_SW64_GPREL_HI16", 0, 16, 0},
+      {"fixup_SW64_GPREL_LO16", 0, 16, 0},
+      {"fixup_SW64_GPREL16", 0, 16, 0},
+      {"fixup_SW64_BRSGP", 0, 21, 0},
+      {"fixup_SW64_TLSGD", 0, 16, 0},
+      {"fixup_SW64_TLSLDM", 0, 16, 0},
+      {"fixup_SW64_DTPMOD64", 0, 64, 0},
+      {"fixup_SW64_GOTDTPREL16", 0, 16, 0},
+      {"fixup_SW64_DTPREL64", 0, 64, 0},
+      {"fixup_SW64_DTPREL_HI16", 0, 16, 0},
+      {"fixup_SW64_DTPREL_LO16", 0, 16, 0},
+      {"fixup_SW64_DTPREL16", 0, 16, 0},
+      {"fixup_SW64_GOTTPREL16", 0, 16, 0},
+      {"fixup_SW64_TPREL64", 0, 64, 0},
+      {"fixup_SW64_TPREL_HI16", 0, 16, 0},
+      {"fixup_SW64_TPREL_LO16", 0, 16, 0},
+      {"fixup_SW64_TPREL16", 0, 16, 0},
+      {"fixup_SW64_ELF_LITERAL_GOT", 0, 16, 0},
+      {"fixup_SW64_LITERAL_BASE", 0, 16, 0},
+      {"fixup_SW64_LITUSE_JSRDIRECT", 0, 16, 0}};
+
+  static_assert(std::size(LittleEndianInfos) == Sw64::NumTargetFixupKinds,
+                "Not all SW64 little endian fixup kinds added!");
+
+  if (Kind < FirstTargetFixupKind)
+    return MCAsmBackend::getFixupKindInfo(Kind);
+
+  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+         "Invalid kind!");
+
+  if (Endian == support::little)
+    return LittleEndianInfos[Kind - FirstTargetFixupKind];
+  else
+    llvm_unreachable("sw_64 is not appoint litter endian.");
+}
+
+/// WriteNopData - Write an (optimal) nop sequence of Count bytes
+/// to the given output. If the target cannot generate such a sequence,
+/// it should return an error.
+///
+/// \return - True on success.
+bool Sw64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+                                  const MCSubtargetInfo *STI) const {
+  // If the count is not 4-byte aligned, we must be writing data into the text
+  // section (otherwise we have unaligned instructions, and thus have far
+  // bigger problems), so just write zeros instead.
+  OS.write_zeros(Count % 4);
+
+  // We are properly aligned, so write NOPs as requested.
+  Count /= 4;
+  for (uint64_t i = 0; i != Count; ++i)
+    support::endian::write<uint32_t>(OS, 0x43ff075f, support::little);
+  return true;
+}
+
+bool Sw64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+                                           const MCFixup &Fixup,
+                                           const MCValue &Target) {
+  const unsigned FixupKind = Fixup.getKind();
+  switch (FixupKind) {
+  default:
+    return false;
+  // All these relocations require special processing
+  // at linking time. Delegate this work to a linker.
+  case Sw64::fixup_SW64_32:
+  case Sw64::fixup_SW64_64:
+  case Sw64::fixup_SW64_CTOR:
+  case Sw64::fixup_SW64_GPREL32:
+  case Sw64::fixup_SW64_ELF_LITERAL:
+  case Sw64::fixup_SW64_LITUSE:
+  case Sw64::fixup_SW64_GPDISP:
+  case Sw64::fixup_SW64_GPDISP_HI16:
+  case Sw64::fixup_SW64_HINT:
+  case Sw64::fixup_SW64_16_PCREL:
+  case Sw64::fixup_SW64_32_PCREL:
+  case Sw64::fixup_SW64_64_PCREL:
+  case Sw64::fixup_SW64_GPREL_HI16:
+  case Sw64::fixup_SW64_GPREL_LO16:
+  case Sw64::fixup_SW64_GPREL16:
+  case Sw64::fixup_SW64_BRSGP:
+  case Sw64::fixup_SW64_TLSGD:
+  case Sw64::fixup_SW64_TLSLDM:
+  case Sw64::fixup_SW64_DTPMOD64:
+  case Sw64::fixup_SW64_GOTDTPREL16:
+  case Sw64::fixup_SW64_DTPREL64:
+  case Sw64::fixup_SW64_DTPREL_HI16:
+  case Sw64::fixup_SW64_DTPREL_LO16:
+  case Sw64::fixup_SW64_DTPREL16:
+  case Sw64::fixup_SW64_GOTTPREL16:
+  case Sw64::fixup_SW64_TPREL64:
+  case Sw64::fixup_SW64_TPREL_HI16:
+  case Sw64::fixup_SW64_TPREL_LO16:
+  case Sw64::fixup_SW64_TPREL16:
+  case Sw64::fixup_SW64_ELF_LITERAL_GOT:
+    return true;
+  case Sw64::fixup_SW64_23_PCREL_S2:
+    return false;
+  }
+}
+
+MCAsmBackend *llvm::createSw64AsmBackend(const Target &T,
+                                         const MCSubtargetInfo &STI,
+                                         const MCRegisterInfo &MRI,
+                                         const MCTargetOptions &Options) {
+  Sw64ABIInfo ABI = Sw64ABIInfo::computeTargetABI(STI.getTargetTriple(),
+                                                  STI.getCPU(), Options);
+  return new Sw64AsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
+                            ABI.IsS64());
+}
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h
new file mode 100644
index 000000000000..3f8bb0cf391b
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64AsmBackend.h
@@ -0,0 +1,96 @@
+//===-- Sw64AsmBackend.h - Sw64 Asm Backend  ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sw64AsmBackend class.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ASMBACKEND_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ASMBACKEND_H
+
+#include "MCTargetDesc/Sw64FixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/TargetParser/Triple.h"
+
+namespace llvm {
+
+class MCAssembler;
+struct MCFixupKindInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSymbolELF;
+class Target;
+
+class Sw64AsmBackend : public MCAsmBackend {
+  Triple TheTriple;
+  bool IsS32;
+
+public:
+  Sw64AsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT,
+                 StringRef CPU, bool S32)
+      : MCAsmBackend(support::little), TheTriple(TT), IsS32(S32) {}
+
+  std::unique_ptr<MCObjectTargetWriter>
+  createObjectTargetWriter() const override;
+
+  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+                  const MCValue &Target, MutableArrayRef<char> Data,
+                  uint64_t Value, bool IsResolved,
+                  const MCSubtargetInfo *STI) const override;
+
+  std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
+  unsigned getNumFixupKinds() const override {
+    return Sw64::NumTargetFixupKinds;
+  }
+
+  /// @name Target Relaxation Interfaces
+  /// @{
+
+  /// MayNeedRelaxation - Check whether the given instruction may need
+  /// relaxation.
+  ///
+  /// \param Inst - The instruction to test.
+  bool mayNeedRelaxation(const MCInst &Inst,
+                         const MCSubtargetInfo &STI) const override {
+    return false;
+  }
+
+  /// fixupNeedsRelaxation - Target specific predicate for whether a given
+  /// fixup requires the associated instruction to be relaxed.
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    // FIXME.
+    llvm_unreachable("RelaxInstruction() unimplemented");
+    return false;
+  }
+
+  /// RelaxInstruction - Relax the instruction in the given fragment
+  /// to the next wider instruction.
+  ///
+  /// \param Inst - The instruction to relax, which may be the same
+  /// as the output.
+  /// \param [out] Res On return, the relaxed instruction.
+
+  /// @}
+
+  bool writeNopData(raw_ostream &OS, uint64_t Count,
+                    const MCSubtargetInfo *STI) const override;
+
+  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+                             const MCValue &Target) override;
+
+}; // class Sw64AsmBackend
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h
new file mode 100644
index 000000000000..822d043816c2
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64BaseInfo.h
@@ -0,0 +1,146 @@
+//===-- Sw64BaseInfo.h - Top level definitions for SW64 MC ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Sw64 target useful for the compiler back-end and the MC libraries.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64BASEINFO_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64BASEINFO_H
+
+#include "Sw64FixupKinds.h"
+#include "Sw64MCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// Sw64II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace Sw64II {
+/// Target Operand Flag enum.
+enum TOF {
+  //===------------------------------------------------------------------===//
+  // Sw64 Specific MachineOperand flags.
+  MO_NO_FLAG,
+
+  /// MO_GOT - Represents the offset into the global offset table at which
+  /// the address the relocation entry symbol resides during execution.
+  MO_LITERAL, // LITERAL
+  MO_GPDISP,
+  MO_GPDISP_HI,
+  MO_GPDISP_LO,
+
+  /// MO_GOT_CALL - Represents the offset into the global offset table at
+  /// which the address of a call site relocation entry symbol resides
+  /// during execution. This is different from the above since this flag
+  /// can only be present in call instructions.
+  MO_GOT_CALL,
+
+  /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+  /// address.
+  MO_ABS_HI,
+  MO_ABS_LO,
+  MO_ABS_HILO,
+  MO_ABS_LI,
+
+  /// MO_GPREL - Represents the offset from the current gp value to be used
+  /// for the relocatable object file being produced.
+  MO_GPREL_HI,
+  MO_GPREL_LO,
+
+  /// MO_TLSGD - Represents the offset into the global offset table at which
+  // the module ID and TSL block offset reside during execution (General
+  // Dynamic TLS).
+  MO_TLSGD,
+
+  /// MO_TLSLDM - Represents the offset into the global offset table at which
+  // the module ID and TSL block offset reside during execution (Local
+  // Dynamic TLS).
+  MO_TLSLDM,
+  MO_DTPREL_HI,
+  MO_DTPREL_LO,
+
+  /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
+  // Exec TLS).
+  MO_GOTTPREL,
+
+  /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
+  // the thread pointer (Local Exec TLS).
+  MO_TPREL_HI,
+  MO_TPREL_LO,
+
+  // S32/64 Flags.
+  MO_GPOFF_HI,
+  MO_GPOFF_LO,
+  MO_GOT_DISP,
+  MO_GOT_PAGE,
+  MO_GOT_OFST,
+
+  /// MO_HIGHER/HIGHEST - Represents the highest or higher half word of a
+  /// 64-bit symbol address.
+  MO_HIGHER,
+  MO_HIGHEST,
+
+  /// MO_GOT_HI16/LO16, MO_CALL_HI16/LO16 - Relocations used for large GOTs.
+  MO_GOT_HI16,
+  MO_GOT_LO16,
+  MO_CALL_HI16,
+  MO_CALL_LO16,
+
+  /// Helper operand used to generate R_SW64_JALR
+  MO_JALR,
+
+  // LITERAL_GOT
+  MO_LITERAL_GOT,
+
+  MO_HINT,
+  MO_LITERAL_BASE = 0x40,
+  MO_LITUSE = 0x80 // LITERAL
+};
+
+enum {
+  //===------------------------------------------------------------------===//
+  // Instruction encodings.  These are the standard/most common forms for
+  // Sw64 instructions.
+  //
+
+  // Pseudo - This represents an instruction that is a pseudo instruction
+  // or one that has not been implemented yet.  It is illegal to code generate
+  // it, but tolerated for intermediate implementation stages.
+  Pseudo = 0,
+
+  /// FrmR - This form is for instructions of the format R.
+  FrmR = 1,
+  /// FrmI - This form is for instructions of the format I.
+  FrmI = 2,
+  /// FrmJ - This form is for instructions of the format J.
+  FrmJ = 3,
+  /// FrmFR - This form is for instructions of the format FR.
+  FrmFR = 4,
+  /// FrmFI - This form is for instructions of the format FI.
+  FrmFI = 5,
+  /// FrmOther - This form is for instructions that have no specific format.
+  FrmOther = 6,
+  FormMask = 15,
+  /// IsCTI - Instruction is a Control Transfer Instruction.
+  IsCTI = 1 << 4,
+  /// HasForbiddenSlot - Instruction has a forbidden slot.
+  HasForbiddenSlot = 1 << 5,
+  /// IsPCRelativeLoad - A Load instruction with implicit source register
+  ///                    ($pc) with explicit offset and destination register
+  IsPCRelativeLoad = 1 << 6,
+  /// HasFCCRegOperand - Instruction uses an $fcc<x> register.
+  HasFCCRegOperand = 1 << 7
+};
+} // namespace Sw64II
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp
new file mode 100644
index 000000000000..3db5c0ab442c
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFObjectWriter.cpp
@@ -0,0 +1,463 @@
+//===-- Sw64ELFObjectWriter.cpp - Sw64 ELF Writer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/Sw64FixupKinds.h"
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <list>
+#include <utility>
+
+#define DEBUG_TYPE "sw_64-elf-object-writer"
+
+using namespace llvm;
+
+namespace {
+
+// Holds additional information needed by the relocation ordering algorithm.
+struct Sw64RelocationEntry {
+  const ELFRelocationEntry R; // < The relocation.
+  bool Matched = false;       // < Is this relocation part of a match.
+
+  Sw64RelocationEntry(const ELFRelocationEntry &R) : R(R) {}
+
+  void print(raw_ostream &Out) const {
+    R.print(Out);
+    Out << ", Matched=" << Matched;
+  }
+};
+
+#ifndef NDEBUG
+raw_ostream &operator<<(raw_ostream &OS, const Sw64RelocationEntry &RHS) {
+  RHS.print(OS);
+  return OS;
+}
+#endif
+
+class Sw64ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  Sw64ELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64);
+
+  ~Sw64ELFObjectWriter() override = default;
+
+  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+                        const MCFixup &Fixup, bool IsPCRel) const override;
+  bool needsRelocateWithSymbol(const MCSymbol &Sym,
+                               unsigned Type) const override;
+  void sortRelocs(const MCAssembler &Asm,
+                  std::vector<ELFRelocationEntry> &Relocs) override;
+};
+
+// The possible results of the Predicate function used by find_best.
+enum FindBestPredicateResult {
+  FindBest_NoMatch = 0,  // < The current element is not a match.
+  FindBest_Match,        // < The current element is a match but better ones are
+                         //   possible.
+  FindBest_PerfectMatch, // < The current element is an unbeatable match.
+};
+
+} // end anonymous namespace
+
+// Copy elements in the range [First, Last) to d1 when the predicate is true or
+// d2 when the predicate is false. This is essentially both std::copy_if and
+// std::remove_copy_if combined into a single pass.
+template <class InputIt, class OutputIt1, class OutputIt2, class UnaryPredicate>
+static std::pair<OutputIt1, OutputIt2> copy_if_else(InputIt First, InputIt Last,
+                                                    OutputIt1 d1, OutputIt2 d2,
+                                                    UnaryPredicate Predicate) {
+  for (InputIt I = First; I != Last; ++I) {
+    if (Predicate(*I)) {
+      *d1 = *I;
+      d1++;
+    } else {
+      *d2 = *I;
+      d2++;
+    }
+  }
+
+  return std::make_pair(d1, d2);
+}
+
+// Find the best match in the range [First, Last).
+//
+// An element matches when Predicate(X) returns FindBest_Match or
+// FindBest_PerfectMatch. A value of FindBest_PerfectMatch also terminates
+// the search. BetterThan(A, B) is a comparator that returns true when A is a
+// better match than B. The return value is the position of the best match.
+//
+// This is similar to std::find_if but finds the best of multiple possible
+// matches.
+template <class InputIt, class UnaryPredicate>
+static InputIt find_best(InputIt First, InputIt Last,
+                         UnaryPredicate Predicate) {
+  InputIt Best = Last;
+
+  for (InputIt I = First; I != Last; ++I) {
+    unsigned Matched = Predicate(*I);
+    if (Matched != FindBest_NoMatch) {
+      LLVM_DEBUG(dbgs() << std::distance(First, I) << " is a match (";
+                 I->print(dbgs()); dbgs() << ")\n");
+      if (Best == Last) {
+        LLVM_DEBUG(dbgs() << ".. and it beats the last one\n");
+        Best = I;
+      }
+    }
+    if (Matched == FindBest_PerfectMatch) {
+      LLVM_DEBUG(dbgs() << ".. and it is unbeatable\n");
+      break;
+    }
+  }
+
+  return Best;
+}
+
+#ifndef NDEBUG
+// Print all the relocations.
+template <class Container>
+static void dumpRelocs(const char *Prefix, const Container &Relocs) {
+  for (const auto &R : Relocs) {
+    dbgs() << Prefix;
+    R.print(dbgs());
+    dbgs() << "\n";
+  }
+}
+#endif
+
+Sw64ELFObjectWriter::Sw64ELFObjectWriter(uint8_t OSABI,
+                                         bool HasRelocationAddend, bool Is64)
+    : MCELFObjectTargetWriter(Is64, OSABI, ELF::EM_SW64, HasRelocationAddend) {}
+
+unsigned Sw64ELFObjectWriter::getRelocType(MCContext &Ctx,
+                                           const MCValue &Target,
+                                           const MCFixup &Fixup,
+                                           bool IsPCRel) const {
+  // Determine the type of the relocation.
+  unsigned Kind = (unsigned)Fixup.getKind();
+  switch (Kind) {
+  case Sw64::fixup_SW64_NONE:
+    return ELF::R_SW_64_NONE;
+  case FK_Data_1:
+  case FK_Data_2:
+    Ctx.reportError(Fixup.getLoc(),
+                    "SW64 does not support one byte relocations");
+    return ELF::R_SW_64_NONE;
+  case FK_Data_4:
+    if (Fixup.getValue()->getKind() == MCExpr::Binary)
+      return ELF::R_SW_64_SREL32; // .cfi_startproc
+    else
+      return ELF::R_SW_64_REFLONG; // R_SW_64_32
+    break;
+  case FK_Data_8: // .8byte ($.str)
+    if (IsPCRel)
+      return ELF::R_SW_64_SREL64;
+    else
+      return ELF::R_SW_64_REFQUAD; // R_SW_64_64
+    break;
+  case Sw64::fixup_SW64_32:
+    return ELF::R_SW_64_REFLONG;
+    break;
+  case Sw64::fixup_SW64_64:
+  case Sw64::fixup_SW64_CTOR:
+    return ELF::R_SW_64_REFQUAD;
+    break;
+  case Sw64::fixup_SW64_GPREL32:
+    return ELF::R_SW_64_GPREL32;
+    break;
+
+  case Sw64::fixup_SW64_ELF_LITERAL:
+    return ELF::R_SW_64_LITERAL;
+    break;
+  case Sw64::fixup_SW64_LITUSE:
+    return ELF::R_SW_64_LITUSE;
+    break;
+  case Sw64::fixup_SW64_LITERAL_BASE:
+    return ELF::R_SW_64_DUMMY_LITERAL;
+    break;
+  case Sw64::fixup_SW64_LITUSE_JSRDIRECT:
+    return ELF::R_SW_64_DUMMY_LITUSE;
+    break;
+  case Sw64::fixup_SW64_GPDISP:
+    return ELF::R_SW_64_GPDISP;
+    break;
+  case Sw64::fixup_SW64_GPDISP_HI16:
+    return ELF::R_SW_64_GPDISP;
+    break;
+  case Sw64::fixup_SW64_GPDISP_LO16:
+    return ELF::R_SW_64_GPDISP;
+    break;
+  case Sw64::fixup_SW64_23_PCREL_S2:
+    return ELF::R_SW_64_BRADDR;
+    break;
+  case Sw64::fixup_SW64_HINT:
+    return ELF::R_SW_64_HINT;
+    break;
+  case Sw64::fixup_SW64_16_PCREL:
+    return ELF::R_SW_64_SREL16;
+    break;
+  case Sw64::fixup_SW64_32_PCREL:
+    return ELF::R_SW_64_SREL32;
+    break;
+  case Sw64::fixup_SW64_64_PCREL:
+    return ELF::R_SW_64_SREL64;
+    break;
+  case Sw64::fixup_SW64_GPREL_HI16:
+    return ELF::R_SW_64_GPRELHIGH;
+    break;
+  case Sw64::fixup_SW64_GPREL_LO16:
+    return ELF::R_SW_64_GPRELLOW;
+    break;
+  case Sw64::fixup_SW64_GPREL16:
+    return ELF::R_SW_64_GPREL16;
+    break;
+  case Sw64::fixup_SW64_BRSGP:
+    return ELF::R_SW_64_BRSGP;
+    break;
+  case Sw64::fixup_SW64_TLSGD:
+    return ELF::R_SW_64_TLSGD;
+    break;
+  case Sw64::fixup_SW64_TLSLDM:
+    return ELF::R_SW_64_TLSLDM;
+    break;
+  case Sw64::fixup_SW64_DTPMOD64:
+    return ELF::R_SW_64_DTPMOD64;
+    break;
+  case Sw64::fixup_SW64_GOTDTPREL16:
+    return ELF::R_SW_64_GOTDTPREL;
+    break;
+  case Sw64::fixup_SW64_DTPREL64:
+    return ELF::R_SW_64_DTPREL64;
+    break;
+  case Sw64::fixup_SW64_DTPREL_HI16:
+    return ELF::R_SW_64_DTPRELHI;
+    break;
+  case Sw64::fixup_SW64_DTPREL_LO16:
+    return ELF::R_SW_64_DTPRELLO;
+    break;
+  case Sw64::fixup_SW64_DTPREL16:
+    return ELF::R_SW_64_DTPREL16;
+    break;
+  case Sw64::fixup_SW64_GOTTPREL16:
+    return ELF::R_SW_64_GOTTPREL;
+    break;
+  case Sw64::fixup_SW64_TPREL64:
+    return ELF::R_SW_64_TPREL64;
+    break;
+  case Sw64::fixup_SW64_TPREL_HI16:
+    return ELF::R_SW_64_TPRELHI;
+    break;
+  case Sw64::fixup_SW64_TPREL_LO16:
+    return ELF::R_SW_64_TPRELLO;
+    break;
+  case Sw64::fixup_SW64_TPREL16:
+    return ELF::R_SW_64_TPREL16;
+    break;
+  case Sw64::fixup_SW64_ELF_LITERAL_GOT:
+    return ELF::R_SW_64_LITERAL_GOT;
+    break;
+  }
+  llvm_unreachable("invalid fixup kind!");
+}
+
+// Determine whether a relocation (X) matches the one given in R.
+//
+// A relocation matches if:
+// - It's type matches that of a corresponding low part. This is provided in
+//   MatchingType for efficiency.
+// - It's based on the same symbol.
+// - It's offset of greater or equal to that of the one given in R.
+//   It should be noted that this rule assumes the programmer does not use
+//   offsets that exceed the alignment of the symbol. The carry-bit will be
+//   incorrect if this is not true.
+//
+// A matching relocation is unbeatable if:
+// - It is not already involved in a match.
+// - It's offset is exactly that of the one given in R.
+static FindBestPredicateResult isMatchingReloc(const Sw64RelocationEntry &X,
+                                               const ELFRelocationEntry &R,
+                                               unsigned MatchingType) {
+  if (X.R.Type == MatchingType && X.R.OriginalSymbol == R.OriginalSymbol) {
+    if (!X.Matched && X.R.OriginalAddend == R.OriginalAddend)
+      return FindBest_PerfectMatch;
+  }
+  return FindBest_NoMatch;
+}
+
+// Rewrite Reloc Target And Type
+static ELFRelocationEntry RewriteTypeReloc(const ELFRelocationEntry R,
+                                           const MCSymbolELF *RenamedSymA) {
+  ELFRelocationEntry Entry = R;
+  switch (R.Type) {
+  default:
+    break;
+  case ELF::R_SW_64_DUMMY_LITUSE:
+    Entry.Type = ELF::R_SW_64_LITUSE;
+    Entry.Symbol = RenamedSymA;
+    Entry.Addend = 0x3;
+    break;
+  case ELF::R_SW_64_DUMMY_LITERAL:
+    Entry.Type = ELF::R_SW_64_LITERAL;
+    break;
+  case ELF::R_SW_64_GPDISP:
+    Entry.Symbol = RenamedSymA;
+    Entry.Addend = 0x4;
+    break;
+  }
+  return Entry;
+}
+
+void Sw64ELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+                                     std::vector<ELFRelocationEntry> &Relocs) {
+  if (Relocs.size() < 2)
+    return;
+
+  MCContext &Ctx = Asm.getContext();
+  std::list<Sw64RelocationEntry> Sorted;
+  std::list<ELFRelocationEntry> Remainder;
+  std::list<ELFRelocationEntry> Orig;
+  const auto *RenamedSymA = cast<MCSymbolELF>(Ctx.getOrCreateSymbol(".text"));
+
+  LLVM_DEBUG(dumpRelocs("R: ", Relocs));
+
+  // Sort relocations by the address they are applied to.
+  llvm::sort(Relocs,
+             [](const ELFRelocationEntry &A, const ELFRelocationEntry &B) {
+               return A.Offset < B.Offset;
+             });
+
+  // copy all reloc entry into remainder, except lituse.
+  // all lituse will be insert literal->next later.
+  copy_if_else(Relocs.begin(), Relocs.end(), std::back_inserter(Remainder),
+               std::back_inserter(Sorted), [](const ELFRelocationEntry &Reloc) {
+                 return Reloc.Type == ELF::R_SW_64_DUMMY_LITUSE;
+               });
+
+  // Separate the movable relocations (AHL relocations using the high bits) from
+  // the immobile relocations (everything else). This does not preserve high/low
+  // matches that already existed in the input.
+  for (auto &R : Remainder) {
+    LLVM_DEBUG(dbgs() << "Matching: " << R << "\n");
+
+    auto InsertionPoint = find_best(
+        Sorted.begin(), Sorted.end(), [&R](const Sw64RelocationEntry &X) {
+          return isMatchingReloc(X, R, ELF::R_SW_64_DUMMY_LITERAL);
+        });
+
+    if (InsertionPoint != Sorted.end()) {
+      // if lit_use and literal correctly matched,
+      // InsertPoint is the reloc entry next to the literal
+      InsertionPoint->Matched = true;
+      InsertionPoint = std::next(InsertionPoint, 1);
+    }
+    Sorted.insert(InsertionPoint, R)->Matched = true;
+  }
+  assert(Relocs.size() == Sorted.size() && "Some relocs were not consumed");
+
+  // Overwrite the original vector with the sorted elements. The caller expects
+  // them in reverse order.
+  unsigned CopyTo = 0;
+  for (const auto &R : reverse(Sorted)) {
+    ELFRelocationEntry Entry = RewriteTypeReloc(R.R, RenamedSymA);
+    Relocs[CopyTo++] = Entry;
+  }
+}
+
+bool Sw64ELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+                                                  unsigned Type) const {
+  if (!isUInt<8>(Type))
+    return needsRelocateWithSymbol(Sym, Type & 0xff) ||
+           needsRelocateWithSymbol(Sym, (Type >> 8) & 0xff) ||
+           needsRelocateWithSymbol(Sym, (Type >> 16) & 0xff);
+
+  switch (Type) {
+  default:
+    errs() << Type << "\n";
+    llvm_unreachable("Unexpected relocation");
+    return true;
+
+  // This relocation doesn't affect the section data.
+  case ELF::R_SW_64_NONE:
+    return false;
+  // On REL ABI's (e.g. S32), these relocations form pairs. The pairing is done
+  // by the static linker by matching the symbol and offset.
+  // We only see one relocation at a time but it's still safe to relocate with
+  // the section so long as both relocations make the same decision.
+  //
+  // Some older linkers may require the symbol for particular cases. Such cases
+  // are not supported yet but can be added as required.
+  case ELF::R_SW_64_REFLONG:
+  case ELF::R_SW_64_REFQUAD:
+  case ELF::R_SW_64_GPREL32:
+  case ELF::R_SW_64_LITERAL:
+  case ELF::R_SW_64_DUMMY_LITERAL:
+  case ELF::R_SW_64_DUMMY_LITUSE:
+  case ELF::R_SW_64_LITUSE:
+  case ELF::R_SW_64_BRADDR:
+  case ELF::R_SW_64_HINT:
+  case ELF::R_SW_64_SREL16:
+  case ELF::R_SW_64_SREL32:
+  case ELF::R_SW_64_SREL64:
+  case ELF::R_SW_64_GPRELHIGH:
+  case ELF::R_SW_64_GPRELLOW:
+  case ELF::R_SW_64_GPREL16:
+  case ELF::R_SW_64_COPY:
+  case ELF::R_SW_64_GLOB_DAT:
+  case ELF::R_SW_64_JMP_SLOT:
+  case ELF::R_SW_64_RELATIVE:
+  case ELF::R_SW_64_BRSGP:
+  case ELF::R_SW_64_TLSGD:
+  case ELF::R_SW_64_TLSLDM:
+  case ELF::R_SW_64_DTPMOD64:
+  case ELF::R_SW_64_GOTDTPREL:
+  case ELF::R_SW_64_DTPREL64:
+  case ELF::R_SW_64_DTPRELHI:
+  case ELF::R_SW_64_DTPRELLO:
+  case ELF::R_SW_64_DTPREL16:
+  case ELF::R_SW_64_GOTTPREL:
+  case ELF::R_SW_64_TPREL64:
+  case ELF::R_SW_64_TPRELHI:
+  case ELF::R_SW_64_TPRELLO:
+  case ELF::R_SW_64_TPREL16:
+  case ELF::R_SW_64_NUM:
+  case ELF::R_SW_64_LITERAL_GOT:
+  case ELF::R_SW_64_PC32:
+  case ELF::R_SW_64_EH:
+    return false;
+
+  case ELF::R_SW_64_GPDISP:
+    return true;
+  }
+}
+
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createSw64ELFObjectWriter(const Triple &TT, bool IsS32) {
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+  bool IsS64 = true;
+  bool HasRelocationAddend = TT.isArch64Bit();
+  return std::make_unique<Sw64ELFObjectWriter>(OSABI, HasRelocationAddend,
+                                               IsS64);
+}
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp
new file mode 100644
index 000000000000..2d5271da7c7d
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.cpp
@@ -0,0 +1,108 @@
+//===-------- Sw64ELFStreamer.cpp - ELF Object Output ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64ELFStreamer.h"
+#include "Sw64OptionRecord.h"
+#include "Sw64TargetStreamer.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
+
+using namespace llvm;
+
+Sw64ELFStreamer::Sw64ELFStreamer(MCContext &Context,
+                                 std::unique_ptr<MCAsmBackend> MAB,
+                                 std::unique_ptr<MCObjectWriter> OW,
+                                 std::unique_ptr<MCCodeEmitter> Emitter)
+    : MCELFStreamer(Context, std::move(MAB), std::move(OW),
+                    std::move(Emitter)) {
+  RegInfoRecord = new Sw64RegInfoRecord(this, Context);
+  Sw64OptionRecords.push_back(
+      std::unique_ptr<Sw64RegInfoRecord>(RegInfoRecord));
+}
+
+void Sw64ELFStreamer::emitInstruction(const MCInst &Inst,
+                                      const MCSubtargetInfo &STI) {
+  MCELFStreamer::emitInstruction(Inst, STI);
+
+  MCContext &Context = getContext();
+  const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
+
+  for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) {
+    const MCOperand &Op = Inst.getOperand(OpIndex);
+
+    if (!Op.isReg())
+      continue;
+
+    unsigned Reg = Op.getReg();
+    RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo);
+  }
+
+  createPendingLabelRelocs();
+}
+
+void Sw64ELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+  Frame.Begin = getContext().createTempSymbol();
+  MCELFStreamer::emitLabel(Frame.Begin);
+}
+
+MCSymbol *Sw64ELFStreamer::emitCFILabel() {
+  MCSymbol *Label = getContext().createTempSymbol("cfi", true);
+  MCELFStreamer::emitLabel(Label);
+  return Label;
+}
+
+void Sw64ELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+  Frame.End = getContext().createTempSymbol();
+  MCELFStreamer::emitLabel(Frame.End);
+}
+
+void Sw64ELFStreamer::createPendingLabelRelocs() { Labels.clear(); }
+
+void Sw64ELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
+  MCELFStreamer::emitLabel(Symbol);
+  Labels.push_back(Symbol);
+}
+
+void Sw64ELFStreamer::switchSection(MCSection *Section,
+                                    const MCExpr *Subsection) {
+  MCELFStreamer::switchSection(Section, Subsection);
+  Labels.clear();
+}
+
+void Sw64ELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
+                                    SMLoc Loc) {
+  MCELFStreamer::emitValueImpl(Value, Size, Loc);
+  Labels.clear();
+}
+
+void Sw64ELFStreamer::emitIntValue(uint64_t Value, unsigned Size) {
+  MCELFStreamer::emitIntValue(Value, Size);
+  Labels.clear();
+}
+
+void Sw64ELFStreamer::EmitSw64OptionRecords() {
+  for (const auto &I : Sw64OptionRecords)
+    I->EmitSw64OptionRecord();
+}
+
+MCELFStreamer *llvm::createSw64ELFStreamer(
+    MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+    std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
+    bool RelaxAll) {
+  return new Sw64ELFStreamer(Context, std::move(MAB), std::move(OW),
+                             std::move(Emitter));
+}
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h
new file mode 100644
index 000000000000..73a1d382a4c6
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64ELFStreamer.h
@@ -0,0 +1,83 @@
+//===- Sw64ELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a custom MCELFStreamer which allows us to insert some hooks before
+// emitting data into an actual object file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H
+
+#include "Sw64OptionRecord.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include <memory>
+
+namespace llvm {
+
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCSubtargetInfo;
+struct MCDwarfFrameInfo;
+
+class Sw64ELFStreamer : public MCELFStreamer {
+  SmallVector<std::unique_ptr<Sw64OptionRecord>, 8> Sw64OptionRecords;
+  Sw64RegInfoRecord *RegInfoRecord;
+  SmallVector<MCSymbol *, 4> Labels;
+
+public:
+  Sw64ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
+                  std::unique_ptr<MCObjectWriter> OW,
+                  std::unique_ptr<MCCodeEmitter> Emitter);
+
+  // Overriding this function allows us to add arbitrary behaviour before the
+  // Inst is actually emitted. For example, we can inspect the operands and
+  // gather sufficient information that allows us to reason about the register
+  // usage for the translation unit.
+  void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+
+  // Overriding this function allows us to record all labels that should be
+  // marked as microSW64. Based on this data marking is done in
+  // EmitInstruction.
+  void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
+
+  // Overriding this function allows us to dismiss all labels that are
+  // candidates for marking as microSW64 when .section directive is processed.
+  void switchSection(MCSection *Section,
+                     const MCExpr *Subsection = nullptr) override;
+
+  // Overriding these functions allows us to dismiss all labels that are
+  // candidates for marking as microSW64 when .word/.long/.4byte etc
+  // directives are emitted.
+  void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
+  void emitIntValue(uint64_t Value, unsigned Size) override;
+
+  // Overriding these functions allows us to avoid recording of these labels
+  // in EmitLabel and later marking them as microSW64.
+  void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
+  void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
+  MCSymbol *emitCFILabel() override;
+
+  // Emits all the option records stored up until the point it's called.
+  void EmitSw64OptionRecords();
+
+  // Mark labels as microSW64, if necessary for the subtarget.
+  void createPendingLabelRelocs();
+};
+
+MCELFStreamer *createSw64ELFStreamer(MCContext &Context,
+                                     std::unique_ptr<MCAsmBackend> MAB,
+                                     std::unique_ptr<MCObjectWriter> OW,
+                                     std::unique_ptr<MCCodeEmitter> Emitter,
+                                     bool RelaxAll);
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64ELFSTREAMER_H
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h
new file mode 100644
index 000000000000..ae378ac175bf
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64FixupKinds.h
@@ -0,0 +1,174 @@
+//===-- Sw64FixupKinds.h - Sw64 Specific Fixup Entries ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64FIXUPKINDS_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace Sw64 {
+// Although most of the current fixup types reflect a unique relocation
+// one can have multiple fixup types for a given relocation and thus need
+// to be uniquely named.
+//
+// This table *must* be in the same order of
+// MCFixupKindInfo Infos[Sw64::NumTargetFixupKinds]
+// in Sw64AsmBackend.cpp.
+//
+enum Fixups {
+  // Branch fixups resulting in R_SW64_NONE.
+  fixup_SW64_NONE = FirstTargetFixupKind,
+
+  // A 32 bit reference to a symbol.
+  // resulting in R_SW_64_REFLONG.
+  fixup_SW64_32,
+
+  // A 64 bit reference to a symbol.
+  // resulting in - R_SW_64_REFQUAD.
+  fixup_SW64_64,
+
+  // A 64 bit reference to a symbol.
+  // resulting in - R_SW_64_REFQUAD.
+  fixup_SW64_CTOR,
+
+  // A 32 bit GP relative offset. This is just like REFLONG except
+  // that when the value is used the value of the gp register will be
+  // added in.
+  // resulting in - R_SW_64_GPREL32.
+  fixup_SW64_GPREL32,
+
+  // Used for an instruction that refers to memory off the GP register
+  // resulting in - R_SW_64_LITERAL.
+  fixup_SW64_ELF_LITERAL,
+  // This reloc only appears immediately following an ELF_LITERAL reloc.
+  // It identifies a use of the literal.  The symbol index is special:
+  // 1 means the literal address is in the base register of a memory
+  // format instruction; 2 means the literal address is in the byte
+  // offset register of a byte-manipulation instruction; 3 means the
+  // literal address is in the target register of a jsr instruction.
+  // This does not actually do any relocation.
+  // resulting in - R_SW_64_LITUSE.
+  fixup_SW64_LITUSE,
+
+  // Load the gp register.  This is always used for a ldih instruction
+  // which loads the upper 16 bits of the gp register.  The symbol
+  // index of the GPDISP instruction is an offset in bytes to the lda
+  // instruction that loads the lower 16 bits.  The value to use for
+  // the relocation is the difference between the GP value and the
+  // current location; the load will always be done against a register
+  // holding the current address.
+  // resulting in - R_SW_64_GPDISP.
+  fixup_SW64_GPDISP,
+  fixup_SW64_GPDISP_HI16,
+  fixup_SW64_GPDISP_LO16,
+
+  // A 21 bit branch.
+  // resulting in - R_SW_64_BRADDR.
+  fixup_SW64_23_PCREL_S2,
+  // A hint for a jump to a register.
+  // resulting in - R_SW_64_HINT.
+  fixup_SW64_HINT,
+
+  // 16 bit PC relative offset.
+  // resulting in - R_SW_64_SREL16.
+  fixup_SW64_16_PCREL,
+
+  // 32 bit PC relative offset.
+  // resulting in - R_SW_64_SREL32.
+  fixup_SW64_32_PCREL,
+
+  // 64 bit PC relative offset.
+  // resulting in - R_SW_64_SREL64.
+  fixup_SW64_64_PCREL,
+
+  // The high 16 bits of the displacement from GP to the target
+  // resulting in - R_SW_64_GPRELHIGH.
+  fixup_SW64_GPREL_HI16,
+
+  // The low 16 bits of the displacement from GP to the target
+  // resulting in - R_SW_64_GPRELLOW.
+  fixup_SW64_GPREL_LO16,
+
+  //  A 16-bit displacement from the GP to the target
+  //  resulting in - R_SW_64_GPREL16.
+  fixup_SW64_GPREL16,
+  // A 21 bit branch that adjusts for gp loads
+  // resulting in - R_SW_64_BRSGP.
+  fixup_SW64_BRSGP,
+
+  // Creates a tls_index for the symbol in the got.
+  // resulting in - R_SW_64_TLSGD.
+  fixup_SW64_TLSGD,
+
+  // Creates a tls_index for the (current) module in the got.
+  // resulting in - R_SW_64_TLSLDM.
+  fixup_SW64_TLSLDM,
+
+  // A dynamic relocation for a DTP module entry.
+  // resulting in - R_SW_64_DTPMOD64.
+  fixup_SW64_DTPMOD64,
+
+  // Creates a 64-bit offset in the got for the displacement from DTP to the
+  // target.
+  // resulting in - R_SW_64_GOTDTPREL.
+  fixup_SW64_GOTDTPREL16,
+
+  // A dynamic relocation for a displacement from DTP to the target.
+  // resulting in - R_SW_64_DTPREL64.
+  fixup_SW64_DTPREL64,
+
+  // The high 16 bits of the displacement from DTP to the target.
+  // resulting in - R_SW_64_DTPRELHI.
+  fixup_SW64_DTPREL_HI16,
+  // The low 16 bits of the displacement from DTP to the target.
+  // resulting in - R_SW_64_DTPRELLO.
+  fixup_SW64_DTPREL_LO16,
+
+  // A 16-bit displacement from DTP to the target.
+  // resulting in - R_SW_64_DTPREL16
+  fixup_SW64_DTPREL16,
+
+  // Creates a 64-bit offset in the got for the displacement from TP to the
+  // target.
+  // resulting in - R_SW_64_GOTTPREL
+  fixup_SW64_GOTTPREL16,
+
+  // A dynamic relocation for a displacement from TP to the target.
+  // resulting in - R_SW_64_TPREL64
+  fixup_SW64_TPREL64,
+
+  //  The high 16 bits of the displacement from TP to the target.
+  //  resulting in - R_SW_64_TPRELHI
+  fixup_SW64_TPREL_HI16,
+
+  // The low 16 bits of the displacement from TP to the target.
+  // resulting in - R_SW_64_TPRELLO
+  fixup_SW64_TPREL_LO16,
+
+  // A 16-bit displacement from TP to the target.
+  // resulting in - R_SW_64_TPREL16
+  fixup_SW64_TPREL16,
+
+  // Used for an instruction that refers to memory off the GP register
+  // together with literal, expand call range to 32 bits offset
+  // resulting in - R_SW_64_LITERAL_GOT
+  fixup_SW64_ELF_LITERAL_GOT,
+
+  // TODO: for literal sorting reloc
+  fixup_SW64_LITERAL_BASE,
+  fixup_SW64_LITUSE_JSRDIRECT,
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // namespace Sw64
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp
new file mode 100644
index 000000000000..bdbd6d0bdf54
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.cpp
@@ -0,0 +1,42 @@
+//===-- Sw64MCAsmInfo.cpp - Sw64 asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the Sw64MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64MCAsmInfo.h"
+#include "llvm/TargetParser/Triple.h"
+
+using namespace llvm;
+
+void Sw64MCAsmInfo::anchor() {}
+
+Sw64MCAsmInfo::Sw64MCAsmInfo(const Triple &TheTriple,
+                             const MCTargetOptions &Options) {
+  IsLittleEndian = TheTriple.isLittleEndian();
+  assert(IsLittleEndian == true && "sw_64 machine is litter endian!");
+
+  CodePointerSize = CalleeSaveStackSlotSize = 8;
+
+  PrivateGlobalPrefix = ".L";
+  AlignmentIsInBytes = false;
+  Data16bitsDirective = "\t.2byte\t";
+  Data32bitsDirective = "\t.4byte\t";
+  Data64bitsDirective = "\t.8byte\t";
+  WeakRefDirective = "\t.weak\t";
+  CommentString = "#";
+  // For chang assemble directer ".set LA, LB" to "LA = LB"
+  HasSw64SetDirective = true;
+  UsesELFSectionDirectiveForBSS = true;
+  SupportsDebugInformation = true;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+  DwarfRegNumForCFI = true;
+  UseIntegratedAssembler = true;
+}
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h
new file mode 100644
index 000000000000..f7809419ecb8
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCAsmInfo.h
@@ -0,0 +1,32 @@
+//===-- Sw64MCAsmInfo.h - Sw64 Asm Info ------------------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Sw64MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCASMINFO_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+class Sw64MCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
+
+public:
+  explicit Sw64MCAsmInfo(const Triple &TheTriple,
+                         const MCTargetOptions &Options);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp
new file mode 100644
index 000000000000..2e56da22b398
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.cpp
@@ -0,0 +1,451 @@
+//===-- Sw64MCCodeEmitter.cpp - Convert Sw64 Code to Machine Code ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Sw64MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64MCCodeEmitter.h"
+#include "MCTargetDesc/Sw64FixupKinds.h"
+#include "MCTargetDesc/Sw64MCExpr.h"
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+#define GET_INSTRMAP_INFO
+#include "Sw64GenInstrInfo.inc"
+#undef GET_INSTRMAP_INFO
+
+namespace llvm {
+
+MCCodeEmitter *createSw64MCCodeEmitterEB(const MCInstrInfo &MCII,
+                                         MCContext &Ctx) {
+  return new Sw64MCCodeEmitter(MCII, Ctx, false);
+}
+
+MCCodeEmitter *createSw64MCCodeEmitterEL(const MCInstrInfo &MCII,
+                                         MCContext &Ctx) {
+  return new Sw64MCCodeEmitter(MCII, Ctx, true);
+}
+
+} // end namespace llvm
+
+MCInst Sw64MCCodeEmitter::LowerCompactBranch(MCInst TmpInst) const {
+  // <MCInst 194 <MCOperand Imm:0> <MCOperand Reg:33> <MCOperand
+  // Expr:(.LBB0_2)>>
+  // ==> <MCInst 194 <MCOperand Reg:33> <MCOperand Expr:(.LBB0_2)>>
+
+  MCInst TI;
+  unsigned int Size = TmpInst.getNumOperands();
+  // for test op is or not a imm
+  // as "bsr $RA,disp" will be convert to " bsr disp" will be an error
+  TI.setOpcode(TmpInst.getOpcode());
+  if (TmpInst.getOperand(0).isImm())
+    for (unsigned int i = 0; i < Size; i++) {
+      if (i == 0)
+        continue;
+      TI.addOperand(TmpInst.getOperand(i));
+    }
+  else {
+    return TmpInst;
+  }
+
+  return TI;
+}
+
+void Sw64MCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const {
+  OS << (char)C;
+}
+
+void Sw64MCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &OS) const {
+  // Output the instruction encoding in little endian byte order.
+  // Little-endian byte ordering:
+  // sw_64:   4 | 3 | 2 | 1
+  for (unsigned i = 0; i < Size; ++i) {
+    unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+    EmitByte((Val >> Shift) & 0xff, OS);
+  }
+}
+
+/// encodeInstruction - Emit the instruction.
+/// Size the instruction with Desc.getSize().
+void Sw64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
+  // Non-pseudo instructions that get changed for direct object
+  // only based on operand values.
+  // If this list of instructions get much longer we will move
+  // the check to a function call. Until then, this is more efficient.
+  MCInst TmpInst = MI;
+
+  switch (MI.getOpcode()) {
+  // If shift amount is >= 32 it the inst needs to be lowered further
+  case Sw64::BEQ:
+  case Sw64::BGE:
+  case Sw64::BGT:
+  case Sw64::BLBC:
+  case Sw64::BLBS:
+  case Sw64::BLE:
+  case Sw64::BLT:
+  case Sw64::BNE:
+  case Sw64::BR:
+  case Sw64::BSR:
+  case Sw64::FBEQ:
+  case Sw64::FBGE:
+  case Sw64::FBGT:
+  case Sw64::FBLE:
+  case Sw64::FBLT:
+  case Sw64::FBNE:
+    break;
+  case Sw64::ALTENT:
+    return;
+  }
+
+  uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+
+  const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
+
+  // Get byte count of instruction
+  unsigned Size = Desc.getSize();
+  if (!Size)
+    llvm_unreachable("Desc.getSize() returns 0");
+
+  EmitInstruction(Binary, Size, STI, OS);
+}
+
+/// getBranchTargetOpValue - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// [(store F4RC:$RA, (Sw64_gprello tglobaladdr:$DISP,
+/// GPRC:$RB))], s_ild_lo>;
+/// record the relocation and return zero.
+unsigned
+Sw64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+
+  // If the destination is an immediate, divide by 4.
+  if (MO.isImm())
+    return MO.getImm() >> 2;
+
+  assert(MO.isExpr() &&
+         "getBranchTargetOpValue expects only expressions or immediates");
+
+  const MCExpr *FixupExpression = MO.getExpr();
+
+  Fixups.push_back(MCFixup::create(0, FixupExpression,
+                                   MCFixupKind(Sw64::fixup_SW64_23_PCREL_S2)));
+  return 0;
+}
+
+/// getJumpTargetOpValue - Return binary encoding of the jump
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned
+Sw64MCCodeEmitter::getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  // If the destination is an immediate, divide by 4.
+  if (MO.isImm())
+    return MO.getImm() >> 2;
+
+  assert(MO.isExpr() &&
+         "getJumpTargetOpValue expects only expressions or an immediate");
+
+  const MCExpr *FixupExpression = MO.getExpr();
+
+  Fixups.push_back(MCFixup::create(0, FixupExpression,
+                                   MCFixupKind(Sw64::fixup_SW64_23_PCREL_S2)));
+  return 0;
+}
+
+static MCOperand createLituse(MCContext *Ctx) {
+  const MCSymbol *Sym = Ctx->getOrCreateSymbol(".text");
+  const MCExpr *Expr = MCSymbolRefExpr::create(Sym, *Ctx);
+
+  return MCOperand::createExpr(
+      Sw64MCExpr::create(Sw64MCExpr::MEK_LITUSE_JSR, Expr, *Ctx));
+}
+
+unsigned Sw64MCCodeEmitter::getExprOpValue(const MCExpr *Expr,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
+
+  MCExpr::ExprKind Kind = Expr->getKind();
+  if (Kind == MCExpr::Constant) {
+    return cast<MCConstantExpr>(Expr)->getValue();
+  }
+
+  if (Kind == MCExpr::Binary) {
+    unsigned Res =
+        getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups, STI);
+    Res += getExprOpValue(cast<MCBinaryExpr>(Expr)->getRHS(), Fixups, STI);
+    return Res;
+  }
+
+  if (Kind == MCExpr::Target) {
+    const Sw64MCExpr *Sw64Expr = cast<Sw64MCExpr>(Expr);
+
+    Sw64::Fixups FixupKind = Sw64::Fixups(0);
+    switch (Sw64Expr->getKind()) {
+    default:
+      llvm_unreachable("Unknown fixup kind!");
+      break;
+    case Sw64MCExpr::MEK_LITUSE_BASE:
+      FixupKind = Sw64::fixup_SW64_LITERAL_BASE;
+      break;
+    case Sw64MCExpr::MEK_LITUSE_JSRDIRECT:
+      FixupKind = Sw64::fixup_SW64_LITUSE_JSRDIRECT;
+      Fixups.push_back(
+          MCFixup::create(0, Sw64Expr, MCFixupKind(Sw64::fixup_SW64_HINT)));
+      break;
+    case Sw64MCExpr::MEK_ELF_LITERAL:
+      FixupKind = Sw64::fixup_SW64_ELF_LITERAL;
+      break;
+    case Sw64MCExpr::MEK_LITUSE_ADDR:
+      FixupKind = Sw64::fixup_SW64_LITUSE;
+      break;
+    case Sw64MCExpr::MEK_LITUSE_BYTOFF:
+      FixupKind = Sw64::fixup_SW64_LITUSE;
+      break;
+    case Sw64MCExpr::MEK_LITUSE_JSR:
+      FixupKind = Sw64::fixup_SW64_LITUSE;
+      break;
+    case Sw64MCExpr::MEK_LITUSE_TLSGD:
+      FixupKind = Sw64::fixup_SW64_LITUSE;
+      break;
+    case Sw64MCExpr::MEK_LITUSE_TLSLDM:
+      FixupKind = Sw64::fixup_SW64_LITUSE;
+      break;
+    case Sw64MCExpr::MEK_HINT:
+      FixupKind = Sw64::fixup_SW64_HINT;
+      break;
+    case Sw64MCExpr::MEK_GPDISP:
+      FixupKind = Sw64::fixup_SW64_GPDISP;
+      break;
+    case Sw64MCExpr::MEK_GPDISP_HI16:
+      FixupKind = Sw64::fixup_SW64_GPDISP_HI16;
+      break;
+    case Sw64MCExpr::MEK_GPDISP_LO16:
+      return 0;
+    case Sw64MCExpr::MEK_GPREL_HI16:
+      FixupKind = Sw64::fixup_SW64_GPREL_HI16;
+      break;
+    case Sw64MCExpr::MEK_GPREL_LO16:
+      FixupKind = Sw64::fixup_SW64_GPREL_LO16;
+      break;
+    case Sw64MCExpr::MEK_GPREL16:
+      FixupKind = Sw64::fixup_SW64_GPREL16;
+      break;
+    case Sw64MCExpr::MEK_BRSGP:
+      FixupKind = Sw64::fixup_SW64_BRSGP;
+      break;
+    case Sw64MCExpr::MEK_TLSGD:
+      FixupKind = Sw64::fixup_SW64_TLSGD;
+      break;
+    case Sw64MCExpr::MEK_TLSLDM:
+      FixupKind = Sw64::fixup_SW64_TLSLDM;
+      break;
+    case Sw64MCExpr::MEK_GOTDTPREL16:
+      FixupKind = Sw64::fixup_SW64_GOTDTPREL16;
+      break;
+    case Sw64MCExpr::MEK_DTPREL_HI16:
+      FixupKind = Sw64::fixup_SW64_DTPREL_HI16;
+      break;
+    case Sw64MCExpr::MEK_DTPREL_LO16:
+      FixupKind = Sw64::fixup_SW64_DTPREL_LO16;
+      break;
+    case Sw64MCExpr::MEK_DTPREL16:
+      FixupKind = Sw64::fixup_SW64_DTPREL16;
+      break;
+    case Sw64MCExpr::MEK_GOTTPREL16:
+      FixupKind = Sw64::fixup_SW64_GOTTPREL16;
+      break;
+    case Sw64MCExpr::MEK_TPREL_HI16:
+      FixupKind = Sw64::fixup_SW64_TPREL_HI16;
+      break;
+    case Sw64MCExpr::MEK_TPREL_LO16:
+      FixupKind = Sw64::fixup_SW64_TPREL_LO16;
+      break;
+    case Sw64MCExpr::MEK_TPREL16:
+      FixupKind = Sw64::fixup_SW64_TPREL16;
+      break;
+    case Sw64MCExpr::MEK_ELF_LITERAL_GOT:
+      FixupKind = Sw64::fixup_SW64_ELF_LITERAL_GOT;
+      break;
+    } // switch
+
+    Fixups.push_back(MCFixup::create(0, Sw64Expr, MCFixupKind(FixupKind)));
+    return 0;
+  }
+
+  return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned
+Sw64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                     SmallVectorImpl<MCFixup> &Fixups,
+                                     const MCSubtargetInfo &STI) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg);
+    return RegNo;
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  } else if (MO.isDFPImm()) {
+    return static_cast<unsigned>(bit_cast<double>(MO.getDFPImm()));
+  }
+
+  // beq         op1    op2
+  // to
+  // beq  opc    op1    op2
+  if (MCII.get(MI.getOpcode()).isBranch() && MI.getNumOperands() == 3) {
+    // for beq/bne/fbeq ....
+    return getBranchTargetOpValue(MI, 2, Fixups, STI);
+  } else if (MCII.get(MI.getOpcode()).isBranch() && MI.getNumOperands() == 2) {
+    // for br/bsr
+    return getJumpTargetOpValue(MI, 1, Fixups, STI);
+  }
+
+  // MO must be an Expr.
+  assert(MO.isExpr());
+  return getExprOpValue(MO.getExpr(), Fixups, STI);
+}
+
+/// Return binary encoding of memory related operand.
+/// If the offset operand requires relocation, record the relocation.
+template <unsigned ShiftAmount>
+unsigned Sw64MCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
+  unsigned RegBits; // Base register is encoded in bits 20-16.
+  unsigned OffBits; // offset is encoded in bits 15-0.
+
+  if (MI.getOperand(OpNo).isImm()) { // vload
+    RegBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI) << 16;
+    OffBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+  } else { // vstore
+    RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) << 16;
+    OffBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI);
+  }
+
+  // Apply the scale factor if there is one.
+  // OffBits >>= ShiftAmount;
+
+  return (OffBits & 0xFFFF) | RegBits;
+}
+
+// FIXME: should be called getMSBEncoding
+unsigned
+Sw64MCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
+                                      SmallVectorImpl<MCFixup> &Fixups,
+                                      const MCSubtargetInfo &STI) const {
+  assert(MI.getOperand(OpNo - 1).isImm());
+  assert(MI.getOperand(OpNo).isImm());
+  unsigned Position =
+      getMachineOpValue(MI, MI.getOperand(OpNo - 1), Fixups, STI);
+  unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+
+  return Position + Size - 1;
+}
+
+unsigned Sw64MCCodeEmitter::getUImm4AndValue(const MCInst &MI, unsigned OpNo,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI) const {
+  assert(MI.getOperand(OpNo).isImm());
+  const MCOperand &MO = MI.getOperand(OpNo);
+  unsigned Value = MO.getImm();
+  switch (Value) {
+  case 128:
+    return 0x0;
+  case 1:
+    return 0x1;
+  case 2:
+    return 0x2;
+  case 3:
+    return 0x3;
+  case 4:
+    return 0x4;
+  case 7:
+    return 0x5;
+  case 8:
+    return 0x6;
+  case 15:
+    return 0x7;
+  case 16:
+    return 0x8;
+  case 31:
+    return 0x9;
+  case 32:
+    return 0xa;
+  case 63:
+    return 0xb;
+  case 64:
+    return 0xc;
+  case 255:
+    return 0xd;
+  case 32768:
+    return 0xe;
+  case 65535:
+    return 0xf;
+  }
+  llvm_unreachable("Unexpected value");
+}
+
+unsigned
+Sw64MCCodeEmitter::getRegisterListOpValue(const MCInst &MI, unsigned OpNo,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
+  unsigned res = 0;
+
+  // Register list operand is always first operand of instruction and it is
+  // placed before memory operand (register + imm).
+
+  for (unsigned I = OpNo, E = MI.getNumOperands() - 2; I < E; ++I) {
+    unsigned Reg = MI.getOperand(I).getReg();
+    unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg);
+    if (RegNo != 31)
+      res++;
+    else
+      res |= 0x10;
+  }
+  return res;
+}
+
+unsigned
+Sw64MCCodeEmitter::getRegisterListOpValue16(const MCInst &MI, unsigned OpNo,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  return (MI.getNumOperands() - 4);
+}
+
+#include "Sw64GenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h
new file mode 100644
index 000000000000..56539f35c2ee
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCCodeEmitter.h
@@ -0,0 +1,111 @@
+//===- Sw64MCCodeEmitter.h - Convert Sw64 Code to Machine Code --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sw64MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include <cstdint>
+
+namespace llvm {
+
+class MCContext;
+class MCExpr;
+class MCFixup;
+class MCInst;
+class MCInstrInfo;
+class MCOperand;
+class MCSubtargetInfo;
+class raw_ostream;
+
+class Sw64MCCodeEmitter : public MCCodeEmitter {
+  const MCInstrInfo &MCII;
+  MCContext &Ctx;
+  bool IsLittleEndian;
+
+public:
+  Sw64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle)
+      : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
+  Sw64MCCodeEmitter(const Sw64MCCodeEmitter &) = delete;
+  Sw64MCCodeEmitter &operator=(const Sw64MCCodeEmitter &) = delete;
+  ~Sw64MCCodeEmitter() override = default;
+
+  void EmitByte(unsigned char C, raw_ostream &OS) const;
+
+  void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
+                       raw_ostream &OS) const;
+
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  // getJumpTargetOpValue - Return binary encoding of the jump
+  // target operand. If the machine operand requires relocation,
+  // record the relocation and return zero.
+  unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
+                                SmallVectorImpl<MCFixup> &Fixups,
+                                const MCSubtargetInfo &STI) const;
+
+  // getBranchTargetOpValue - Return binary encoding of the branch
+  // target operand. If the machine operand requires relocation,
+  // record the relocation and return zero.
+  unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+                                  SmallVectorImpl<MCFixup> &Fixups,
+                                  const MCSubtargetInfo &STI) const;
+
+  // getMachineOpValue - Return binary encoding of operand. If the machin
+  // operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
+  unsigned getMSAMemEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
+  template <unsigned ShiftAmount = 0>
+  unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
+                          SmallVectorImpl<MCFixup> &Fixups,
+                          const MCSubtargetInfo &STI) const;
+  unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups,
+                              const MCSubtargetInfo &STI) const;
+
+  unsigned getUImm4AndValue(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+
+  unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
+                          const MCSubtargetInfo &STI) const;
+
+  unsigned getRegisterListOpValue(const MCInst &MI, unsigned OpNo,
+                                  SmallVectorImpl<MCFixup> &Fixups,
+                                  const MCSubtargetInfo &STI) const;
+
+  unsigned getRegisterListOpValue16(const MCInst &MI, unsigned OpNo,
+                                    SmallVectorImpl<MCFixup> &Fixups,
+                                    const MCSubtargetInfo &STI) const;
+
+private:
+  MCInst LowerCompactBranch(MCInst TempInst) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCCODEEMITTER_H
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp
new file mode 100644
index 000000000000..a1f2c430646d
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.cpp
@@ -0,0 +1,176 @@
+//===-- Sw64MCExpr.cpp - Sw64 specific MC expression classes --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64MCExpr.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sw64mcexpr"
+
+const Sw64MCExpr *Sw64MCExpr::create(Sw64MCExpr::Sw64ExprKind Kind,
+                                     const MCExpr *Expr, MCContext &Ctx) {
+  return new (Ctx) Sw64MCExpr(Kind, Expr);
+}
+
+void Sw64MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  int64_t AbsVal;
+  // FIXME: the end "(" need match
+  if (Expr->evaluateAsAbsolute(AbsVal))
+    OS << AbsVal;
+  else
+    Expr->print(OS, MAI, true);
+}
+
+bool Sw64MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
+                                           const MCAsmLayout *Layout,
+                                           const MCFixup *Fixup) const {
+  if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
+    return false;
+
+  if (Res.getRefKind() != MCSymbolRefExpr::VK_None)
+    return false;
+
+  // evaluateAsAbsolute() and evaluateAsValue() require that we evaluate the
+  // %hi/%lo/etc. here. Fixup is a null pointer when either of these is the
+  // caller.
+  if (Res.isAbsolute() && Fixup == nullptr) {
+    int64_t AbsVal = Res.getConstant();
+    switch (Kind) {
+    case MEK_None:
+      llvm_unreachable("MEK_None is invalid");
+    case MEK_DTPREL16:
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
+    case MEK_ELF_LITERAL:      /* !literal relocation.  */
+    case MEK_LITUSE_ADDR:      /* !lituse_addr relocation.  */
+    case MEK_LITUSE_BASE:      /* !lituse_base relocation.  */
+    case MEK_LITUSE_BYTOFF:    /* !lituse_bytoff relocation.  */
+    case MEK_LITUSE_JSR:       /* !lituse_jsr relocation.  */
+    case MEK_LITUSE_TLSGD:     /* !lituse_tlsgd relocation.  */
+    case MEK_LITUSE_TLSLDM:    /* !lituse_tlsldm relocation.  */
+    case MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation.  */
+    case MEK_GPDISP:           /* !gpdisp relocation.  */
+    case MEK_GPDISP_HI16:
+    case MEK_GPDISP_LO16:
+    case MEK_GPREL_HI16:      /* !gprelhigh relocation.  */
+    case MEK_GPREL_LO16:      /* !gprellow relocation.  */
+    case MEK_GPREL16:         /* !gprel relocation.  */
+    case MEK_BRSGP:           /* !samegp relocation.  */
+    case MEK_TLSGD:           /* !tlsgd relocation.  */
+    case MEK_TLSLDM:          /* !tlsldm relocation.  */
+    case MEK_GOTDTPREL16:     /* !gotdtprel relocation.  */
+    case MEK_DTPREL_HI16:     /* !dtprelhi relocation.  */
+    case MEK_DTPREL_LO16:     /* !dtprello relocation.  */
+    case MEK_GOTTPREL16:      /* !gottprel relocation.  */
+    case MEK_TPREL_HI16:      /* !tprelhi relocation.  */
+    case MEK_TPREL_LO16:      /* !tprello relocation.  */
+    case MEK_TPREL16:         /* !tprel relocation.  */
+    case MEK_ELF_LITERAL_GOT: /* !literal_got relocation.  */
+      return false;
+    }
+    Res = MCValue::get(AbsVal);
+    return true;
+  }
+  // We want to defer it for relocatable expressions since the constant is
+  // applied to the whole symbol value.
+  //
+  // The value of getKind() that is given to MCValue is only intended to aid
+  // debugging when inspecting MCValue objects. It shouldn't be relied upon
+  // for decision making.
+  Res =
+      MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
+
+  return true;
+}
+
+void Sw64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+  Streamer.visitUsedExpr(*getSubExpr());
+}
+
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+  switch (Expr->getKind()) {
+  case MCExpr::Target:
+    fixELFSymbolsInTLSFixupsImpl(cast<Sw64MCExpr>(Expr)->getSubExpr(), Asm);
+    break;
+  case MCExpr::Constant:
+    break;
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+    fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+    fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+    break;
+  }
+  case MCExpr::SymbolRef: {
+    // We're known to be under a TLS fixup, so any symbol should be
+    // modified. There should be only one.
+    const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+    cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
+    break;
+  }
+  case MCExpr::Unary:
+    fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+// For lituse relocation, we don't need to change symbol type
+// to tls.
+void Sw64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+  switch (getKind()) {
+  case MEK_None:
+    llvm_unreachable("MEK_None and MEK_Special are invalid");
+    break;
+  case MEK_GPDISP:
+  case MEK_LITUSE_BASE:      /* !lituse_base relocation.  */
+  case MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation.  */
+  case MEK_GPDISP_HI16:
+  case MEK_GPDISP_LO16:
+  case MEK_ELF_LITERAL:
+  case MEK_ELF_LITERAL_GOT:
+  case MEK_GPREL_HI16:
+  case MEK_GPREL_LO16:
+  case MEK_GPREL16:
+  case MEK_BRSGP:
+    // If we do have nested target-specific expressions, they will be in
+    // a consecutive chain.
+    if (const Sw64MCExpr *E = dyn_cast<const Sw64MCExpr>(getSubExpr()))
+      E->fixELFSymbolsInTLSFixups(Asm);
+    break;
+  case MEK_DTPREL16:
+  case MEK_LITUSE_ADDR:   /* !lituse_addr relocation.  */
+  case MEK_LITUSE_BYTOFF: /* !lituse_bytoff relocation.  */
+  case MEK_LITUSE_JSR:    /* !lituse_jsr relocation.  */
+  case MEK_LITUSE_TLSGD:  /* !lituse_tlsgd relocation.  */
+  case MEK_LITUSE_TLSLDM: /* !lituse_tlsldm relocation.  */
+  case MEK_TLSGD:         /* !tlsgd relocation.  */
+  case MEK_TLSLDM:        /* !tlsldm relocation.  */
+  case MEK_GOTDTPREL16:   /* !gotdtprel relocation.  */
+  case MEK_DTPREL_HI16:   /* !dtprelhi relocation.  */
+  case MEK_DTPREL_LO16:   /* !dtprello relocation.  */
+  case MEK_GOTTPREL16:    /* !gottprel relocation.  */
+  case MEK_TPREL_HI16:    /* !tprelhi relocation.  */
+  case MEK_TPREL_LO16:    /* !tprello relocation.  */
+  case MEK_TPREL16:       /* !tprel relocation.  */
+    fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
+    break;
+  }
+}
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h
new file mode 100644
index 000000000000..a83efa56ff1a
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCExpr.h
@@ -0,0 +1,97 @@
+//===- Sw64MCExpr.h - Sw64 specific MC expression classes -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H
+
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+namespace llvm {
+class Sw64MCExpr : public MCTargetExpr {
+public:
+  // for linker relax, add complex relocation
+  // exprkind here
+  enum Sw64ExprKind {
+    // use for relax
+    MEK_HINT = 0x100,
+    MEK_LITERAL = 0x200,
+    MEK_LITUSE = 0x400,
+
+    // do complex relocation
+    MEK_LITUSE_BASE = MEK_LITERAL | MEK_LITUSE,
+    MEK_LITUSE_JSRDIRECT = MEK_HINT | MEK_LITUSE,
+
+    // None
+    MEK_None = 0x000,
+
+    // final reloc
+    MEK_ELF_LITERAL,     /* !literal relocation.  */
+    MEK_ELF_LITERAL_GOT, /* !literal_got relocation */
+    MEK_LITUSE_ADDR,     /* !lituse_addr relocation.  */
+    MEK_LITUSE_BYTOFF, /* !lituse_bytoff relocation.  */
+    MEK_LITUSE_JSR,    /* !lituse_jsr relocation.  */
+    MEK_LITUSE_TLSGD,  /* !lituse_tlsgd relocation.  */
+    MEK_LITUSE_TLSLDM, /* !lituse_tlsldm relocation.  */
+    MEK_GPDISP, /* !gpdisp relocation.  */
+    MEK_GPDISP_HI16,
+    MEK_GPDISP_LO16,
+    MEK_GPREL_HI16,  /* !gprelhigh relocation.  */
+    MEK_GPREL_LO16,  /* !gprellow relocation.  */
+    MEK_GPREL16,     /* !gprel relocation.  */
+    MEK_BRSGP,       /* !samegp relocation.  */
+    MEK_TLSGD,       /* !tlsgd relocation.  */
+    MEK_TLSLDM,      /* !tlsldm relocation.  */
+    MEK_GOTDTPREL16, /* !gotdtprel relocation.  */
+    MEK_DTPREL_HI16, /* !dtprelhi relocation.  */
+    MEK_DTPREL_LO16, /* !dtprello relocation.  */
+    MEK_DTPREL16,    /* !dtprel relocation.  */
+    MEK_GOTTPREL16,  /* !gottprel relocation.  */
+    MEK_TPREL_HI16,  /* !tprelhi relocation.  */
+    MEK_TPREL_LO16,  /* !tprello relocation.  */
+    MEK_TPREL16,     /* !tprel relocation.  */
+  };
+
+private:
+  const Sw64ExprKind Kind;
+  const MCExpr *Expr;
+
+  explicit Sw64MCExpr(Sw64ExprKind Kind, const MCExpr *Expr)
+      : Kind(Kind), Expr(Expr) {}
+
+public:
+  static const Sw64MCExpr *create(Sw64ExprKind Kind, const MCExpr *Expr,
+                                  MCContext &Ctx);
+
+  // Get the kind of this expression.
+  Sw64ExprKind getKind() const { return Kind; }
+
+  // Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override;
+  void visitUsedExpr(MCStreamer &Streamer) const override;
+
+  MCFragment *findAssociatedFragment() const override {
+    return getSubExpr()->findAssociatedFragment();
+  }
+
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCEXPR_H
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp
new file mode 100644
index 000000000000..d07dc3ff582d
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.cpp
@@ -0,0 +1,189 @@
+//===-- Sw64MCTargetDesc.cpp - Sw64 Target Descriptions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sw64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64MCTargetDesc.h"
+#include "InstPrinter/Sw64InstPrinter.h"
+#include "Sw64AsmBackend.h"
+#include "Sw64ELFStreamer.h"
+#include "Sw64MCAsmInfo.h"
+#include "Sw64TargetStreamer.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/TargetParser/Triple.h"
+
+using namespace llvm;
+namespace llvm {
+
+class MCInstrInfo;
+
+} // end namespace llvm
+#define GET_INSTRINFO_MC_DESC
+#include "Sw64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "Sw64GenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "Sw64GenRegisterInfo.inc"
+
+/// Select the Sw64 CPU for the given triple and cpu name.
+/// FIXME: Merge with the copy in Sw64Subtarget.cpp
+StringRef SW64_MC::selectSw64CPU(const Triple &TT, StringRef CPU) {
+  return CPU = "sw_64";
+}
+
+static MCInstrInfo *createSw64MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSw64MCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createSw64MCRegisterInfo(const Triple &TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitSw64MCRegisterInfo(X, Sw64::R26);
+  return X;
+}
+
+static MCSubtargetInfo *createSw64MCSubtargetInfo(const Triple &TT,
+                                                  StringRef CPU, StringRef FS) {
+  CPU = SW64_MC::selectSw64CPU(TT, CPU);
+  return createSw64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+}
+
+static MCAsmInfo *createSw64MCAsmInfo(const MCRegisterInfo &MRI,
+                                      const Triple &TT,
+                                      const MCTargetOptions &Options) {
+  MCAsmInfo *MAI = new Sw64MCAsmInfo(TT, Options);
+
+  unsigned SP = MRI.getDwarfRegNum(Sw64::R30, true);
+  MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0);
+  MAI->addInitialFrameState(Inst);
+
+  return MAI;
+}
+
+static MCInstPrinter *createSw64MCInstPrinter(const Triple &T,
+                                              unsigned SyntaxVariant,
+                                              const MCAsmInfo &MAI,
+                                              const MCInstrInfo &MII,
+                                              const MCRegisterInfo &MRI) {
+  return new Sw64InstPrinter(MAI, MII, MRI);
+}
+
+static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
+                                    std::unique_ptr<MCAsmBackend> &&MAB,
+                                    std::unique_ptr<MCObjectWriter> &&OW,
+                                    std::unique_ptr<MCCodeEmitter> &&Emitter,
+                                    bool RelaxAll) {
+  MCStreamer *S;
+  S = createSw64ELFStreamer(Context, std::move(MAB), std::move(OW),
+                            std::move(Emitter), RelaxAll);
+  return S;
+}
+
+static MCTargetStreamer *createSw64AsmTargetStreamer(MCStreamer &S,
+                                                     formatted_raw_ostream &OS,
+                                                     MCInstPrinter *InstPrint,
+                                                     bool isVerboseAsm) {
+  return new Sw64TargetAsmStreamer(S, OS);
+}
+
+static MCTargetStreamer *createSw64NullTargetStreamer(MCStreamer &S) {
+  return new Sw64TargetStreamer(S);
+}
+
+static MCTargetStreamer *
+createSw64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+  return new Sw64TargetELFStreamer(S, STI);
+}
+
+namespace {
+
+class Sw64MCInstrAnalysis : public MCInstrAnalysis {
+public:
+  Sw64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
+
+  bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+                      uint64_t &Target) const override {
+    unsigned NumOps = Inst.getNumOperands();
+    if (NumOps == 0)
+      return false;
+    if (Inst.getOpcode() == Sw64::JSR || Inst.getOpcode() == Sw64::JSR) {
+      Target = Inst.getOperand(NumOps - 1).getImm() != 0
+                   ? Inst.getOperand(NumOps - 2).getImm()
+                   : Addr + 4;
+      return true;
+    }
+    switch (Info->get(Inst.getOpcode()).operands()[NumOps - 1].OperandType) {
+    default:
+      return false;
+    case MCOI::OPERAND_PCREL:
+      Target = Addr + Inst.getOperand(NumOps - 1).getImm() * 4 + 4;
+      return true;
+    }
+  }
+};
+} // namespace
+
+static MCInstrAnalysis *createSw64MCInstrAnalysis(const MCInstrInfo *Info) {
+  return new Sw64MCInstrAnalysis(Info);
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64TargetMC() {
+  Target *T = &getTheSw64Target();
+
+  // Register the MC asm info.
+  RegisterMCAsmInfoFn X(*T, createSw64MCAsmInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(*T, createSw64MCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(*T, createSw64MCRegisterInfo);
+
+  // Register the elf streamer.
+  TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
+
+  // Register the asm target streamer.
+  TargetRegistry::RegisterAsmTargetStreamer(*T, createSw64AsmTargetStreamer);
+
+  TargetRegistry::RegisterNullTargetStreamer(*T, createSw64NullTargetStreamer);
+
+  // Register the MC subtarget info.
+  TargetRegistry::RegisterMCSubtargetInfo(*T, createSw64MCSubtargetInfo);
+
+  // Register the MC instruction analyzer.
+  TargetRegistry::RegisterMCInstrAnalysis(*T, createSw64MCInstrAnalysis);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(*T, createSw64MCInstPrinter);
+
+  TargetRegistry::RegisterObjectTargetStreamer(*T,
+                                               createSw64ObjectTargetStreamer);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterMCAsmBackend(*T, createSw64AsmBackend);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterMCCodeEmitter(*T, createSw64MCCodeEmitterEL);
+}
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h
new file mode 100644
index 000000000000..4ab9d2fff507
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64MCTargetDesc.h
@@ -0,0 +1,66 @@
+//===-- Sw64MCTargetDesc.h - Sw64 Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sw64 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCTARGETDESC_H
+#define LLVM_LIB_TARGET_SW64_MCTARGETDESC_SW64MCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+#include <memory>
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectTargetWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class MCTargetOptions;
+class StringRef;
+class Target;
+class Triple;
+class raw_ostream;
+class raw_pwrite_stream;
+
+Target &getTheSw64Target();
+
+MCCodeEmitter *createSw64MCCodeEmitterEL(const MCInstrInfo &MCII,
+                                         MCContext &Ctx);
+
+MCAsmBackend *createSw64AsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                   const MCRegisterInfo &MRI,
+                                   const MCTargetOptions &Options);
+
+std::unique_ptr<MCObjectTargetWriter>
+createSw64ELFObjectWriter(const Triple &TT, bool IsS32);
+
+namespace SW64_MC {
+StringRef selectSw64CPU(const Triple &TT, StringRef CPU);
+}
+
+} // namespace llvm
+
+// Defines symbolic names for Sw64 registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "Sw64GenRegisterInfo.inc"
+
+// Defines symbolic names for the Sw64 instructions.
+#define GET_INSTRINFO_ENUM
+#include "Sw64GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "Sw64GenSubtargetInfo.inc"
+
+#endif
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp
new file mode 100644
index 000000000000..07bddfbacfb7
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64OptionRecord.cpp
@@ -0,0 +1,32 @@
+//===- Sw64OptionRecord.cpp - Abstraction for storing information ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64OptionRecord.h"
+#include "Sw64ABIInfo.h"
+#include "Sw64ELFStreamer.h"
+#include "Sw64TargetStreamer.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include <cassert>
+
+using namespace llvm;
+
+void Sw64RegInfoRecord::EmitSw64OptionRecord() {
+
+  // We need to distinguish between S64 and the rest because at the moment
+  // we don't emit .Sw64.options for other ELFs other than S64.
+  // Since .reginfo has the same information as .Sw64.options (ODK_REGINFO),
+  // we can use the same abstraction (Sw64RegInfoRecord class) to handle both.
+}
+
+void Sw64RegInfoRecord::SetPhysRegUsed(unsigned Reg,
+                                       const MCRegisterInfo *MCRegInfo) {}
diff --git a/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp
new file mode 100644
index 000000000000..19cdbc7d0c5a
--- /dev/null
+++ b/llvm/lib/Target/Sw64/MCTargetDesc/Sw64TargetStreamer.cpp
@@ -0,0 +1,388 @@
+//===-- Sw64TargetStreamer.cpp - Sw64 Target Streamer Methods -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sw64 specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64TargetStreamer.h"
+#include "InstPrinter/Sw64InstPrinter.h"
+#include "MCTargetDesc/Sw64ABIInfo.h"
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "MCTargetDesc/Sw64MCExpr.h"
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "Sw64ELFStreamer.h"
+#include "Sw64MCExpr.h"
+#include "Sw64MCTargetDesc.h"
+#include "Sw64TargetObjectFile.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+namespace llvm {
+struct Sw64InstrTable {
+  MCInstrDesc Insts[4445];
+  MCOperandInfo OperandInfo[3026];
+  MCPhysReg ImplicitOps[130];
+};
+extern const Sw64InstrTable Sw64Descs;
+} // end namespace llvm
+
+namespace {
+static cl::opt<bool> RoundSectionSizes(
+    "sw_64-round-section-sizes", cl::init(false),
+    cl::desc("Round section sizes up to the section alignment"), cl::Hidden);
+} // end anonymous namespace
+
+Sw64TargetStreamer::Sw64TargetStreamer(MCStreamer &S)
+    : MCTargetStreamer(S), ModuleDirectiveAllowed(true) {
+  GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+}
+void Sw64TargetStreamer::emitDirectiveSetReorder() { forbidModuleDirective(); }
+void Sw64TargetStreamer::emitDirectiveSetNoReorder() {}
+void Sw64TargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); }
+void Sw64TargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); }
+void Sw64TargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); }
+void Sw64TargetStreamer::emitDirectiveSetNoAt() { forbidModuleDirective(); }
+void Sw64TargetStreamer::emitDirectiveEnd(StringRef Name) {}
+void Sw64TargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {}
+void Sw64TargetStreamer::emitDirectiveNaN2008() {}
+void Sw64TargetStreamer::emitDirectiveNaNLegacy() {}
+void Sw64TargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); }
+void Sw64TargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
+                                   unsigned ReturnReg) {}
+
+void Sw64TargetStreamer::emitDirectiveSetCore3b() {}
+void Sw64TargetStreamer::emitDirectiveSetCore4() {}
+
+void Sw64TargetAsmStreamer::emitDirectiveSetCore3b() {
+  OS << "\t.arch= \t core3b\n";
+  forbidModuleDirective();
+}
+void Sw64TargetAsmStreamer::emitDirectiveSetCore4() {
+  OS << "\t.arch= \t core4\n";
+  forbidModuleDirective();
+}
+
+void Sw64TargetStreamer::emitDirectiveSetArch(StringRef Arch) {
+  forbidModuleDirective();
+}
+
+void Sw64TargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) {}
+
+Sw64TargetAsmStreamer::Sw64TargetAsmStreamer(MCStreamer &S,
+                                             formatted_raw_ostream &OS)
+    : Sw64TargetStreamer(S), OS(OS) {}
+
+void Sw64TargetAsmStreamer::emitDirectiveSetReorder() {
+  Sw64TargetStreamer::emitDirectiveSetReorder();
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveSetNoReorder() {
+  forbidModuleDirective();
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveSetMacro() {
+  Sw64TargetStreamer::emitDirectiveSetMacro();
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveSetNoMacro() {
+  Sw64TargetStreamer::emitDirectiveSetNoMacro();
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveSetAt() {
+  Sw64TargetStreamer::emitDirectiveSetAt();
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveSetNoAt() {
+  Sw64TargetStreamer::emitDirectiveSetNoAt();
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveEnd(StringRef Name) {
+  OS << "\t.end\t" << Name << '\n';
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
+  OS << "\t.ent\t" << Symbol.getName() << '\n';
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveNaN2008() { OS << "\t.nan\t2008\n"; }
+
+void Sw64TargetAsmStreamer::emitDirectiveNaNLegacy() {
+  OS << "\t.nan\tlegacy\n";
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveInsn() {
+  Sw64TargetStreamer::emitDirectiveInsn();
+  OS << "\t.insn\n";
+}
+
+void Sw64TargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
+                                      unsigned ReturnReg) {
+  OS << "\t.frame\t$"
+     << StringRef(Sw64InstPrinter::getRegisterName(StackReg)).lower() << ","
+     << StackSize << ",$"
+     << StringRef(Sw64InstPrinter::getRegisterName(ReturnReg)).lower() << '\n';
+}
+
+void Sw64TargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) {
+  OS << "\t.set arch=" << Arch << "\n";
+  Sw64TargetStreamer::emitDirectiveSetArch(Arch);
+}
+
+// This part is for ELF object output.
+Sw64TargetELFStreamer::Sw64TargetELFStreamer(MCStreamer &S,
+                                             const MCSubtargetInfo &STI)
+    : Sw64TargetStreamer(S), STI(STI) {
+  MCAssembler &MCA = getStreamer().getAssembler();
+
+  // It's possible that MCObjectFileInfo isn't fully initialized at this point
+  // due to an initialization order problem where LLVMTargetMachine creates the
+  // target streamer before TargetLoweringObjectFile calls
+  // InitializeMCObjectFileInfo. There doesn't seem to be a single place that
+  // covers all cases so this statement covers most cases and direct object
+  // emission must call setPic() once MCObjectFileInfo has been initialized. The
+  // cases we don't handle here are covered by Sw64AsmPrinter.
+  Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent();
+
+  // Set the header flags that we can in the constructor.
+  // FIXME: This is a fairly terrible hack. We set the rest
+  // of these in the destructor. The problem here is two-fold:
+  //
+  // a: Some of the eflags can be set/reset by directives.
+  // b: There aren't any usage paths that initialize the ABI
+  //    pointer until after we initialize either an assembler
+  //    or the target machine.
+  // We can fix this by making the target streamer construct
+  // the ABI, but this is fraught with wide ranging dependency
+  // issues as well.
+  unsigned EFlags = MCA.getELFHeaderEFlags();
+
+  // FIXME: Fix a dependency issue by instantiating the ABI object to some
+  // default based off the triple. The triple doesn't describe the target
+  // fully, but any external user of the API that uses the MCTargetStreamer
+  // would otherwise crash on assertion failure.
+
+  ABI = Sw64ABIInfo(Sw64ABIInfo::S64());
+
+  MCA.setELFHeaderEFlags(EFlags);
+}
+
+void Sw64TargetELFStreamer::emitLabel(MCSymbol *S) {
+  auto *Symbol = cast<MCSymbolELF>(S);
+  getStreamer().getAssembler().registerSymbol(*Symbol);
+  uint8_t Type = Symbol->getType();
+  if (Type != ELF::STT_FUNC)
+    return;
+}
+
+void Sw64TargetELFStreamer::finish() {
+  MCAssembler &MCA = getStreamer().getAssembler();
+  const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo();
+
+  // .bss, .text and .data are always at least 16-byte aligned.
+  MCSection &TextSection = *OFI.getTextSection();
+  MCA.registerSection(TextSection);
+  MCSection &DataSection = *OFI.getDataSection();
+  MCA.registerSection(DataSection);
+  MCSection &BSSSection = *OFI.getBSSSection();
+  MCA.registerSection(BSSSection);
+
+  TextSection.ensureMinAlignment(Align(16));
+  DataSection.ensureMinAlignment(Align(16));
+  BSSSection.ensureMinAlignment(Align(16));
+
+  if (RoundSectionSizes) {
+    // Make sections sizes a multiple of the alignment. This is useful for
+    // verifying the output of IAS against the output of other assemblers but
+    // it's not necessary to produce a correct object and increases section
+    // size.
+    MCStreamer &OS = getStreamer();
+    for (MCSection &S : MCA) {
+      MCSectionELF &Section = static_cast<MCSectionELF &>(S);
+
+      Align Alignment = Section.getAlign();
+      OS.switchSection(&Section);
+      if (Section.useCodeAlign())
+        OS.emitCodeAlignment(Alignment, &STI, Alignment.value());
+      else
+        OS.emitValueToAlignment(Alignment, 0, 1, Alignment.value());
+    }
+  }
+
+  // Update e_header flags. See the FIXME and comment above in
+  // the constructor for a full rundown on this.
+  unsigned EFlags = MCA.getELFHeaderEFlags();
+
+  if (Pic)
+    EFlags |= ELF::EF_SW64_PIC | ELF::EF_SW64_CPIC;
+
+  MCA.setELFHeaderEFlags(EFlags);
+
+  // Emit all the option records.
+  // At the moment we are only emitting .Sw64.options (ODK_REGINFO) and
+  // .reginfo.
+  Sw64ELFStreamer &MEF = static_cast<Sw64ELFStreamer &>(Streamer);
+  MEF.EmitSw64OptionRecords();
+}
+
+MCELFStreamer &Sw64TargetELFStreamer::getStreamer() {
+  return static_cast<MCELFStreamer &>(Streamer);
+}
+
+void Sw64TargetELFStreamer::emitDirectiveSetNoReorder() {
+  MCAssembler &MCA = getStreamer().getAssembler();
+  unsigned Flags = MCA.getELFHeaderEFlags();
+  Flags |= ELF::EF_SW64_NOREORDER;
+  MCA.setELFHeaderEFlags(Flags);
+  forbidModuleDirective();
+}
+
+void Sw64TargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
+  GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+
+  // .ent also acts like an implicit '.type symbol, STT_FUNC'
+  static_cast<const MCSymbolELF &>(Symbol).setType(ELF::STT_FUNC);
+}
+
+void Sw64TargetELFStreamer::emitDirectiveNaN2008() {
+  MCAssembler &MCA = getStreamer().getAssembler();
+  unsigned Flags = MCA.getELFHeaderEFlags();
+  Flags |= ELF::EF_SW64_NAN2008;
+  MCA.setELFHeaderEFlags(Flags);
+}
+
+void Sw64TargetELFStreamer::emitDirectiveNaNLegacy() {
+  MCAssembler &MCA = getStreamer().getAssembler();
+  unsigned Flags = MCA.getELFHeaderEFlags();
+  Flags &= ~ELF::EF_SW64_NAN2008;
+  MCA.setELFHeaderEFlags(Flags);
+}
+
+void Sw64TargetELFStreamer::emitDirectiveInsn() {
+  Sw64TargetStreamer::emitDirectiveInsn();
+  Sw64ELFStreamer &MEF = static_cast<Sw64ELFStreamer &>(Streamer);
+  MEF.createPendingLabelRelocs();
+}
+
+void Sw64TargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
+                                      unsigned ReturnReg_) {
+  MCContext &Context = getStreamer().getAssembler().getContext();
+  const MCRegisterInfo *RegInfo = Context.getRegisterInfo();
+
+  FrameInfoSet = true;
+  FrameReg = RegInfo->getEncodingValue(StackReg);
+  FrameOffset = StackSize;
+  ReturnReg = RegInfo->getEncodingValue(ReturnReg_);
+}
+
+static const char *getRelType(const MCExpr *Expr, const MCSubtargetInfo &STI) {
+  const Sw64MCExpr *Sw64Expr = cast<Sw64MCExpr>(Expr);
+  static int curgpdist = 0;
+  switch (Sw64Expr->getKind()) {
+  default:
+    return "";
+  case Sw64MCExpr::MEK_GPDISP_HI16:
+  case Sw64MCExpr::MEK_GPDISP_LO16:
+  case Sw64MCExpr::MEK_GPDISP: {
+    std::string a =
+        std::string("!gpdisp!") + std::to_string((curgpdist) / 2 + 1);
+    curgpdist++;
+    return strdup(a.c_str());
+  }
+  case Sw64MCExpr::MEK_ELF_LITERAL:
+    return "!literal";
+  case Sw64MCExpr::MEK_LITUSE_ADDR: /* !lituse_addr relocation.  */
+    return "!lituse_addr";
+  case Sw64MCExpr::MEK_LITUSE_BASE: /* !lituse_base relocation.  */
+    return "!literal";
+  case Sw64MCExpr::MEK_LITUSE_BYTOFF: /* !lituse_bytoff relocation.  */
+    return "!lituse_bytoff";
+  case Sw64MCExpr::MEK_LITUSE_JSR: /* !lituse_jsr relocation.  */
+    return "!lituse_jsr";
+  case Sw64MCExpr::MEK_LITUSE_TLSGD: /* !lituse_tlsgd relocation.  */
+    return "!lituse_tlsgd";
+  case Sw64MCExpr::MEK_LITUSE_TLSLDM: /* !lituse_tlsldm relocation.  */
+    return "!lituse_tlsldm";
+    //  case Sw64MCExpr::MEK_LITUSE_JSRDIRECT: /* !lituse_jsrdirect relocation.
+    //  */
+    //    return "!lituse_jsrdirect";
+  case Sw64MCExpr::MEK_GPREL_HI16: /* !gprelhigh relocation.  */
+    return "!gprelhigh";
+  case Sw64MCExpr::MEK_GPREL_LO16: /* !gprellow relocation.  */
+    return "!gprellow";
+  case Sw64MCExpr::MEK_GPREL16: /* !gprel relocation.  */
+    return "!gprel";
+  case Sw64MCExpr::MEK_BRSGP: /* !samegp relocation.  */
+    return "!samegp";
+  case Sw64MCExpr::MEK_TLSGD: /* !tlsgd relocation.  */
+    return "!tlsgd";
+  case Sw64MCExpr::MEK_TLSLDM: /* !tlsldm relocation.  */
+    return "!tlsldm";
+  case Sw64MCExpr::MEK_GOTDTPREL16: /* !gotdtprel relocation.  */
+    return "!gotdtprel";
+  case Sw64MCExpr::MEK_DTPREL_HI16: /* !dtprelhi relocation.  */
+    return "!dtprelhi";
+  case Sw64MCExpr::MEK_DTPREL_LO16: /* !dtprello relocation.  */
+    return "!dtprello";
+  case Sw64MCExpr::MEK_DTPREL16: /* !dtprel relocation.  */
+    return "!dtprel";
+  case Sw64MCExpr::MEK_GOTTPREL16: /* !gottprel relocation.  */
+    return "!gottprel";
+  case Sw64MCExpr::MEK_TPREL_HI16: /* !tprelhi relocation.  */
+    return "!tprelhi";
+  case Sw64MCExpr::MEK_TPREL_LO16: /* !tprello relocation.  */
+    return "!tprello";
+  case Sw64MCExpr::MEK_TPREL16: /* !tprel relocation.  */
+    return "!tprel";
+  case Sw64MCExpr::MEK_ELF_LITERAL_GOT: /* !literal_got relocation.  */
+    return "!literal_got";
+  }
+}
+
+static void printRelocInst(MCInstPrinter &InstPrinter, const MCInst &Inst,
+                           raw_ostream &OS, const MCSubtargetInfo &STI,
+                           uint64_t Address) {
+  MCOperand Op = Inst.getOperand(1);
+  if (Op.isExpr()) {
+    const MCExpr *Expr = Op.getExpr();
+    if (Expr->getKind() == MCExpr::Target) {
+      const char *RelName = getRelType(Expr, STI);
+      InstPrinter.printInst(&Inst, Address, RelName, STI, OS);
+      return;
+    }
+  }
+  InstPrinter.printInst(&Inst, Address, "", STI, OS);
+}
+
+void Sw64TargetStreamer::prettyPrintAsm(MCInstPrinter &InstPrinter,
+                                        uint64_t Address, const MCInst &Inst,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &OS) {
+  const MCInstrDesc &MCID =
+      Sw64Descs.Insts[Sw64::INSTRUCTION_LIST_END - 1 - Inst.getOpcode()];
+  // while moving mayload flags for ldi/ldih
+  // adding opcode determine here
+  if (MCID.mayLoad() || MCID.mayStore() || Inst.getOpcode() == Sw64::LDAH ||
+      Inst.getOpcode() == Sw64::LDA) {
+    printRelocInst(InstPrinter, Inst, OS, STI, Address);
+    return;
+  }
+  InstPrinter.printInst(&Inst, Address, "", STI, OS);
+}
diff --git a/llvm/lib/Target/Sw64/README.txt b/llvm/lib/Target/Sw64/README.txt
new file mode 100644
index 000000000000..b69205b49b6c
--- /dev/null
+++ b/llvm/lib/Target/Sw64/README.txt
@@ -0,0 +1,7 @@
+To-do
+-----
+
+* Instruction encodings
+* Tailcalls
+* Investigate loop alignment
+* Add builtins
diff --git a/llvm/lib/Target/Sw64/Sw64.h b/llvm/lib/Target/Sw64/Sw64.h
new file mode 100644
index 000000000000..1d2d3f05bb4f
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64.h
@@ -0,0 +1,56 @@
+//===-- Sw64.h - Top-level interface for Sw64 representation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sw64 back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64_H
+#define LLVM_LIB_TARGET_SW64_SW64_H
+
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+namespace Sw64 {
+// These describe LDAx
+static const int IMM_LOW = -32768;
+static const int IMM_HIGH = 32767;
+static const int IMM_MULT = 65536;
+} // namespace Sw64
+
+class FunctionPass;
+class ModulePass;
+class TargetMachine;
+class Sw64TargetMachine;
+class formatted_raw_ostream;
+
+FunctionPass *createSw64ISelDag(Sw64TargetMachine &TM,
+                                CodeGenOpt::Level OptLevel);
+
+FunctionPass *createSw64LLRPPass(Sw64TargetMachine &tm);
+FunctionPass *createSw64BranchSelectionPass();
+FunctionPass *createSw64BranchSelection();
+FunctionPass *createSw64PreLegalizeCombiner(); // for fmad
+FunctionPass *createSw64ExpandPseudoPass();
+FunctionPass *createSw64ExpandPseudo2Pass();
+FunctionPass *createSw64CombineLSPass();
+FunctionPass *createSw64IEEEConstraintPass();
+
+bool LowerSw64MachineOperandToMCOperand(const MachineOperand &MO,
+                                        MCOperand &MCOp, const AsmPrinter &AP);
+
+void initializeSw64BranchSelectionPass(PassRegistry &);
+void initializeSw64PreLegalizerCombinerPass(PassRegistry &); // for fmad
+void initializeSw64DAGToDAGISelPass(PassRegistry &);
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64.td b/llvm/lib/Target/Sw64/Sw64.td
new file mode 100644
index 000000000000..fba48fc8115f
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64.td
@@ -0,0 +1,154 @@
+//===- Sw64.td - Describe the Sw64 Target Machine --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+
+include "llvm/Target/Target.td"
+
+//Sw64 is little endian
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
+                                  "Enable CIX extensions">;
+
+// argument, type, value, help text
+
+def Featurecore3b : SubtargetFeature<"core3b", "Sw64ArchVersion", "core3b",
+                                  "Enable core4b Feature">;
+
+def Featurecore4 : SubtargetFeature<"core4", "Sw64ArchVersion", "core4",
+                                  "Enable core4 Feature">;
+
+def FeatureRelax : SubtargetFeature<"relax", "relax", "true",
+                                  "Enable relax ld attribute">;
+
+def FeatureEv : SubtargetFeature<"swEv", "Ev", "true",
+                                  "Enable Sw6a Feature test">;
+foreach i = {1-14, 22-25} in
+    def FeatureReserve#i : SubtargetFeature<"reserve-r"#i, "ReserveRegister["#i#"]", "true",
+                                             "Reserve "#i#", making it unavailable "
+                                             "as a GPR">;
+
+def FeatureOptMul : SubtargetFeature<"swOptMul", "Sw64OptMul", "true",
+                                     "Enable Sw6b optimize mul">;
+
+def Featureintarith : SubtargetFeature<"swIntArith", "Sw64EnableIntAri", "true",
+                                       "Enable core4 integer arithmetic instructions">;
+def Featureintshift : SubtargetFeature<"swIntShift", "Sw64EnableIntShift", "true",
+                                       "Enable core4 integer shift instructions">;
+def Featurebyteinst : SubtargetFeature<"swByteInst", "Sw64EnableByteInst", "true",
+                                       "Enable core4 byte manipulation instructions">;
+def Featurefloatarith : SubtargetFeature<"swFloatArith", "Sw64EnableFloatAri", "true",
+                                       "Enable core4 float arithmetic instructions">;
+def Featurefloatround : SubtargetFeature<"swFloatRound", "Sw64EnableFloatRound", "true",
+                                       "Enable core4 float round instructions">;
+def Featurepostinc : SubtargetFeature<"swPostInc", "Sw64EnablePostInc", "true",
+                                       "Enable core4 post-inc load and store instructions">;
+def Featurecrcinst : SubtargetFeature<"swCrcInst", "Sw64EnableCrcInst", "true",
+                                       "Enable core4 crc32 instructions">;
+
+def FeatureSIMD : SubtargetFeature<"simd", "HasSIMD", "true",
+				  "Sw64 SIMD Instruction">;
+
+//***********************
+// Subtarget Support test
+//***********************
+def HasMieee : Predicate<"MF->getSubtarget<Sw64Subtarget>().hasMieee()">,
+               AssemblerPredicate<(all_of FeatureCIX)>;
+
+def HasCore3b : Predicate<"Subtarget->hasCore3b()">,
+              AssemblerPredicate<(all_of Featurecore3b)>;
+
+def HasCore4 : Predicate<"Subtarget->hasCore4()">,
+              AssemblerPredicate<(all_of Featurecore4)>;
+
+def enRelax : Predicate<"Subtarget->enRelax()">,
+              AssemblerPredicate<(all_of FeatureRelax)>;
+
+def HasEv : Predicate<"Subtarget->hasEv()">,
+            AssemblerPredicate<(all_of FeatureEv)>;
+
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "Sw64RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "Sw64CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Base Schedule Description
+//===----------------------------------------------------------------------===//
+
+include "Sw64Schedule.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "Sw64InstrInfo.td"
+
+
+//===----------------------------------------------------------------------===//
+// MicroArchitechural Schedule Descriptions
+//===----------------------------------------------------------------------===//
+
+include "Sw64SchedCore3.td"
+include "Sw64SchedCore4.td"
+include "Sw64SchedCore3SIMD.td"
+
+def Sw64InstrInfo : InstrInfo {
+}
+
+//===----------------------------------------------------------------------===//
+// Sw64 Processor Definitions
+//===----------------------------------------------------------------------===//
+
+//***********************
+//  Sw processor test
+//***********************
+
+class Proc<string Name, list<SubtargetFeature> Features>
+    : ProcessorModel<Name, GenericSw64Model, Features>;
+
+def : Proc<"sw_64", []>;
+def : Proc<"sw6a", [Featurecore3b]>;
+def : Proc<"sw6b", [Featurecore3b]>;
+def : Proc<"sw4d", [Featurecore3b]>;
+def : Proc<"sw8a", [Featurecore3b, Featurecore4]>;
+
+//===----------------------------------------------------------------------===//
+// The Sw64 Target
+//===----------------------------------------------------------------------===//
+def Sw64AsmWriter : AsmWriter {
+  string AsmWriterClassName = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+def Sw64AsmParser : AsmParser {
+  let ShouldEmitMatchRegisterName = 0;
+}
+
+def Sw64 : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = Sw64InstrInfo;
+  let AssemblyWriters = [Sw64AsmWriter];
+  let AssemblyParsers = [Sw64AsmParser];
+}
diff --git a/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp b/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp
new file mode 100644
index 000000000000..36168e986db4
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64AsmPrinter.cpp
@@ -0,0 +1,308 @@
+//===-- Sw64AsmPrinter.cpp - Sw64 LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format Sw64 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstPrinter/Sw64InstPrinter.h"
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64InstrInfo.h"
+#include "Sw64MCInstLower.h"
+#include "Sw64Subtarget.h"
+#include "Sw64TargetMachine.h"
+#include "Sw64TargetStreamer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+class Sw64AsmPrinter : public AsmPrinter {
+  Sw64MCInstLower MCInstLowering;
+  Sw64TargetStreamer &getTargetStreamer();
+  /// InConstantPool - Maintain state when emitting a sequence of constant
+  /// pool entries so we can properly mark them as data regions.
+  bool InConstantPool = false;
+
+public:
+  explicit Sw64AsmPrinter(TargetMachine &TM,
+                          std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {}
+
+  StringRef getPassName() const override { return "Sw64 Assembly Printer"; }
+
+  void printOp(const MachineOperand &MO, raw_ostream &O);
+  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       const char *ExtraCode, raw_ostream &O) override;
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             const char *ExtraCode, raw_ostream &O) override;
+
+  void emitFunctionEntryLabel() override;
+  void emitInstruction(const MachineInstr *MI) override;
+  void emitFunctionBodyStart() override;
+  void emitFunctionBodyEnd() override;
+  void emitStartOfAsmFile(Module &M) override;
+  bool isBlockOnlyReachableByFallthrough(
+      const MachineBasicBlock *MBB) const override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                   const MachineInstr *MI);
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
+    return LowerSw64MachineOperandToMCOperand(MO, MCOp, *this);
+  }
+};
+} // end of anonymous namespace
+
+bool Sw64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+
+  // Initialize TargetLoweringObjectFile.
+  AsmPrinter::runOnMachineFunction(MF);
+  return true;
+}
+
+bool Sw64AsmPrinter::isBlockOnlyReachableByFallthrough(
+    const MachineBasicBlock *MBB) const {
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+  // If the predecessor is a switch statement, assume a jump table
+  // implementation, so it is not a fall through.
+  if (const BasicBlock *bb = Pred->getBasicBlock())
+    if (isa<SwitchInst>(bb->getTerminator()))
+      return false;
+
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isEHPad() || MBB->pred_empty())
+    return false;
+
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+
+  if (PI2 != MBB->pred_end())
+    return false;
+
+  // The predecessor has to be immediately before this block.
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+
+  // Otherwise, check the last instruction.
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->isTerminator())
+    ;
+  return false;
+  //  return !I->isBarrier();
+  // ;
+}
+
+Sw64TargetStreamer &Sw64AsmPrinter::getTargetStreamer() {
+  return static_cast<Sw64TargetStreamer &>(*OutStreamer->getTargetStreamer());
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void Sw64AsmPrinter::emitFunctionBodyStart() {
+  MCInstLowering.Initialize(&MF->getContext());
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void Sw64AsmPrinter::emitFunctionBodyEnd() {
+  // Emit function end directives
+  Sw64TargetStreamer &TS = getTargetStreamer();
+
+  // There are instruction for this macros, but they must
+  // always be at the function end, and we can't emit and
+  // break with BB logic.
+  TS.emitDirectiveSetAt();
+  TS.emitDirectiveSetMacro();
+  TS.emitDirectiveSetReorder();
+
+  TS.emitDirectiveEnd(CurrentFnSym->getName());
+  // Make sure to terminate any constant pools that were at the end
+  // of the function.
+  if (!InConstantPool)
+    return;
+  InConstantPool = false;
+  OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
+}
+
+void Sw64AsmPrinter::emitFunctionEntryLabel() {
+  Sw64TargetStreamer &TS = getTargetStreamer();
+
+  TS.emitDirectiveEnt(*CurrentFnSym);
+  OutStreamer->emitLabel(CurrentFnSym);
+}
+
+void Sw64AsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                  raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  if (MO.isReg()) {
+    assert(Register::isPhysicalRegister(MO.getReg()) && "Not physreg??");
+    O << Sw64InstPrinter::getRegisterName(MO.getReg());
+  } else if (MO.isImm()) {
+    O << MO.getImm();
+  } else {
+    printOp(MO, O);
+  }
+}
+void Sw64AsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << Sw64InstPrinter::getRegisterName(MO.getReg());
+    return;
+
+  case MachineOperand::MO_Immediate:
+    assert(0 && "printOp() does not handle immediate values");
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    MO.getMBB()->getSymbol()->print(O, MAI);
+    return;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    return;
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    return;
+
+  case MachineOperand::MO_GlobalAddress:
+    getSymbol(MO.getGlobal())->print(O, MAI);
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+    return;
+
+  default:
+    O << "<unknown operand type: "; //  << MO.getType() << ">";
+    return;
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+bool Sw64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                     const char *ExtraCode, raw_ostream &O) {
+  // Print the operand if there is no operand modifier.
+  if (!ExtraCode || !ExtraCode[0]) {
+    printOperand(MI, OpNo, O);
+    return false;
+  }
+  if (ExtraCode && ExtraCode[0])
+    if (ExtraCode[1] != 0)
+      return true;
+
+  switch (ExtraCode[0]) {
+  default:
+    return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
+  case 'r':
+    printOperand(MI, OpNo, O);
+    return false;
+  }
+  // Otherwise fallback on the default implementation.
+  return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
+}
+
+void Sw64AsmPrinter::emitStartOfAsmFile(Module &M) {
+  if (OutStreamer->hasRawTextSupport()) {
+    OutStreamer->emitRawText(StringRef("\t.set noreorder"));
+    OutStreamer->emitRawText(StringRef("\t.set volatile"));
+    OutStreamer->emitRawText(StringRef("\t.set noat"));
+    OutStreamer->emitRawText(StringRef("\t.set nomacro"));
+  }
+}
+
+bool Sw64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                           unsigned OpNum,
+                                           const char *ExtraCode,
+                                           raw_ostream &O) {
+  assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
+
+  const MachineOperand &BaseMO = MI->getOperand(OpNum);
+
+  assert(BaseMO.isReg() &&
+         "Unexpected base pointer for inline asm memory operand.");
+
+  if (ExtraCode && ExtraCode[0]) {
+    return true; // Unknown modifier.
+  }
+
+  O << "0(" << Sw64InstPrinter::getRegisterName(BaseMO.getReg()) << ")";
+
+  return false;
+}
+
+#include "Sw64GenMCPseudoLowering.inc"
+
+void Sw64AsmPrinter::emitInstruction(const MachineInstr *MI) {
+  if (MI->isDebugValue())
+    return;
+  SmallString<128> Str;
+  raw_svector_ostream O(Str);
+
+  if (emitPseudoExpansionLowering(*OutStreamer, MI))
+    return;
+
+  if (MI->getOpcode() == Sw64::STQ_C || MI->getOpcode() == Sw64::STL_C)
+    OutStreamer->emitCodeAlignment(Align(8), &getSubtargetInfo());
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+
+  EmitToStreamer(*OutStreamer, TmpInst);
+}
+
+// Force static initialization.
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64AsmPrinter() {
+  RegisterAsmPrinter<Sw64AsmPrinter> X(getTheSw64Target());
+}
diff --git a/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp b/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp
new file mode 100644
index 000000000000..cd1c3c4c3ce8
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64BranchSelector.cpp
@@ -0,0 +1,81 @@
+//===-- Sw64BranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace Pseudo COND_BRANCH_* with their appropriate real branch
+// Simplified version of the PPC Branch Selector
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64.h"
+#include "Sw64InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sw_64-branch-expansion"
+
+namespace {
+class Sw64BranchSelection : public MachineFunctionPass {
+public:
+  static char ID;
+
+  Sw64BranchSelection() : MachineFunctionPass(ID) {
+    initializeSw64BranchSelectionPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+    return "Sw64 Branch Expansion Pass";
+  }
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+};
+} // end of anonymous namespace
+
+char Sw64BranchSelection::ID = 0;
+
+INITIALIZE_PASS(Sw64BranchSelection, DEBUG_TYPE,
+                "Expand out of range branch instructions and fix forbidden"
+                " slot hazards",
+                false, false)
+
+/// Returns a pass that clears pipeline hazards.
+FunctionPass *llvm::createSw64BranchSelection() {
+  return new Sw64BranchSelection();
+}
+
+bool Sw64BranchSelection::runOnMachineFunction(MachineFunction &F) {
+
+  return true;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64CallingConv.td b/llvm/lib/Target/Sw64/Sw64CallingConv.td
new file mode 100644
index 000000000000..7b0275c8c9fb
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64CallingConv.td
@@ -0,0 +1,72 @@
+//===- Sw64CallingConv.td - Calling Conventions for Sw64 -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Sw64 architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+    : CCIf<!strconcat("static_cast<const Sw64Subtarget&>"
+                       "(State.getMachineFunction().getSubtarget()).",
+                     F), A>;
+
+//===----------------------------------------------------------------------===//
+// Sw64 Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_Sw64 : CallingConv<[
+  // i64 is returned in register R0
+  // R1 is an llvm extension, I don't know what gcc does
+  CCIfType<[i64], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+  // f32 / f64 are returned in F0/F1
+  CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>,
+
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64],
+       CCAssignToReg<[V0, V1]>>,
+
+  CCIfSubtarget<"hasSIMD()",
+       CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64],
+       CCAssignToReg<[F0, F1]>>>
+]>;
+
+// In soft-mode, register R16+R17, instead of R0+R1, is used to return a long
+// double value.
+def RetCC_F128Soft_Sw64 : CallingConv<[
+  CCIfType<[i64], CCAssignToReg<[R16, R17]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// Sw64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_Sw64 : CallingConv<[
+  // The first 6 arguments are passed in registers, whether integer or
+  // floating-point
+
+  CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
+                                          [F16, F17, F18, F19, F20, F21]>>,
+
+  CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
+                                               [R16, R17, R18, R19, R20, R21]>>,
+
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f64, v4f32],
+            CCAssignToRegWithShadow<[V16, V17, V18, V19, V20, V21],
+                                    [R16, R17, R18, R19, R20, R21]>>,
+
+  // Stack slots are 8 bytes in size and 8-byte aligned.
+  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>,
+
+  CCIfSubtarget<"hasSIMD()",
+       CCIfType<[v32i8, v16i16, v8i32, v4i64, v4f32, v4f64],
+       CCAssignToReg<[F16, F17, F18, F19, F20, F21]>>>
+]>;
+
+// CalleeSavedRegs
+def CSR_I64 : CalleeSavedRegs<(add (sequence "R%u", 9, 14), R15, R26)>;
+
+def CSR_F64 : CalleeSavedRegs<(add CSR_I64, (sequence "F%u", 2, 9))>;
diff --git a/llvm/lib/Target/Sw64/Sw64CombineLS.cpp b/llvm/lib/Target/Sw64/Sw64CombineLS.cpp
new file mode 100644
index 000000000000..fbf63b69f7ab
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64CombineLS.cpp
@@ -0,0 +1,63 @@
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64FrameLowering.h"
+#include "Sw64Subtarget.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "sw_64-combineLS"
+
+using namespace llvm;
+
+namespace llvm {
+
+struct Sw64CombineLS : public MachineFunctionPass {
+  /// Target machine description which we query for reg. names, data
+  /// layout, etc.
+  static char ID;
+  Sw64CombineLS() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const { return "Sw64 Combine Load Store insn"; }
+
+  bool runOnMachineFunction(MachineFunction &F) {
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
+         ++FI) {
+      MachineBasicBlock &MBB = *FI;
+      MachineBasicBlock::iterator MBBI = MBB.begin();
+      MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+      NMBBI++;
+      for (; NMBBI != MBB.end(); MBBI++, NMBBI++) {
+
+        MachineInstr &MI = *MBBI, &NMI = *NMBBI;
+        DebugLoc DL = MI.getDebugLoc();
+        const MCInstrDesc &MCID = NMI.getDesc();
+
+        if (MI.getOpcode() == Sw64::LDA &&
+            (MCID.mayLoad() || MCID.mayStore())) {
+          LLVM_DEBUG(dbgs() << "combining Load/Store instr\n"; MI.dump();
+                     dbgs() << "\n"; NMI.dump(); dbgs() << "\n");
+
+          if (MI.getOperand(0).getReg() == NMI.getOperand(2).getReg() &&
+              NMI.getOperand(2).getReg() != Sw64::R30) {
+            BuildMI(MBB, MBBI, DL, MCID)
+                .add(NMI.getOperand(0))
+                .add(MI.getOperand(1))
+                .add(MI.getOperand(0));
+            NMI.eraseFromParent();
+            MI.eraseFromParent();
+          }
+        }
+      }
+    }
+    return true;
+  }
+};
+char Sw64CombineLS::ID = 0;
+} // end namespace llvm
+
+FunctionPass *llvm::createSw64CombineLSPass() { return new Sw64CombineLS(); }
diff --git a/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp b/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp
new file mode 100644
index 000000000000..42a71f72e4a9
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64ExpandPseudo.cpp
@@ -0,0 +1,1141 @@
+//===-- Sw64ExpandPseudoInsts.cpp - Expand pseudo instructions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+// This is currently only used for expanding atomic pseudos after register
+// allocation. We do this to avoid the fast register allocator introducing
+// spills between ll and sc. These stores cause some other implementations to
+// abort the atomic RMW sequence.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64InstrInfo.h"
+#include "Sw64Subtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sw_64-pseudo"
+namespace llvm {
+extern const MCInstrDesc Sw64Insts[];
+}
+
+namespace {
+class Sw64ExpandPseudo : public MachineFunctionPass {
+public:
+  static char ID;
+  Sw64ExpandPseudo() : MachineFunctionPass(ID) {}
+
+  const Sw64InstrInfo *TII;
+  const Sw64Subtarget *STI;
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  StringRef getPassName() const override {
+    return "Sw64 pseudo instruction expansion pass";
+  }
+
+private:
+  bool expandAtomicCmpSwap(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           MachineBasicBlock::iterator &NextMBBI,
+                           unsigned Size);
+  bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MBBI,
+                                  MachineBasicBlock::iterator &NextMBBI);
+
+  bool expandAtomicBinOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+                         MachineBasicBlock::iterator &NMBBI, unsigned Size);
+  bool expandAtomicBinOpSubword(MachineBasicBlock &BB,
+                                MachineBasicBlock::iterator I,
+                                MachineBasicBlock::iterator &NMBBI);
+  bool expandCurGpdisp(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI);
+
+  bool expandLoadAddress(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI,
+                         MachineBasicBlock::iterator &NextMBBI);
+
+  bool expandLoadCPAddress(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           MachineBasicBlock::iterator &NextMBBI);
+
+  bool expandLdihInstPair(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          MachineBasicBlock::iterator &NextMBBI,
+                          unsigned FlagsHi, unsigned SecondOpcode,
+                          unsigned FlagsLo = Sw64II::MO_GPREL_LO,
+                          unsigned srcReg = Sw64::R29);
+
+  bool expandLoadGotAddress(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            MachineBasicBlock::iterator &NextMBBI);
+
+  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NMBB);
+
+  bool expandMBB(MachineBasicBlock &MBB);
+  bool expandIntReduceSum(MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+                          MachineBasicBlock::iterator &NMBBI);
+  bool expandFPReduceSum(MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+                         MachineBasicBlock::iterator &NMBBI);
+};
+char Sw64ExpandPseudo::ID = 0;
+} // namespace
+
+bool Sw64ExpandPseudo::expandAtomicCmpSwapSubword(
+    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+    MachineBasicBlock::iterator &NMBBI) {
+
+  MachineFunction *MF = BB.getParent();
+  DebugLoc DL = I->getDebugLoc();
+
+  unsigned LL, SC, BEQ;
+  unsigned BIC, BIS;
+  unsigned EXTL, INSL, MASKL;
+  unsigned mask;
+  BIS = Sw64::BISr;
+  BIC = Sw64::BICi;
+  BEQ = Sw64::BEQ;
+  LL = Sw64 ::LDQ_L;
+  SC = Sw64::STQ_C;
+  Register Dest = I->getOperand(0).getReg();
+  Register Ptr = I->getOperand(1).getReg();
+  Register OldVal = I->getOperand(2).getReg();
+  Register NewVal = I->getOperand(3).getReg();
+  // add
+  Register Reg_bic = I->getOperand(4).getReg();
+  Register Reg_ins = I->getOperand(5).getReg();
+  Register LockVal = I->getOperand(6).getReg();
+  Register Reg_cmp = I->getOperand(7).getReg();
+  Register Reg_mas = I->getOperand(8).getReg();
+  switch (I->getOpcode()) {
+  case Sw64::ATOMIC_CMP_SWAP_I8_POSTRA:
+    mask = 1;
+    EXTL = Sw64::EXTLBr;
+    INSL = Sw64::INSLBr;
+    MASKL = Sw64::MASKLBr;
+    break;
+  case Sw64::ATOMIC_CMP_SWAP_I16_POSTRA:
+    mask = 3;
+    EXTL = Sw64::EXTLHr;
+    INSL = Sw64::INSLHr;
+    MASKL = Sw64::MASKLHr;
+    break;
+  default:
+    llvm_unreachable("Unknown pseudo atomic!");
+  }
+
+  const BasicBlock *LLVM_BB = BB.getBasicBlock();
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = ++BB.getIterator();
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
+
+  BB.addSuccessor(loopMBB, BranchProbability::getOne());
+  loopMBB->addSuccessor(loopMBB);
+  loopMBB->addSuccessor(exitMBB);
+  loopMBB->normalizeSuccProbs();
+
+  // memb
+  BuildMI(loopMBB, DL, TII->get(Sw64::MB));
+
+  // bic
+  BuildMI(loopMBB, DL, TII->get(BIC), Reg_bic).addReg(Ptr).addImm(7);
+
+  // inslh
+  BuildMI(loopMBB, DL, TII->get(INSL), Reg_ins).addReg(NewVal).addReg(Ptr);
+
+  // lldl
+  BuildMI(loopMBB, DL, TII->get(LL), LockVal).addImm(0).addReg(Reg_bic);
+
+  // extlh
+  BuildMI(loopMBB, DL, TII->get(EXTL), Dest).addReg(LockVal).addReg(Ptr);
+
+  // cmpeq
+  // zapnot
+  BuildMI(loopMBB, DL, TII->get(Sw64::ZAPNOTi), OldVal)
+      .addReg(OldVal)
+      .addImm(mask);
+  BuildMI(loopMBB, DL, TII->get(Sw64::ZAPNOTi), Dest).addReg(Dest).addImm(mask);
+  BuildMI(loopMBB, DL, TII->get(Sw64::CMPEQr), Reg_cmp)
+      .addReg(OldVal)
+      .addReg(Dest);
+
+  if (STI->hasCore4())
+    // beq
+    BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB);
+  else
+    // wr_f
+    BuildMI(loopMBB, DL, TII->get(Sw64::WR_F)).addReg(Reg_cmp);
+
+  // masklh
+  BuildMI(loopMBB, DL, TII->get(MASKL), Reg_mas).addReg(LockVal).addReg(Ptr);
+
+  // bis
+  BuildMI(loopMBB, DL, TII->get(BIS), Reg_ins).addReg(Reg_mas).addReg(Reg_ins);
+
+  // lstw
+  BuildMI(loopMBB, DL, TII->get(SC)).addReg(Reg_ins).addImm(0).addReg(Reg_bic);
+
+  if (!STI->hasCore4())
+    // rd_f
+    BuildMI(loopMBB, DL, TII->get(Sw64::RD_F)).addReg(Reg_ins);
+
+  // beq
+  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB);
+
+  // beq
+  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Reg_ins).addMBB(loopMBB);
+
+  NMBBI = BB.end();
+  I->eraseFromParent(); // The instruction is gone now.
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *loopMBB);
+  computeAndAddLiveIns(LiveRegs, *exitMBB);
+  return true;
+}
+
+bool Sw64ExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB,
+                                           MachineBasicBlock::iterator I,
+                                           MachineBasicBlock::iterator &NMBBI,
+                                           unsigned Size) {
+  MachineFunction *MF = BB.getParent();
+  DebugLoc DL = I->getDebugLoc();
+  unsigned LL, SC;
+  unsigned BEQ = Sw64::BEQ;
+
+  if (Size == 4) {
+    LL = Sw64 ::LDL_L;
+    SC = Sw64::STL_C;
+  } else {
+    LL = Sw64::LDQ_L;
+    SC = Sw64::STQ_C;
+  }
+
+  Register Dest = I->getOperand(0).getReg();
+  Register Ptr = I->getOperand(1).getReg();
+  Register OldVal = I->getOperand(2).getReg();
+  Register NewVal = I->getOperand(3).getReg();
+  Register Scratch = I->getOperand(4).getReg();
+  // add
+  Register Reg_cmp = I->getOperand(5).getReg();
+
+  // insert new blocks after the current block
+  const BasicBlock *LLVM_BB = BB.getBasicBlock();
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = ++BB.getIterator();
+  MF->insert(It, loop1MBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), &BB,
+                  std::next(MachineBasicBlock::iterator(I)), BB.end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
+
+  //  thisMBB:
+  //    ...
+  //    fallthrough --> loop1MBB
+  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
+
+  loop1MBB->addSuccessor(loop1MBB);
+  loop1MBB->addSuccessor(exitMBB);
+  loop1MBB->normalizeSuccProbs();
+
+  // memb
+  BuildMI(loop1MBB, DL, TII->get(Sw64::MB));
+
+  // ldi
+  BuildMI(loop1MBB, DL, TII->get(Sw64::LDA), Ptr).addImm(0).addReg(Ptr);
+
+  // lldw
+  BuildMI(loop1MBB, DL, TII->get(LL), Dest).addImm(0).addReg(Ptr);
+
+  // zapnot
+  if (Size == 4) {
+    BuildMI(loop1MBB, DL, TII->get(Sw64::ZAPNOTi), OldVal)
+        .addReg(OldVal)
+        .addImm(15);
+    BuildMI(loop1MBB, DL, TII->get(Sw64::ZAPNOTi), Dest)
+        .addReg(Dest)
+        .addImm(15);
+  }
+
+  // cmpeq
+  BuildMI(loop1MBB, DL, TII->get(Sw64::CMPEQr))
+      .addReg(Reg_cmp)
+      .addReg(OldVal)
+      .addReg(Dest);
+
+  if (STI->hasCore4())
+    // beq
+    BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB);
+  else
+    // wr_f
+    BuildMI(loop1MBB, DL, TII->get(Sw64::WR_F)).addReg(Reg_cmp);
+
+  // mov
+  BuildMI(loop1MBB, DL, TII->get(Sw64::BISr), Scratch)
+      .addReg(NewVal)
+      .addReg(NewVal);
+
+  // lstw
+  BuildMI(loop1MBB, DL, TII->get(SC)).addReg(Scratch).addImm(0).addReg(Ptr);
+
+  if (!STI->hasCore4())
+    // rd_f
+    BuildMI(loop1MBB, DL, TII->get(Sw64::RD_F)).addReg(Scratch);
+
+  // beq
+  BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Reg_cmp).addMBB(exitMBB);
+
+  BuildMI(loop1MBB, DL, TII->get(BEQ)).addReg(Scratch).addMBB(loop1MBB);
+
+  NMBBI = BB.end();
+  I->eraseFromParent(); // The instruction is gone now.
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *loop1MBB);
+  computeAndAddLiveIns(LiveRegs, *exitMBB);
+
+  return true;
+}
+
+bool Sw64ExpandPseudo::expandAtomicBinOpSubword(
+    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+    MachineBasicBlock::iterator &NMBBI) {
+
+  MachineFunction *MF = BB.getParent();
+  DebugLoc DL = I->getDebugLoc();
+  unsigned LL, SC, ZERO, BEQ;
+  unsigned EXTL, INSL, MASKL;
+
+  unsigned WR_F, RD_F, LDA, BIS, BIC;
+  WR_F = Sw64::WR_F;
+  RD_F = Sw64::RD_F;
+  LDA = Sw64::LDA;
+  BIS = Sw64::BISr;
+  BIC = Sw64::BICi;
+  LL = Sw64::LDQ_L;
+  SC = Sw64::STQ_C;
+  ZERO = Sw64::R31;
+  BEQ = Sw64::BEQ;
+
+  Register OldVal = I->getOperand(0).getReg();
+  Register Ptr = I->getOperand(1).getReg();
+  Register Incr = I->getOperand(2).getReg();
+  Register StoreVal = I->getOperand(3).getReg();
+  // add
+  Register LockVal = I->getOperand(4).getReg();
+  Register Reg_bic = I->getOperand(5).getReg();
+  Register cmpres = I->getOperand(6).getReg();
+
+  unsigned Opcode = 0;
+  switch (I->getOpcode()) {
+  case Sw64::ATOMIC_LOAD_ADD_I8_POSTRA:
+    Opcode = Sw64::ADDLr;
+    EXTL = Sw64::EXTLBr;
+    INSL = Sw64::INSLBr;
+    MASKL = Sw64::MASKLBr;
+    break;
+  case Sw64::ATOMIC_LOAD_SUB_I8_POSTRA:
+    Opcode = Sw64::SUBLr;
+    EXTL = Sw64::EXTLBr;
+    INSL = Sw64::INSLBr;
+    MASKL = Sw64::MASKLBr;
+    break;
+  case Sw64::ATOMIC_LOAD_AND_I8_POSTRA:
+    Opcode = Sw64::ANDr;
+    EXTL = Sw64::EXTLBr;
+    INSL = Sw64::INSLBr;
+    MASKL = Sw64::MASKLBr;
+    break;
+  case Sw64::ATOMIC_LOAD_OR_I8_POSTRA:
+    Opcode = Sw64::BISr;
+    EXTL = Sw64::EXTLBr;
+    INSL = Sw64::INSLBr;
+    MASKL = Sw64::MASKLBr;
+    break;
+  case Sw64::ATOMIC_LOAD_XOR_I8_POSTRA:
+    Opcode = Sw64::XORr;
+    EXTL = Sw64::EXTLBr;
+    INSL = Sw64::INSLBr;
+    MASKL = Sw64::MASKLBr;
+    break;
+  case Sw64::ATOMIC_SWAP_I8_POSTRA:
+    EXTL = Sw64::EXTLBr;
+    INSL = Sw64::INSLBr;
+    MASKL = Sw64::MASKLBr;
+    break;
+  case Sw64::ATOMIC_LOAD_ADD_I16_POSTRA:
+    Opcode = Sw64::ADDQr;
+    EXTL = Sw64::EXTLHr;
+    INSL = Sw64::INSLHr;
+    MASKL = Sw64::MASKLHr;
+    break;
+  case Sw64::ATOMIC_LOAD_SUB_I16_POSTRA:
+    Opcode = Sw64::SUBQr;
+    EXTL = Sw64::EXTLHr;
+    INSL = Sw64::INSLHr;
+    MASKL = Sw64::MASKLHr;
+    break;
+  case Sw64::ATOMIC_LOAD_AND_I16_POSTRA:
+    Opcode = Sw64::ANDr;
+    EXTL = Sw64::EXTLHr;
+    INSL = Sw64::INSLHr;
+    MASKL = Sw64::MASKLHr;
+    break;
+  case Sw64::ATOMIC_LOAD_OR_I16_POSTRA:
+    Opcode = Sw64::BISr;
+    EXTL = Sw64::EXTLHr;
+    INSL = Sw64::INSLHr;
+    MASKL = Sw64::MASKLHr;
+    break;
+  case Sw64::ATOMIC_LOAD_XOR_I16_POSTRA:
+    Opcode = Sw64::XORr;
+    EXTL = Sw64::EXTLHr;
+    INSL = Sw64::INSLHr;
+    MASKL = Sw64::MASKLHr;
+    break;
+  case Sw64::ATOMIC_SWAP_I16_POSTRA:
+    EXTL = Sw64::EXTLHr;
+    INSL = Sw64::INSLHr;
+    MASKL = Sw64::MASKLHr;
+    break;
+  case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA:
+    EXTL = Sw64::EXTLBr;
+    INSL = Sw64::INSLBr;
+    MASKL = Sw64::MASKLBr;
+    break;
+  case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA:
+    EXTL = Sw64::EXTLHr;
+    INSL = Sw64::INSLHr;
+    MASKL = Sw64::MASKLHr;
+    break;
+  default:
+    llvm_unreachable("Unknown pseudo atomic!");
+  }
+
+  const BasicBlock *LLVM_BB = BB.getBasicBlock();
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = ++BB.getIterator();
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
+
+  BB.addSuccessor(loopMBB, BranchProbability::getOne());
+  loopMBB->addSuccessor(loopMBB);
+  loopMBB->addSuccessor(exitMBB);
+  loopMBB->normalizeSuccProbs();
+
+  // memb
+  BuildMI(loopMBB, DL, TII->get(Sw64::MB));
+
+  // bic
+  BuildMI(loopMBB, DL, TII->get(BIC), Reg_bic).addReg(Ptr).addImm(7);
+
+  // lldl
+  BuildMI(loopMBB, DL, TII->get(LL), LockVal).addImm(0).addReg(Reg_bic);
+
+  // ldi
+  BuildMI(loopMBB, DL, TII->get(LDA), StoreVal).addImm(1).addReg(ZERO);
+
+  if (!STI->hasCore4())
+    // wr_f
+    BuildMI(loopMBB, DL, TII->get(WR_F)).addReg(StoreVal);
+
+  // extlh
+  BuildMI(loopMBB, DL, TII->get(EXTL), OldVal).addReg(LockVal).addReg(Ptr);
+
+  BuildMI(loopMBB, DL, TII->get(EXTL), OldVal).addReg(LockVal).addReg(Ptr);
+
+  // BinOpcode
+  // Use a tmp reg since the src and dst reg of ORNOT op shall not be the same
+  // one for unknown reason.
+  switch (I->getOpcode()) {
+  case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA:
+    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
+    // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal :
+    // Incr
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    break;
+  case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA:
+    // cmplt OldVal, Incr, cmpres
+    // seleq cmpres, OldVal, Incr, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    break;
+  case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA:
+    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
+    // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr :
+    // OldVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(Incr)
+        .addReg(OldVal);
+    break;
+  case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA:
+    // cmplt OldVal, Incr, cmpres
+    // seleq cmpres, Incr, OldVal, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(Incr)
+        .addReg(OldVal);
+    break;
+  case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA:
+    // and OldVal, Incr, andres
+    // ornot andres, 0, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal)
+        .addReg(Sw64::R31)
+        .addReg(cmpres);
+    break;
+  case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA:
+    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
+    // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal :
+    // Incr
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    break;
+  case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA:
+    // cmplt OldVal, Incr, cmpres
+    // seleq cmpres, OldVal, Incr, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    break;
+  case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA:
+    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
+    // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr :
+    // OldVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(Incr)
+        .addReg(OldVal);
+    break;
+  case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA:
+    // cmplt OldVal, Incr, cmpres
+    // seleq cmpres, Incr, OldVal, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(Incr)
+        .addReg(OldVal);
+    break;
+  case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA:
+    // and OldVal, Incr, andres
+    // ornot andres, 0, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal)
+        .addReg(Sw64::R31)
+        .addReg(cmpres);
+    break;
+  default:
+    if (Opcode) {
+      BuildMI(loopMBB, DL, TII->get(Opcode), StoreVal)
+          .addReg(OldVal)
+          .addReg(Incr);
+    } else {
+      BuildMI(loopMBB, DL, TII->get(Sw64::BISr), StoreVal)
+          .addReg(Incr)
+          .addReg(Incr);
+    }
+  }
+
+  // inslh
+  BuildMI(loopMBB, DL, TII->get(INSL), StoreVal).addReg(StoreVal).addReg(Ptr);
+
+  // masklh
+  BuildMI(loopMBB, DL, TII->get(MASKL), LockVal).addReg(LockVal).addReg(Ptr);
+
+  // bis
+  BuildMI(loopMBB, DL, TII->get(BIS), LockVal).addReg(LockVal).addReg(StoreVal);
+
+  // lstl
+  BuildMI(loopMBB, DL, TII->get(SC)).addReg(LockVal).addImm(0).addReg(Reg_bic);
+
+  if (!STI->hasCore4())
+    // rd_f
+    BuildMI(loopMBB, DL, TII->get(RD_F)).addReg(LockVal);
+
+  // beq
+  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(LockVal).addMBB(loopMBB);
+
+  NMBBI = BB.end();
+  I->eraseFromParent(); // The instruction is gone now.
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *loopMBB);
+  computeAndAddLiveIns(LiveRegs, *exitMBB);
+
+  return true;
+}
+
+bool Sw64ExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB,
+                                         MachineBasicBlock::iterator I,
+                                         MachineBasicBlock::iterator &NMBBI,
+                                         unsigned Size) {
+  MachineFunction *MF = BB.getParent();
+  DebugLoc DL = I->getDebugLoc();
+  unsigned LL, SC;
+  unsigned LDA = Sw64::LDA;
+  unsigned ZERO = Sw64::R31;
+  unsigned BEQ = Sw64::BEQ;
+
+  if (Size == 4) {
+    LL = Sw64::LDL_L;
+    SC = Sw64::STL_C;
+  } else {
+    LL = Sw64::LDQ_L;
+    SC = Sw64::STQ_C;
+  }
+
+  Register OldVal = I->getOperand(0).getReg();
+  Register Ptr = I->getOperand(1).getReg();
+  Register Incr = I->getOperand(2).getReg();
+  Register StoreVal = I->getOperand(3).getReg();
+  Register Scratch1 = I->getOperand(4).getReg();
+  Register cmpres = I->getOperand(5).getReg();
+
+  unsigned Opcode = 0;
+  switch (I->getOpcode()) {
+  case Sw64::ATOMIC_LOAD_ADD_I32_POSTRA:
+    Opcode = Sw64::ADDLr;
+    break;
+  case Sw64::ATOMIC_LOAD_SUB_I32_POSTRA:
+    Opcode = Sw64::SUBLr;
+    break;
+  case Sw64::ATOMIC_LOAD_AND_I32_POSTRA:
+    Opcode = Sw64::ANDr;
+    break;
+  case Sw64::ATOMIC_LOAD_OR_I32_POSTRA:
+    Opcode = Sw64::BISr;
+    break;
+  case Sw64::ATOMIC_LOAD_XOR_I32_POSTRA:
+    Opcode = Sw64::XORr;
+    break;
+  case Sw64::ATOMIC_SWAP_I32_POSTRA:
+    break;
+  case Sw64::ATOMIC_LOAD_ADD_I64_POSTRA:
+    Opcode = Sw64::ADDQr;
+    break;
+  case Sw64::ATOMIC_LOAD_SUB_I64_POSTRA:
+    Opcode = Sw64::SUBQr;
+    break;
+  case Sw64::ATOMIC_LOAD_AND_I64_POSTRA:
+    Opcode = Sw64::ANDr;
+    break;
+  case Sw64::ATOMIC_LOAD_OR_I64_POSTRA:
+    Opcode = Sw64::BISr;
+    break;
+  case Sw64::ATOMIC_LOAD_XOR_I64_POSTRA:
+    Opcode = Sw64::XORr;
+    break;
+  case Sw64::ATOMIC_SWAP_I64_POSTRA:
+    break;
+  case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA:
+
+  case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA:
+    break;
+  default:
+    llvm_unreachable("Unknown pseudo atomic!");
+  }
+
+  // insert new blocks after the current block
+  const BasicBlock *LLVM_BB = BB.getBasicBlock();
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+  MachineFunction::iterator It = ++BB.getIterator();
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
+
+  BB.addSuccessor(loopMBB, BranchProbability::getOne());
+  loopMBB->addSuccessor(loopMBB);
+  loopMBB->addSuccessor(exitMBB);
+  loopMBB->normalizeSuccProbs();
+
+  // memb
+  BuildMI(loopMBB, DL, TII->get(Sw64::MB));
+
+  // ldi
+  BuildMI(loopMBB, DL, TII->get(Sw64::LDA), Ptr).addImm(0).addReg(Ptr);
+
+  // lldw
+  BuildMI(loopMBB, DL, TII->get(LL), OldVal).addImm(0).addReg(Ptr);
+
+  // ldi
+  BuildMI(loopMBB, DL, TII->get(LDA), Scratch1).addImm(1).addReg(ZERO);
+
+  if (!STI->hasCore4())
+    // wr_f
+    BuildMI(loopMBB, DL, TII->get(Sw64::WR_F)).addReg(Scratch1);
+
+  // BinOpcode
+
+  // Use a tmp reg since the src and dst reg of ORNOT op shall not be the same
+  // one for unknown reason.
+  switch (I->getOpcode()) {
+  case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA:
+    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
+    // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal :
+    // Incr
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    break;
+  case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA:
+    // cmplt OldVal, Incr, cmpres
+    // seleq cmpres, OldVal, Incr, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    break;
+  case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA:
+    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
+    // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr :
+    // OldVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(Incr)
+        .addReg(OldVal);
+    break;
+  case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA:
+    // cmplt OldVal, Incr, cmpres
+    // seleq cmpres, Incr, OldVal, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(Incr)
+        .addReg(OldVal);
+    break;
+  case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA:
+    // and OldVal, Incr, cmpres
+    // ornot cmpres, 0, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal)
+        .addReg(Sw64::R31)
+        .addReg(cmpres);
+    break;
+  case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA:
+    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
+    // seleq cmpres, OldVal, Incr, StoreVal -- StoreVal = cmpres == 0 ? OldVal :
+    // Incr
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    break;
+  case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA:
+    // cmplt OldVal, Incr, cmpres
+    // seleq cmpres, OldVal, Incr, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    break;
+  case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA:
+    // cmpult OldVal, Incr, cmpres          -- cmpres = OldVal < Incr ? 1 : 0
+    // seleq cmpres, Incr, OldVal, StoreVal -- StoreVal = cmpres == 0 ? Incr :
+    // OldVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPULTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(Incr)
+        .addReg(OldVal);
+    break;
+  case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA:
+    // cmplt OldVal, Incr, cmpres
+    // seleq cmpres, Incr, OldVal, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::CMPLTr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::SELEQr), StoreVal)
+        .addReg(cmpres)
+        .addReg(Incr)
+        .addReg(OldVal);
+    break;
+  case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA:
+    // and OldVal, Incr, cmpres
+    // ornot cmpres, 0, StoreVal
+    BuildMI(loopMBB, DL, TII->get(Sw64::ANDr), cmpres)
+        .addReg(OldVal)
+        .addReg(Incr);
+    BuildMI(loopMBB, DL, TII->get(Sw64::ORNOTr), StoreVal)
+        .addReg(Sw64::R31)
+        .addReg(cmpres);
+    break;
+  default:
+    if (Opcode) {
+      BuildMI(loopMBB, DL, TII->get(Opcode), StoreVal)
+          .addReg(OldVal)
+          .addReg(Incr);
+    } else {
+      BuildMI(loopMBB, DL, TII->get(Sw64::BISr), StoreVal)
+          .addReg(Incr)
+          .addReg(Incr);
+    }
+  }
+
+  // lstw
+  BuildMI(loopMBB, DL, TII->get(SC)).addReg(StoreVal).addImm(0).addReg(Ptr);
+
+  if (!STI->hasCore4())
+    // rd_f
+    BuildMI(loopMBB, DL, TII->get(Sw64::RD_F)).addReg(StoreVal);
+
+  // beq
+  BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(StoreVal).addMBB(loopMBB);
+
+  NMBBI = BB.end();
+  I->eraseFromParent(); // The instruction is gone now.
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *loopMBB);
+  computeAndAddLiveIns(LiveRegs, *exitMBB);
+
+  return true;
+}
+
+bool Sw64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                MachineBasicBlock::iterator &NMBB) {
+
+  bool Modified = false;
+
+  switch (MBBI->getOpcode()) {
+  case Sw64::ATOMIC_CMP_SWAP_I32_POSTRA:
+    return expandAtomicCmpSwap(MBB, MBBI, NMBB, 4);
+  case Sw64::ATOMIC_CMP_SWAP_I64_POSTRA:
+    return expandAtomicCmpSwap(MBB, MBBI, NMBB, 8);
+
+  case Sw64::ATOMIC_CMP_SWAP_I8_POSTRA:
+  case Sw64::ATOMIC_CMP_SWAP_I16_POSTRA:
+    return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB);
+
+  case Sw64::ATOMIC_SWAP_I8_POSTRA:
+  case Sw64::ATOMIC_SWAP_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_ADD_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_ADD_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_SUB_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_SUB_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_AND_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_AND_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_OR_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_OR_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_XOR_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_XOR_I16_POSTRA:
+
+  case Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_MAX_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_MIN_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_NAND_I16_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_MAX_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_MIN_I8_POSTRA:
+  case Sw64::ATOMIC_LOAD_NAND_I8_POSTRA:
+    return expandAtomicBinOpSubword(MBB, MBBI, NMBB);
+
+  case Sw64::ATOMIC_LOAD_ADD_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_SUB_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_AND_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_OR_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_XOR_I32_POSTRA:
+  case Sw64::ATOMIC_SWAP_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_MAX_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_MIN_I32_POSTRA:
+  case Sw64::ATOMIC_LOAD_NAND_I32_POSTRA:
+    return expandAtomicBinOp(MBB, MBBI, NMBB, 4);
+
+  case Sw64::ATOMIC_LOAD_ADD_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_SUB_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_AND_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_OR_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_XOR_I64_POSTRA:
+  case Sw64::ATOMIC_SWAP_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_MAX_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_MIN_I64_POSTRA:
+  case Sw64::ATOMIC_LOAD_NAND_I64_POSTRA:
+    return expandAtomicBinOp(MBB, MBBI, NMBB, 8);
+  case Sw64::MOVProgPCGp:
+  case Sw64::MOVaddrPCGp:
+    return expandCurGpdisp(MBB, MBBI);
+  case Sw64::LOADlitSym:
+  case Sw64::LOADlit:
+    return expandLoadGotAddress(MBB, MBBI, NMBB);
+  case Sw64::LOADconstant:
+    return expandLoadCPAddress(MBB, MBBI, NMBB);
+  case Sw64::MOVaddrCP:
+  case Sw64::MOVaddrBA:
+  case Sw64::MOVaddrGP:
+  case Sw64::MOVaddrEXT:
+  case Sw64::MOVaddrJT:
+    return expandLoadAddress(MBB, MBBI, NMBB);
+  default:
+    return Modified;
+  }
+}
+
+bool Sw64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool Sw64ExpandPseudo::expandCurGpdisp(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  MachineOperand addr = MI.getOperand(0);
+  MachineOperand dstReg = MI.getOperand(2);
+
+  BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R29)
+      .addGlobalAddress(addr.getGlobal(), 0, Sw64II::MO_GPDISP_HI)
+      .add(dstReg);
+  BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDA), Sw64::R29)
+      .addGlobalAddress(addr.getGlobal(), 0, Sw64II::MO_GPDISP_LO)
+      .addReg(Sw64::R29);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool Sw64ExpandPseudo::expandLoadCPAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI,
+                            Sw64::LDL);
+}
+
+bool Sw64ExpandPseudo::expandLoadGotAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  LLVM_DEBUG(dbgs() << "expand Loadlit LoadlitSym" << *MBBI);
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned DestReg = MI.getOperand(0).getReg();
+  const MachineOperand &Symbol = MI.getOperand(1);
+
+  MachineFunction *MF = MBB.getParent();
+  switch (MF->getTarget().getCodeModel()) {
+  default:
+    report_fatal_error("Unsupported code model for lowering");
+  case CodeModel::Small: {
+    if (Symbol.isSymbol())
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
+          .addReg(Sw64::R29);
+    else
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
+          .addDisp(Symbol, 0, Sw64II::MO_LITERAL)
+          .addReg(Sw64::R29);
+    break;
+  }
+
+  case CodeModel::Medium: {
+    if (Symbol.isSymbol()) {
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT)
+          .addReg(Sw64::R29);
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
+          .addReg(DestReg);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
+          .addDisp(Symbol, 0, Sw64II::MO_LITERAL_GOT)
+          .addReg(Sw64::R29);
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
+          .addDisp(Symbol, 0, Sw64II::MO_LITERAL)
+          .addReg(DestReg);
+    }
+    break;
+  }
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
+bool Sw64ExpandPseudo::expandLoadAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI,
+                            Sw64::LDA);
+}
+
+bool Sw64ExpandPseudo::expandLdihInstPair(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI,
+                                          MachineBasicBlock::iterator &NextMBBI,
+                                          unsigned FlagsHi,
+                                          unsigned SecondOpcode,
+                                          unsigned FlagsLo, unsigned srcReg) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned DestReg = MI.getOperand(0).getReg();
+  const MachineOperand &Symbol = MI.getOperand(1);
+
+  MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
+          .add(Symbol)
+          .addReg(srcReg);
+  MachineInstrBuilder MIB1 =
+      BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg)
+          .add(Symbol)
+          .addReg(DestReg);
+
+  MachineInstr *tmpInst = MIB.getInstr();
+  MachineInstr *tmpInst1 = MIB1.getInstr();
+
+  MachineOperand &SymbolHi = tmpInst->getOperand(1);
+  MachineOperand &SymbolLo = tmpInst1->getOperand(1);
+
+  SymbolHi.addTargetFlag(FlagsHi);
+  SymbolLo.addTargetFlag(FlagsLo);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool Sw64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  STI = &static_cast<const Sw64Subtarget &>(MF.getSubtarget());
+  TII = STI->getInstrInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+       ++MFI)
+    Modified |= expandMBB(*MFI);
+
+  if (Modified)
+    MF.RenumberBlocks();
+
+  return Modified;
+}
+
+/// createSw64ExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createSw64ExpandPseudoPass() {
+  return new Sw64ExpandPseudo();
+}
diff --git a/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp b/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp
new file mode 100644
index 000000000000..550c2f52036f
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64ExpandPseudo2.cpp
@@ -0,0 +1,334 @@
+//===-- Sw64ExpandPseudoInsts.cpp - Expand pseudo instructions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+// This is currently only used for expanding atomic pseudos after register
+// allocation. We do this to avoid the fast register allocator introducing
+// spills between ll and sc. These stores cause some other implementations to
+// abort the atomic RMW sequence.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64InstrInfo.h"
+#include "Sw64Subtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sw_64-pseudo2"
+namespace llvm {
+extern const MCInstrDesc Sw64Insts[];
+}
+
+static cl::opt<bool>
+    ExpandPre("expand-presched",
+              cl::desc("Expand pseudo Inst before PostRA schedule"),
+              cl::init(true), cl::Hidden);
+
+namespace {
+class Sw64ExpandPseudo2 : public MachineFunctionPass {
+public:
+  static char ID;
+  Sw64ExpandPseudo2() : MachineFunctionPass(ID) {}
+
+  const Sw64InstrInfo *TII;
+  const Sw64Subtarget *STI;
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  StringRef getPassName() const override {
+    return "Sw64 pseudo instruction expansion pass2";
+  }
+
+private:
+  bool expandPseudoCall(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI,
+                        MachineBasicBlock::iterator &NextMBBI);
+
+  bool expandLoadAddress(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI,
+                         MachineBasicBlock::iterator &NextMBBI);
+
+  bool expandLoadCPAddress(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           MachineBasicBlock::iterator &NextMBBI);
+
+  bool expandLdihInstPair(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          MachineBasicBlock::iterator &NextMBBI,
+                          unsigned FlagsHi, unsigned SecondOpcode,
+                          unsigned FlagsLo = Sw64II::MO_GPREL_LO,
+                          unsigned srcReg = Sw64::R29);
+
+  bool expandLoadGotAddress(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            MachineBasicBlock::iterator &NextMBBI);
+
+  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NMBB);
+
+  bool expandMBB(MachineBasicBlock &MBB);
+};
+char Sw64ExpandPseudo2::ID = 0;
+} // namespace
+
+bool Sw64ExpandPseudo2::expandMI(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MBBI,
+                                 MachineBasicBlock::iterator &NMBB) {
+  bool Modified = false;
+
+  if (ExpandPre) {
+    switch (MBBI->getOpcode()) {
+    case Sw64::LOADlitSym:
+    case Sw64::LOADlit:
+      return expandLoadGotAddress(MBB, MBBI, NMBB);
+    case Sw64::LOADconstant:
+      return expandLoadCPAddress(MBB, MBBI, NMBB);
+    case Sw64::MOVaddrCP:
+    case Sw64::MOVaddrBA:
+    case Sw64::MOVaddrGP:
+    case Sw64::MOVaddrEXT:
+    case Sw64::MOVaddrJT:
+      return expandLoadAddress(MBB, MBBI, NMBB);
+    case Sw64::PseudoCall:
+      return expandPseudoCall(MBB, MBBI, NMBB);
+    default:
+      return Modified;
+    }
+  } else {
+    switch (MBBI->getOpcode()) {
+    case Sw64::PseudoCall:
+      return expandPseudoCall(MBB, MBBI, NMBB);
+    default:
+      return Modified;
+    }
+  }
+}
+
+bool Sw64ExpandPseudo2::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool Sw64ExpandPseudo2::expandLoadCPAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI,
+                            Sw64::LDL);
+}
+
+bool Sw64ExpandPseudo2::expandLoadAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  return expandLdihInstPair(MBB, MBBI, NextMBBI, Sw64II::MO_GPREL_HI,
+                            Sw64::LDA);
+}
+
+bool Sw64ExpandPseudo2::expandLdihInstPair(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi,
+    unsigned SecondOpcode, unsigned FlagsLo, unsigned srcReg) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned DestReg = MI.getOperand(0).getReg();
+  const MachineOperand &Symbol = MI.getOperand(1);
+
+  MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
+          .add(Symbol)
+          .addReg(srcReg);
+  MachineInstrBuilder MIB1 =
+      BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg)
+          .add(Symbol)
+          .addReg(DestReg);
+
+  MachineInstr *tmpInst = MIB.getInstr();
+  MachineInstr *tmpInst1 = MIB1.getInstr();
+
+  MachineOperand &SymbolHi = tmpInst->getOperand(1);
+  MachineOperand &SymbolLo = tmpInst1->getOperand(1);
+
+  SymbolHi.addTargetFlag(FlagsHi);
+  SymbolLo.addTargetFlag(FlagsLo);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+// while expanding call, we can choose adding lituse
+// for linker relax or not. Adding flags for sortRelocs
+bool Sw64ExpandPseudo2::expandPseudoCall(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  LLVM_DEBUG(dbgs() << "expand PseudoCall" << *MBBI);
+
+  MachineFunction *MF = MBB.getParent();
+  const auto &STI = MF->getSubtarget<Sw64Subtarget>();
+  const Sw64FrameLowering *SFL = STI.getFrameLowering();
+
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Lflags = 0; // load flags
+  unsigned Cflags = 0; // Call flags
+
+  MachineOperand Symbol = MI.getOperand(0);
+  switch (MF->getTarget().getCodeModel()) {
+  default:
+    report_fatal_error("Unsupported code model for lowering");
+  case CodeModel::Small: {
+    if (Symbol.isGlobal()) {
+      int64_t Offs = Symbol.getOffset();
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27)
+          .addGlobalAddress(Symbol.getGlobal(), Offs,
+                            Lflags | Sw64II::MO_LITERAL |
+                                Sw64II::MO_LITERAL_BASE)
+          .addReg(Sw64::R29);
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26)
+          .addReg(Sw64::R27)
+          .addGlobalAddress(Symbol.getGlobal(), 0,
+                            Cflags | Sw64II::MO_HINT | Sw64II::MO_LITUSE);
+    } else if (Symbol.isSymbol()) {
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
+          .addReg(Sw64::R29);
+      const Sw64TargetLowering *STL = STI.getTargetLowering();
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26)
+          .addReg(Sw64::R27)
+          .addExternalSymbol(Symbol.getSymbolName());
+    }
+    break;
+  }
+
+  case CodeModel::Medium: {
+    if (Symbol.isGlobal()) {
+      int64_t Offs = Symbol.getOffset();
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R27)
+          .addGlobalAddress(Symbol.getGlobal(), Offs, Sw64II::MO_LITERAL_GOT)
+          .addReg(Sw64::R29);
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27)
+          .addGlobalAddress(Symbol.getGlobal(), Offs,
+                            Lflags | Sw64II::MO_LITERAL |
+                                Sw64II::MO_LITERAL_BASE)
+          .addReg(Sw64::R27);
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26)
+          .addReg(Sw64::R27)
+          .addGlobalAddress(Symbol.getGlobal(), 0,
+                            Cflags | Sw64II::MO_HINT | Sw64II::MO_LITUSE);
+    } else if (Symbol.isSymbol()) {
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), Sw64::R27)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT)
+          .addReg(Sw64::R29);
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), Sw64::R27)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
+          .addReg(Sw64::R27);
+      const Sw64TargetLowering *STL = STI.getTargetLowering();
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::JSR), Sw64::R26)
+          .addReg(Sw64::R27)
+          .addExternalSymbol(Symbol.getSymbolName());
+    }
+    break;
+  }
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool Sw64ExpandPseudo2::expandLoadGotAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  LLVM_DEBUG(dbgs() << "expand Loadlit LoadlitSym" << *MBBI);
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned DestReg = MI.getOperand(0).getReg();
+  const MachineOperand &Symbol = MI.getOperand(1);
+
+  MachineFunction *MF = MBB.getParent();
+  switch (MF->getTarget().getCodeModel()) {
+  default:
+    report_fatal_error("Unsupported code model for lowering");
+  case CodeModel::Small: {
+    if (Symbol.isSymbol())
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
+          .addReg(Sw64::R29);
+    else
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
+          .addDisp(Symbol, 0, Sw64II::MO_LITERAL)
+          .addReg(Sw64::R29);
+    break;
+  }
+
+  case CodeModel::Medium: {
+    if (Symbol.isSymbol()) {
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL_GOT)
+          .addReg(Sw64::R29);
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
+          .addExternalSymbol(Symbol.getSymbolName(), Sw64II::MO_LITERAL)
+          .addReg(DestReg);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDAH), DestReg)
+          .addDisp(Symbol, 0, Sw64II::MO_LITERAL_GOT)
+          .addReg(Sw64::R29);
+      BuildMI(MBB, MBBI, DL, TII->get(Sw64::LDL), DestReg)
+          .addDisp(Symbol, 0, Sw64II::MO_LITERAL)
+          .addReg(DestReg);
+    }
+    break;
+  }
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
+bool Sw64ExpandPseudo2::runOnMachineFunction(MachineFunction &MF) {
+  STI = &static_cast<const Sw64Subtarget &>(MF.getSubtarget());
+  TII = STI->getInstrInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+       ++MFI)
+    Modified |= expandMBB(*MFI);
+
+  if (Modified)
+    MF.RenumberBlocks();
+
+  return Modified;
+}
+
+/// createSw64ExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createSw64ExpandPseudo2Pass() {
+  return new Sw64ExpandPseudo2();
+}
diff --git a/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp b/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp
new file mode 100644
index 000000000000..9030d8ba99c2
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64FrameLowering.cpp
@@ -0,0 +1,456 @@
+//=====- Sw64FrameLowering.cpp - Sw64 Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sw64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+#include "Sw64FrameLowering.h"
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64InstrInfo.h"
+#include "Sw64MachineFunctionInfo.h"
+#include "Sw64Subtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm> // std::sort
+
+using namespace llvm;
+
+cl::opt<bool> Sw64PG("pg", cl::desc("Support the pg"), cl::init(false));
+
+static long getUpper16(long l) {
+  long y = l / Sw64::IMM_MULT;
+  if (l % Sw64::IMM_MULT > Sw64::IMM_HIGH)
+    ++y;
+  else if (l % Sw64::IMM_MULT < Sw64::IMM_LOW)
+    --y;
+  return y;
+}
+
+static long getLower16(long l) {
+  long h = getUpper16(l);
+  return l - h * Sw64::IMM_MULT;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool Sw64FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+         MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
+         TRI->hasStackRealignment(MF);
+}
+
+// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+// not required, we reserve argument space for call sites in the function
+// immediately on entry to the current function.  This eliminates the need for
+// add/sub sp brackets around call sites.  Returns true if the call frame is
+// included as part of the stack frame.
+bool Sw64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  return !MF.getFrameInfo().hasVarSizedObjects();
+}
+
+bool Sw64FrameLowering::isLeafProc(MachineFunction &MF) const {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  return !MRI.isPhysRegUsed(Sw64::R29);
+}
+
+bool Sw64FrameLowering::hasBP(const MachineFunction &MF) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+  return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
+}
+
+void Sw64FrameLowering::emitPrologue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(); // Prolog goes in entry BB
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  const Sw64InstrInfo &TII = *MF.getSubtarget<Sw64Subtarget>().getInstrInfo();
+  const Sw64RegisterInfo &RegInfo = *static_cast<const Sw64RegisterInfo *>(
+      MF.getSubtarget<Sw64Subtarget>().getRegisterInfo());
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc dl;
+
+  // First, compute final stack size.
+  uint64_t StackSize = MFI.getStackSize();
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+
+  MBB.addLiveIn(Sw64::R27);
+  int curgpdist = STI.getCurgpdist();
+  // Handle GOT offset
+  // Now sw_64 won't emit this unless it is necessary.
+  // While it is also useful for DebugInfo test.
+  if (!isLeafProc(MF)) {
+    BuildMI(MBB, MBBI, dl, TII.get(Sw64::MOVProgPCGp))
+        .addGlobalAddress(&(MF.getFunction()))
+        .addImm(++curgpdist)
+        .addReg(Sw64::R27);
+
+    BuildMI(MBB, MBBI, dl, TII.get(Sw64::ALTENT))
+        .addGlobalAddress(&(MF.getFunction()));
+  }
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI.adjustsStack())
+    return;
+
+  if (Sw64Mieee) {
+    if (!Sw64DeleteNop)
+      BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP));
+  }
+  if (Sw64PG) {
+    BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDL), Sw64::R28)
+        .addExternalSymbol("_mcount")
+        .addReg(Sw64::R29);
+    if (Sw64Mieee) {
+      if (!Sw64DeleteNop)
+        BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP));
+      BuildMI(MBB, MBBI, dl, TII.get(Sw64::JSR))
+          .addReg(Sw64::R28)
+          .addReg(Sw64::R28)
+          .addExternalSymbol("_mcount");
+      if (!Sw64DeleteNop)
+        BuildMI(MBB, MBBI, dl, TII.get(Sw64::NOP));
+    } else
+      BuildMI(MBB, MBBI, dl, TII.get(Sw64::JSR))
+          .addReg(Sw64::R28)
+          .addReg(Sw64::R28)
+          .addExternalSymbol("_mcount");
+  }
+
+  unsigned Align = getStackAlignment();
+  StackSize = (StackSize + Align - 1) / Align * Align;
+
+  // Update frame info to pretend that this is part of the stack...
+  MFI.setStackSize(StackSize);
+
+  // adjust stack pointer: r30 -= numbytes
+  int AdjustStackSize = -StackSize;
+  if (AdjustStackSize >= Sw64::IMM_LOW) {
+    BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30)
+        .addImm(AdjustStackSize)
+        .addReg(Sw64::R30);
+  } else if (getUpper16(AdjustStackSize) >= Sw64::IMM_LOW) {
+    BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDAH), Sw64::R30)
+        .addImm(getUpper16(AdjustStackSize))
+        .addReg(Sw64::R30);
+    BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30)
+        .addImm(getLower16(AdjustStackSize))
+        .addReg(Sw64::R30);
+  } else {
+    report_fatal_error("Too big a stack frame at " + Twine(-AdjustStackSize));
+  }
+
+  // emit ".cfi_def_cfa_offset StackSize"
+  unsigned CFIIndex = MF.addFrameInst(
+      MCCFIInstruction::cfiDefCfaOffset(nullptr, -AdjustStackSize));
+  BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+      .addCFIIndex(CFIIndex);
+
+  std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+
+  if (!CSI.empty()) {
+    // Find the instruction past the last instruction that saves a
+    // callee-saved register to the stack.
+    for (unsigned i = 0; i < CSI.size(); ++i)
+      ++MBBI;
+
+    // Iterate over list of callee-saved registers and emit .cfi_offset
+    // directives.
+    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+                                                      E = CSI.end();
+         I != E; ++I) {
+      int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
+      unsigned Reg = I->getReg();
+      unsigned DReg = MRI->getDwarfRegNum(Reg, true);
+      unsigned CFIIndex = MF.addFrameInst(
+          MCCFIInstruction::createOffset(nullptr, DReg, Offset));
+
+      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+    }
+  }
+
+  // if framepointer enabled, set it to point to the stack pointer.
+  // Now if we need to, save the old FP and set the new
+  if (hasFP(MF)) {
+    // This must be the last instr in the prolog
+    BuildMI(MBB, MBBI, dl, TII.get(Sw64::BISr), Sw64::R15)
+        .addReg(Sw64::R30)
+        .addReg(Sw64::R30);
+
+    // emit ".cfi_def_cfa_register $fp"
+    unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+        nullptr, MRI->getDwarfRegNum(Sw64::R15, true)));
+    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+
+    if (RegInfo.hasStackRealignment(MF)) {
+      // ldi -MaxAlign
+      // and -MaxAlign for sp
+      Register VR = MF.getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+
+      assert((Log2(MFI.getMaxAlign()) < 16) &&
+             "Function's alignment size requirement is not supported.");
+      int64_t MaxAlign = -(int64_t)MFI.getMaxAlign().value();
+      BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), VR)
+          .addImm(MaxAlign)
+          .addReg(Sw64::R31);
+      BuildMI(MBB, MBBI, dl, TII.get(Sw64::ANDr), Sw64::R30)
+          .addReg(Sw64::R30)
+          .addReg(VR);
+
+      if (hasBP(MF))
+        // mov $sp, $14
+        BuildMI(MBB, MBBI, dl, TII.get(Sw64::BISr), Sw64::R14)
+            .addReg(Sw64::R30)
+            .addReg(Sw64::R30);
+    }
+  }
+}
+
+void Sw64FrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const Sw64InstrInfo &TII = *MF.getSubtarget<Sw64Subtarget>().getInstrInfo();
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  assert((MBBI->getOpcode() == Sw64::PseudoRet) &&
+         "Can only insert epilog into returning blocks");
+
+  // Get the number of bytes allocated from the FrameInfo...
+  uint64_t StackSize = MFI.getStackSize();
+  // now if we need to, restore the old FP
+  if (hasFP(MF)) {
+    // Find the first instruction that restores a callee-saved register.
+    MachineBasicBlock::iterator I = MBBI;
+    for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) {
+      --I;
+    }
+
+    // copy the FP into the SP (discards allocas)
+    BuildMI(MBB, I, dl, TII.get(Sw64::BISr), Sw64::R30)
+        .addReg(Sw64::R15)
+        .addReg(Sw64::R15);
+  }
+
+  if (StackSize != 0) {
+    if (StackSize <= Sw64::IMM_HIGH) {
+      BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30)
+          .addImm(StackSize)
+          .addReg(Sw64::R30);
+    } else if (getUpper16(StackSize) <= Sw64::IMM_HIGH) {
+      BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDAH), Sw64::R30)
+          .addImm(getUpper16(StackSize))
+          .addReg(Sw64::R30);
+      BuildMI(MBB, MBBI, dl, TII.get(Sw64::LDA), Sw64::R30)
+          .addImm(getLower16(StackSize))
+          .addReg(Sw64::R30);
+    } else {
+      report_fatal_error("Too big a stack frame at " + Twine(StackSize));
+    }
+  }
+}
+
+StackOffset
+Sw64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+                                          Register &FrameReg) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  if (MFI.isFixedObjectIndex(FI))
+    FrameReg = hasFP(MF) ? Sw64::R15 : Sw64::R30;
+  else
+    FrameReg = hasBP(MF) ? Sw64::R14 : Sw64::R30;
+
+  return StackOffset::getFixed(MFI.getObjectOffset(FI) + MFI.getStackSize() -
+                               getOffsetOfLocalArea() +
+                               MFI.getOffsetAdjustment());
+}
+
+// TODO: must be rewrite.
+bool Sw64FrameLowering::spillCalleeSavedRegisters(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return true;
+
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+  DebugLoc DL;
+  if (MI != MBB.end() && !MI->isDebugInstr())
+    DL = MI->getDebugLoc();
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    MBB.addLiveIn(Reg);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, TRI,
+                            Register());
+  }
+  return true;
+}
+
+bool Sw64FrameLowering::restoreCalleeSavedRegisters(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
+  bool AtStart = MI == MBB.begin();
+  MachineBasicBlock::iterator BeforeI = MI;
+  if (!AtStart)
+    --BeforeI;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI,
+                             Register());
+    assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
+    // Insert in reverse order.  loadRegFromStackSlot can insert multiple
+    // instructions.
+    if (AtStart)
+      MI = MBB.begin();
+    else {
+      MI = BeforeI;
+      ++MI;
+    }
+  }
+  return true;
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+MachineBasicBlock::iterator Sw64FrameLowering::eliminateCallFramePseudoInstr(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator I) const {
+
+  const Sw64InstrInfo &TII = *MF.getSubtarget<Sw64Subtarget>().getInstrInfo();
+
+  if (!hasReservedCallFrame(MF)) {
+    // Turn the adjcallstackdown instruction into 'ldi sp,-<amt>sp' and the
+    // adjcallstackup instruction into 'ldi sp,<amt>sp'
+    MachineInstr &Old = *I;
+    // FIXME: temporary modify the old value is: Old.getOperand(0).getImm();
+    uint64_t Amount = Old.getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = getStackAlignment();
+      Amount = (Amount + Align - 1) / Align * Align;
+
+      MachineInstr *New;
+      if (Old.getOpcode() == Sw64::ADJUSTSTACKDOWN) {
+        New = BuildMI(MF, Old.getDebugLoc(), TII.get(Sw64::LDA), Sw64::R30)
+                  .addImm(-Amount)
+                  .addReg(Sw64::R30);
+      } else {
+        assert(Old.getOpcode() == Sw64::ADJUSTSTACKUP);
+        New = BuildMI(MF, Old.getDebugLoc(), TII.get(Sw64::LDA), Sw64::R30)
+                  .addImm(Amount)
+                  .addReg(Sw64::R30);
+      }
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+
+  return MBB.erase(I);
+}
+
+/// Mark \p Reg and all registers aliasing it in the bitset.
+static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs,
+                         unsigned Reg) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+    SavedRegs.set(*AI);
+}
+
+// TODO: must be rewrite.
+void Sw64FrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                             BitVector &SavedRegs,
+                                             RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+  // Mark $fp as used if function has dedicated frame pointer.
+  if (hasFP(MF))
+    setAliasRegs(MF, SavedRegs, Sw64::R15);
+  if (hasBP(MF))
+    setAliasRegs(MF, SavedRegs, Sw64::R14);
+
+  // Set scavenging frame index if necessary.
+  uint64_t MaxSPOffset = estimateStackSize(MF);
+
+  // If there is a variable sized object on the stack, the estimation cannot
+  // account for it.
+  if (isIntN(16, MaxSPOffset) && !MF.getFrameInfo().hasVarSizedObjects())
+    return;
+}
+
+// Estimate the size of the stack, including the incoming arguments. We need to
+// account for register spills, local objects, reserved call frame and incoming
+// arguments. This is required to determine the largest possible positive offset
+// from $sp so that it can be determined if an emergency spill slot for stack
+// addresses is required.
+uint64_t Sw64FrameLowering::estimateStackSize(const MachineFunction &MF) const {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+
+  int64_t Size = 0;
+
+  // Iterate over fixed sized objects which are incoming arguments.
+  for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
+    if (MFI.getObjectOffset(I) > 0)
+      Size += MFI.getObjectSize(I);
+
+  // Conservatively assume all callee-saved registers will be saved.
+  for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
+    unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
+    Size = alignTo(Size + RegSize, RegSize);
+  }
+
+  // Get the size of the rest of the frame objects and any possible reserved
+  // call frame, accounting for alignment.
+  return Size + MFI.estimateStackSize(MF);
+}
+
+void Sw64FrameLowering::processFunctionBeforeFrameFinalized(
+    MachineFunction &MF, RegScavenger *RS) const {
+  const Sw64RegisterInfo *RegInfo =
+      MF.getSubtarget<Sw64Subtarget>().getRegisterInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterClass *RC = &Sw64::GPRCRegClass;
+  if (!isInt<16>(MFI.estimateStackSize(MF))) {
+    int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC),
+                                          RegInfo->getSpillAlign(*RC), false);
+    RS->addScavengingFrameIndex(RegScavFI);
+  }
+  assert(RS && "requiresRegisterScavenging failed");
+}
diff --git a/llvm/lib/Target/Sw64/Sw64FrameLowering.h b/llvm/lib/Target/Sw64/Sw64FrameLowering.h
new file mode 100644
index 000000000000..ef0613b44618
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64FrameLowering.h
@@ -0,0 +1,82 @@
+//===-- Sw64FrameLowering.h - Frame info for Sw64 Target ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains Sw64 frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_SW64_SW64FRAMELOWERING_H
+#define LLVM_LIB_TARGET_SW64_SW64FRAMELOWERING_H
+
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class Sw64Subtarget;
+
+class Sw64FrameLowering : public TargetFrameLowering {
+
+protected:
+  const Sw64Subtarget &STI;
+
+public:
+  explicit Sw64FrameLowering(const Sw64Subtarget &sti)
+      : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0),
+        STI(sti) {
+    // Do nothing
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+  StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
+                                     Register &FrameReg) const override;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 ArrayRef<CalleeSavedInfo> CSI,
+                                 const TargetRegisterInfo *TRI) const override;
+
+  bool
+  restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MI,
+                              MutableArrayRef<CalleeSavedInfo> CSI,
+                              const TargetRegisterInfo *TRI) const override;
+
+  MachineBasicBlock::iterator
+  eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I) const override;
+
+  bool hasFP(const MachineFunction &MF) const override;
+  bool hasReservedCallFrame(const MachineFunction &MF) const override;
+
+  bool hasBP(const MachineFunction &MF) const;
+
+private:
+  void emitMieee(MachineFunction &MF) const;
+
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS = nullptr) const override;
+
+  void processFunctionBeforeFrameFinalized(
+      MachineFunction &MF, RegScavenger *RS = nullptr) const override;
+
+  //! Stack slot size (4 bytes)
+  static int stackSlotSize() { return 4; }
+
+  // Returns true if MF is a leaf procedure.
+  bool isLeafProc(MachineFunction &MF) const;
+
+protected:
+  uint64_t estimateStackSize(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp b/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp
new file mode 100644
index 000000000000..6689f7c256d3
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64IEEEConstraint.cpp
@@ -0,0 +1,138 @@
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64FrameLowering.h"
+#include "Sw64Subtarget.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "sw_64-ieee-contrain"
+
+using namespace llvm;
+
+namespace llvm {
+
+struct Sw64IEEEConstraint : public MachineFunctionPass {
+  /// Target machine description which we query for reg. names, data
+  /// layout, etc.
+  static char ID;
+  Sw64IEEEConstraint() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const { return "Sw64 Add IEEE Contrain"; }
+
+  bool runOnMachineFunction(MachineFunction &F);
+};
+char Sw64IEEEConstraint::ID = 0;
+} // end namespace llvm
+
+static bool isNeedIEEEConstraint(unsigned opcode) {
+  switch (opcode) {
+  case Sw64::ADDS:
+  case Sw64::SUBS:
+  case Sw64::MULS:
+  case Sw64::DIVS:
+  case Sw64::FMAS:
+  case Sw64::FMSS:
+  case Sw64::FNMAS:
+  case Sw64::FNMSS:
+  case Sw64::ADDD:
+  case Sw64::SUBD:
+  case Sw64::MULD:
+  case Sw64::DIVD:
+  case Sw64::FMAD:
+  case Sw64::FMSD:
+  case Sw64::FNMAD:
+  case Sw64::FNMSD:
+  case Sw64::CVTQS:
+  case Sw64::CVTQT:
+  case Sw64::CVTTQ:
+  case Sw64::CVTTS:
+  case Sw64::CVTST:
+  case Sw64::FCVTWL:
+  case Sw64::FCVTLW:
+  case Sw64::VADDS:
+  case Sw64::VADDD:
+  case Sw64::VSUBS:
+  case Sw64::VSUBD:
+  case Sw64::VMULS:
+  case Sw64::VMULD:
+  case Sw64::VDIVS:
+  case Sw64::VDIVD:
+  case Sw64::VSQRTS:
+  case Sw64::VSQRTD:
+  case Sw64::SQRTSS:
+  case Sw64::SQRTSD:
+  case Sw64::CMPTEQ:
+  case Sw64::CMPTLE:
+  case Sw64::CMPTLT:
+  case Sw64::CMPTUN:
+  case Sw64::VFCMPEQ:
+  case Sw64::VFCMPLE:
+  case Sw64::VFCMPLT:
+  case Sw64::VFCMPUN:
+  case Sw64::VMAS:
+  case Sw64::VMAD:
+  case Sw64::VMSS:
+  case Sw64::VMSD:
+  case Sw64::VNMAS:
+  case Sw64::VNMAD:
+  case Sw64::VNMSS:
+  case Sw64::VNMSD:
+  case Sw64::FSELEQS:
+  case Sw64::FSELNES:
+  case Sw64::FSELLTS:
+  case Sw64::FSELLES:
+  case Sw64::FSELGTS:
+  case Sw64::FSELGES:
+  case Sw64::FSELEQD:
+  case Sw64::FSELNED:
+  case Sw64::FSELLTD:
+  case Sw64::FSELLED:
+  case Sw64::FSELGTD:
+  case Sw64::FSELGED:
+  case Sw64::FCTTDL_G:
+  case Sw64::FCTTDL_P:
+  case Sw64::FCTTDL_N:
+  case Sw64::FCTTDL:
+    return true;
+  }
+  return false;
+}
+
+bool Sw64IEEEConstraint::runOnMachineFunction(MachineFunction &F) {
+  const Sw64Subtarget &ST = F.getSubtarget<Sw64Subtarget>();
+  if (ST.hasCore4())
+    return false;
+
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    MachineBasicBlock &MBB = *FI;
+    MachineBasicBlock::iterator MBBI = MBB.begin();
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    NMBBI++;
+    for (; MBBI != MBB.end(); MBBI++) {
+      if (isNeedIEEEConstraint(MBBI->getOpcode())) {
+        MachineOperand &MO = MBBI->getOperand(0);
+        if (MO.isEarlyClobber()) {
+          LLVM_DEBUG(dbgs() << "getting is EarlyClobber Flag"
+                            << MO.isEarlyClobber() << "\n";
+                     MBBI->dump());
+          continue;
+        }
+
+        MO.setIsEarlyClobber();
+        LLVM_DEBUG(dbgs() << "setting is EarlyClobber Flag"
+                          << MBBI->getOperand(0).isEarlyClobber() << "\n";
+                   MBBI->dump());
+      }
+    }
+  }
+  return true;
+}
+
+FunctionPass *llvm::createSw64IEEEConstraintPass() {
+  return new Sw64IEEEConstraint();
+}
diff --git a/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp b/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..d684a9aa25d7
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64ISelDAGToDAG.cpp
@@ -0,0 +1,1016 @@
+//===-- Sw64ISelDAGToDAG.cpp - Sw64 pattern matching inst selector ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for Sw64,
+// converting from a legalized dag to a Sw64 dag.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "sw_64-isel"
+#define PASS_NAME "Sw64 DAG->DAG Pattern Instruction Selection"
+
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64MachineFunctionInfo.h"
+#include "Sw64Subtarget.h"
+#include "Sw64TargetMachine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+//===--------------------------------------------------------------------===//
+/// Sw64DAGToDAGISel - Sw64 specific code to select Sw64 machine
+/// instructions for SelectionDAG operations.
+class Sw64DAGToDAGISel : public SelectionDAGISel {
+  const Sw64Subtarget *Subtarget;
+
+  static const int64_t IMM_LOW = -32768;
+  static const int64_t IMM_HIGH = 32767;
+  static const int64_t IMM_MULT = 65536;
+  static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
+  static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT;
+
+  static int64_t get_ldah16(int64_t x) {
+    int64_t y = x / IMM_MULT;
+    if (x % IMM_MULT > IMM_HIGH)
+      ++y;
+    if (x % IMM_MULT < IMM_LOW)
+      --y;
+    return y;
+  }
+
+  static int64_t get_lda16(int64_t x) { return x - get_ldah16(x) * IMM_MULT; }
+
+  /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
+  /// instruction (if not, return 0).  Note that this code accepts partial
+  /// zap masks.  For example (and LHS, 1) is a valid zap, as long we know
+  /// that the bits 1-7 of LHS are already zero.  If LHS is non-null, we are
+  /// in checking mode.  If LHS is null, we assume that the mask has already
+  /// been validated before.
+  uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
+    uint64_t BitsToCheck = 0;
+    unsigned Result = 0;
+    for (unsigned i = 0; i != 8; ++i) {
+      if (((Constant >> 8 * i) & 0xFF) == 0) {
+        // nothing to do.
+      } else {
+        Result |= 1 << i;
+        if (((Constant >> 8 * i) & 0xFF) == 0xFF) {
+          // If the entire byte is set, zapnot the byte.
+        } else if (LHS.getNode() == 0) {
+          // Otherwise, if the mask was previously validated, we know its okay
+          // to zapnot this entire byte even though all the bits aren't set.
+        } else {
+          // Otherwise we don't know that the it's okay to zapnot this entire
+          // byte.  Only do this iff we can prove that the missing bits are
+          // already null, so the bytezap doesn't need to really null them.
+          BitsToCheck |= ~Constant & (0xFFULL << 8 * i);
+        }
+      }
+    }
+
+    // If there are missing bits in a byte (for example, X & 0xEF00), check to
+    // see if the missing bits (0x1000) are already known zero if not, the zap
+    // isn't okay to do, as it won't clear all the required bits.
+    if (BitsToCheck && !CurDAG->MaskedValueIsZero(
+                           LHS, APInt(LHS.getValueSizeInBits(), BitsToCheck)))
+      return 0;
+
+    return Result;
+  }
+
+  static uint64_t get_zapImm(uint64_t x) {
+    unsigned build = 0;
+    for (int i = 0; i != 8; ++i) {
+      if ((x & 0x00FF) == 0x00FF)
+        build |= 1 << i;
+      else if ((x & 0x00FF) != 0)
+        return 0;
+      x >>= 8;
+    }
+    return build;
+  }
+
+  static uint64_t getNearPower2(uint64_t x) {
+    if (!x)
+      return 0;
+    unsigned at = __builtin_clzll(x);
+    uint64_t complow = 1ULL << (63 - at);
+    uint64_t comphigh = complow << 1;
+    if (x - complow <= comphigh - x)
+      return complow;
+    else
+      return comphigh;
+  }
+
+  static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
+    uint64_t y = getNearPower2(x);
+    if (swap)
+      return (y - x) == r;
+    else
+      return (x - y) == r;
+  }
+
+public:
+  static char ID;
+
+  Sw64DAGToDAGISel() = delete;
+
+  explicit Sw64DAGToDAGISel(Sw64TargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(ID, TM, OptLevel), Subtarget(nullptr) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    Subtarget = &MF.getSubtarget<Sw64Subtarget>();
+    return SelectionDAGISel::runOnMachineFunction(MF);
+  }
+  /// getI64Imm - Return a target constant with the specified value, of type
+  /// i64.
+  inline SDValue getI64Imm(int64_t Imm, const SDLoc &dl) {
+    return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
+  }
+
+  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
+    return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+  }
+
+  static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm);
+  // Select - Convert the specified operand from a target-independent to a
+  // target-specific node if it hasn't already been changed.
+  void Select(SDNode *N) override;
+  StringRef getPassName() const override {
+    return "Sw64 DAG->DAG Pattern Instruction Selection";
+  }
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+                                    std::vector<SDValue> &OutOps) override;
+
+  template <MVT::SimpleValueType VT>
+  bool SelectAddSubImm(SDValue N, SDValue &Imm) {
+    return SelectAddSubImm(N, VT, Imm);
+  }
+
+  bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const;
+  bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
+                                  unsigned OffsetBits,
+                                  unsigned ShiftAmount) const;
+  bool selectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset) const;
+  bool selectAddrRegImm16(SDValue Addr, SDValue &Base, SDValue &Offset) const;
+
+  /// abs64 - absolute value of a 64-bit int.  Not all environments support
+  /// "abs" on whatever their name for the 64-bit int type is.  The absolute
+  /// value of the largest negative number is undefined, as with "abs".
+  inline int64_t abs64(int64_t x) { return (x < 0) ? -x : x; }
+
+// Include the pieces autogenerated from the target description.
+#include "Sw64GenDAGISel.inc"
+
+private:
+  /// getTargetMachine - Return a reference to the TargetMachine, casted
+  /// to the target-specific type.
+  const Sw64TargetMachine &getTargetMachine() {
+    return static_cast<const Sw64TargetMachine &>(TM);
+  }
+
+  bool SelectAddSubImm(SDValue N, MVT VT, SDValue &Imm);
+  bool SelectComplexImm(SDValue N, SDValue &Imm);
+
+  SDNode *getGlobalBaseReg();
+  SDNode *getGlobalRetAddr();
+  void SelectCALL(SDNode *Op);
+  bool tryIndexedLoad(SDNode *N);
+  bool tryIndexedStore(SDNode *N);
+  bool selectSExti32(SDValue N, SDValue &Val);
+  bool selectZExti32(SDValue N, SDValue &Val);
+
+  /// Select constant vector splats.
+  bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const;
+  /// Select constant vector splats whose value fits in a given integer.
+  bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
+                          unsigned ImmBitSize) const;
+  /// Select constant vector splats whose value fits in a uimm8.
+  bool selectVSplatUimm8(SDValue N, SDValue &Imm) const;
+
+  bool selectVSplatSimm8(SDValue N, SDValue &Imm) const;
+  bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const;
+
+  bool selectIntAddrSImm16(SDValue Addr, SDValue &Base, SDValue &Offset) const;
+
+  bool selectIntAddrSImm12(SDValue Addr, SDValue &Base, SDValue &Offset) const;
+
+  bool SelectAddrFI(SDValue Addr, SDValue &Base);
+};
+} // end anonymous namespace
+char Sw64DAGToDAGISel::ID = 0;
+
+INITIALIZE_PASS(Sw64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+///
+SDNode *Sw64DAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG
+      ->getRegister(GlobalBaseReg,
+                    getTargetLowering()->getPointerTy(CurDAG->getDataLayout()))
+      .getNode();
+}
+
+/// getGlobalRetAddr - Grab the return address.
+///
+SDNode *Sw64DAGToDAGISel::getGlobalRetAddr() {
+  unsigned GlobalRetAddr = Subtarget->getInstrInfo()->getGlobalRetAddr(MF);
+  return CurDAG
+      ->getRegister(GlobalRetAddr,
+                    getTargetLowering()->getPointerTy(CurDAG->getDataLayout()))
+      .getNode();
+}
+
+bool Sw64DAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
+  if (auto FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+    return true;
+  }
+
+  return false;
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+void Sw64DAGToDAGISel::Select(SDNode *N) {
+
+  // Dump information about the Node being selected
+  LLVM_DEBUG(errs() << "Selecting: "; N->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (N->isMachineOpcode()) {
+    LLVM_DEBUG(errs() << "== "; N->dump(CurDAG); errs() << "\n");
+    return;
+  }
+  SDLoc dl(N);
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case ISD::LOAD:
+    if (tryIndexedLoad(N))
+      return;
+    // Other cases are autogenerated.
+    break;
+  case ISD::STORE:
+    if (tryIndexedStore(N))
+      return;
+    // Other cases are autogenerated.
+    break;
+  case Sw64ISD::CALL:
+    SelectCALL(N);
+    if (N->use_empty()) // Don't delete EntryToken, etc.
+      CurDAG->RemoveDeadNode(N);
+    return;
+  case ISD::FrameIndex: {
+    assert(N->getValueType(0) == MVT::i64);
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+    if (N->hasOneUse()) {
+      N->setDebugLoc((*(N->use_begin()))->getDebugLoc());
+      CurDAG->SelectNodeTo(N, Sw64::LDA, MVT::i64, TFI,
+                           CurDAG->getTargetConstant(0, dl, MVT::i64));
+      return;
+    }
+    ReplaceNode(
+        N, CurDAG->getMachineNode(Sw64::LDA, dl, MVT::i64, TFI,
+                                  CurDAG->getTargetConstant(0, dl, MVT::i64)));
+    return;
+  }
+  case ISD::GLOBAL_OFFSET_TABLE:
+    ReplaceNode(N, getGlobalBaseReg());
+    return;
+  case Sw64ISD::GlobalRetAddr:
+    ReplaceNode(N, getGlobalRetAddr());
+    return;
+
+  case Sw64ISD::DivCall: {
+    SDValue Chain = CurDAG->getEntryNode();
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R24, N1, SDValue(0, 0));
+    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R25, N2, Chain.getValue(1));
+    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R27, N0, Chain.getValue(1));
+    SDNode *CNode = CurDAG->getMachineNode(Sw64::PseudoCallDiv, dl, MVT::Other,
+                                           MVT::Glue, Chain, Chain.getValue(1));
+    Chain = CurDAG->getCopyFromReg(Chain, dl, Sw64::R27, MVT::i64,
+                                   SDValue(CNode, 1));
+    ReplaceNode(N,
+                CurDAG->getMachineNode(Sw64::BISr, dl, MVT::i64, Chain, Chain));
+    return;
+  }
+
+  case ISD::READCYCLECOUNTER: {
+    SDValue Chain = N->getOperand(0);
+    ReplaceNode(
+        N, CurDAG->getMachineNode(Sw64::RPCC, dl, MVT::i64, MVT::Other, Chain));
+    return;
+  }
+
+  case ISD::Constant: {
+    auto ConstNode = cast<ConstantSDNode>(N);
+    if (ConstNode->isZero()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              Sw64::R31, MVT::i64);
+      ReplaceUses(SDValue(N, 0), Result);
+      return;
+    }
+    uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
+    int64_t Imm = ConstNode->getSExtValue();
+    int64_t val = Imm;
+    int32_t val32 = (int32_t)val;
+    if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
+        val >= IMM_LOW + IMM_LOW * IMM_MULT)
+      break;                 //(LDAH (LDA))
+    if ((uval >> 32) == 0 && // empty upper bits
+        val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
+      break; //(zext (LDAH (LDA)))
+    // Else use the constant pool
+
+    ConstantInt *C =
+        ConstantInt::get(Type::getInt64Ty(*CurDAG->getContext()), uval);
+    SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+    SDNode *Load =
+        CurDAG->getMachineNode(Sw64::LOADconstant, dl, MVT::i64, CPI);
+    ReplaceNode(N, Load);
+
+    return;
+  }
+  case ISD::TargetConstantFP:
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+    bool isDouble = N->getValueType(0) == MVT::f64;
+    EVT T = isDouble ? MVT::f64 : MVT::f32;
+    if (CN->getValueAPF().isPosZero()) {
+      ReplaceNode(
+          N, CurDAG->getMachineNode(isDouble ? Sw64::CPYSD : Sw64::CPYSS, dl, T,
+                                    CurDAG->getRegister(Sw64::F31, T),
+                                    CurDAG->getRegister(Sw64::F31, T)));
+      return;
+    } else if (CN->getValueAPF().isNegZero()) {
+      ReplaceNode(
+          N, CurDAG->getMachineNode(isDouble ? Sw64::CPYSND : Sw64::CPYSNS, dl,
+                                    T, CurDAG->getRegister(Sw64::F31, T),
+                                    CurDAG->getRegister(Sw64::F31, T)));
+      return;
+    } else {
+      report_fatal_error("Unhandled FP constant type");
+    }
+    break;
+  }
+
+  case ISD::SETCC:
+    if (N->getSimpleValueType(0).SimpleTy == MVT::v4i64)
+      break;
+    if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
+      ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+      unsigned Opc = Sw64::WTF;
+      bool rev = false;
+      bool inv = false;
+      bool ordonly = false;
+      if (Sw64Mieee) {
+        switch (CC) {
+        default:
+          LLVM_DEBUG(N->dump(CurDAG));
+          llvm_unreachable("Unknown FP comparison!");
+        case ISD::SETEQ:
+        case ISD::SETOEQ:
+        case ISD::SETUEQ:
+          Opc = Sw64::CMPTEQ;
+          break;
+        case ISD::SETLT:
+        case ISD::SETOLT:
+        case ISD::SETULT:
+          Opc = Sw64::CMPTLT;
+          break;
+        case ISD::SETLE:
+        case ISD::SETOLE:
+        case ISD::SETULE:
+          Opc = Sw64::CMPTLE;
+          break;
+        case ISD::SETGT:
+        case ISD::SETOGT:
+        case ISD::SETUGT:
+          Opc = Sw64::CMPTLT;
+          rev = true;
+          break;
+        case ISD::SETGE:
+        case ISD::SETOGE:
+        case ISD::SETUGE:
+          Opc = Sw64::CMPTLE;
+          rev = true;
+          break;
+        case ISD::SETNE:
+        case ISD::SETONE:
+        case ISD::SETUNE:
+          Opc = Sw64::CMPTEQ;
+          inv = true;
+          break;
+        case ISD::SETO:
+          Opc = Sw64::CMPTUN;
+          inv = true;
+          ordonly = true;
+          break;
+        case ISD::SETUO:
+          Opc = Sw64::CMPTUN;
+          ordonly = true;
+          break;
+        };
+
+        /*
+           unordered:
+           FCMPUN $f1, $f2, $f3
+           FCMPxx $f1, $f2, $f3
+           FSELNE $f3, $f3, $f4, $f4
+
+           ordered:
+           FCMPUN $f1, $f2, $f3
+           FCMPxx $f1, $f2, $f3
+           FSELEQ $f3, $f4, $f31, $f4
+
+           SETO/SETUO:
+           FCMPxx $f1, $f2, $f3
+        */
+        bool ordered = true;
+        switch (CC) {
+        case ISD::SETUEQ:
+        case ISD::SETULT:
+        case ISD::SETULE:
+        case ISD::SETUNE:
+        case ISD::SETUGT:
+        case ISD::SETUGE:
+          ordered = false;
+          break;
+        default:
+          break;
+        }
+        SDValue opr0 = N->getOperand(rev ? 1 : 0);
+        SDValue opr1 = N->getOperand(rev ? 0 : 1);
+        SDNode *cmpu =
+            CurDAG->getMachineNode(Sw64::CMPTUN, dl, MVT::f64, opr0, opr1);
+        SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, opr0, opr1);
+        if (inv)
+          cmp = CurDAG->getMachineNode(
+              Sw64::CMPTEQ, dl, MVT::f64, SDValue(cmp, 0),
+              CurDAG->getRegister(Sw64::F31, MVT::f64));
+
+        SDNode *sel = NULL;
+        if (ordonly)
+          sel = cmp;
+        else if (ordered)
+          sel = CurDAG->getMachineNode(Sw64::FSELEQD, dl, MVT::f64,
+                                       CurDAG->getRegister(Sw64::F31, MVT::f64),
+                                       SDValue(cmp, 0), SDValue(cmpu, 0));
+        else
+          sel = CurDAG->getMachineNode(Sw64::FSELNED, dl, MVT::f64,
+                                       SDValue(cmp, 0), SDValue(cmpu, 0),
+                                       SDValue(cmpu, 0));
+
+        MVT VT = N->getSimpleValueType(0).SimpleTy == MVT::v4i64 ? MVT::v4i64
+                                                                 : MVT::i64;
+        SDNode *LD =
+            CurDAG->getMachineNode(Sw64::FTOIT, dl, VT, SDValue(sel, 0));
+
+        ReplaceNode(N, CurDAG->getMachineNode(
+                           Sw64::CMPULTr, dl, VT,
+                           CurDAG->getRegister(Sw64::R31, VT), SDValue(LD, 0)));
+        return;
+      } else {
+        switch (CC) {
+        default:
+          LLVM_DEBUG(N->dump(CurDAG));
+          llvm_unreachable("Unknown FP comparison!");
+        case ISD::SETEQ:
+        case ISD::SETOEQ:
+        case ISD::SETUEQ:
+          Opc = Sw64::CMPTEQ;
+          break;
+        case ISD::SETLT:
+        case ISD::SETOLT:
+        case ISD::SETULT:
+          Opc = Sw64::CMPTLT;
+          break;
+        case ISD::SETLE:
+        case ISD::SETOLE:
+        case ISD::SETULE:
+          Opc = Sw64::CMPTLE;
+          break;
+        case ISD::SETGT:
+        case ISD::SETOGT:
+        case ISD::SETUGT:
+          Opc = Sw64::CMPTLT;
+          rev = true;
+          break;
+        case ISD::SETGE:
+        case ISD::SETOGE:
+        case ISD::SETUGE:
+          Opc = Sw64::CMPTLE;
+          rev = true;
+          break;
+        case ISD::SETNE:
+        case ISD::SETONE:
+        case ISD::SETUNE:
+          Opc = Sw64::CMPTEQ;
+          inv = true;
+          break;
+        case ISD::SETO:
+          Opc = Sw64::CMPTUN;
+          inv = true;
+          break;
+        case ISD::SETUO:
+          Opc = Sw64::CMPTUN;
+          break;
+        };
+        SDValue tmp1 = N->getOperand(rev ? 1 : 0);
+        SDValue tmp2 = N->getOperand(rev ? 0 : 1);
+        SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
+        if (inv)
+          cmp = CurDAG->getMachineNode(
+              Sw64::CMPTEQ, dl, MVT::f64, SDValue(cmp, 0),
+              CurDAG->getRegister(Sw64::F31, MVT::f64));
+        switch (CC) {
+        case ISD::SETUEQ:
+        case ISD::SETULT:
+        case ISD::SETULE:
+        case ISD::SETUNE:
+        case ISD::SETUGT:
+        case ISD::SETUGE: {
+          SDNode *cmp2 =
+              CurDAG->getMachineNode(Sw64::CMPTUN, dl, MVT::f64, tmp1, tmp2);
+          cmp = CurDAG->getMachineNode(Sw64::ADDD, dl, MVT::f64,
+                                       SDValue(cmp2, 0), SDValue(cmp, 0));
+          break;
+        }
+        default:
+          break;
+        }
+        SDNode *LD =
+            CurDAG->getMachineNode(Sw64::FTOIT, dl, MVT::i64, SDValue(cmp, 0));
+
+        ReplaceNode(
+            N, CurDAG->getMachineNode(Sw64::CMPULTr, dl, MVT::i64,
+                                      CurDAG->getRegister(Sw64::R31, MVT::i64),
+                                      SDValue(LD, 0)));
+        return;
+      }
+    }
+    break;
+  case ISD::AND: {
+    ConstantSDNode *SC = NULL;
+    ConstantSDNode *MC = NULL;
+    if (N->getOperand(0).getOpcode() == ISD::SRL &&
+        (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
+        (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
+      uint64_t sval = SC->getZExtValue();
+      uint64_t mval = MC->getZExtValue();
+      // If the result is a zap, let the autogened stuff handle it.
+      if (get_zapImm(N->getOperand(0), mval))
+        break;
+      // given mask X, and shift S, we want to see if there is any zap in the
+      // mask if we play around with the botton S bits
+      uint64_t dontcare = (~0ULL) >> (64 - sval);
+      uint64_t mask = mval << sval;
+
+      if (get_zapImm(mask | dontcare))
+        mask = mask | dontcare;
+
+      if (get_zapImm(mask)) {
+        SDValue Z =
+            SDValue(CurDAG->getMachineNode(Sw64::ZAPNOTi, dl, MVT::i64,
+                                           N->getOperand(0).getOperand(0),
+                                           getI64Imm(get_zapImm(mask), dl)),
+                    0);
+        ReplaceNode(N, CurDAG->getMachineNode(Sw64::SRLi, dl, MVT::i64, Z,
+                                              getI64Imm(sval, dl)));
+        return;
+      }
+    }
+    break;
+  }
+  case ISD::BUILD_VECTOR: {
+
+    BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N);
+    APInt SplatValue, SplatUndef;
+    unsigned SplatBitSize;
+    bool HasAnyUndefs;
+    EVT ViaVecTy;
+
+    if (!Subtarget->hasSIMD() || !BVN->getValueType(0).is256BitVector())
+      return;
+
+    if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+                              HasAnyUndefs, 8, false))
+      break;
+  }
+  }
+  // Select the default instruction
+  SelectCode(N);
+}
+
+void Sw64DAGToDAGISel::SelectCALL(SDNode *N) {
+  // TODO: add flag stuff to prevent nondeturministic breakage!
+
+  SDValue Chain = N->getOperand(0);
+  SDValue Addr = N->getOperand(1);
+  SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
+  SDLoc dl(N);
+  if (Addr.getOpcode() == Sw64ISD::GPRelLo) {
+    SDValue GOT = SDValue(getGlobalBaseReg(), 0);
+    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R29, GOT, InFlag);
+    InFlag = Chain.getValue(1);
+    Chain = SDValue(CurDAG->getMachineNode(Sw64::BSR, dl, MVT::Other, MVT::Glue,
+                                           Addr.getOperand(0), Chain, InFlag),
+                    0);
+  } else {
+    Chain = CurDAG->getCopyToReg(Chain, dl, Sw64::R27, Addr, InFlag);
+    InFlag = Chain.getValue(1);
+    SDValue Ops[] = {Chain, CurDAG->getRegister(Sw64::R27, MVT::i64),
+                     N->getOperand(2), InFlag};
+    Chain = SDValue(
+        CurDAG->getMachineNode(Sw64::JSR, dl, MVT::Other, MVT::Glue, Ops), 0);
+  }
+  InFlag = Chain.getValue(1);
+
+  ReplaceUses(SDValue(N, 0), Chain);
+  ReplaceUses(SDValue(N, 1), InFlag);
+}
+
+/// Match frameindex
+bool Sw64DAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base,
+                                            SDValue &Offset) const {
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    EVT ValTy = Addr.getValueType();
+
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+    Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy);
+    return true;
+  }
+  return false;
+}
+
+/// Match frameindex+offset and frameindex|offset
+bool Sw64DAGToDAGISel::selectAddrFrameIndexOffset(
+    SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits,
+    unsigned ShiftAmount = 0) const {
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+    if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) {
+      EVT ValTy = Addr.getValueType();
+
+      // If the first operand is a FI, get the TargetFI Node
+      if (FrameIndexSDNode *FIN =
+              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+        Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+      else {
+        Base = Addr.getOperand(0);
+        // If base is a FI, additional offset calculation is done in
+        // eliminateFrameIndex, otherwise we need to check the alignment
+        const Align Alignment(1ULL << ShiftAmount);
+        if (!isAligned(Alignment, CN->getZExtValue()))
+          return false;
+      }
+
+      Offset =
+          CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), ValTy);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Sw64DAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base,
+                                         SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9))
+    return true;
+
+  return false;
+}
+
+bool Sw64DAGToDAGISel::selectAddrRegImm16(SDValue Addr, SDValue &Base,
+                                          SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16))
+    return true;
+
+  return false;
+}
+
+bool Sw64DAGToDAGISel::SelectInlineAsmMemoryOperand(
+    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+  SDValue Base, Offset;
+
+  switch (ConstraintID) {
+  default:
+    llvm_unreachable("Unexpected asm memory constraint");
+  case InlineAsm::Constraint_i:
+  case InlineAsm::Constraint_m:
+  case InlineAsm::Constraint_Q:
+    // We need to make sure that this one operand does not end up in XZR, thus
+    //  require the address to be in a PointerRegClass register.
+    const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
+    const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
+    SDLoc dl(Op);
+    SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
+    SDValue NewOp =
+        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                       Op.getValueType(), Op, RC),
+                0);
+    OutOps.push_back(NewOp);
+    return false;
+  }
+  return true;
+}
+
+bool Sw64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM != ISD::POST_INC)
+    return false;
+  SDLoc dl(N);
+  MVT VT = LD->getMemoryVT().getSimpleVT();
+  bool isFloat = false;
+  unsigned Opcode = 0;
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    Opcode = Sw64::LDBU_A;
+    break;
+  case MVT::i16:
+    Opcode = Sw64::LDHU_A;
+    break;
+  case MVT::i32:
+    Opcode = Sw64::LDW_A;
+    break;
+  case MVT::i64:
+    Opcode = Sw64::LDL_A;
+    break;
+  case MVT::f32:
+    Opcode = Sw64::LDS_A;
+    isFloat = true;
+    break;
+  case MVT::f64:
+    Opcode = Sw64::LDD_A;
+    isFloat = true;
+    break;
+  default:
+    return false;
+  }
+  SDValue Offset = LD->getOffset();
+  int64_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+  ReplaceNode(
+      N, CurDAG->getMachineNode(Opcode, SDLoc(N), isFloat ? VT : MVT::i64,
+                                MVT::i64, MVT::Other, LD->getBasePtr(),
+                                CurDAG->getTargetConstant(Inc, dl, MVT::i64),
+                                LD->getChain()));
+  return true;
+}
+
+bool Sw64DAGToDAGISel::tryIndexedStore(SDNode *N) {
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  ISD::MemIndexedMode AM = ST->getAddressingMode();
+  if (AM != ISD::POST_INC)
+    return false;
+  SDLoc dl(N);
+  MVT VT = ST->getMemoryVT().getSimpleVT();
+  unsigned Opcode = 0;
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    Opcode = Sw64::STB_A;
+    break;
+  case MVT::i16:
+    Opcode = Sw64::STH_A;
+    break;
+  case MVT::i32:
+    Opcode = Sw64::STW_A;
+    break;
+  case MVT::i64:
+    Opcode = Sw64::STL_A;
+    break;
+  case MVT::f32:
+    Opcode = Sw64::STS_A;
+    break;
+  case MVT::f64:
+    Opcode = Sw64::STD_A;
+    break;
+  default:
+    return false;
+  }
+  MachineMemOperand *MemOp = ST->getMemOperand();
+  SDValue From[2] = {SDValue(ST, 0), SDValue(ST, 1)};
+  SDValue To[2];
+  int64_t Inc = cast<ConstantSDNode>(ST->getOffset().getNode())->getSExtValue();
+  SDValue Ops[] = {ST->getValue(), ST->getBasePtr(),
+                   CurDAG->getTargetConstant(Inc, dl, MVT::i64),
+                   ST->getChain()};
+  MachineSDNode *S =
+      CurDAG->getMachineNode(Opcode, dl, MVT::i64, MVT::Other, Ops);
+  CurDAG->setNodeMemRefs(S, {MemOp});
+  To[0] = SDValue(S, 0);
+  To[1] = SDValue(S, 1);
+  ReplaceUses(From, To, 2);
+  CurDAG->RemoveDeadNode(ST);
+  return true;
+}
+
+/// ComplexPattern used on Sw64InstrInfo
+/// Used on Sw64 Load/Store instructions
+bool Sw64DAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+                                         SDValue &Offset) const {
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType());
+  return true;
+}
+
+// Select constant vector splats.
+//
+// Returns true and sets Imm if:
+// * MSA is enabled
+// * N is a ISD::BUILD_VECTOR representing a constant splat
+bool Sw64DAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm,
+                                    unsigned MinSizeInBits) const {
+  BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
+
+  if (!Node)
+    return false;
+
+  APInt SplatValue, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+
+  if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+                             MinSizeInBits, false))
+    return false;
+
+  Imm = SplatValue;
+
+  return true;
+}
+
+bool Sw64DAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
+                                          unsigned ImmBitSize) const {
+  APInt ImmValue;
+  EVT EltTy = N->getValueType(0).getVectorElementType();
+
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0);
+
+  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
+      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+
+    if ((Signed && ImmValue.isSignedIntN(ImmBitSize)) ||
+        (!Signed && ImmValue.isIntN(ImmBitSize))) {
+      Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Select constant vector splats.
+bool Sw64DAGToDAGISel::selectVSplatSimm8(SDValue N, SDValue &Imm) const {
+  return selectVSplatCommon(N, Imm, true, 8);
+}
+
+bool Sw64DAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const {
+  return selectVSplatCommon(N, Imm, false, 8);
+}
+
+bool Sw64DAGToDAGISel::selectIntAddrSImm16(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2))
+    return true;
+
+  return selectAddrDefault(Addr, Base, Offset);
+}
+
+bool Sw64DAGToDAGISel::selectIntAddrSImm12(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3))
+    return true;
+
+  return selectAddrDefault(Addr, Base, Offset);
+}
+
+bool Sw64DAGToDAGISel::SelectAddSubImm(SDValue N, MVT VT, SDValue &Imm) {
+  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
+    const int64_t ImmVal = CNode->getSExtValue();
+    SDLoc DL(N);
+
+    switch (VT.SimpleTy) {
+    case MVT::i8:
+      // Can always select i8s, no shift, mask the immediate value to
+      // deal with sign-extended value from lowering.
+      if (!isUInt<8>(ImmVal))
+        return false;
+      Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i64);
+      return true;
+    case MVT::i16:
+      // i16 values get sign-extended to 32-bits during lowering.
+      Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i64);
+      return true;
+      break;
+    case MVT::i32:
+    case MVT::i64:
+      return false;
+      break;
+    default:
+      break;
+    }
+  }
+
+  return false;
+}
+
+bool Sw64DAGToDAGISel::SelectComplexImm(SDValue N, SDValue &Imm) {
+  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
+    const int64_t ImmVal = CNode->getSExtValue();
+    SDLoc DL(N);
+    if (!isUInt<5>(ImmVal))
+      return false;
+    Imm = CurDAG->getTargetConstant(ImmVal & 0x1F, DL, MVT::i64);
+    return true;
+  }
+  return false;
+}
+
+/// createSw64ISelDag - This pass converts a legalized DAG into a
+/// Sw64-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSw64ISelDag(Sw64TargetMachine &TM,
+                                      CodeGenOpt::Level OptLevel) {
+  return new Sw64DAGToDAGISel(TM, OptLevel);
+}
+
+bool Sw64DAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
+  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+    Val = N.getOperand(0);
+    return true;
+  }
+  MVT VT = N.getSimpleValueType();
+  if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
+    Val = N;
+    return true;
+  }
+
+  return false;
+}
+
+bool Sw64DAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
+  if (N.getOpcode() == ISD::AND) {
+    auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) {
+      Val = N.getOperand(0);
+      return true;
+    }
+  }
+  MVT VT = N.getSimpleValueType();
+  APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32);
+  if (CurDAG->MaskedValueIsZero(N, Mask)) {
+    Val = N;
+    return true;
+  }
+
+  return false;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp b/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp
new file mode 100644
index 000000000000..2cccb036374e
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64ISelLowering.cpp
@@ -0,0 +1,3984 @@
+//===-- Sw64ISelLowering.cpp - Sw64 DAG Lowering Implementation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Sw64TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64ISelLowering.h"
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64MachineFunctionInfo.h"
+#include "Sw64Subtarget.h"
+#include "Sw64TargetMachine.h"
+#include "Sw64TargetObjectFile.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsSw64.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sw_64-lower"
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value.  It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+                          const TargetRegisterClass *RC) {
+  assert(RC->contains(PReg) && "Not the correct regclass!");
+  Register VReg = MF.getRegInfo().createVirtualRegister(RC);
+  MF.getRegInfo().addLiveIn(PReg, VReg);
+  return VReg;
+}
+
+const char *Sw64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch ((Sw64ISD::NodeType)Opcode) {
+  default:
+    return 0;
+  case Sw64ISD::CVTQT_:
+    return "Sw64::CVTQT_";
+  case Sw64ISD::CVTQS_:
+    return "Sw64::CVTQS_";
+  case Sw64ISD::CVTTQ_:
+    return "Sw64::CVTTQ_";
+  case Sw64ISD::CVTST_:
+    return "Sw64::CVTST_";
+  case Sw64ISD::CVTTS_:
+    return "Sw64::CVTTS_";
+  case Sw64ISD::JmpLink:
+    return "Sw64::JmpLink";
+  case Sw64ISD::Ret:
+    return "Sw64::Ret";
+  case Sw64ISD::TPRelLo:
+    return "Sw64::TPRelLo";
+  case Sw64ISD::TPRelHi:
+    return "Sw64::TPRelHi";
+  case Sw64ISD::SysCall:
+    return "Sw64::SysCall";
+  case Sw64ISD::LDAWC:
+    return "Sw64::Sw64_LDAWC";
+
+  case Sw64ISD::TLSGD:
+    return "Sw64::TLSGD";
+  case Sw64ISD::DTPRelLo:
+    return "Sw64::DTPRelLo";
+  case Sw64ISD::DTPRelHi:
+    return "Sw64::DTPRelHi";
+  case Sw64ISD::TLSLDM:
+    return "Sw64::TLSLDM";
+  case Sw64ISD::RelGottp:
+    return "Sw64::RelGottp";
+  case Sw64ISD::GPRelHi:
+    return "Sw64::GPRelHi";
+  case Sw64ISD::GPRelLo:
+    return "Sw64::GPRelLo";
+  case Sw64ISD::RelLit:
+    return "Sw64::RelLit";
+  case Sw64ISD::GlobalRetAddr:
+    return "Sw64::GlobalRetAddr";
+  case Sw64ISD::CALL:
+    return "Sw64::CALL";
+  case Sw64ISD::DivCall:
+    return "Sw64::DivCall";
+  case Sw64ISD::RET_FLAG:
+    return "Sw64::RET_FLAG";
+  case Sw64ISD::COND_BRANCH_I:
+    return "Sw64::COND_BRANCH_I";
+  case Sw64ISD::COND_BRANCH_F:
+    return "Sw64::COND_BRANCH_F";
+  case Sw64ISD::MEMBARRIER:
+    return "Sw64ISD::MEMBARRIER";
+
+  case Sw64ISD::GPRel:
+    return "Sw64ISD::GPRel";
+  case Sw64ISD::TPRel:
+    return "Sw64ISD::TPRel";
+  case Sw64ISD::DTPRel:
+    return "Sw64ISD::DTPRel";
+  case Sw64ISD::LDIH:
+    return "Sw64ISD::LDIH";
+  case Sw64ISD::LDI:
+    return "Sw64ISD::LDI";
+
+  case Sw64ISD::Z_S_FILLCS:
+    return "Sw64ISD::Z_S_FILLCS";
+  case Sw64ISD::Z_S_FILLDE:
+    return "Sw64ISD::Z_S_FILLDE";
+  case Sw64ISD::Z_FILLDE:
+    return "Sw64ISD::Z_FILLDE";
+  case Sw64ISD::Z_FILLDE_E:
+    return "Sw64ISD::Z_FILLDE_E";
+  case Sw64ISD::Z_FILLCS:
+    return "Sw64ISD::Z_FILLCS";
+  case Sw64ISD::Z_FILLCS_E:
+    return "Sw64ISD::Z_FILLCS_E";
+  case Sw64ISD::Z_E_FILLCS:
+    return "Sw64ISD::Z_E_FILLCS";
+  case Sw64ISD::Z_E_FILLDE:
+    return "Sw64ISD::Z_E_FILLDE";
+  case Sw64ISD::Z_FLUSHD:
+    return "Sw64ISD::Z_FLUSHD";
+
+  case Sw64ISD::FRECS:
+    return "Sw64ISD::FRECS";
+  case Sw64ISD::FRECD:
+    return "Sw64ISD::FRECD";
+  case Sw64ISD::SBT:
+    return "Sw64ISD::SBT";
+  case Sw64ISD::REVBH:
+    return "Sw64ISD::REVBH";
+  case Sw64ISD::REVBW:
+    return "Sw64ISD::REVBW";
+
+  case Sw64ISD::ROLW:
+    return "Sw64ISD::ROLW";
+  case Sw64ISD::CRC32B:
+    return "Sw64ISD::CRC32B";
+  case Sw64ISD::CRC32H:
+    return "Sw64ISD::CRC32H";
+  case Sw64ISD::CRC32W:
+    return "Sw64ISD::CRC32W";
+  case Sw64ISD::CRC32L:
+    return "Sw64ISD::CRC32L";
+  case Sw64ISD::CRC32CB:
+    return "Sw64ISD::CRC32CB";
+  case Sw64ISD::CRC32CH:
+    return "Sw64ISD::CRC32CH";
+  case Sw64ISD::CRC32CW:
+    return "Sw64ISD::CRC32CW";
+  case Sw64ISD::CRC32CL:
+    return "Sw64ISD::CRC32CL";
+
+  case Sw64ISD::VLDWE:
+    return "Sw64ISD::VLDWE";
+  case Sw64ISD::VLDSE:
+    return "Sw64ISD::VLDSE";
+  case Sw64ISD::VLDDE:
+    return "Sw64ISD::VLDDE";
+
+  case Sw64ISD::VNOR:
+    return "Sw64ISD::VNOR";
+  case Sw64ISD::VEQV:
+    return "Sw64ISD::VEQV";
+  case Sw64ISD::VORNOT:
+    return "Sw64ISD::VORNOT";
+  case Sw64ISD::VSHF:
+    return "Sw64ISD::VSHF";
+  case Sw64ISD::SHF:
+    return "Sw64ISD::SHF";
+  case Sw64ISD::ILVEV:
+    return "Sw64ISD::ILVEV";
+  case Sw64ISD::ILVOD:
+    return "Sw64ISD::ILVOD";
+  case Sw64ISD::ILVL:
+    return "Sw64ISD::ILVL";
+  case Sw64ISD::ILVR:
+    return "Sw64ISD::ILVR";
+  case Sw64ISD::PCKEV:
+    return "Sw64ISD::PCKEV";
+  case Sw64ISD::PCKOD:
+    return "Sw64ISD::PCKOD";
+  case Sw64ISD::VMAX:
+    return "Sw64ISD::VMAX";
+  case Sw64ISD::VMIN:
+    return "Sw64ISD::VMIN";
+  case Sw64ISD::VUMAX:
+    return "Sw64ISD::VUMAX";
+  case Sw64ISD::VUMIN:
+    return "Sw64ISD::VUMIN";
+  case Sw64ISD::VFREC:
+    return "Sw64ISD::VFREC";
+  case Sw64ISD::VFCMPEQ:
+    return "Sw64ISD::VFCMPEQ";
+  case Sw64ISD::VFCMPLE:
+    return "Sw64ISD::VFCMPLE";
+  case Sw64ISD::VFCMPLT:
+    return "Sw64ISD::VFCMPLT";
+  case Sw64ISD::VFCMPUN:
+    return "Sw64ISD::VFCMPUN";
+  case Sw64ISD::VFCVTSD:
+    return "Sw64ISD::VFCVTSD";
+  case Sw64ISD::VFCVTDS:
+    return "Sw64ISD::VFCVTDS";
+  case Sw64ISD::VFCVTLS:
+    return "Sw64ISD::VFCVTLS";
+  case Sw64ISD::VFCVTLD:
+    return "Sw64ISD::VFCVTLD";
+  case Sw64ISD::VFCVTSH:
+    return "Sw64ISD::VFCVTSH";
+  case Sw64ISD::VFCVTHS:
+    return "Sw64ISD::VFCVTHS";
+  case Sw64ISD::VFCVTDL:
+    return "Sw64ISD::VFCVTDL";
+  case Sw64ISD::VFCVTDLG:
+    return "Sw64ISD::VFCVTDLG";
+  case Sw64ISD::VFCVTDLP:
+    return "Sw64ISD::VFCVTDLP";
+  case Sw64ISD::VFCVTDLZ:
+    return "Sw64ISD::VFCVTDLZ";
+  case Sw64ISD::VFCVTDLN:
+    return "Sw64ISD::VFCVTDLN";
+  case Sw64ISD::VFRIS:
+    return "Sw64ISD::VFRIS";
+  case Sw64ISD::VFRISG:
+    return "Sw64ISD::VFRISG";
+  case Sw64ISD::VFRISP:
+    return "Sw64ISD::VFRISP";
+  case Sw64ISD::VFRISZ:
+    return "Sw64ISD::VFRISZ";
+  case Sw64ISD::VFRISN:
+    return "Sw64ISD::VFRISN";
+  case Sw64ISD::VFRID:
+    return "Sw64ISD::VFRID";
+  case Sw64ISD::VFRIDG:
+    return "Sw64ISD::VFRIDG";
+  case Sw64ISD::VFRIDP:
+    return "Sw64ISD::VFRIDP";
+  case Sw64ISD::VFRIDZ:
+    return "Sw64ISD::VFRIDZ";
+  case Sw64ISD::VFRIDN:
+    return "Sw64ISD::VFRIDN";
+  case Sw64ISD::VMAXF:
+    return "Sw64ISD::VMAXF";
+  case Sw64ISD::VMINF:
+    return "Sw64ISD::VMINF";
+  case Sw64ISD::VCPYB:
+    return "Sw64ISD::VCPYB";
+  case Sw64ISD::VCPYH:
+    return "Sw64ISD::VCPYH";
+
+  case Sw64ISD::VCON_W:
+    return "Sw64ISD::VCON_W";
+  case Sw64ISD::VCON_S:
+    return "Sw64ISD::VCON_S";
+  case Sw64ISD::VCON_D:
+    return "Sw64ISD::VCON_D";
+
+  case Sw64ISD::INSVE:
+    return "Sw64ISD::INSVE";
+  case Sw64ISD::VCOPYF:
+    return "Sw64ISD::VCOPYF";
+  case Sw64ISD::V8SLL:
+    return "Sw64ISD::V8SLL";
+  case Sw64ISD::V8SLLi:
+    return "Sw64ISD::V8SLLi";
+  case Sw64ISD::V8SRL:
+    return "Sw64ISD::V8SRL";
+  case Sw64ISD::V8SRLi:
+    return "Sw64ISD::V8SRLi";
+  case Sw64ISD::VROTR:
+    return "Sw64ISD::VROTR";
+  case Sw64ISD::VROTRi:
+    return "Sw64ISD::VROTRi";
+  case Sw64ISD::V8SRA:
+    return "Sw64ISD::V8SRA";
+  case Sw64ISD::V8SRAi:
+    return "Sw64ISD::V8SRAi";
+  case Sw64ISD::VROLB:
+    return "Sw64ISD::VROLB";
+  case Sw64ISD::VROLBi:
+    return "Sw64ISD::VROLBi";
+  case Sw64ISD::VROLH:
+    return "Sw64ISD::VROLH";
+  case Sw64ISD::VROLHi:
+    return "Sw64ISD::VROLHi";
+  case Sw64ISD::VROLL:
+    return "Sw64ISD::VROLL";
+  case Sw64ISD::VROLLi:
+    return "Sw64ISD::VROLLi";
+  case Sw64ISD::VCTPOP:
+    return "Sw64ISD::VCTPOP";
+  case Sw64ISD::VCTLZ:
+    return "Sw64ISD::VCTLZ";
+
+  case Sw64ISD::VLOG:
+    return "Sw64ISD::VLOG";
+  case Sw64ISD::VSETGE:
+    return "Sw64ISD::VSETGE";
+
+  case Sw64ISD::VSELEQW:
+    return "Sw64ISD::VSELEQW";
+  case Sw64ISD::VSELLTW:
+    return "Sw64ISD::VSELLTW";
+  case Sw64ISD::VSELLEW:
+    return "Sw64ISD::VSELLEW";
+  case Sw64ISD::VSELLBCW:
+    return "Sw64ISD::VSELLBCW";
+
+  case Sw64ISD::VFCMOVEQ:
+    return "Sw64ISD::VFCMOVEQ";
+  case Sw64ISD::VFCMOVLE:
+    return "Sw64ISD::VFCMOVLE";
+  case Sw64ISD::VFCMOVLT:
+    return "Sw64ISD::VFCMOVLT";
+
+  case Sw64ISD::VECT_VUCADDW:
+    return "Sw64ISD::VECT_VUCADDW";
+  case Sw64ISD::VECT_VUCADDH:
+    return "Sw64ISD::VECT_VUCADDH";
+  case Sw64ISD::VECT_VUCADDB:
+    return "Sw64ISD::VECT_VUCADDB";
+  case Sw64ISD::VECT_VUCSUBW:
+    return "Sw64ISD::VECT_VUCSUBW";
+  case Sw64ISD::VECT_VUCSUBH:
+    return "Sw64ISD::VECT_VUCSUBH";
+  case Sw64ISD::VECT_VUCSUBB:
+    return "Sw64ISD::VECT_VUCSUBB";
+
+  case Sw64ISD::VECREDUCE_FADD:
+    return "Sw64ISD::VECREDUCE_FADD";
+  case Sw64ISD::VSHL_BY_SCALAR:
+    return "Sw64ISD::VSHL_BY_SCALAR";
+  case Sw64ISD::VSRL_BY_SCALAR:
+    return "Sw64ISD::VSRL_BY_SCALAR";
+  case Sw64ISD::VSRA_BY_SCALAR:
+    return "Sw64ISD::VSRA_BY_SCALAR";
+  case Sw64ISD::VEXTRACT_SEXT_ELT:
+    return "Sw64ISD::VEXTRACT_SEXT_ELT";
+  case Sw64ISD::VBROADCAST:
+    return "Sw64ISD::VBROADCAST";
+  case Sw64ISD::VBROADCAST_LD:
+    return "Sw64ISD::VBROADCAST_LD";
+  case Sw64ISD::VTRUNCST:
+    return "Sw64ISD::VTRUNCST";
+  }
+
+  return nullptr;
+}
+
+Sw64TargetLowering::Sw64TargetLowering(const TargetMachine &TM,
+                                       const Sw64Subtarget &Subtarget)
+    : TargetLowering(TM), TM(TM), Subtarget(Subtarget) {
+  if (Subtarget.hasSIMD()) {
+    // Expand all truncating stores and extending loads.
+    for (MVT VT0 : MVT::vector_valuetypes()) {
+      for (MVT VT1 : MVT::vector_valuetypes()) {
+        setTruncStoreAction(VT0, VT1, Expand);
+        setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand);
+        setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand);
+        setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand);
+      }
+    }
+  }
+
+  // Set up the TargetLowering object.
+  // I am having problems with shr n i8 1
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+  addRegisterClass(MVT::i64, &Sw64::GPRCRegClass);
+  addRegisterClass(MVT::f64, &Sw64::F8RCRegClass);
+  addRegisterClass(MVT::f32, &Sw64::F4RCRegClass);
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  // Loads
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
+  }
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i8, Expand);  // ldbu
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i16, Expand); // ldhu
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, MVT::i32, Expand); // ldwu
+
+  if (Subtarget.hasCore4() && Subtarget.enablePostInc()) {
+    for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
+      setIndexedLoadAction(ISD::POST_INC, VT, Legal);
+      setIndexedStoreAction(ISD::POST_INC, VT, Legal);
+    }
+  }
+
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+  setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+  setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+  setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+  setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+
+  for (MVT VT : MVT::fp_valuetypes()) {
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+  }
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+
+  // Sw64 wants to turn select_cc of INT/FP into sel/fsel when possible.
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::FREM, MVT::f32, Expand);
+  setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+  if (Subtarget.hasCore4() && Subtarget.enableFloatCmov()) {
+    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  } else {
+    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  }
+
+  setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
+
+  setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+  setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+  setOperationAction(ISD::CTLZ, MVT::i64, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+  setOperationAction(ISD::UDIV, MVT::i128, Custom);
+  setOperationAction(ISD::SDIV, MVT::i128, Custom);
+  setOperationAction(ISD::UREM, MVT::i128, Custom);
+  setOperationAction(ISD::SREM, MVT::i128, Custom);
+
+  if (!Subtarget.hasCore4() || !Subtarget.enableIntAri()) {
+    setOperationAction(ISD::SREM, MVT::i64, Custom);
+    setOperationAction(ISD::UREM, MVT::i64, Custom);
+    setOperationAction(ISD::SDIV, MVT::i64, Custom);
+    setOperationAction(ISD::UDIV, MVT::i64, Custom);
+  }
+
+  if (Subtarget.hasCore4() && Subtarget.enableByteInst()) {
+    setOperationAction(ISD::BSWAP, MVT::i64, Legal);
+    setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+    setOperationAction(ISD::BSWAP, MVT::i16, Legal);
+  } else {
+    setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+  }
+
+  if (Subtarget.hasCore4() && Subtarget.enableFloatRound()) {
+    for (MVT Ty : {MVT::f32, MVT::f64}) {
+      setOperationAction(ISD::FFLOOR, Ty, Legal);
+      setOperationAction(ISD::FNEARBYINT, Ty, Legal);
+      setOperationAction(ISD::FCEIL, Ty, Legal);
+      setOperationAction(ISD::FTRUNC, Ty, Legal);
+      setOperationAction(ISD::FROUND, Ty, Legal);
+    }
+  }
+
+  setOperationAction(ISD::ADDC, MVT::i64, Expand);
+  setOperationAction(ISD::ADDE, MVT::i64, Expand);
+  setOperationAction(ISD::SUBC, MVT::i64, Expand);
+  setOperationAction(ISD::SUBE, MVT::i64, Expand);
+
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+  setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // We don't support sin/cos/sqrt/pow
+  setOperationAction(ISD::FSIN, MVT::f64, Expand);
+  setOperationAction(ISD::FCOS, MVT::f64, Expand);
+  setOperationAction(ISD::FSIN, MVT::f32, Expand);
+  setOperationAction(ISD::FCOS, MVT::f32, Expand);
+
+  setOperationAction(ISD::FSQRT, MVT::f64, Legal);
+  setOperationAction(ISD::FSQRT, MVT::f32, Legal);
+  setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
+  setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
+
+  setOperationAction(ISD::FPOW, MVT::f32, Expand);
+  setOperationAction(ISD::FPOW, MVT::f64, Expand);
+
+  // We have fused multiply-addition for f32 and f64 but not f128.
+  setOperationAction(ISD::FMA, MVT::f64, Legal);
+  setOperationAction(ISD::FMA, MVT::f32, Legal);
+  setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+  setOperationAction(ISD::SETCC, MVT::f32, Promote);
+
+  setOperationAction(ISD::BITCAST, MVT::f32, Promote);
+  // Not implemented yet.
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+  // We want to legalize GlobalAddress and ConstantPool and
+  // ExternalSymbols nodes into the appropriate instructions to
+  // materialize the address.
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+  setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+  setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+  setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+  setOperationAction(ISD::VAARG, MVT::Other, Custom);
+  setOperationAction(ISD::VAARG, MVT::i32, Custom);
+
+  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+  setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
+  setOperationAction(ISD::ATOMIC_LOAD, MVT::i8, Custom);
+  setOperationAction(ISD::ATOMIC_STORE, MVT::i8, Custom);
+
+  setOperationAction(ISD::ATOMIC_LOAD, MVT::i16, Custom);
+  setOperationAction(ISD::ATOMIC_STORE, MVT::i16, Custom);
+
+  setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
+
+  setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+  setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom);
+
+  setOperationAction(ISD::FSIN, MVT::f64, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+  setOperationAction(ISD::FSIN, MVT::f32, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+  setOperationAction(ISD::FADD, MVT::f128, Custom);
+  setOperationAction(ISD::FADD, MVT::i128, Custom);
+  setStackPointerRegisterToSaveRestore(Sw64::R30);
+
+  if (Subtarget.hasSIMD() || Subtarget.hasCore4()) {
+    // We want to custom lower some of our intrinsics.
+    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+    setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other,
+                       Custom); // for builtin_sw64_load
+    setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+  }
+
+  if (Subtarget.hasSIMD()) {
+    addSIMDIntType(MVT::v32i8, &Sw64::V256LRegClass);
+    addSIMDIntType(MVT::v16i16, &Sw64::V256LRegClass);
+    addSIMDIntType(MVT::v8i32, &Sw64::V256LRegClass);
+    addSIMDIntType(MVT::v4i64, &Sw64::V256LRegClass);
+    addSIMDFloatType(MVT::v4f32, &Sw64::V256LRegClass);
+    addSIMDFloatType(MVT::v4f64, &Sw64::V256LRegClass);
+
+    setTargetDAGCombine(ISD::AND);
+    setTargetDAGCombine(ISD::OR);
+    setTargetDAGCombine(ISD::SRA);
+    setTargetDAGCombine(ISD::VSELECT);
+    setTargetDAGCombine(ISD::XOR);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i32, Legal);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Legal);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Legal);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i16, Expand);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i8, Expand);
+
+    setOperationAction(ISD::SETCC, MVT::v8i32, Legal);
+    setOperationAction(ISD::SETCC, MVT::v4i64, Expand);
+    setOperationAction(ISD::SETCC, MVT::v4f32, Legal);
+    setOperationAction(ISD::SETCC, MVT::v4f64, Expand);
+
+    if (Subtarget.hasCore4())
+      for (auto VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+        addRegisterClass(VT, &Sw64::V256LRegClass);
+        setOperationAction(ISD::SRL, VT, Custom);
+        setOperationAction(ISD::SHL, VT, Custom);
+        setOperationAction(ISD::SRA, VT, Custom);
+        setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+      }
+    else {
+      addRegisterClass(MVT::v8i32, &Sw64::V256LRegClass);
+      setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+      setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+      setOperationAction(ISD::SRA, MVT::v8i32, Custom);
+      setOperationAction(ISD::BUILD_VECTOR, MVT::v8i32, Custom);
+    }
+  }
+
+  setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
+  setOperationAction(ISD::FNEG, MVT::v4f64, Legal);
+
+  setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+  setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
+
+  if (Subtarget.hasCore4() && Subtarget.enableIntShift()) {
+    setOperationAction(ISD::ROTR, MVT::i64, Expand);
+    setOperationAction(ISD::ROTL, MVT::i32, Custom);
+  } else {
+    setOperationAction(ISD::ROTL, MVT::i64, Expand);
+    setOperationAction(ISD::ROTR, MVT::i64, Expand);
+  }
+  if (Subtarget.hasCore4() && Subtarget.enableFloatAri()) {
+    setOperationAction(ISD::FDIV, MVT::f32, Legal);
+    setOperationAction(ISD::FDIV, MVT::f64, Legal);
+  }
+
+  // return R
+  setLibcallName(RTLIB::OEQ_F128, "_OtsEqlX");
+  setLibcallName(RTLIB::UNE_F128, "_OtsNeqX");
+  setLibcallName(RTLIB::UO_F128, "_OtsNeqX");
+  setLibcallName(RTLIB::OLE_F128, "_OtsLeqX");
+  setLibcallName(RTLIB::OLT_F128, "_OtsLssX");
+  setLibcallName(RTLIB::OGE_F128, "_OtsGeqX");
+  setLibcallName(RTLIB::OGT_F128, "_OtsGtrX");
+  // return R16+R17
+  setLibcallName(RTLIB::FPEXT_F64_F128, "_OtsConvertFloatTX");
+  setLibcallName(RTLIB::FPEXT_F32_F128, "_OtsConvertFloatTX");
+  setLibcallName(RTLIB::UINTTOFP_I64_F128, "_OtsCvtQUX");
+  setLibcallName(RTLIB::UINTTOFP_I32_F128, "_OtsCvtQUX");
+  setLibcallName(RTLIB::SINTTOFP_I32_F128, "_OtsCvtQX");
+  setLibcallName(RTLIB::SINTTOFP_I64_F128, "_OtsCvtQX");
+  // add round return R
+  setLibcallName(RTLIB::FPTOSINT_F128_I64, "_OtsCvtXQ");
+  setLibcallName(RTLIB::FPTOUINT_F128_I64, "_OtsCvtXQ");
+  setLibcallName(RTLIB::FPROUND_F128_F64, "_OtsConvertFloatXT");
+  setLibcallName(RTLIB::FPROUND_F128_F32, "_OtsConvertFloatXT");
+  // add round return R16+R17
+  setLibcallName(RTLIB::ADD_F128, "_OtsAddX");
+  setLibcallName(RTLIB::SUB_F128, "_OtsSubX");
+  setLibcallName(RTLIB::MUL_F128, "_OtsMulX");
+  setLibcallName(RTLIB::DIV_F128, "_OtsDivX");
+  setOperationAction(ISD::CTPOP, MVT::i32, Promote);
+  setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+
+  setMinStackArgumentAlignment(Align(32));
+  setMinFunctionAlignment(Align(8));
+  setTargetDAGCombine(ISD::MUL);
+
+  computeRegisterProperties(Subtarget.getRegisterInfo());
+  MaxStoresPerMemsetOptSize = 16;
+  MaxStoresPerMemset = 16;
+  MaxStoresPerMemcpy = 4;
+  MaxStoresPerMemcpyOptSize = 4;
+}
+
+bool Sw64TargetLowering::generateFMAsInMachineCombiner(
+    EVT VT, CodeGenOpt::Level OptLevel) const {
+  return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
+}
+
+EVT Sw64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+                                           EVT VT) const {
+  // Refer to other.
+  if (!VT.isVector())
+    return MVT::i64;
+
+  return VT.changeVectorElementTypeToInteger();
+}
+
+#include "Sw64GenCallingConv.inc"
+
+static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+
+  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
+}
+
+static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+
+  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
+                                   Flags);
+}
+
+static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flag) {
+
+  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
+}
+
+static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
+                             SelectionDAG &DAG, unsigned Flags) {
+
+  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
+                                   N->getOffset(), Flags);
+}
+
+// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall_void(const char *CallSym) {
+  const char *const LibCalls[] = {
+      "_OtsAddX", "_OtsConvertFloatTX", "_OtsCvtQUX", "_OtsCvtQX",
+      "_OtsDivX", "_OtsMulX",           "_OtsSubX"};
+
+  // Check that LibCalls is sorted betically.
+  auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
+  assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
+
+  return std::binary_search(std::begin(LibCalls), std::end(LibCalls), CallSym,
+                            Comp);
+}
+
+// This function returns true if CallSym is a long double emulation routine.
+static bool isF128SoftLibCall_round(const char *CallSym) {
+  const char *const LibCalls[] = {
+      "_OtsAddX",  "_OtsConvertFloatTX", "_OtsConvertFloatXT",
+      "_OtsCvtXQ", "_OtsDivX",           "_OtsMulX",
+      "_OtsSubX"};
+
+  // Check that LibCalls is sorted betically.
+  auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
+  assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
+
+  return std::binary_search(std::begin(LibCalls), std::end(LibCalls), CallSym,
+                            Comp);
+}
+
+// Enable SIMD support for the given integer type and Register class.
+void Sw64TargetLowering::addSIMDIntType(MVT::SimpleValueType Ty,
+                                        const TargetRegisterClass *RC) {
+  addRegisterClass(Ty, RC);
+
+  // Expand all builtin opcodes.
+  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+    setOperationAction(Opc, Ty, Expand);
+
+  // for vfcmpxxs
+  setTruncStoreAction(MVT::v4i64, MVT::v4i32, Custom);
+
+  setOperationAction(ISD::BITCAST, Ty, Legal);
+  setOperationAction(ISD::LOAD, Ty, Legal);
+  setOperationAction(ISD::STORE, Ty, Legal);
+
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom);
+
+  setOperationAction(ISD::ROTL, Ty, Custom);
+  setOperationAction(ISD::ROTR, Ty, Expand);
+  setOperationAction(ISD::ADD, Ty, Legal);
+  setOperationAction(ISD::AND, Ty, Legal);
+  setOperationAction(ISD::MUL, Ty, Legal);
+  setOperationAction(ISD::OR, Ty, Legal);
+  setOperationAction(ISD::SDIV, Ty, Legal);
+  setOperationAction(ISD::SREM, Ty, Legal);
+  setOperationAction(ISD::SUB, Ty, Legal);
+  setOperationAction(ISD::UDIV, Ty, Legal);
+  setOperationAction(ISD::UREM, Ty, Legal);
+  setOperationAction(ISD::UMAX, Ty, Legal);
+  setOperationAction(ISD::UMIN, Ty, Legal);
+  setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
+  setOperationAction(ISD::XOR, Ty, Legal);
+
+  setOperationAction(ISD::VECREDUCE_ADD, Ty, Legal);
+
+  if (Ty == MVT::v8i32 || Ty == MVT::v4i64) {
+    setOperationAction(ISD::FP_TO_SINT, Ty, Legal);
+    setOperationAction(ISD::FP_TO_UINT, Ty, Legal);
+    setOperationAction(ISD::SINT_TO_FP, Ty, Legal);
+    setOperationAction(ISD::UINT_TO_FP, Ty, Legal);
+  }
+  setCondCodeAction(ISD::SETNE, Ty, Expand);
+  setCondCodeAction(ISD::SETGE, Ty, Expand);
+  setCondCodeAction(ISD::SETGT, Ty, Expand);
+  setCondCodeAction(ISD::SETUGE, Ty, Expand);
+  setCondCodeAction(ISD::SETUGT, Ty, Expand);
+}
+
+// Enable SIMD support for the given floating-point type and Register class.
+void Sw64TargetLowering::addSIMDFloatType(MVT::SimpleValueType Ty,
+                                          const TargetRegisterClass *RC) {
+  addRegisterClass(Ty, RC);
+
+  // Expand all builtin opcodes.
+  for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+    setOperationAction(Opc, Ty, Expand);
+
+  setOperationAction(ISD::LOAD, Ty, Legal);
+  setOperationAction(ISD::STORE, Ty, Legal);
+  setOperationAction(ISD::BITCAST, Ty, Legal);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom);
+  setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
+
+  setOperationAction(ISD::FCOPYSIGN, Ty, Legal);
+
+  if (Ty != MVT::v16f16) {
+    setOperationAction(ISD::FABS, Ty, Expand);
+    setOperationAction(ISD::FADD, Ty, Legal);
+    setOperationAction(ISD::FDIV, Ty, Legal);
+    setOperationAction(ISD::FEXP2, Ty, Legal);
+    setOperationAction(ISD::FLOG2, Ty, Legal);
+    setOperationAction(ISD::FMA, Ty, Legal);
+    setOperationAction(ISD::FMUL, Ty, Legal);
+    setOperationAction(ISD::FRINT, Ty, Legal);
+    setOperationAction(ISD::FSQRT, Ty, Legal);
+    setOperationAction(ISD::FSUB, Ty, Legal);
+    setOperationAction(ISD::VSELECT, Ty, Legal);
+
+    setOperationAction(ISD::SETCC, Ty, Legal);
+    setCondCodeAction(ISD::SETO, Ty, Custom);
+    setCondCodeAction(ISD::SETOGE, Ty, Expand);
+    setCondCodeAction(ISD::SETOGT, Ty, Expand);
+    setCondCodeAction(ISD::SETUGE, Ty, Expand);
+    setCondCodeAction(ISD::SETUGT, Ty, Expand);
+    setCondCodeAction(ISD::SETGE, Ty, Expand);
+    setCondCodeAction(ISD::SETGT, Ty, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
+  }
+}
+
+// Fold zero extensions into Sw64ISD::VEXTRACT_[SZ]EXT_ELT
+//
+// Performs the following transformations:
+// - Changes Sw64ISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
+//   sign/zero-extension is completely overwritten by the new one performed by
+//   the ISD::AND.
+// - Removes redundant zero extensions performed by an ISD::AND.
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+// Perform combines where ISD::OR is the root node.
+//
+// Performs the following transformations:
+// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
+//   where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
+//   vector type.
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
+                                TargetLowering::DAGCombinerInfo &DCI,
+                                const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
+                                               SelectionDAG &DAG,
+                                               const Sw64Subtarget &Subtarget) {
+  unsigned MaxSteps = 4;
+  SmallVector<APInt, 16> WorkStack(1, C);
+  unsigned Steps = 0;
+  unsigned BitWidth = C.getBitWidth();
+
+  while (!WorkStack.empty()) {
+    APInt Val = WorkStack.pop_back_val();
+
+    if (Val == 0 || Val == 1)
+      continue;
+
+    if (Steps >= MaxSteps)
+      return false;
+
+    if (Val.isPowerOf2()) {
+      ++Steps;
+      continue;
+    }
+
+    APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
+    APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
+                                  : APInt(BitWidth, 1) << C.ceilLogBase2();
+    if ((Val - Floor).ule(Ceil - Val)) {
+      WorkStack.push_back(Floor);
+      WorkStack.push_back(Val - Floor);
+    } else {
+      WorkStack.push_back(Ceil);
+      WorkStack.push_back(Ceil - Val);
+    }
+
+    ++Steps;
+  }
+  // If the value being multiplied is not supported natively, we have to pay
+  // an additional legalization cost, conservatively assume an increase in the
+  // cost of 3 instructions per step. This values for this heuristic were
+  // determined experimentally.
+  unsigned RegisterSize = DAG.getTargetLoweringInfo()
+                              .getRegisterType(*DAG.getContext(), VT)
+                              .getSizeInBits();
+  Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
+  if (Steps > 27)
+    return false;
+
+  return true;
+}
+
+static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
+                            EVT ShiftTy, SelectionDAG &DAG) {
+  // Return 0.
+  if (C == 0)
+    return DAG.getConstant(0, DL, VT);
+
+  // Return x.
+  if (C == 1)
+    return X;
+
+  // If c is power of 2, return (shl x, log2(c)).
+  if (C.isPowerOf2())
+    return DAG.getNode(ISD::SHL, DL, VT, X,
+                       DAG.getConstant(C.logBase2(), DL, ShiftTy));
+
+  unsigned BitWidth = C.getBitWidth();
+  APInt Floor = APInt(BitWidth, 1) << C.logBase2();
+  APInt Ceil = C.isNegative() ? APInt(BitWidth, 0)
+                              : APInt(BitWidth, 1) << C.ceilLogBase2();
+
+  // If |c - floor_c| <= |c - ceil_c|,
+  // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
+  // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
+  if ((C - Floor).ule(Ceil - C)) {
+    SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
+    SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
+    return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
+  }
+
+  // If |c - floor_c| > |c - ceil_c|,
+  // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
+  SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
+  SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
+  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
+}
+
+static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const Sw64Subtarget &Subtarget) {
+  EVT VT = N->getValueType(0);
+
+  if (Subtarget.enOptMul())
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+      if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
+                                C->getAPIntValue(), VT, DAG, Subtarget))
+        return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
+                            MVT::i64, DAG);
+
+  return SDValue(N, 0);
+}
+
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+  return SDValue();
+}
+
+static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
+  return SDValue();
+}
+
+static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
+                                 const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+SDValue Sw64TargetLowering::PerformDAGCombine(SDNode *N,
+                                              DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Val;
+
+  switch (N->getOpcode()) {
+  case ISD::AND:
+    Val = performANDCombine(N, DAG, DCI, Subtarget);
+    break;
+  case ISD::OR:
+    Val = performORCombine(N, DAG, DCI, Subtarget);
+    break;
+  case ISD::MUL:
+    return performMULCombine(N, DAG, DCI, Subtarget);
+  case ISD::SHL:
+    Val = performSHLCombine(N, DAG, DCI, Subtarget);
+    break;
+  case ISD::SRA:
+    return performSRACombine(N, DAG, DCI, Subtarget);
+  case ISD::SRL:
+    return performSRLCombine(N, DAG, DCI, Subtarget);
+  case ISD::VSELECT:
+    return performVSELECTCombine(N, DAG);
+  case ISD::XOR:
+    Val = performXORCombine(N, DAG, Subtarget);
+    break;
+  case ISD::SETCC:
+    Val = performSETCCCombine(N, DAG);
+    break;
+  }
+
+  if (Val.getNode()) {
+    LLVM_DEBUG(dbgs() << "\nSw64 DAG Combine:\n";
+               N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
+               Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
+    return Val;
+  }
+
+  return Sw64TargetLowering::PerformDAGCombineV(N, DCI);
+}
+
+/// ------------------------- scaler ------------------------------ ///
+
+static SDValue performDivRemCombineV(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static SDValue performSELECTCombineV(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static SDValue performANDCombineV(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static SDValue performORCombineV(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static SDValue performADDCombineV(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+static SDValue performSHLCombineV(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const Sw64Subtarget &Subtarget) {
+  return SDValue();
+}
+
+SDValue Sw64TargetLowering::PerformDAGCombineV(SDNode *N,
+                                               DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  unsigned Opc = N->getOpcode();
+
+  switch (Opc) {
+  default:
+    break;
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    return performDivRemCombineV(N, DAG, DCI, Subtarget);
+  case ISD::SELECT:
+    return performSELECTCombineV(N, DAG, DCI, Subtarget);
+  case ISD::AND:
+    return performANDCombineV(N, DAG, DCI, Subtarget);
+  case ISD::OR:
+    return performORCombineV(N, DAG, DCI, Subtarget);
+  case ISD::ADD:
+    return performADDCombineV(N, DAG, DCI, Subtarget);
+  case ISD::SHL:
+    return performSHLCombineV(N, DAG, DCI, Subtarget);
+  }
+
+  return SDValue();
+}
+
+SDValue Sw64TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+
+  SelectionDAG &DAG = CLI.DAG;
+  SDLoc &dl = CLI.DL;
+  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &isTailCall = CLI.IsTailCall;
+  CallingConv::ID CallConv = CLI.CallConv;
+  bool isVarArg = CLI.IsVarArg;
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  // Sw64 target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_Sw64);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getStackSize();
+  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+  RegsToPass.push_back(std::make_pair((unsigned)Sw64::R27, Callee));
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default:
+      assert(0 && "Unknown loc info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    }
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, Sw64::R30, MVT::i64);
+
+      SDValue PtrOff =
+          DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr,
+                      DAG.getIntPtrConstant(VA.getLocMemOffset(), dl));
+
+      MemOpChains.push_back(
+          DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
+    }
+  }
+  const ExternalSymbolSDNode *ES =
+      dyn_cast_or_null<const ExternalSymbolSDNode>(Callee.getNode());
+  if (ES && isF128SoftLibCall_round(ES->getSymbol())) {
+    RegsToPass.push_back(std::make_pair(((unsigned)Sw64::R16) + ArgLocs.size(),
+                                        DAG.getConstant(2, dl, MVT::i64)));
+  }
+
+  // FIXME: Fix the error for clang-repl.
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emitted instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  // Fix the error for clang-repl.
+  // Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (!isTailCall) {
+    const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+    const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+    assert(Mask && "Missing call preserved mask for calling convention");
+    Ops.push_back(DAG.getRegisterMask(Mask));
+  }
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+  Chain = DAG.getNode(Sw64ISD::JmpLink, dl, NodeTys, Ops);
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(
+      Chain,
+      DAG.getConstant(NumBytes, dl, getPointerTy(DAG.getDataLayout()), true),
+      DAG.getConstant(0, dl, getPointerTy(DAG.getDataLayout()), true), InFlag,
+      dl);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+                         InVals, CLI.Callee.getNode(), CLI.RetTy);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue Sw64TargetLowering::LowerCallResult(
+    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc &dl, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals, const SDNode *CallNode,
+    const Type *RetTy) const {
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
+  const ExternalSymbolSDNode *ES =
+      dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
+
+  if (ES && isF128SoftLibCall_void(ES->getSymbol()))
+    CCInfo.AnalyzeCallResult(Ins, RetCC_F128Soft_Sw64);
+  else
+
+    CCInfo.AnalyzeCallResult(Ins, RetCC_Sw64);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag)
+                .getValue(1);
+
+    SDValue RetValue = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+    InVals.push_back(RetValue);
+  }
+
+  return Chain;
+}
+
+SDValue Sw64TargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  Sw64MachineFunctionInfo *FuncInfo = MF.getInfo<Sw64MachineFunctionInfo>();
+
+  unsigned args_int[] = {Sw64::R16, Sw64::R17, Sw64::R18,
+                         Sw64::R19, Sw64::R20, Sw64::R21};
+  unsigned args_float[] = {Sw64::F16, Sw64::F17, Sw64::F18,
+                           Sw64::F19, Sw64::F20, Sw64::F21};
+  unsigned args_vector[] = {Sw64::V16, Sw64::V17, Sw64::V18,
+                            Sw64::V19, Sw64::V20, Sw64::V21};
+
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    SDValue argt;
+    EVT ObjectVT = Ins[ArgNo].VT;
+    SDValue ArgVal;
+    if (ArgNo < 6) {
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
+      default:
+        assert(false && "Invalid value type!");
+      case MVT::f64:
+        args_float[ArgNo] =
+            AddLiveIn(MF, args_float[ArgNo], &Sw64::F8RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+        break;
+      case MVT::f32:
+        args_float[ArgNo] =
+            AddLiveIn(MF, args_float[ArgNo], &Sw64::F4RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+        break;
+      case MVT::i64:
+        args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], &Sw64::GPRCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
+        break;
+      case MVT::v32i8:
+      case MVT::v16i16:
+      case MVT::v8i32:
+      case MVT::v4i64:
+      case MVT::v4f32:
+      case MVT::v4f64:
+        args_vector[ArgNo] =
+            AddLiveIn(MF, args_vector[ArgNo], &Sw64::V256LRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_vector[ArgNo], ObjectVT);
+        break;
+      }
+    } else { // more args
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI.CreateFixedObject(8, 8 * (ArgNo - 6), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      // from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
+    }
+    InVals.push_back(ArgVal);
+  }
+
+  // If the functions takes variable number of arguments, copy all regs to stack
+  if (isVarArg) {
+    FuncInfo->setVarArgsOffset(Ins.size() * 8);
+    std::vector<SDValue> LS;
+    for (int i = 0; i < 6; ++i) {
+      if (Register::isPhysicalRegister(args_int[i]))
+        args_int[i] = AddLiveIn(MF, args_int[i], &Sw64::GPRCRegClass);
+      SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
+      int FI = MFI.CreateFixedObject(8, -8 * (6 - i), true);
+      if (i == 0)
+        FuncInfo->setVarArgsBase(FI);
+      SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo()));
+      if (Register::isPhysicalRegister(args_float[i]))
+        args_float[i] = AddLiveIn(MF, args_float[i], &Sw64::F8RCRegClass);
+      argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
+      FI = MFI.CreateFixedObject(8, -8 * (12 - i), true);
+      SDFI = DAG.getFrameIndex(FI, MVT::i64);
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo()));
+    }
+    // Set up a token factor with all the stack traffic
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LS);
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+bool Sw64TargetLowering::CanLowerReturn(
+    CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, RetCC_Sw64);
+}
+
+SDValue
+Sw64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                                bool isVarArg,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                const SmallVectorImpl<SDValue> &OutVals,
+                                const SDLoc &dl, SelectionDAG &DAG) const {
+
+  SDValue Copy = DAG.getCopyToReg(
+      Chain, dl, Sw64::R26, DAG.getNode(Sw64ISD::GlobalRetAddr, dl, MVT::i64),
+      SDValue());
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  SDValue Flag;
+  unsigned outSize = Outs.size();
+  unsigned *ArgReg = new unsigned[outSize];
+  for (unsigned j = 0, r = 0, f = 0, v = 0; j != outSize; j++) {
+    EVT ArgVT = Outs[j].VT;
+    switch (ArgVT.getSimpleVT().SimpleTy) {
+    default:
+      if (ArgVT.isInteger())
+        ArgReg[j] = Sw64::R0 + r++;
+      else
+        ArgReg[j] = Sw64::F0 + f++;
+      Copy =
+          DAG.getCopyToReg(Copy, dl, ArgReg[j], OutVals[j], Copy.getValue(1));
+
+      if (ArgVT.isInteger())
+        RetOps.push_back(DAG.getRegister(ArgReg[j], MVT::i64));
+      else
+        RetOps.push_back(DAG.getRegister(ArgReg[j], ArgVT.getSimpleVT()));
+      break;
+
+    case MVT::v32i8:
+    case MVT::v16i16:
+    case MVT::v8i32:
+    case MVT::v4i64:
+    case MVT::v4f32:
+    case MVT::v4f64:
+      ArgReg[j] = Sw64::V0 + v++;
+      Copy =
+          DAG.getCopyToReg(Copy, dl, ArgReg[j], OutVals[j], Copy.getValue(1));
+      RetOps.push_back(DAG.getRegister(ArgReg[j], ArgVT.getSimpleVT()));
+      break;
+    }
+  }
+
+  RetOps[0] = Copy;
+  RetOps.push_back(Copy.getValue(1));
+  return DAG.getNode(Sw64ISD::Ret, dl, MVT::Other, RetOps);
+}
+
+void Sw64TargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
+                                    SelectionDAG &DAG) const {
+
+  SDLoc dl(N);
+  Chain = N->getOperand(0);
+  SDValue VAListP = N->getOperand(1);
+  const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
+  unsigned Align = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
+  Align = std::max(Align,8u);
+
+  SDValue Base =
+      DAG.getLoad(MVT::i64, dl, Chain, VAListP, MachinePointerInfo(VAListS));
+  SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+                            DAG.getConstant(8, dl, MVT::i64));
+  SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+                                  Tmp, MachinePointerInfo(), MVT::i32);
+  DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
+  if (N->getValueType(0).isFloatingPoint()) {
+    // if fp && Offset < 6*8, then subtract 6*8 from DataPtr
+    SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
+                                    DAG.getConstant(8 * 6, dl, MVT::i64));
+    SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
+                              DAG.getConstant(8 * 6, dl, MVT::i64), ISD::SETLT);
+    DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
+  }
+  SDValue NewOffset = DAG.getNode(
+      ISD::ADD, dl, MVT::i64, Offset,
+      DAG.getConstant(Align, dl, MVT::i64));
+  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
+                            MachinePointerInfo(), MVT::i32);
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+SDValue Sw64TargetLowering::LowerOperation(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  switch (Op.getOpcode()) {
+  default:
+    llvm_unreachable("Wasn't expecting to be able to lower this!");
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+    return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return LowerINTRINSIC_W_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_VOID:
+    return LowerINTRINSIC_VOID(Op, DAG);
+  case ISD::SRL_PARTS:
+    return LowerSRL_PARTS(Op, DAG);
+  case ISD::SRA_PARTS:
+    return LowerSRA_PARTS(Op, DAG);
+  case ISD::SHL_PARTS:
+    return LowerSHL_PARTS(Op, DAG);
+  case ISD::SINT_TO_FP:
+    return LowerSINT_TO_FP(Op, DAG);
+  case ISD::FP_TO_SINT:
+    return LowerFP_TO_SINT(Op, DAG);
+  case ISD::FP_TO_SINT_SAT:
+  case ISD::FP_TO_UINT_SAT:
+    return LowerFP_TO_INT_SAT(Op, DAG);
+  case ISD::ConstantPool:
+    return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:
+    return LowerBlockAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::ExternalSymbol:
+    return LowerExternalSymbol(Op, DAG);
+  case ISD::ATOMIC_FENCE:
+    return LowerATOMIC_FENCE(Op, DAG);
+  case ISD::ATOMIC_LOAD:
+    return LowerATOMIC_LOAD(Op, DAG);
+  case ISD::ATOMIC_STORE:
+    return LowerATOMIC_STORE(Op, DAG);
+  case ISD::OR:
+    return LowerOR(Op, DAG);
+  case ISD::UREM:
+  case ISD::SREM:
+    return LowerSUREM(Op, DAG);
+  // fall through
+  case ISD::SDIV:
+  case ISD::UDIV:
+    return LowerSUDIV(Op, DAG);
+  case ISD::VAARG:
+    return LowerVAARG(Op, DAG);
+  case ISD::VACOPY:
+    return LowerVACOPY(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG);
+  case ISD::RETURNADDR:
+    return DAG.getNode(Sw64ISD::GlobalRetAddr, dl, MVT::i64);
+  case ISD::FRAMEADDR:
+    return LowerFRAMEADDR(Op, DAG);
+  case ISD::PREFETCH:
+    return LowerPREFETCH(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT:
+    return LowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::BUILD_VECTOR:
+    return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:
+  case ISD::ROTL:
+    return LowerVectorShift(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:
+    return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG);
+  case ISD::STORE:
+    return LowerSTORE(Op, DAG);
+  }
+
+  return SDValue();
+}
+
+SDValue Sw64TargetLowering::LowerVectorShift(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  // Look for cases where a vector shift can use the *_BY_SCALAR form.
+  // SDValue Op0 = Op.getOperand(0);
+  // SDValue Op1 = Op.getOperand(1);
+  SDLoc DL(Op);
+  EVT VT = Op.getValueType();
+
+  // See whether the shift vector is a splat represented as BUILD_VECTOR.
+  switch (Op.getOpcode()) {
+  default:
+    llvm_unreachable("unexpect vecotr opcode");
+  case ISD::ROTL:
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                       DAG.getConstant(Intrinsic::sw64_vrol, DL, MVT::i64),
+                       Op.getOperand(0), Op.getOperand(1));
+  case ISD::SHL:
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                       DAG.getConstant(Intrinsic::sw64_vsll, DL, MVT::i64),
+                       Op.getOperand(0), Op.getOperand(1));
+  case ISD::SRL:
+  case ISD::SRA:
+    unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::sw64_vsra
+                                                : Intrinsic::sw64_vsrl;
+
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                       DAG.getConstant(Opc, DL, MVT::i64), Op.getOperand(0),
+                       Op.getOperand(1));
+  }
+
+  // Otherwise just treat the current form as legal.
+  return Op;
+}
+
+// Lower Operand specifics
+SDValue Sw64TargetLowering::LowerJumpTable(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  LLVM_DEBUG(dbgs() << "Sw64:: begin lowJumpTable----\n");
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  // FIXME there isn't really any debug info here
+  SDLoc dl(Op);
+  return getAddr(JT, DAG);
+}
+
+SDValue Sw64TargetLowering::LowerConstantPool(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  LLVM_DEBUG(dbgs() << "Sw64:: begin lowConstantPool----\n");
+  SDLoc dl(Op);
+  SDLoc DL(Op);
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  // FIXME there isn't really any debug info here
+  return getAddr(N, DAG);
+}
+
+SDValue Sw64TargetLowering::LowerBlockAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  LLVM_DEBUG(dbgs() << "Sw64:: begin lowBlockAddress----\n");
+  SDLoc dl(Op);
+  SDLoc DL(Op);
+
+  BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
+  return getAddr(BA, DAG);
+}
+
+SDValue Sw64TargetLowering::LowerGlobalAddress(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  LLVM_DEBUG(dbgs() << "Sw64:: begin lowGlobalAddress----\n");
+  SDLoc dl(Op);
+  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GSDN->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, GSDN->getOffset());
+  // FIXME there isn't really any debug info here
+  if (GV->hasLocalLinkage()) {
+    return getAddr(GSDN, DAG);
+  } else
+    return DAG.getNode(Sw64ISD::RelLit, dl, MVT::i64, GA,
+                       DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+}
+
+template <class NodeTy>
+SDValue Sw64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const {
+  LLVM_DEBUG(dbgs() << "Sw64TargetLowering:: getAddr");
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  SDLoc DL(N);
+
+  switch (getTargetMachine().getCodeModel()) {
+  default:
+    report_fatal_error("Unsupported code model for lowering");
+  case CodeModel::Small:
+  case CodeModel::Medium: {
+    SDValue Hi = getTargetNode(N, DL, Ty, DAG, Sw64II::MO_GPREL_HI);
+    SDValue Lo = getTargetNode(N, DL, Ty, DAG, Sw64II::MO_GPREL_LO);
+    SDValue MNHi = DAG.getNode(Sw64ISD::LDIH, DL, Ty, Hi);
+    return DAG.getNode(Sw64ISD::LDI, DL, Ty, MNHi, Lo);
+  }
+  }
+}
+
+SDValue Sw64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+
+  // If the relocation model is PIC, use the General Dynamic TLS Model or
+  // Local Dynamic TLS model, otherwise use the Initial Exec or
+  // Local Exec TLS Model.
+
+  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+  if (DAG.getTarget().useEmulatedTLS())
+    return LowerToTLSEmulatedModel(GSDN, DAG);
+
+  SDLoc dl(Op);
+  const GlobalValue *GV = GSDN->getGlobal();
+
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+  TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+  if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
+    // General Dynamic == tlsgd
+    // LocalDynamic    == tlsldm
+    // GA == TGA
+    SDValue Argument;
+    if (model == TLSModel::GeneralDynamic) {
+      SDValue Addr =
+          DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_TLSGD);
+      Argument =
+          SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, Addr,
+                                     DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)),
+                  0);
+    } else {
+      SDValue Addr =
+          DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_TLSLDM);
+      Argument =
+          SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, Addr,
+                                     DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)),
+                  0);
+    }
+    unsigned PtrSize = PtrVT.getSizeInBits();
+    IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+    SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
+    ArgListTy Args;
+    ArgListEntry Entry;
+    Entry.Node = Argument;
+    Entry.Ty = PtrTy;
+    Args.push_back(Entry);
+    TargetLowering::CallLoweringInfo CLI(DAG);
+    CLI.setDebugLoc(dl)
+        .setChain(DAG.getEntryNode())
+        .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args));
+    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+    SDValue Ret = CallResult.first;
+    if (model != TLSModel::LocalDynamic)
+      return Ret;
+
+    SDValue DTPHi = DAG.getTargetGlobalAddress(
+        GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_DTPREL_HI);
+    SDValue DTPLo = DAG.getTargetGlobalAddress(
+        GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_DTPREL_LO);
+
+    SDValue Hi =
+        SDValue(DAG.getMachineNode(Sw64::LDAH, dl, MVT::i64, DTPHi, Ret), 0);
+    return SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, DTPLo, Hi), 0);
+  }
+
+  if (model == TLSModel::InitialExec) {
+    // Initial Exec TLS Model //gottprel
+    SDValue Gp = DAG.getGLOBAL_OFFSET_TABLE(MVT::i64);
+    SDValue Addr =
+        DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Sw64II::MO_GOTTPREL);
+    SDValue RelDisp =
+        SDValue(DAG.getMachineNode(Sw64::LDL, dl, MVT::i64, Addr, Gp), 0);
+    SDValue SysCall = DAG.getNode(Sw64ISD::SysCall, dl, MVT::i64,
+                                  DAG.getConstant(0x9e, dl, MVT::i64));
+    return SDValue(
+        DAG.getMachineNode(Sw64::ADDQr, dl, MVT::i64, RelDisp, SysCall), 0);
+  } else {
+    // Local Exec TLS Model //tprelHi tprelLo
+    assert(model == TLSModel::LocalExec);
+    SDValue SysCall = DAG.getNode(Sw64ISD::SysCall, dl, MVT::i64,
+                                  DAG.getConstant(0x9e, dl, MVT::i64));
+    SDValue TPHi = DAG.getTargetGlobalAddress(
+        GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_TPREL_HI);
+    SDValue TPLo = DAG.getTargetGlobalAddress(
+        GV, dl, MVT::i64, GSDN->getOffset(), Sw64II::MO_TPREL_LO);
+    SDValue Hi =
+        SDValue(DAG.getMachineNode(Sw64::LDAH, dl, MVT::i64, TPHi, SysCall), 0);
+    return SDValue(DAG.getMachineNode(Sw64::LDA, dl, MVT::i64, TPLo, Hi), 0);
+  }
+}
+
+static bool isCrossINSMask(ArrayRef<int> M, EVT VT) {
+  unsigned NumElts = VT.getVectorNumElements();
+  for (unsigned i = 0; i < NumElts; i++) {
+    unsigned idx = i / 2;
+    if (M[i] < 0)
+      return false;
+    if (M[i] != idx && (M[i] - NumElts) != idx)
+      return false;
+  }
+  return true;
+}
+
+static SDValue GenerateVectorShuffle(SDValue Op, EVT VT, SelectionDAG &DAG,
+                                     SDLoc dl) {
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+  ArrayRef<int> ShuffleMask = SVN->getMask();
+  if (ShuffleMask.size() > 8)
+    return SDValue();
+
+  unsigned NewMask;
+  if (VT == MVT::v8i32) {
+    for (int i = (ShuffleMask.size() - 1); i >= 0; i--) {
+      NewMask = NewMask << 4;
+      int idx = ShuffleMask[i];
+      int bits = idx > 7 ? 1 : 0;
+      idx = idx > 7 ? (idx - 8) : idx;
+      NewMask |= (bits << 3) | idx;
+    }
+  } else if (VT == MVT::v4i64 || VT == MVT::v4f32 || VT == MVT::v4f64) {
+    for (int i = ShuffleMask.size() * 2 - 1; i >= 0; i--) {
+      NewMask = NewMask << 4;
+      int idx = ShuffleMask[i / 2];
+      int bits = idx > 3 ? 1 : 0;
+      int mod = i % 2;
+      idx = idx > 3 ? (idx * 2 + mod - 8) : idx * 2 + mod;
+      NewMask |= (bits << 3) | idx;
+    }
+  }
+
+  SDValue ConstMask = DAG.getConstant(NewMask, dl, MVT::i64);
+  return DAG.getNode(Sw64ISD::VSHF, dl, VT, Op.getOperand(0), Op.getOperand(1),
+                     ConstMask);
+}
+
+SDValue Sw64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+  // Convert shuffles that are directly supported on NEON to target-specific
+  // DAG nodes, instead of keeping them as shuffles and matching them again
+  // during code selection.  This is more efficient and avoids the possibility
+  // of inconsistencies between legalization and selection.
+  ArrayRef<int> ShuffleMask = SVN->getMask();
+
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
+  assert(ShuffleMask.size() == VT.getVectorNumElements() &&
+         "Unexpected VECTOR_SHUFFLE mask size!");
+
+  if (SVN->isSplat()) {
+    int Lane = SVN->getSplatIndex();
+    // If this is undef splat, generate it via "just" vdup, if possible.
+    if (Lane == -1)
+      Lane = 0;
+
+    if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      return DAG.getNode(Sw64ISD::VBROADCAST, dl, V1.getValueType(),
+                         V1.getOperand(0));
+
+    // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
+    // constant. If so, we can just reference the lane's definition directly.
+    if (V1.getOpcode() == ISD::BUILD_VECTOR &&
+        !isa<ConstantSDNode>(V1.getOperand(Lane))) {
+      SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, V1.getValueType(),
+                                V1.getOperand(Lane));
+      return DAG.getNode(Sw64ISD::VBROADCAST, dl, VT, Ext);
+    }
+  }
+  if (isCrossINSMask(ShuffleMask, VT))
+    return DAG.getNode(Sw64ISD::VINSECTL, dl, VT, V1, V2);
+
+  // SmallVector<int, 32> NewMask;
+  SDValue Tmp1 = GenerateVectorShuffle(Op, VT, DAG, dl);
+
+  return Tmp1;
+}
+
+SDValue Sw64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned NewIntrinsic;
+  EVT VT = Op.getValueType();
+  switch (IntNo) {
+  default:
+    break; // Don't custom lower most intrinsics.
+  case Intrinsic::sw64_umulh:
+    return DAG.getNode(ISD::MULHU, dl, MVT::i64, Op.getOperand(1),
+                       Op.getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_crc32b:
+    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
+      return DAG.getNode(Sw64ISD::CRC32B, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_crc32h:
+    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
+      return DAG.getNode(Sw64ISD::CRC32H, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_crc32w:
+    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
+      return DAG.getNode(Sw64ISD::CRC32W, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_crc32l:
+    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
+      return DAG.getNode(Sw64ISD::CRC32L, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_crc32cb:
+    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
+      return DAG.getNode(Sw64ISD::CRC32CB, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_crc32ch:
+    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
+      return DAG.getNode(Sw64ISD::CRC32CH, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_crc32cw:
+    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
+      return DAG.getNode(Sw64ISD::CRC32CW, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_crc32cl:
+    if (Subtarget.hasCore4() && Subtarget.enableCrcInst())
+      return DAG.getNode(Sw64ISD::CRC32CL, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_sbt:
+    if (Subtarget.hasCore4() && Subtarget.enableSCbtInst())
+      return DAG.getNode(Sw64ISD::SBT, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    LLVM_FALLTHROUGH;
+  case Intrinsic::sw64_cbt:
+    if (Subtarget.hasCore4() && Subtarget.enableSCbtInst())
+      return DAG.getNode(Sw64ISD::CBT, dl, Op->getValueType(0),
+                         Op->getOperand(1), Op->getOperand(2));
+    return Op;
+  case Intrinsic::sw64_vsllb:
+  case Intrinsic::sw64_vsllh:
+  case Intrinsic::sw64_vsllw:
+  case Intrinsic::sw64_vslll:
+    NewIntrinsic = Intrinsic::sw64_vsll;
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::sw64_vsrlb:
+  case Intrinsic::sw64_vsrlh:
+  case Intrinsic::sw64_vsrlw:
+  case Intrinsic::sw64_vsrll:
+    NewIntrinsic = Intrinsic::sw64_vsrl;
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
+                       Op.getOperand(1), Op.getOperand(2));
+    // Fallthough
+  case Intrinsic::sw64_vsrab:
+  case Intrinsic::sw64_vsrah:
+  case Intrinsic::sw64_vsraw:
+  case Intrinsic::sw64_vsral:
+    NewIntrinsic = Intrinsic::sw64_vsra;
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::sw64_vrolb:
+  case Intrinsic::sw64_vrolh:
+  case Intrinsic::sw64_vrolw:
+  case Intrinsic::sw64_vroll:
+    NewIntrinsic = Intrinsic::sw64_vrol;
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::sw64_vlogzz:
+    return DAG.getNode(Sw64ISD::VLOG, dl, VT, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
+  case Intrinsic::sw64_vmaxb:
+  case Intrinsic::sw64_vmaxh:
+  case Intrinsic::sw64_vmaxw:
+  case Intrinsic::sw64_vmaxl:
+    return DAG.getNode(Sw64ISD::VMAX, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vumaxb:
+  case Intrinsic::sw64_vumaxh:
+  case Intrinsic::sw64_vumaxw:
+  case Intrinsic::sw64_vumaxl:
+    return DAG.getNode(Sw64ISD::VUMAX, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vminb:
+  case Intrinsic::sw64_vminh:
+  case Intrinsic::sw64_vminw:
+  case Intrinsic::sw64_vminl:
+    return DAG.getNode(Sw64ISD::VMIN, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vuminb:
+  case Intrinsic::sw64_vuminh:
+  case Intrinsic::sw64_vuminw:
+  case Intrinsic::sw64_vuminl:
+    return DAG.getNode(Sw64ISD::VUMIN, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vmaxs:
+  case Intrinsic::sw64_vmaxd:
+    return DAG.getNode(Sw64ISD::VMAXF, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vmins:
+  case Intrinsic::sw64_vmind:
+    return DAG.getNode(Sw64ISD::VMINF, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+
+  case Intrinsic::sw64_vseleqw:
+  case Intrinsic::sw64_vseleqwi:
+    return DAG.getNode(Sw64ISD::VSELEQW, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  case Intrinsic::sw64_vselltw:
+  case Intrinsic::sw64_vselltwi:
+    return DAG.getNode(Sw64ISD::VSELLTW, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  case Intrinsic::sw64_vsellew:
+  case Intrinsic::sw64_vsellewi:
+    return DAG.getNode(Sw64ISD::VSELLEW, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  case Intrinsic::sw64_vsellbcw:
+  case Intrinsic::sw64_vsellbcwi:
+    return DAG.getNode(Sw64ISD::VSELLBCW, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  case Intrinsic::sw64_vsqrts:
+  case Intrinsic::sw64_vsqrtd:
+    return DAG.getNode(Sw64ISD::VSQRT, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+
+  case Intrinsic::sw64_vsums:
+  case Intrinsic::sw64_vsumd:
+    return DAG.getNode(Sw64ISD::VSUMF, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+
+  case Intrinsic::sw64_vfrecs:
+  case Intrinsic::sw64_vfrecd:
+    return DAG.getNode(Sw64ISD::VFREC, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+
+  case Intrinsic::sw64_vfcmpeqs:
+  case Intrinsic::sw64_vfcmpeqd:
+    return DAG.getNode(Sw64ISD::VFCMPEQ, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vfcmples:
+  case Intrinsic::sw64_vfcmpled:
+    return DAG.getNode(Sw64ISD::VFCMPLE, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vfcmplts:
+  case Intrinsic::sw64_vfcmpltd:
+    return DAG.getNode(Sw64ISD::VFCMPLT, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vfcmpuns:
+  case Intrinsic::sw64_vfcmpund:
+    return DAG.getNode(Sw64ISD::VFCMPUN, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+
+  case Intrinsic::sw64_vfcvtsd:
+    return DAG.getNode(Sw64ISD::VFCVTSD, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfcvtds:
+    return DAG.getNode(Sw64ISD::VFCVTDS, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfcvtls:
+    return DAG.getNode(Sw64ISD::VFCVTLS, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfcvtld:
+    return DAG.getNode(Sw64ISD::VFCVTLD, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfcvtsh:
+    return DAG.getNode(Sw64ISD::VFCVTSH, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  case Intrinsic::sw64_vfcvths:
+    return DAG.getNode(Sw64ISD::VFCVTHS, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+
+  case Intrinsic::sw64_vfcvtdl:
+    return DAG.getNode(Sw64ISD::VFCVTDL, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfcvtdl_g:
+    return DAG.getNode(Sw64ISD::VFCVTDLG, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfcvtdl_p:
+    return DAG.getNode(Sw64ISD::VFCVTDLP, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfcvtdl_z:
+    return DAG.getNode(Sw64ISD::VFCVTDLZ, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfcvtdl_n:
+    return DAG.getNode(Sw64ISD::VFCVTDLN, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+
+  case Intrinsic::sw64_vfris:
+    return DAG.getNode(Sw64ISD::VFRIS, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfris_g:
+    return DAG.getNode(Sw64ISD::VFRISG, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfris_p:
+    return DAG.getNode(Sw64ISD::VFRISP, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfris_z:
+    return DAG.getNode(Sw64ISD::VFRISZ, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfris_n:
+    return DAG.getNode(Sw64ISD::VFRISN, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfrid:
+    return DAG.getNode(Sw64ISD::VFRID, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfrid_g:
+    return DAG.getNode(Sw64ISD::VFRIDG, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfrid_p:
+    return DAG.getNode(Sw64ISD::VFRIDP, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfrid_z:
+    return DAG.getNode(Sw64ISD::VFRIDZ, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vfrid_n:
+    return DAG.getNode(Sw64ISD::VFRIDN, dl, Op->getValueType(0),
+                       Op->getOperand(1));
+  case Intrinsic::sw64_vextw:
+  case Intrinsic::sw64_vextl:
+  case Intrinsic::sw64_vextfs:
+  case Intrinsic::sw64_vextfd:
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2));
+  case Intrinsic::sw64_vfseleqs:
+  case Intrinsic::sw64_vfseleqd:
+    return DAG.getNode(Sw64ISD::VFCMOVEQ, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  case Intrinsic::sw64_vfselles:
+  case Intrinsic::sw64_vfselled:
+    return DAG.getNode(Sw64ISD::VFCMOVLE, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  case Intrinsic::sw64_vfsellts:
+  case Intrinsic::sw64_vfselltd:
+    return DAG.getNode(Sw64ISD::VFCMOVLT, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  case Intrinsic::sw64_vshfw:
+    return DAG.getNode(Sw64ISD::VSHF, dl, Op->getValueType(0),
+                       Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+  }
+  return Op;
+}
+
+SDValue Sw64TargetLowering::LowerVectorMemIntr(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  EVT VT = Op.getValueType();
+  LLVM_DEBUG(dbgs() << "Custom Lower Vector Memory Intrinsics\n"; Op.dump(););
+  SDValue Args = Op.getOperand(2);
+  switch (IntNo) {
+  default:
+    break;
+  case Intrinsic::sw64_vload:
+    return DAG.getNode(ISD::LOAD, dl, VT, Args);
+  }
+  return Op;
+}
+
+SDValue Sw64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  unsigned IntNo = Op.getConstantOperandVal(1);
+  unsigned NewIntrinsic;
+  EVT VT = Op.getValueType();
+  switch (IntNo) {
+  default:
+    break; // Don't custom lower most intrinsics.
+  case Intrinsic::sw64_vloadu: {
+    SDValue Chain = Op->getOperand(0);
+    SDVTList VTs = DAG.getVTList(VT.getSimpleVT().SimpleTy, MVT::Other);
+    NewIntrinsic = Intrinsic::sw64_vload_u;
+    SDValue VLOAD_U1 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain,
+                                   DAG.getConstant(NewIntrinsic, dl, MVT::i64),
+                                   Op.getOperand(2));
+    SDValue Hiaddr =
+        DAG.getNode(ISD::ADD, dl, MVT::i64,
+                    DAG.getConstant((VT == MVT::v4f32 ? 16 : 32), dl, MVT::i64),
+                    Op->getOperand(2));
+    SDValue VLOAD_U2 =
+        DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain,
+                    DAG.getConstant(NewIntrinsic, dl, MVT::i64), Hiaddr);
+
+    switch (VT.getSimpleVT().SimpleTy) {
+    default:
+      break;
+    case MVT::v8i32:
+      NewIntrinsic = Intrinsic::sw64_vconw;
+      break;
+    case MVT::v4f32:
+      NewIntrinsic = Intrinsic::sw64_vcons;
+      break;
+    case MVT::v4f64:
+    case MVT::v4i64:
+      NewIntrinsic = Intrinsic::sw64_vcond;
+      break;
+    }
+    return DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, VTs, Chain,
+                       DAG.getConstant(NewIntrinsic, dl, MVT::i64), VLOAD_U1,
+                       VLOAD_U2, Op->getOperand(2));
+  }
+  }
+  return SDValue();
+}
+
+SDValue Sw64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  unsigned NewIntrinsic;
+  EVT VT = Op.getValueType();
+  EVT VTOperand2 = Op.getOperand(2).getValueType();
+  switch (IntNo) {
+  case Intrinsic::sw64_vstoreu: {
+    NewIntrinsic = Intrinsic::sw64_vstoreul;
+    SDValue VSTOREUL =
+        DAG.getNode(ISD::INTRINSIC_VOID, dl, VT, Op.getOperand(0),
+                    DAG.getConstant(NewIntrinsic, dl, MVT::i64),
+                    Op.getOperand(2), Op.getOperand(3));
+
+    SDValue Hiaddr = DAG.getNode(
+        ISD::ADD, dl, MVT::i64,
+        DAG.getConstant((VTOperand2 == MVT::v4f32 ? 16 : 32), dl, MVT::i64),
+        Op->getOperand(3));
+    NewIntrinsic = Intrinsic::sw64_vstoreuh;
+    return DAG.getNode(ISD::INTRINSIC_VOID, dl, VT, VSTOREUL,
+                       DAG.getConstant(NewIntrinsic, dl, MVT::i64),
+                       Op.getOperand(2), Hiaddr);
+  }
+  default:
+    break;
+  }
+  return Op;
+}
+
+SDValue Sw64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDValue Vec = Op.getOperand(0);
+  MVT VecVT = Vec.getSimpleValueType();
+  SDValue Idx = Op.getOperand(1);
+  MVT EltVT = VecVT.getVectorElementType();
+  if (EltVT != MVT::i32 && EltVT != MVT::f32 && EltVT != MVT::f64)
+    return SDValue();
+
+  if (!dyn_cast<ConstantSDNode>(Idx))
+    return SDValue();
+
+  SDValue tmp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Vec, Idx);
+  return tmp;
+  //  return DAG.getAnyExtOrTrunc(tmp, dl, MVT::i32);
+}
+
+SDValue Sw64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDValue Idx = Op.getOperand(2);
+
+  if (!dyn_cast<ConstantSDNode>(Idx))
+    return SDValue();
+
+  return Op;
+}
+
+static bool isConstantOrUndef(const SDValue Op) {
+  if (Op->isUndef())
+    return true;
+  if (isa<ConstantSDNode>(Op))
+    return true;
+  if (isa<ConstantFPSDNode>(Op))
+    return true;
+  return false;
+}
+
+static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
+  for (unsigned i = 0; i < Op->getNumOperands(); ++i)
+    if (isConstantOrUndef(Op->getOperand(i)))
+      return true;
+  return false;
+}
+
+SDValue Sw64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
+  SDLoc dl(Op);
+  MVT VecVT = Op.getSimpleValueType();
+  EVT ResTy = Op->getValueType(0);
+  SDLoc DL(Op);
+  APInt SplatValue, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+
+  if (!Subtarget.hasSIMD() || !ResTy.is256BitVector())
+    return SDValue();
+
+  if (VecVT.isInteger()) {
+    // Certain vector constants, used to express things like logical NOT and
+    // arithmetic NEG, are passed through unmodified.  This allows special
+    // patterns for these operations to match, which will lower these constants
+    // to whatever is proven necessary.
+    BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+    if (BVN->isConstant())
+      if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
+        unsigned BitSize = VecVT.getVectorElementType().getSizeInBits();
+        APInt Val(BitSize,
+                  Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
+        if (Val.isZero() || Val.isAllOnes())
+          return Op;
+      }
+  }
+  MVT ElemTy = Op->getSimpleValueType(0).getScalarType();
+  unsigned ElemBits = ElemTy.getSizeInBits();
+
+  if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+                            8, false) &&
+      SplatBitSize <= 64 && ElemBits == SplatBitSize) {
+    // We can only cope with 8, 16, 32, or 64-bit elements
+    if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
+        SplatBitSize != 64)
+      return SDValue();
+
+    // If the value isn't an integer type we will have to bitcast
+    // from an integer type first. Also, if there are any undefs, we must
+    // lower them to defined values first.
+    if (ResTy.isInteger() && !HasAnyUndefs) {
+      return DAG.getNode(Sw64ISD::VBROADCAST, dl, ResTy, Op.getOperand(1));
+    }
+
+    EVT ViaVecTy;
+
+    switch (SplatBitSize) {
+    default:
+      return SDValue();
+    case 8:
+      ViaVecTy = MVT::v32i8;
+      break;
+    case 16:
+      ViaVecTy = MVT::v16i16;
+      break;
+    case 32:
+      ViaVecTy = MVT::v8i32;
+      break;
+    case 64:
+      ViaVecTy = MVT::v4i64;
+      break;
+    }
+
+    // SelectionDAG::getConstant will promote SplatValue appropriately.
+    SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
+
+    // Bitcast to the type we originally wanted
+    if (ViaVecTy != ResTy)
+      Result = DAG.getNode(ISD::BITCAST, dl, ResTy, Result);
+
+    return Result;
+  } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) {
+    return DAG.getNode(Sw64ISD::VBROADCAST, dl, ResTy, Op.getOperand(1));
+  } else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
+    // Use INSERT_VECTOR_ELT operations rather than expand to stores.
+    // The resulting code is the same length as the expansion, but it doesn't
+    // use memory operations
+    EVT ResTy = Node->getValueType(0);
+
+    assert(ResTy.isVector());
+
+    unsigned NumElts = ResTy.getVectorNumElements();
+    SDValue Vector = DAG.getUNDEF(ResTy);
+    for (unsigned i = 0; i < NumElts; ++i) {
+      Vector =
+          DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
+                      Node->getOperand(i), DAG.getConstant(i, DL, MVT::i64));
+    }
+    return Vector;
+  }
+
+  return SDValue();
+}
+
+SDValue Sw64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  StoreSDNode &Nd = *cast<StoreSDNode>(Op);
+
+  if (Nd.getMemoryVT() != MVT::v4i32)
+    return Op;
+
+  // Replace a v4i64 with v4i32 stores.
+  SDLoc DL(Op);
+
+  SDValue Val = Op->getOperand(1);
+
+  return DAG.getMemIntrinsicNode(Sw64ISD::VTRUNCST, DL,
+                                 DAG.getVTList(MVT::Other),
+                                 {Nd.getChain(), Val, Nd.getBasePtr()},
+                                 Nd.getMemoryVT(), Nd.getMemOperand());
+}
+
+SDValue Sw64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  // Sw64 Produce not generic v4i64 setcc result, but v4f64/f32 result 2.0
+  // Need to use addition compare to reverse the result.
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  SDLoc DL(Op);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+
+  // TODO: Trunc v4i64 Compare to v4f64
+  // Sw64 Doesn't have v4i64 compare. Due to LLVM speciality, all comparisons
+  // will be process as Ingeter, like Vector-64bits compare reults is v4i64.
+  // So we have to do it for now.
+  if (LHS.getValueType() == MVT::v4i64 && RHS.getValueType() == MVT::v4i64) {
+    return SDValue();
+  }
+
+  if (CC != ISD::SETO)
+    return Op;
+
+  SDValue Res = DAG.getSetCC(DL, MVT::v4i64, Op.getOperand(0), Op.getOperand(1),
+                             ISD::SETUO);
+  SDValue Zero = DAG.getRegister(Sw64::V31, MVT::v4f64);
+  SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::v4f64, Res);
+  return DAG.getSetCC(DL, MVT::v4i64, Cast, Zero, ISD::SETOEQ);
+}
+
+SDValue Sw64TargetLowering::LowerSHL_PARTS(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  MVT VT = MVT::i64;
+
+  SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
+  SDValue Shamt = Op.getOperand(2);
+  // if shamt < (VT.bits):
+  //  lo = (shl lo, shamt)
+  //  hi = (or (shl hi, shamt) (srl (srl lo, 1), (xor shamt, (VT.bits-1)))
+  // else:
+  //  lo = 0
+  //  hi = (shl lo, shamt[4:0])
+  SDValue Not =
+      DAG.getNode(ISD::XOR, DL, MVT::i64, Shamt,
+                  DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i64));
+  SDValue ShiftRight1Lo =
+      DAG.getNode(ISD::SRL, DL, VT, Lo, DAG.getConstant(1, DL, VT));
+  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not);
+  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
+  SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+  SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
+  SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i64, Shamt,
+                             DAG.getConstant(VT.getSizeInBits(), DL, MVT::i64));
+  Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getConstant(0, DL, VT),
+                   ShiftLeftLo);
+  Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or);
+
+  SDValue Ops[2] = {Lo, Hi};
+  return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue Sw64TargetLowering::LowerSRL_PARTS(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt = Op.getOperand(2);
+  SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
+                           DAG.getConstant(64, dl, MVT::i64), ShAmt);
+  SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
+                              DAG.getConstant(0, dl, MVT::i64), ISD::SETLE);
+  // if 64 - shAmt <= 0
+  SDValue Hi_Neg = DAG.getConstant(0, dl, MVT::i64);
+  SDValue ShAmt_Neg =
+      DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(0, dl, MVT::i64), bm);
+  SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
+  // else
+  SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
+  SDValue Hi_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
+  SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
+  Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
+  // Merge
+  SDValue Hit = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
+  SDValue Lot = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
+  SDValue BMCC1 = DAG.getSetCC(dl, MVT::i64, ShAmt,
+                               DAG.getConstant(0, dl, MVT::i64), ISD::SETEQ);
+  SDValue BMCC2 = DAG.getSetCC(dl, MVT::i64, ShAmt,
+                               DAG.getConstant(64, dl, MVT::i64), ISD::SETEQ);
+  SDValue Hit1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpHi, Hit);
+  SDValue Lot1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpLo, Lot);
+  SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2,
+                           DAG.getConstant(0, dl, MVT::i64), Hit1);
+  SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2, ShOpHi, Lot1);
+
+  SDValue Ops[2] = {Lo, Hi};
+  return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue Sw64TargetLowering::LowerSRA_PARTS(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  SDLoc dl(Op);
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt = Op.getOperand(2);
+  SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
+                           DAG.getConstant(64, dl, MVT::i64), ShAmt);
+  SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
+                              DAG.getConstant(0, dl, MVT::i64), ISD::SETLE);
+  // if 64 - shAmt <= 0
+  SDValue Hi_Neg = DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+                               DAG.getConstant(VTBits - 1, dl, MVT::i64));
+  SDValue ShAmt_Neg =
+      DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(0, dl, MVT::i64), bm);
+  SDValue Lo_Neg = DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi, ShAmt_Neg);
+  // else
+  SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
+  SDValue Hi_Pos = DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi, ShAmt);
+  SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
+  Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
+  // Merge
+  SDValue Hit = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
+  SDValue Lot = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
+  SDValue BMCC1 = DAG.getSetCC(dl, MVT::i64, ShAmt,
+                               DAG.getConstant(0, dl, MVT::i64), ISD::SETEQ);
+  SDValue BMCC2 = DAG.getSetCC(dl, MVT::i64, ShAmt,
+                               DAG.getConstant(64, dl, MVT::i64), ISD::SETEQ);
+  SDValue Hit1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpHi, Hit);
+  SDValue Lot1 = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC1, ShOpLo, Lot);
+  SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2,
+                           DAG.getNode(ISD::SRA, dl, MVT::i64, ShOpHi,
+                                       DAG.getConstant(63, dl, MVT::i64)),
+                           Hit1);
+  SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC2, ShOpHi, Lot1);
+  SDValue Ops[2] = {Lo, Hi};
+  return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue Sw64TargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+         "Unhandled SINT_TO_FP type in custom expander!");
+  SDValue LD;
+  bool isDouble = Op.getValueType() == MVT::f64;
+  LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+  SDValue FP = DAG.getNode(isDouble ? Sw64ISD::CVTQT_ : Sw64ISD::CVTQS_, dl,
+                           isDouble ? MVT::f64 : MVT::f32, LD);
+  return FP;
+}
+
+SDValue Sw64TargetLowering::LowerFP_TO_SINT(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
+  SDValue src = Op.getOperand(0);
+
+  if (!isDouble) // Promote
+    src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
+
+  src = DAG.getNode(Sw64ISD::CVTTQ_, dl, MVT::f64, src);
+
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
+}
+
+SDValue Sw64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDValue width = Op.getOperand(1);
+
+  if (width.getValueType() != MVT::i64)
+    width = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), MVT::i64, width);
+
+  return expandFP_TO_INT_SAT(Op.getNode(), DAG);
+}
+
+// ----------------------------------------------------------
+// For cnstruct a new chain call to libgcc to replace old chain
+// from udiv/sidv i128 , i128 to call %sret, i128 ,i128
+//
+// ----------------------------------------------------------
+SDValue Sw64TargetLowering::LowerSUDIVI128(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+
+  if (!Op.getValueType().isInteger())
+    return SDValue();
+  RTLIB::Libcall LC;
+  bool isSigned;
+  switch (Op->getOpcode()) {
+  default:
+    llvm_unreachable("Unexpected request for libcall!");
+  case ISD::SDIV:
+    isSigned = true;
+    LC = RTLIB::SDIV_I128;
+    break;
+  case ISD::UDIV:
+    isSigned = false;
+    LC = RTLIB::UDIV_I128;
+    break;
+  case ISD::SREM:
+    isSigned = true;
+    LC = RTLIB::SREM_I128;
+    break;
+  case ISD::UREM:
+    isSigned = false;
+    LC = RTLIB::UREM_I128;
+    break;
+  }
+  SDValue InChain = DAG.getEntryNode();
+
+  // Create a extra stack objdect to store libcall result
+  SDValue DemoteStackSlot;
+  TargetLowering::ArgListTy Args;
+  auto &DL = DAG.getDataLayout();
+  uint64_t TySize = 16;
+  MachineFunction &MF = DAG.getMachineFunction();
+  int DemoteStackIdx =
+      MF.getFrameInfo().CreateStackObject(TySize, Align(8), false);
+  EVT ArgVT = Op->getOperand(0).getValueType();
+  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+  Type *StackSlotPtrType = PointerType::get(ArgTy, DL.getAllocaAddrSpace());
+  // save the sret infomation
+  DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
+  ArgListEntry Entry;
+  Entry.Node = DemoteStackSlot;
+  Entry.Ty = StackSlotPtrType;
+  Entry.IsSRet = true;
+  Entry.Alignment = Align(8);
+  Args.push_back(Entry);
+
+  // passing udiv/sdiv operands argument
+  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+    ArgListEntry Entry;
+    ArgVT = Op->getOperand(i).getValueType();
+    assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
+           "Unexpected argument type for lowering");
+    Entry.Node = Op->getOperand(i);
+    Entry.Ty = IntegerType::get(*DAG.getContext(), 128);
+    Entry.IsInReg = true;
+    Entry.IsSExt = isSigned;
+    Entry.IsZExt = false;
+    Args.push_back(Entry);
+  }
+
+  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+                                         getPointerTy(DAG.getDataLayout()));
+  // create a new libcall to producess udiv/sdiv
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl)
+      .setChain(InChain)
+      .setLibCallee(
+          getLibcallCallingConv(LC),
+          static_cast<EVT>(MVT::isVoid).getTypeForEVT(*DAG.getContext()),
+          Callee, std::move(Args))
+      .setNoReturn(true)
+      .setSExtResult(isSigned)
+      .setZExtResult(!isSigned);
+
+  SDValue CallInfo = LowerCallTo(CLI).second;
+  return LowerCallExtraResult(CallInfo, DemoteStackSlot, DemoteStackIdx, DAG)
+      .first;
+}
+
+// --------------------------------------------------------------------
+// when a call using sret arugments pass in register, the call result
+// must be handled, create a load node and tokenfactor to pass the call
+// result
+// --------------------------------------------------------------------
+std::pair<SDValue, SDValue> Sw64TargetLowering::LowerCallExtraResult(
+    SDValue &Chain, SDValue &DemoteStackSlot, unsigned DemoteStackIdx,
+    SelectionDAG &DAG) const {
+  SmallVector<SDValue, 4> Chains(1), ReturnValues(1);
+  SDLoc DL(Chain);
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
+  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, DemoteStackSlot,
+                            DAG.getConstant(0, DL, MVT::i64), Flags);
+  SDValue L = DAG.getLoad(MVT::i128, DL, Chain, Add,
+                          MachinePointerInfo::getFixedStack(
+                              DAG.getMachineFunction(), DemoteStackIdx, 0),
+                          /* Alignment = */ 8);
+  Chains[0] = L.getValue(1);
+  ReturnValues[0] = L;
+  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(MVT::i128),
+                            ReturnValues);
+  return std::make_pair(Res, Chain);
+}
+
+SDValue Sw64TargetLowering::LowerExternalSymbol(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  LLVM_DEBUG(dbgs() << "Sw64:: begin lowExternalSymbol----\n");
+  SDLoc dl(Op);
+  return DAG.getNode(Sw64ISD::RelLit, dl, MVT::i64,
+                     DAG.getTargetExternalSymbol(
+                         cast<ExternalSymbolSDNode>(Op)->getSymbol(), MVT::i64),
+                     DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+}
+
+SDValue Sw64TargetLowering::LowerATOMIC_FENCE(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  // FIXME: Need pseudo-fence for 'singlethread' fences
+  // FIXME: Set SType for weaker fences where supported/appropriate.
+  SDLoc DL(Op);
+  return DAG.getNode(Sw64ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
+SDValue Sw64TargetLowering::LowerATOMIC_LOAD(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  AtomicSDNode *N = cast<AtomicSDNode>(Op);
+  assert(N->getOpcode() == ISD::ATOMIC_LOAD && "Bad Atomic OP");
+  assert((N->getSuccessOrdering() == AtomicOrdering::Unordered ||
+          N->getSuccessOrdering() == AtomicOrdering::Monotonic) &&
+         "setInsertFencesForAtomic(true) expects unordered / monotonic");
+  EVT VT = N->getMemoryVT();
+  SDValue Result;
+  if (VT != MVT::i64)
+    Result =
+        DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(Op), MVT::i64, N->getChain(),
+                       N->getBasePtr(), N->getPointerInfo(), VT, N->getAlign(),
+                       N->getMemOperand()->getFlags(), N->getAAInfo());
+  else
+    Result = DAG.getLoad(MVT::i64, SDLoc(Op), N->getChain(), N->getBasePtr(),
+                         N->getPointerInfo(), N->getAlign(),
+                         N->getMemOperand()->getFlags(), N->getAAInfo(),
+                         N->getRanges());
+  return Result;
+}
+
+SDValue Sw64TargetLowering::LowerATOMIC_STORE(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  AtomicSDNode *N = cast<AtomicSDNode>(Op);
+  assert(N->getOpcode() == ISD::ATOMIC_STORE && "Bad Atomic OP");
+  assert((N->getSuccessOrdering() == AtomicOrdering::Unordered ||
+          N->getSuccessOrdering() == AtomicOrdering::Monotonic) &&
+         "setInsertFencesForAtomic(true) expects unordered / monotonic");
+
+  return DAG.getStore(N->getChain(), SDLoc(Op), N->getVal(), N->getBasePtr(),
+                      N->getPointerInfo(), N->getAlign(),
+                      N->getMemOperand()->getFlags(), N->getAAInfo());
+}
+MachineMemOperand::Flags
+Sw64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
+  // Because of how we convert atomic_load and atomic_store to normal loads and
+  // stores in the DAG, we need to ensure that the MMOs are marked volatile
+  // since DAGCombine hasn't been updated to account for atomic, but non
+  // volatile loads.  (See D57601)
+  if (auto *SI = dyn_cast<StoreInst>(&I))
+    if (SI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *LI = dyn_cast<LoadInst>(&I))
+    if (LI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
+    if (AI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
+    if (AI->isAtomic())
+      return MachineMemOperand::MOVolatile;
+  return MachineMemOperand::MONone;
+}
+
+SDValue Sw64TargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const {
+  SDValue N0 = Op->getOperand(0);
+  SDValue N1 = Op->getOperand(1);
+  EVT VT = N1.getValueType();
+  SDLoc dl(Op);
+  if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
+    const APInt &C1Val = C1->getAPIntValue();
+    if (C1Val.isPowerOf2()) {
+      SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), dl, VT);
+      return DAG.getNode(Sw64ISD::SBT, dl, VT, N0, ShAmtC);
+    }
+  }
+  // if ((or (srl shl)) || (or (shl srl)) then rolw
+  if ((N0->getOpcode() == ISD::SRL && N1->getOpcode() == ISD::SRL) ||
+      (N0->getOpcode() == ISD::SRL && N1->getOpcode() == ISD::SHL))
+    if (N0->getOperand(1)->getOperand(0)->getOpcode() == ISD::SUB &&
+        N0->getOperand(1)->getOperand(0)->getConstantOperandVal(0) == 32)
+      return DAG.getNode(Sw64ISD::ROLW, dl, VT, N1->getOperand(0),
+                         N1->getOperand(1)->getOperand(0));
+  return SDValue();
+}
+
+SDValue Sw64TargetLowering::LowerSUREM(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  // Expand only on constant case
+  // modify the operate of div 0
+  if (Op.getOperand(1).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Op.getNode()->getOperand(1))->getAPIntValue() != 0) {
+
+    EVT VT = Op.getNode()->getValueType(0);
+
+    SmallVector<SDNode *, 8> Built;
+    SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM
+                       ? BuildUDIV(Op.getNode(), DAG, false, Built)
+                       : BuildSDIV(Op.getNode(), DAG, false, Built);
+
+    Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
+    Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
+
+    return Tmp1;
+  }
+
+  return LowerSUDIV(Op, DAG);
+}
+
+SDValue Sw64TargetLowering::LowerSUDIV(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+
+  if (!Op.getValueType().isInteger())
+    return SDValue();
+
+  // modify the operate of div 0
+  if (Op.getOperand(1).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Op.getNode()->getOperand(1))->getAPIntValue() != 0) {
+    SmallVector<SDNode *, 8> Built;
+    return Op.getOpcode() == ISD::SDIV
+               ? BuildSDIV(Op.getNode(), DAG, true, Built)
+               : BuildUDIV(Op.getNode(), DAG, true, Built);
+  }
+
+  const char *opstr = 0;
+  switch (Op.getOpcode()) {
+  case ISD::UREM:
+    opstr = "__remlu";
+    break;
+  case ISD::SREM:
+    opstr = "__reml";
+    break;
+  case ISD::UDIV:
+    opstr = "__divlu";
+    break;
+  case ISD::SDIV:
+    opstr = "__divl";
+    break;
+  }
+
+  SDValue Tmp1 = Op.getOperand(0);
+  SDValue Tmp2 = Op.getOperand(1);
+  SDValue Addr = DAG.getExternalSymbol(opstr, MVT::i64);
+  return DAG.getNode(Sw64ISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
+}
+
+SDValue Sw64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDValue Chain, DataPtr;
+  LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
+  SDValue Result;
+  if (Op.getValueType() == MVT::i32)
+    Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+                            MachinePointerInfo(), MVT::i32);
+  else if (Op.getValueType() == MVT::f32) {
+    Result = DAG.getLoad(MVT::f64, dl, Chain, DataPtr, MachinePointerInfo());
+    SDValue InFlags = Result.getValue(1);
+    SmallVector<SDValue, 8> Ops;
+    Ops.push_back(InFlags);
+    Ops.push_back(Result);
+    SDVTList NodeTys = DAG.getVTList(MVT::f32, MVT::Other);
+    Result = DAG.getNode(Sw64ISD::CVTTS_, dl, NodeTys, Ops);
+  } else {
+    Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
+                         MachinePointerInfo());
+  }
+  return Result;
+}
+
+SDValue Sw64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue DestP = Op.getOperand(1);
+  SDValue SrcP = Op.getOperand(2);
+  const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  SDValue Val = DAG.getLoad(getPointerTy(DAG.getDataLayout()), dl, Chain, SrcP,
+                            MachinePointerInfo(SrcS));
+  SDValue Result =
+      DAG.getStore(Val.getValue(1), dl, Val, DestP, MachinePointerInfo(DestS));
+  SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
+                           DAG.getConstant(8, dl, MVT::i64));
+  Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, NP,
+                       MachinePointerInfo(), MVT::i32);
+  SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
+                            DAG.getConstant(8, dl, MVT::i64));
+  return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, MachinePointerInfo(),
+                           MVT::i32);
+}
+
+SDValue Sw64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  MachineFunction &MF = DAG.getMachineFunction();
+  Sw64MachineFunctionInfo *FuncInfo = MF.getInfo<Sw64MachineFunctionInfo>();
+
+  SDValue Chain = Op.getOperand(0);
+  SDValue VAListP = Op.getOperand(1);
+  const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+  // vastart stores the address of the VarArgsBase and VarArgsOffset
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
+  SDValue S1 =
+      DAG.getStore(Chain, dl, FR, VAListP, MachinePointerInfo(VAListS));
+  SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+                            DAG.getConstant(8, dl, MVT::i64));
+
+  return DAG.getTruncStore(
+      S1, dl, DAG.getConstant(FuncInfo->getVarArgsOffset(), dl, MVT::i64), SA2,
+      MachinePointerInfo(), MVT::i32);
+}
+
+// Prefetch operands are:
+// 1: Address to prefetch
+// 2: bool isWrite
+// 3: int locality (0 = no locality ... 3 = extreme locality)
+// 4: bool isDataCache
+SDValue Sw64TargetLowering::LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  // unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+  unsigned Code = IsData ? Sw64ISD::Z_S_FILLCS : Sw64ISD::Z_FILLCS;
+  if (IsWrite == 1 && IsData == 1)
+    Code = Sw64ISD::Z_FILLDE;
+  if (IsWrite == 0 && IsData == 1)
+    Code = Sw64ISD::Z_FILLCS;
+  if (IsWrite == 1 && IsData == 0)
+    Code = Sw64ISD::Z_S_FILLDE;
+  if (IsWrite == 0 && IsData == 0)
+    Code = Sw64ISD::Z_FILLCS;
+
+  unsigned PrfOp = 0;
+
+  return DAG.getNode(Code, DL, MVT::Other, Op.getOperand(0),
+                     DAG.getConstant(PrfOp, DL, MVT::i64), Op.getOperand(1));
+}
+
+SDValue Sw64TargetLowering::LowerROLW(SDNode *N, SelectionDAG &DAG) const {
+  SDLoc DL(N);
+
+  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+  SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+  SDValue NewRes = DAG.getNode(Sw64ISD::ROLW, DL, MVT::i64, NewOp0, NewOp1);
+  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
+}
+
+SDValue Sw64TargetLowering::LowerFRAMEADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  // check the depth
+  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
+    DAG.getContext()->emitError(
+        "return address can be determined only for current frame");
+    return SDValue();
+  }
+
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Sw64::R15, VT);
+  return FrameAddr;
+}
+
+void Sw64TargetLowering::ReplaceNodeResults(SDNode *N,
+                                            SmallVectorImpl<SDValue> &Results,
+                                            SelectionDAG &DAG) const {
+  SDLoc dl(N);
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM: {
+    SDValue Res = LowerSUDIVI128(SDValue(N, 0), DAG);
+    Results.push_back(Res);
+    return;
+  }
+  case ISD::ATOMIC_LOAD:
+  case ISD::ATOMIC_STORE:
+  case ISD::FP_TO_SINT_SAT:
+  case ISD::FP_TO_UINT_SAT:
+    return;
+  case ISD::FP_TO_SINT: {
+    SDValue NewRes =
+        DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, N->getOperand(0));
+    Results.push_back(
+        DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), NewRes));
+    return;
+  }
+  case ISD::ROTL:
+    SDValue Res = LowerROLW(N, DAG);
+    Results.push_back(Res);
+    return;
+  }
+  assert(N->getValueType(0) == MVT::i32 && N->getOpcode() == ISD::VAARG &&
+         "Unknown node to custom promote!");
+
+  SDValue Chain, DataPtr;
+  LowerVAARG(N, Chain, DataPtr, DAG);
+
+  SDValue Res =
+      DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, MachinePointerInfo());
+
+  Results.push_back(Res);
+  Results.push_back(SDValue(Res.getNode(), 1));
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+Sw64TargetLowering::ConstraintType
+Sw64TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default:
+      break;
+    case 'f':
+    case 'r':
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+unsigned Sw64TargetLowering::MatchRegName(StringRef Name) const {
+  unsigned Reg = StringSwitch<unsigned>(Name.lower())
+                     .Case("$0", Sw64::R0)
+                     .Case("$1", Sw64::R1)
+                     .Case("$2", Sw64::R2)
+                     .Case("$3", Sw64::R3)
+                     .Case("$4", Sw64::R4)
+                     .Case("$5", Sw64::R5)
+                     .Case("$6", Sw64::R6)
+                     .Case("$7", Sw64::R7)
+                     .Case("$8", Sw64::R8)
+                     .Case("$9", Sw64::R9)
+                     .Case("$10", Sw64::R10)
+                     .Case("$11", Sw64::R11)
+                     .Case("$12", Sw64::R12)
+                     .Case("$13", Sw64::R13)
+                     .Case("$14", Sw64::R14)
+                     .Case("$15", Sw64::R15)
+                     .Case("$16", Sw64::R16)
+                     .Case("$17", Sw64::R17)
+                     .Case("$18", Sw64::R18)
+                     .Case("$19", Sw64::R19)
+                     .Case("$20", Sw64::R20)
+                     .Case("$21", Sw64::R21)
+                     .Case("$22", Sw64::R22)
+                     .Case("$23", Sw64::R23)
+                     .Case("$24", Sw64::R24)
+                     .Case("$25", Sw64::R25)
+                     .Case("$26", Sw64::R26)
+                     .Case("$27", Sw64::R27)
+                     .Case("$28", Sw64::R28)
+                     .Case("$29", Sw64::R29)
+                     .Case("$30", Sw64::R30)
+                     .Case("$31", Sw64::R31)
+                     .Default(0);
+  return Reg;
+}
+Register
+Sw64TargetLowering::getRegisterByName(const char *RegName, LLT VT,
+                                      const MachineFunction &MF) const {
+  Register Reg = MatchRegName(StringRef(RegName));
+  if (Reg)
+    return Reg;
+
+  report_fatal_error("Sw Invalid register name global variable");
+}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+Sw64TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+  // If we don't have a value, we can't do a match,
+  // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'f':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+Instruction *Sw64TargetLowering::emitLeadingFence(IRBuilderBase &Builder,
+                                                  Instruction *Inst,
+                                                  AtomicOrdering Ord) const {
+  if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
+    return Builder.CreateFence(AtomicOrdering::AcquireRelease);
+  if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
+    return Builder.CreateFence(AtomicOrdering::Release);
+  return nullptr;
+}
+
+Instruction *Sw64TargetLowering::emitTrailingFence(IRBuilderBase &Builder,
+                                                   Instruction *Inst,
+                                                   AtomicOrdering Ord) const {
+  if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
+    return Builder.CreateFence(AtomicOrdering::AcquireRelease);
+  if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
+    return Builder.CreateFence(AtomicOrdering::Release);
+  return nullptr;
+}
+
+/// This is a helper function to parse a physical register string and split it
+/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag
+/// that is returned indicates whether parsing was successful. The second flag
+/// is true if the numeric part exists.
+static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
+                                              unsigned long long &Reg) {
+  if (C.front() != '{' || C.back() != '}')
+    return std::make_pair(false, false);
+
+  // Search for the first numeric character.
+  StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
+  I = std::find_if(B, E, isdigit);
+
+  Prefix = StringRef(B, I - B);
+
+  // The second flag is set to false if no numeric characters were found.
+  if (I == E)
+    return std::make_pair(true, false);
+
+  // Parse the numeric characters.
+  return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg),
+                        true);
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+Sw64TargetLowering::parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
+  const TargetRegisterClass *RC;
+  StringRef Prefix;
+  unsigned long long Reg;
+
+  std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg);
+
+  if (!R.first)
+    return std::make_pair(0U, nullptr);
+
+  if (!R.second)
+    return std::make_pair(0U, nullptr);
+
+  if (Prefix == "$f") { // Parse $f0-$f31.
+    // The size of FP registers is 64-bit or Reg is an even number, select
+    // the 64-bit register class.
+    if (VT == MVT::Other)
+      VT = MVT::f64;
+
+    RC = getRegClassFor(VT);
+
+  } else { // Parse $0-$31.
+    assert(Prefix == "$");
+    // Sw64 has only i64 register.
+    RC = getRegClassFor(MVT::i64);
+    StringRef name((C.data() + 1), (C.size() - 2));
+
+    return std::make_pair(MatchRegName(name), RC);
+  }
+
+  assert(Reg < RC->getNumRegs());
+  return std::make_pair(*(RC->begin() + Reg), RC);
+}
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass *>
+Sw64TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                 StringRef Constraint,
+                                                 MVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, &Sw64::GPRCRegClass);
+    case 'f':
+      return VT == MVT::f64 ? std::make_pair(0U, &Sw64::F8RCRegClass)
+                            : std::make_pair(0U, &Sw64::F4RCRegClass);
+    }
+  }
+
+  std::pair<unsigned, const TargetRegisterClass *> R;
+  R = parseRegForInlineAsmConstraint(Constraint, VT);
+
+  if (R.second)
+    return R;
+
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+Sw64TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+                                                MachineBasicBlock *BB) const {
+  switch (MI.getOpcode()) {
+  default:
+    llvm_unreachable("Unexpected instr type to insert");
+
+  case Sw64::FILLCS:
+  case Sw64::FILLDE:
+  case Sw64::S_FILLCS:
+  case Sw64::S_FILLDE:
+    return emitPrefetch(MI, BB);
+
+  // I64 && I32
+  case Sw64::ATOMIC_LOAD_ADD_I32:
+  case Sw64::LAS32:
+    return emitAtomicBinary(MI, BB);
+  case Sw64::ATOMIC_LOAD_ADD_I64:
+  case Sw64::LAS64:
+    return emitAtomicBinary(MI, BB);
+
+  case Sw64::ATOMIC_SWAP_I32:
+  case Sw64::SWAP32:
+    return emitAtomicBinary(MI, BB);
+  case Sw64::ATOMIC_SWAP_I64:
+  case Sw64::SWAP64:
+    return emitAtomicBinary(MI, BB);
+  case Sw64::ATOMIC_CMP_SWAP_I32:
+  case Sw64::CAS32:
+    return emitAtomicCmpSwap(MI, BB, 4);
+  case Sw64::ATOMIC_CMP_SWAP_I64:
+  case Sw64::CAS64:
+    return emitAtomicCmpSwap(MI, BB, 8);
+
+  case Sw64::ATOMIC_LOAD_AND_I32:
+    return emitAtomicBinary(MI, BB);
+  case Sw64::ATOMIC_LOAD_AND_I64:
+    return emitAtomicBinary(MI, BB);
+
+  case Sw64::ATOMIC_LOAD_OR_I32:
+    return emitAtomicBinary(MI, BB);
+  case Sw64::ATOMIC_LOAD_OR_I64:
+    return emitAtomicBinary(MI, BB);
+
+  case Sw64::ATOMIC_LOAD_SUB_I32:
+    return emitAtomicBinary(MI, BB);
+  case Sw64::ATOMIC_LOAD_SUB_I64:
+    return emitAtomicBinary(MI, BB);
+
+  case Sw64::ATOMIC_LOAD_XOR_I32:
+    return emitAtomicBinary(MI, BB);
+  case Sw64::ATOMIC_LOAD_XOR_I64:
+    return emitAtomicBinary(MI, BB);
+
+  case Sw64::ATOMIC_LOAD_UMAX_I64:
+  case Sw64::ATOMIC_LOAD_MAX_I64:
+  case Sw64::ATOMIC_LOAD_UMIN_I64:
+  case Sw64::ATOMIC_LOAD_MIN_I64:
+  case Sw64::ATOMIC_LOAD_NAND_I64:
+    return emitAtomicBinary(MI, BB);
+
+  case Sw64::ATOMIC_LOAD_UMAX_I32:
+  case Sw64::ATOMIC_LOAD_MAX_I32:
+  case Sw64::ATOMIC_LOAD_UMIN_I32:
+  case Sw64::ATOMIC_LOAD_MIN_I32:
+  case Sw64::ATOMIC_LOAD_NAND_I32:
+    return emitAtomicBinary(MI, BB);
+
+  case Sw64::ATOMIC_LOAD_UMAX_I16:
+  case Sw64::ATOMIC_LOAD_MAX_I16:
+  case Sw64::ATOMIC_LOAD_UMIN_I16:
+  case Sw64::ATOMIC_LOAD_MIN_I16:
+  case Sw64::ATOMIC_LOAD_NAND_I16:
+    return emitAtomicBinaryPartword(MI, BB, 2);
+
+  case Sw64::ATOMIC_LOAD_UMAX_I8:
+  case Sw64::ATOMIC_LOAD_MAX_I8:
+  case Sw64::ATOMIC_LOAD_UMIN_I8:
+  case Sw64::ATOMIC_LOAD_MIN_I8:
+  case Sw64::ATOMIC_LOAD_NAND_I8:
+    return emitAtomicBinaryPartword(MI, BB, 1);
+
+  // I8
+  case Sw64::ATOMIC_LOAD_ADD_I8:
+    return emitAtomicBinaryPartword(MI, BB, 1);
+  case Sw64::ATOMIC_SWAP_I8:
+    return emitAtomicBinaryPartword(MI, BB, 1);
+  case Sw64::ATOMIC_LOAD_AND_I8:
+    return emitAtomicBinaryPartword(MI, BB, 1);
+  case Sw64::ATOMIC_LOAD_OR_I8:
+    return emitAtomicBinaryPartword(MI, BB, 1);
+  case Sw64::ATOMIC_LOAD_SUB_I8:
+    return emitAtomicBinaryPartword(MI, BB, 1);
+  case Sw64::ATOMIC_LOAD_XOR_I8:
+    return emitAtomicBinaryPartword(MI, BB, 1);
+  case Sw64::ATOMIC_CMP_SWAP_I8:
+    return emitAtomicCmpSwapPartword(MI, BB, 1);
+
+  // I16
+  case Sw64::ATOMIC_LOAD_ADD_I16:
+    return emitAtomicBinaryPartword(MI, BB, 2);
+  case Sw64::ATOMIC_SWAP_I16:
+    return emitAtomicBinaryPartword(MI, BB, 2);
+  case Sw64::ATOMIC_LOAD_AND_I16:
+    return emitAtomicBinaryPartword(MI, BB, 2);
+  case Sw64::ATOMIC_LOAD_OR_I16:
+    return emitAtomicBinaryPartword(MI, BB, 2);
+  case Sw64::ATOMIC_LOAD_SUB_I16:
+    return emitAtomicBinaryPartword(MI, BB, 2);
+  case Sw64::ATOMIC_LOAD_XOR_I16:
+    return emitAtomicBinaryPartword(MI, BB, 2);
+  case Sw64::ATOMIC_CMP_SWAP_I16:
+    return emitAtomicCmpSwapPartword(MI, BB, 2);
+  }
+}
+
+MachineBasicBlock *
+Sw64TargetLowering::emitPrefetch(MachineInstr &MI,
+                                 MachineBasicBlock *BB) const {
+
+  Register RA, RB, RC;
+  MachineFunction *MF = BB->getParent();
+  // MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+
+  // %11:gprc = PHI %10:gprc, %bb.1, %15:gprc, %bb.4
+  // FILLCS 128, %11:gprc
+  // it should be directed return.
+  if (!(DefMI->getOpcode() == Sw64::LDA && DefMI->getOperand(1).isImm()))
+    return BB;
+
+  int Imm = DefMI->getOperand(1).getImm();
+  int Distance = Imm + MI.getOperand(0).getImm();
+  Register Address = DefMI->getOperand(2).getReg();
+
+  MachineInstr *MII = MI.getNextNode();
+  if (MII)
+    MII = MII->getNextNode();
+  else
+    return BB;
+
+  if (MII) {
+    if (MII->getOpcode() == Sw64::LDL || MII->getOpcode() == Sw64::LDW ||
+        MII->getOpcode() == Sw64::LDHU || MII->getOpcode() == Sw64::LDBU) {
+      int MIImm = MII->getOperand(1).getImm();
+      if (MIImm > 1000 || MIImm < -1000) {
+        MI.eraseFromParent();
+        return BB;
+      }
+    }
+  }
+
+  if (Distance > 1500 || Distance < -1500) {
+    MI.eraseFromParent(); // The pseudo instruction is gone now.
+    return BB;
+  }
+
+  BuildMI(*BB, MI, DL, TII->get(MI.getOpcode()))
+      .addImm(Distance)
+      .addReg(Address);
+
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+Sw64TargetLowering::emitReduceSum(MachineInstr &MI,
+                                  MachineBasicBlock *BB) const {
+
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  Register RB = MI.getOperand(0).getReg();
+  Register RA = MI.getOperand(1).getReg();
+
+  Register RC = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass);
+  Register RD = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass);
+  Register RE = RegInfo.createVirtualRegister(&Sw64::GPRCRegClass);
+
+  MachineBasicBlock::iterator II(MI);
+
+  BuildMI(*BB, II, DL, TII->get(MI.getOpcode()))
+      .addReg(RB, RegState::Define | RegState::EarlyClobber)
+      .addReg(RA, RegState::Kill)
+      .addReg(RC, RegState::Define | RegState::EarlyClobber |
+                      RegState::Implicit | RegState::Dead)
+      .addReg(RD, RegState::Define | RegState::EarlyClobber |
+                      RegState::Implicit | RegState::Dead)
+      .addReg(RE, RegState::Define | RegState::EarlyClobber |
+                      RegState::Implicit | RegState::Dead);
+
+  MI.eraseFromParent(); // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+Sw64TargetLowering::emitITOFSInstruct(MachineInstr &MI,
+                                      MachineBasicBlock *BB) const {
+  return BB;
+}
+
+MachineBasicBlock *
+Sw64TargetLowering::emitFSTOIInstruct(MachineInstr &MI,
+                                      MachineBasicBlock *BB) const {
+
+  Register RA, RC;
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned Opc = Sw64::CTPOPOW;
+  Register Scratch = RegInfo.createVirtualRegister(&Sw64::F4RCRegClass);
+
+  RC = MI.getOperand(0).getReg();
+  RA = MI.getOperand(1).getReg();
+
+  if (MI.getOpcode() != Opc)
+    Opc = Sw64::CTLZOW;
+
+  BuildMI(*BB, MI, DL, TII->get(Opc))
+      .addReg(Scratch, RegState::Define)
+      .addReg(RA);
+  BuildMI(*BB, MI, DL, TII->get(Sw64::FTOIS))
+      .addReg(RC, RegState::Define)
+      .addReg(Scratch);
+
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *Sw64TargetLowering::emitAtomicBinaryPartword(
+    MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
+  assert((Size == 1 || Size == 2) &&
+         "Unsupported size for EmitAtomicBinaryPartial.");
+
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned OldVal = MI.getOperand(0).getReg();
+  unsigned Ptr = MI.getOperand(1).getReg();
+  unsigned Incr = MI.getOperand(2).getReg();
+
+  unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+  unsigned LockVal = RegInfo.createVirtualRegister(RC);
+  unsigned Reg_bic = RegInfo.createVirtualRegister(RC);
+  unsigned Scratch = RegInfo.createVirtualRegister(RC);
+
+  unsigned AtomicOp = 0;
+  switch (MI.getOpcode()) {
+  case Sw64::ATOMIC_LOAD_ADD_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_ADD_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_SUB_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_SUB_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_AND_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_AND_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_OR_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_OR_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_XOR_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_XOR_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_SWAP_I8:
+    AtomicOp = Sw64::ATOMIC_SWAP_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_ADD_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_ADD_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_SUB_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_SUB_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_AND_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_AND_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_OR_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_OR_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_XOR_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_XOR_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_SWAP_I16:
+    AtomicOp = Sw64::ATOMIC_SWAP_I16_POSTRA;
+    break;
+
+  case Sw64::ATOMIC_LOAD_UMAX_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_MAX_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_MAX_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_UMIN_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_MIN_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_MIN_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_NAND_I16:
+    AtomicOp = Sw64::ATOMIC_LOAD_NAND_I16_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_UMAX_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_MAX_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_MAX_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_UMIN_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_MIN_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_MIN_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_NAND_I8:
+    AtomicOp = Sw64::ATOMIC_LOAD_NAND_I8_POSTRA;
+    break;
+  default:
+    llvm_unreachable("Unknown pseudo atomic for replacement!");
+  }
+
+  MachineBasicBlock::iterator II(MI);
+
+  unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr);
+
+  unsigned t_Incr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Incr).addReg(Incr).addReg(Incr);
+
+  BuildMI(*BB, II, DL, TII->get(AtomicOp))
+      .addReg(OldVal, RegState::Define | RegState::EarlyClobber)
+      .addReg(t_Ptr, RegState::EarlyClobber)
+      .addReg(t_Incr, RegState::EarlyClobber)
+      .addReg(StoreVal, RegState::Define | RegState::EarlyClobber |
+                            RegState::Implicit | RegState::Dead)
+      .addReg(LockVal, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead)
+      .addReg(Reg_bic, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead)
+      .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead);
+
+  MI.eraseFromParent(); // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *Sw64TargetLowering::emitAtomicCmpSwapPartword(
+    MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
+  assert((Size == 1 || Size == 2) &&
+         "Unsupported size for EmitAtomicCmpSwapPartial.");
+
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Dest = MI.getOperand(0).getReg();
+  unsigned Ptr = MI.getOperand(1).getReg();
+  unsigned OldVal = MI.getOperand(2).getReg();
+  unsigned NewVal = MI.getOperand(3).getReg();
+
+  unsigned Reg_bic = RegInfo.createVirtualRegister(RC);
+  unsigned Reg_ins = RegInfo.createVirtualRegister(RC);
+  unsigned LockVal = RegInfo.createVirtualRegister(RC);
+  unsigned Reg_cmp = RegInfo.createVirtualRegister(RC);
+  unsigned Reg_mas = RegInfo.createVirtualRegister(RC);
+
+  unsigned AtomicOp = 0;
+  switch (MI.getOpcode()) {
+  case Sw64::ATOMIC_CMP_SWAP_I8:
+    AtomicOp = Sw64::ATOMIC_CMP_SWAP_I8_POSTRA;
+    break;
+  case Sw64::ATOMIC_CMP_SWAP_I16:
+    AtomicOp = Sw64::ATOMIC_CMP_SWAP_I16_POSTRA;
+    break;
+  default:
+    llvm_unreachable("Unknown pseudo atomic for replacement!");
+  }
+
+  MachineBasicBlock::iterator II(MI);
+
+  unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr);
+  unsigned t_OldVal =
+      MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_OldVal)
+      .addReg(OldVal)
+      .addReg(OldVal);
+  unsigned t_NewVal =
+      MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_NewVal)
+      .addReg(NewVal)
+      .addReg(NewVal);
+
+  BuildMI(*BB, II, DL, TII->get(AtomicOp))
+      .addReg(Dest, RegState::Define | RegState::EarlyClobber)
+      .addReg(t_Ptr, RegState::EarlyClobber)
+      .addReg(t_OldVal, RegState::EarlyClobber)
+      .addReg(t_NewVal, RegState::EarlyClobber)
+      .addReg(Reg_bic, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead)
+      .addReg(Reg_ins, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead)
+      .addReg(LockVal, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead)
+      .addReg(Reg_cmp, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead)
+      .addReg(Reg_mas, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead);
+
+  MI.eraseFromParent(); // The instruction is gone now.
+
+  return BB;
+}
+
+// This function also handles Sw64::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
+// Sw64::SWAP32
+MachineBasicBlock *
+Sw64TargetLowering::emitAtomicBinary(MachineInstr &MI,
+                                     MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned AtomicOp;
+  switch (MI.getOpcode()) {
+  case Sw64::ATOMIC_LOAD_ADD_I32:
+  case Sw64::LAS32:
+    AtomicOp = Sw64::ATOMIC_LOAD_ADD_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_SUB_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_SUB_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_AND_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_AND_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_OR_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_OR_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_XOR_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_XOR_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_SWAP_I32:
+  case Sw64::SWAP32:
+    AtomicOp = Sw64::ATOMIC_SWAP_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_ADD_I64:
+  case Sw64::LAS64:
+    AtomicOp = Sw64::ATOMIC_LOAD_ADD_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_SUB_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_SUB_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_AND_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_AND_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_OR_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_OR_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_XOR_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_XOR_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_SWAP_I64:
+  case Sw64::SWAP64:
+    AtomicOp = Sw64::ATOMIC_SWAP_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_UMAX_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I64_POSTRA;
+    break;
+
+  case Sw64::ATOMIC_LOAD_MAX_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_MAX_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_UMIN_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_MIN_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_MIN_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_NAND_I64:
+    AtomicOp = Sw64::ATOMIC_LOAD_NAND_I64_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_UMAX_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_UMAX_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_MAX_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_MAX_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_UMIN_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_UMIN_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_MIN_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_MIN_I32_POSTRA;
+    break;
+  case Sw64::ATOMIC_LOAD_NAND_I32:
+    AtomicOp = Sw64::ATOMIC_LOAD_NAND_I32_POSTRA;
+    break;
+
+  default:
+    llvm_unreachable("Unknown pseudo atomic for replacement!");
+  }
+
+  unsigned OldVal = MI.getOperand(0).getReg();
+  unsigned Ptr = MI.getOperand(1).getReg();
+  unsigned Incr = MI.getOperand(2).getReg();
+
+  unsigned StoreVal = RegInfo.createVirtualRegister(RC);
+  unsigned Scratch = RegInfo.createVirtualRegister(RC);
+  unsigned Scratch1 = RegInfo.createVirtualRegister(RC);
+
+  MachineBasicBlock::iterator II(MI);
+
+  unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr);
+
+  unsigned t_Incr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Incr).addReg(Incr).addReg(Incr);
+
+  BuildMI(*BB, II, DL, TII->get(AtomicOp))
+      .addReg(OldVal, RegState::Define | RegState::EarlyClobber)
+      .addReg(t_Ptr, RegState::EarlyClobber)
+      .addReg(t_Incr, RegState::EarlyClobber)
+      .addReg(StoreVal, RegState::Define | RegState::EarlyClobber |
+                            RegState::Implicit | RegState::Dead)
+      .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead)
+      .addReg(Scratch1, RegState::Define | RegState::EarlyClobber |
+                            RegState::Implicit | RegState::Dead);
+
+  MI.eraseFromParent(); // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *Sw64TargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
+                                                         MachineBasicBlock *BB,
+                                                         unsigned Size) const {
+  assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned AtomicOp;
+
+  switch (MI.getOpcode()) {
+  case Sw64::CAS32:
+  case Sw64::ATOMIC_CMP_SWAP_I32:
+    AtomicOp = Sw64::ATOMIC_CMP_SWAP_I32_POSTRA;
+    break;
+  case Sw64::CAS64:
+  case Sw64::ATOMIC_CMP_SWAP_I64:
+    AtomicOp = Sw64::ATOMIC_CMP_SWAP_I64_POSTRA;
+    break;
+  default:
+    llvm_unreachable("Unknown pseudo atomic for replacement!");
+  }
+
+  /*
+      $0=Dest $16=Ptr $17=OldVal $18=NewVal
+
+      memb
+      $BB0_1:
+         ldi $0,0($16)
+         lldw $0,0($0)
+         cmpeq $17,$0,$1
+         wr_f $1
+         bis $18,$18,$2
+         lstw $2,0($16)
+         rd_f $2
+         beq $1,$BB0_2
+         beq $2,$BB0_1
+      $BB0_2:
+ */
+
+  unsigned Dest = MI.getOperand(0).getReg();
+  unsigned Ptr = MI.getOperand(1).getReg();
+  unsigned OldVal = MI.getOperand(2).getReg();
+  unsigned NewVal = MI.getOperand(3).getReg();
+  unsigned Scratch = RegInfo.createVirtualRegister(RC);
+  unsigned Reg_cmp = RegInfo.createVirtualRegister(RC);
+
+  MachineBasicBlock::iterator II(MI);
+
+  unsigned t_Ptr = MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_Ptr).addReg(Ptr).addReg(Ptr);
+  unsigned t_OldVal =
+      MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_OldVal)
+      .addReg(OldVal)
+      .addReg(OldVal);
+  unsigned t_NewVal =
+      MF->getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+  BuildMI(*BB, II, DL, TII->get(Sw64::BISr), t_NewVal)
+      .addReg(NewVal)
+      .addReg(NewVal);
+
+  BuildMI(*BB, II, DL, TII->get(AtomicOp))
+      .addReg(Dest, RegState::Define | RegState::EarlyClobber)
+      .addReg(t_Ptr, RegState::EarlyClobber)
+      .addReg(t_OldVal, RegState::EarlyClobber)
+      .addReg(t_NewVal, RegState::EarlyClobber)
+      .addReg(Scratch, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead)
+      .addReg(Reg_cmp, RegState::Define | RegState::EarlyClobber |
+                           RegState::Implicit | RegState::Dead);
+
+  MI.eraseFromParent(); // The instruction is gone now.
+
+  return BB;
+}
+
+MVT Sw64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
+                                               EVT LHSTy) const {
+  return MVT::i64;
+}
+
+bool Sw64TargetLowering::isOffsetFoldingLegal(
+    const GlobalAddressSDNode *GA) const {
+  // The Sw64 target isn't yet aware of offsets.
+  return false;
+}
+
+EVT Sw64TargetLowering::getOptimalMemOpType(
+    const MemOp &Op, const AttributeList & /*FuncAttributes*/) const {
+  if (Subtarget.enOptMemset())
+    return MVT::i64;
+  return MVT::Other;
+}
+
+bool Sw64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return false;
+  // +0.0   F31
+  // +0.0f  F31
+  // -0.0  -F31
+  // -0.0f -F31
+  return Imm.isZero() || Imm.isNegZero();
+}
+
+SDValue Sw64TargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+                                             int Enabled,
+                                             int &RefinementSteps) const {
+  EVT VT = Operand.getValueType();
+  if ((VT == MVT::f32 || VT == MVT::f64) && Subtarget.hasCore4() &&
+      Subtarget.enableFloatAri()) {
+    if (RefinementSteps == ReciprocalEstimate::Unspecified) {
+      if (VT.getScalarType() == MVT::f32)
+        RefinementSteps = 2;
+      if (VT.getScalarType() == MVT::f64)
+        RefinementSteps = 3;
+    }
+    if (VT.getScalarType() == MVT::f32)
+      return DAG.getNode(Sw64ISD::FRECS, SDLoc(Operand), VT, Operand);
+    if (VT.getScalarType() == MVT::f64)
+      return DAG.getNode(Sw64ISD::FRECD, SDLoc(Operand), VT, Operand);
+  }
+  return SDValue();
+}
+
+bool Sw64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                    SDValue &Base,
+                                                    SDValue &Offset,
+                                                    ISD::MemIndexedMode &AM,
+                                                    SelectionDAG &DAG) const {
+  EVT VT;
+  SDValue Ptr;
+  LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
+  if (!LSN)
+    return false;
+  VT = LSN->getMemoryVT();
+  bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+                     VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64;
+  if (!IsLegalType)
+    return false;
+  if (Op->getOpcode() != ISD::ADD)
+    return false;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT = LD->getMemoryVT();
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT = ST->getMemoryVT();
+    Ptr = ST->getBasePtr();
+  } else
+    return false;
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+    uint64_t RHSC = RHS->getZExtValue();
+    Base = Ptr;
+    Offset = DAG.getConstant(RHSC, SDLoc(N), MVT::i64);
+    AM = ISD::POST_INC;
+    return true;
+  }
+
+  return false;
+}
+
+const TargetRegisterClass *Sw64TargetLowering::getRepRegClassFor(MVT VT) const {
+  if (VT == MVT::Other)
+    return &Sw64::GPRCRegClass;
+  if (VT == MVT::i32)
+    return &Sw64::FPRC_loRegClass;
+  return TargetLowering::getRepRegClassFor(VT);
+}
+
+bool Sw64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                               const AddrMode &AM, Type *Ty,
+                                               unsigned AS,
+                                               Instruction *I) const {
+  if (!Subtarget.hasCore4() || !Subtarget.enablePostInc())
+    return llvm::TargetLoweringBase::isLegalAddressingMode(DL, AM, Ty, AS, I);
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  // Require a 12-bit signed offset.
+  if (!isInt<12>(AM.BaseOffs))
+    return false;
+
+  switch (AM.Scale) {
+  case 0: // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (!AM.HasBaseReg) // allow "r+i".
+      break;
+    return false; // disallow "r+r" or "r+r+i".
+  default:
+    return false;
+  }
+
+  return true;
+}
+
+bool Sw64TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+                                                    EVT VT) const {
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  default:
+    break;
+  }
+
+  return false;
+}
+
+bool Sw64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+                                                    Type *Ty) const {
+  switch (Ty->getScalarType()->getTypeID()) {
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool Sw64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+  // Zexts are free if they can be combined with a load.
+  if (Subtarget.enOptExt()) {
+    if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
+      EVT MemVT = LD->getMemoryVT();
+      if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
+           (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
+          (LD->getExtensionType() == ISD::NON_EXTLOAD ||
+           LD->getExtensionType() == ISD::ZEXTLOAD))
+        return true;
+    }
+  }
+
+  return TargetLowering::isZExtFree(Val, VT2);
+}
+
+bool Sw64TargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
+  if (Subtarget.enOptExt())
+    return SrcVT == MVT::i32 && DstVT == MVT::i64;
+  return false;
+}
+
+bool Sw64TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  if (Subtarget.enOptExt())
+    return Imm >= 0 && Imm <= 255;
+  return false;
+}
+
+bool Sw64TargetLowering::isLegalAddImmediate(int64_t Imm) const {
+  if (Subtarget.enOptExt())
+    return Imm >= 0 && Imm <= 255;
+  return false;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64ISelLowering.h b/llvm/lib/Target/Sw64/Sw64ISelLowering.h
new file mode 100644
index 000000000000..836abe2774af
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64ISelLowering.h
@@ -0,0 +1,476 @@
+//===-- Sw64ISelLowering.h - Sw64 DAG Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Sw64 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64ISELLOWERING_H
+#define LLVM_LIB_TARGET_SW64_SW64ISELLOWERING_H
+
+#include "Sw64.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+
+// Forward delcarations
+class Sw64Subtarget;
+class Sw64TargetMachine;
+
+namespace Sw64ISD {
+enum NodeType : unsigned {
+  // Start the numbering where the builtin ops and target ops leave off.
+  FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+  // These corrospond to the identical Instruction
+  CVTQT_,
+  CVTQS_,
+  CVTTQ_,
+  CVTTS_,
+  CVTST_,
+  /// GPRelHi/GPRelLo - These represent the high and low 16-bit
+  /// parts of a global address respectively.
+  GPRelHi,
+  GPRelLo,
+  /// TPRelHi/TPRelLo - These represent the high and low 16-bit
+  /// parts of a TLS global address respectively.
+  TPRelHi,
+  TPRelLo,
+  TLSGD,  // SW
+  TLSLDM, // SW
+  DTPRelHi,
+  DTPRelLo,
+  RelGottp, // SW
+  SysCall,
+  /// RetLit - Literal Relocation of a Global
+  RelLit,
+
+  /// GlobalRetAddr - used to restore the return address
+  GlobalRetAddr,
+
+  /// CALL - Normal call.
+  CALL,
+
+  ///  Jump and link (call)
+  JmpLink,
+  /// DIVCALL - used for special library calls for div and rem
+  DivCall,
+  /// return flag operand
+  RET_FLAG,
+  Ret,
+  LDAWC,
+  MEMBARRIER,
+  /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
+  /// corresponds to the COND_BRANCH pseudo instruction.
+  /// *PRC is the input register to compare to zero,
+  /// OPC is the branch opcode to use (e.g. Sw64::BEQ),
+  /// DESTBB is the destination block to branch to, and INFLAG is
+  /// an optional input flag argument.
+  COND_BRANCH_I,
+  COND_BRANCH_F,
+
+  Z_S_FILLCS,
+  Z_S_FILLDE,
+  Z_FILLDE,
+  Z_FILLDE_E,
+  Z_FILLCS,
+  Z_FILLCS_E,
+  Z_E_FILLCS,
+  Z_E_FILLDE,
+  Z_FLUSHD,
+
+  GPRel,
+  TPRel,
+  DTPRel,
+  LDIH,
+  LDI,
+
+  FRECS,
+  FRECD,
+  ADDPI,
+  ADDPIS,
+  SBT,
+  CBT,
+  REVBH,
+  REVBW,
+
+  ROLW,
+  CRC32B,
+  CRC32H,
+  CRC32W,
+  CRC32L,
+  CRC32CB,
+  CRC32CH,
+  CRC32CW,
+  CRC32CL,
+
+  VBROADCAST_LD,
+  VBROADCAST,
+
+  // Vector load.
+  VLDWE,
+  VLDSE,
+  VLDDE,
+
+  // Vector comparisons.
+  // These take a vector and return a boolean.
+  VALL_ZERO,
+  VANY_ZERO,
+  VALL_NONZERO,
+  VANY_NONZERO,
+
+  // This is vcmpgew.
+  VSETGE,
+
+  // These take a vector and return a vector bitmask.
+  VCEQ,
+  VCLE_S,
+  VCLE_U,
+  VCLT_S,
+  VCLT_U,
+  // These is vector select.
+  VFCMOVEQ,
+  VFCMOVLE,
+  VFCMOVLT,
+  VSELEQW,
+  VSELLTW,
+  VSELLEW,
+  VSELLBCW,
+
+  VMAX,
+  VMIN,
+  VUMAX,
+  VUMIN,
+  VSQRT,
+  VSUMF,
+  VFREC,
+  VFCMPEQ,
+  VFCMPLE,
+  VFCMPLT,
+  VFCMPUN,
+  VFCVTSD,
+  VFCVTDS,
+  VFCVTLS,
+  VFCVTLD,
+  VFCVTSH,
+  VFCVTHS,
+  VFCVTDL,
+  VFCVTDLG,
+  VFCVTDLP,
+  VFCVTDLZ,
+  VFCVTDLN,
+  VFRIS,
+  VFRISG,
+  VFRISP,
+  VFRISZ,
+  VFRISN,
+  VFRID,
+  VFRIDG,
+  VFRIDP,
+  VFRIDZ,
+  VFRIDN,
+  VMAXF,
+  VMINF,
+  VINSECTL,
+  VCPYB,
+  VCPYH,
+  // Vector Shuffle with mask as an operand
+  VSHF,  // Generic shuffle
+  SHF,   // 4-element set shuffle.
+  ILVEV, // Interleave even elements
+  ILVOD, // Interleave odd elements
+  ILVL,  // Interleave left elements
+  ILVR,  // Interleave right elements
+  PCKEV, // Pack even elements
+  PCKOD, // Pack odd elements
+  VCON_W,
+  VCON_S,
+  VCON_D,
+
+  VSHL_BY_SCALAR,
+  VSRL_BY_SCALAR,
+  VSRA_BY_SCALAR,
+  // Vector Lane Copy
+  INSVE, // Copy element from one vector to another
+
+  // Combined (XOR (OR $a, $b), -1)
+  VNOR,
+  VEQV,
+  VORNOT,
+
+  VCTPOP,
+  VCTLZ,
+
+  VLOG,
+  VCOPYF,
+  V8SLL,
+  V8SLLi,
+  V8SRL,
+  V8SRLi,
+  VROTR,
+  VROTRi,
+  V8SRA,
+  V8SRAi,
+  VROLB,
+  VROLBi,
+  VROLH,
+  VROLHi,
+  VROLL,
+  VROLLi,
+  VECREDUCE_FADD,
+  VECT_VUCADDW,
+  VECT_VUCADDH,
+  VECT_VUCADDB,
+  VECT_VUCSUBW,
+  VECT_VUCSUBH,
+  VECT_VUCSUBB,
+  // Extended vector element extraction
+  VEXTRACT_SEXT_ELT,
+  VEXTRACT_ZEXT_ELT,
+
+  VTRUNCST = ISD::FIRST_TARGET_MEMORY_OPCODE
+};
+} // namespace Sw64ISD
+
+//===--------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===--------------------------------------------------------------------===//
+class Sw64TargetLowering : public TargetLowering {
+  const TargetMachine &TM;
+  const Sw64Subtarget &Subtarget;
+
+public:
+  explicit Sw64TargetLowering(const TargetMachine &TM,
+                              const Sw64Subtarget &Subtarget);
+
+  MVT getScalarShiftAmountTy(const DataLayout &DL, EVT LHSTy) const override;
+
+  virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; };
+
+  bool generateFMAsInMachineCombiner(EVT VT,
+                                     CodeGenOpt::Level OptLevel) const override;
+
+  /// getSetCCResultType - Get the SETCC result ValueType
+  virtual EVT getSetCCResultType(const DataLayout &, LLVMContext &,
+                                 EVT VT) const override;
+  bool isLegalICmpImmediate(int64_t Imm) const override;
+  bool isLegalAddImmediate(int64_t Imm) const override;
+  bool isZExtFree(SDValue Val, EVT VT2) const override;
+  bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+
+  /// LowerOperation - Provide custom lowering hooks for some operations.
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+
+  /// ReplaceNodeResults - Replace the results of node with an illegal result
+  /// type with new values built out of custom code.
+  virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                                  SelectionDAG &DAG) const override;
+
+  /// getTargetNodeName - This method returns the name of a target specific
+  /// DAG node.
+  const char *getTargetNodeName(unsigned Opcode) const override;
+  template <class NodeTy> SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const;
+  SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                          CallingConv::ID CallConv, bool isVarArg,
+                          const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc &dl,
+                          SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+                          const SDNode *CallNode, const Type *RetTy) const;
+  ConstraintType getConstraintType(const std::string &Constraint) const;
+
+  unsigned MatchRegName(StringRef Name) const;
+  Register getRegisterByName(const char *RegName, LLT VT,
+                             const MachineFunction &MF) const override;
+  /// Examine constraint string and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  ConstraintWeight
+  getSingleConstraintMatchWeight(AsmOperandInfo &info,
+                                 const char *constraint) const override;
+
+  // Inline asm support
+  std::pair<unsigned, const TargetRegisterClass *>
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               StringRef Constraint, MVT VT) const override;
+
+  MachineBasicBlock *
+  EmitInstrWithCustomInserter(MachineInstr &MI,
+                              MachineBasicBlock *BB) const override;
+
+  virtual bool
+  isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
+  EVT getOptimalMemOpType(
+      const MemOp &Op, const AttributeList & /*FuncAttributes*/) const override;
+
+  /// isFPImmLegal - Returns true if the target can instruction select the
+  /// specified FP immediate natively. If false, the legalizer will
+  /// materialize the FP immediate as a load from a constant pool.
+  virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+  struct LTStr {
+    bool operator()(const char *S1, const char *S2) const {
+      return strcmp(S1, S2) < 0;
+    }
+  };
+  /// If a physical register, this returns the register that receives the
+  /// exception address on entry to an EH pad.
+  Register
+  getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+    return Sw64::R16;
+  }
+
+  /// If a physical register, this returns the register that receives the
+  /// exception typeid on entry to a landing pad.
+  Register
+  getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+    return Sw64::R17;
+  }
+  SDValue PerformDAGCombineV(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+  /// Enable SIMD support for the given integer type and Register
+  /// class.
+  void addSIMDIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
+
+  /// Enable SIMD support for the given floating-point type and
+  /// Register class.
+  void addSIMDFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
+
+private:
+  // Helpers for custom lowering.
+  void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
+                  SelectionDAG &DAG) const;
+
+  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+                               bool isVarArg,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               const SDLoc &dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const override;
+
+  virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                            SmallVectorImpl<SDValue> &InVals) const override;
+
+  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
+                      SelectionDAG &DAG) const override;
+
+  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+                      bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+                      LLVMContext &Context) const override;
+
+  // Lower Operand specifics
+  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSUREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSUDIV(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSUDIVI128(SDValue Op, SelectionDAG &DAG) const;
+  std::pair<SDValue, SDValue> LowerCallExtraResult(SDValue &Chain,
+                                                   SDValue &DemoteStackSlot,
+                                                   unsigned DemoteStackIdx,
+                                                   SelectionDAG &DAG) const;
+  SDValue LowerROLW(SDNode *N, SelectionDAG &DAG) const;
+
+  SDValue LowerVectorShift(SDValue Op, SelectionDAG &DAG) const;
+
+  ISD::NodeType getExtendForAtomicOps() const override {
+    return ISD::ANY_EXTEND;
+  }
+
+  SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
+
+  MachineMemOperand::Flags
+  getTargetMMOFlags(const Instruction &I) const override;
+
+  bool shouldInsertFencesForAtomic(const Instruction *I) const override {
+    return true;
+  }
+  Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
+                                AtomicOrdering Ord) const override;
+  Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
+                                 AtomicOrdering Ord) const override;
+  /// This function parses registers that appear in inline-asm constraints.
+  /// It returns pair (0, 0) on failure.
+
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+
+  SDValue LowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
+
+  MachineBasicBlock *emitReduceSum(MachineInstr &MI,
+                                   MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitITOFSInstruct(MachineInstr &MI,
+                                       MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitFSTOIInstruct(MachineInstr &MI,
+                                       MachineBasicBlock *BB) const;
+  SDValue LowerVectorMemIntr(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+  /// Return true if an FMA operation is faster than a pair of fmul and fadd
+  /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+  /// returns true, otherwise fmuladd is expanded to fmul + fadd.
+  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+                                  EVT VT) const override;
+  bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
+
+  std::pair<unsigned, const TargetRegisterClass *>
+  parseRegForInlineAsmConstraint(StringRef C, MVT VT) const;
+
+  MachineBasicBlock *emitAtomicBinary(MachineInstr &MI,
+                                      MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
+                                       unsigned Size) const;
+  MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI,
+                                              MachineBasicBlock *BB,
+                                              unsigned Size) const;
+  MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI,
+                                               MachineBasicBlock *BB,
+                                               unsigned Size) const;
+  MachineBasicBlock *emitPrefetch(MachineInstr &MI,
+                                  MachineBasicBlock *BB) const;
+
+  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                           int &RefinementSteps) const override;
+  bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
+                                  SDValue &Offset, ISD::MemIndexedMode &AM,
+                                  SelectionDAG &DAG) const override;
+  const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
+
+  SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+                             unsigned AS,
+                             Instruction *I = nullptr) const override;
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64InstrFormats.td b/llvm/lib/Target/Sw64/Sw64InstrFormats.td
new file mode 100644
index 000000000000..c7ec61ea5b5b
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64InstrFormats.td
@@ -0,0 +1,452 @@
+//===- Sw64InstrFormats.td - Sw64 Instruction Formats ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def u5imm   : Operand<i64>{
+  let DecoderMethod = "decodeUImmOperand<5>";
+}
+def u6imm   : Operand<i64>{
+  let DecoderMethod = "decodeUImmOperand<6>";
+}
+def u8imm   : Operand<i64>{
+  let DecoderMethod = "decodeUImmOperand<8>";
+}
+def u8immHex   : Operand<i64>{
+  let DecoderMethod = "decodeUImmOperand<8>";
+  let PrintMethod = "printHexImm";
+}
+def s8imm   : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<8>";
+}
+def s13imm  : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<13>";
+}
+def s12imm  : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<12>";
+}
+def s14imm  : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<14>";
+}
+def s16imm  : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<16>";
+  let OperandType = "OPERAND_PCREL";
+}
+def s21imm  : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<21>";
+  let OperandType = "OPERAND_PCREL";
+}
+def u26imm  : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<26>";
+}
+def s64imm  : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<64>";
+  let PrintMethod = "printMemoryArg";
+}
+def u64imm  : Operand<i64>{
+  let DecoderMethod = "decodeSImmOperand<64>";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+// Sw64 instruction baseline
+class InstSw64<bits<6> op, string opstr, string operands> : Instruction {
+  field bits<32> Inst;
+  let Namespace = "Sw64";
+  let Inst{31-26} = op;
+
+  let AsmString = opstr # " " # operands;
+  // Add Size: Number of bytes in encoding
+  let Size = 4;
+  // SoftFail is a field the disassembler can use to provide a way for
+  // instructions to not match without killing the whole decode process. It is
+  // mainly used for ARM, but Tablegen expects this field to exist or it fails
+  // to build the decode table.
+  field bits<32> SoftFail = 0;
+}
+
+//Chapter2.6.1
+// sys_call  |31     26|25                 0|
+//           |  Opcode |        Func        |
+class PALForm<bits<6> opcode, dag iops, dag oops,
+              string opstr, string operands>
+    : InstSw64<opcode, opstr, operands> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  bits<26> disp;
+
+  let Inst{25-0} = disp;
+}
+
+// Branching	beq/bge/bgt	Chapter2.6.2
+// COND_BRANCH  |31     26|25      21|20                           0|
+//              |  Opcode |   RA/Fa  |          disp                |
+
+def JmpTargetAsmOperand : AsmOperandClass {
+  let Name = "JmpImm";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseJmpImm";
+}
+
+def target : Operand<OtherVT> {
+  let ParserMatchClass = JmpTargetAsmOperand;
+	let EncoderMethod = "getBranchTargetOpValue";
+  let DecoderMethod = "decodeSImmOperand<21>";
+  let OperandType = "OPERAND_PCREL";
+  let MCOperandPredicate = [{
+    int64_t Imm;
+    if (MCOp.evaluateAsConstantImm(Imm))
+      return isShiftedInt<22, 2>(Imm);
+    return MCOp.isBareSymbolRef();
+  }];
+}
+
+class BForm<bits<6> opcode, dag iops, dag oops,
+            string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  bits<64> Opc; //dummy
+  bits<5> RA;
+  bits<21> disp;
+
+  let Inst{25-21} = RA;
+  let Inst{20-0} = disp;
+}
+
+// LDL/LDW	 Chapter2.6.3
+// Memory  |31     26|25      21|20      16|15               0|
+//         |  Opcode |   RA/Fa  |    RB    |        disp      |
+class MForm<bits<6> opcode, dag iops, dag oops,
+            string opstr, string operands="", list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RA;
+  bits<16> DISP;
+  bits<5> RB;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-0} = DISP;
+}
+
+class MfcForm<bits<6> opcode, bits<16> Func, dag iops, dag oops,
+              string opstr, string operands="", list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  bits<16> Function=Func;
+  bits<5> RA;
+  bits<5> RB;
+
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-0} = Function;
+}
+
+
+// New Add, for atomic-op
+// Memory + Func  |31     26|25      21|20      16|15    12|11           0|
+//                |  Opcode |   RA/Fa  |    RB    |  Func  |    disp      |
+class MFuncForm<bits<6> opcode, bits<4> func, dag iops, dag oops,
+                string opstr, string operands="", list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let InOperandList = iops;
+  let OutOperandList = oops;
+
+  bits<5> RA;
+  bits<12> disp;
+  bits<5> RB;
+  bits<4> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-12} = Function;
+  let Inst{11-0} = disp;
+}
+
+// New Add, for privilege inst
+// Memory + Func  |31     26|25      21|20      16|15    12|11           0|
+//                |  Opcode |    TH    |    RB    |  Func  |    disp      |
+class MPrvlForm<bits<6> opcode, bits<4> func, dag iops, dag oops,
+                string opstr, string operands="", list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let InOperandList = iops;
+  let OutOperandList = oops;
+
+  bits<5> TH;
+  bits<12> disp;
+  bits<5> RB;
+  bits<4> Function = func;
+
+  let Inst{25-21} = TH;
+  let Inst{20-16} = RB;
+  let Inst{15-12} = Function;
+  let Inst{11-0} = disp;
+}
+
+//	Chapter2.6.4
+// simple_operation_form |31     26|25      21|20     16|15   13|12            5|4     0|
+//              r + r :  |  Opcode |   RA     |  RB     |  SBZ  |      Func     |  RC   |
+class OForm<bits<6> opcode, bits<8> fun, dag iops, dag oops,
+            string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+  bits<8> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-13} = 0;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+
+// 	Chapter2.6.4
+// simple_operation_form |31     26|25      21|20             13|12            5|4     0|
+//              r + i :  |  Opcode |   RA     |        imm      |      Func     |   RC  |
+class OFormL<bits<6> opcode, bits<8> fun, dag iops, dag oops,
+            string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<8> L;
+  bits<8> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{20-13} = L;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+//      Chapter2.6.4
+// simple_operation_form |31     26|25                        13|12            5|4     0|
+//              r + i :  |  Opcode |           imm              |      Func     |   RC  |
+class OFormI<bits<6> opcode, bits<8> fun, dag iops, dag oops,
+            string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RC;
+  bits<13> L;
+  bits<8> Function = fun;
+
+  let Inst{25-13} = L;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+
+
+// seleq/selne...		Chapter2.6.5(1)
+// int_complex_operation_form |31     26|25      21|20     16|15   13|12  10|9    5|4    0|
+//                   r + r :  |  Opcode |    RA    |    RB   |  SBZ  | Func |  RC  |  RD  |
+class OForm4<bits<6> opcode, bits<3> fun, dag iops, dag oops,
+             string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> RC;
+  bits<3> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-13} = 0;
+  let Inst{12-10} = Function;
+  let Inst{9-5} = RC;
+  let Inst{4-0} = RD;
+}
+
+// seleq/selne...		Chapter2.6.5(2)
+// int_complex_operation_form |31     26|25      21|20             13|12  10|9    5|4    0|
+//                   r + i :  |  Opcode |    RA    |        imm      | Func |  RC  |  RD  |
+class OForm4L<bits<6> opcode, bits<3> fun, dag iops, dag oops,
+              string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RD;
+  bits<5> RA;
+  bits<8> L;
+  bits<3> Function = fun;
+  bits<5> RC;
+
+  let Inst{25-21} = RA;
+  let Inst{20-13} = L;
+  let Inst{12-10} = Function;
+  let Inst{9-5} = RC;
+  let Inst{4-0} = RD;
+}
+
+// fadds/faddd... Chapter2.6.4
+// simple_operation_form |31     26|25      21|20     16|15   13|12            5|4     0|
+//              r + r :  |  Opcode |      Fa  |     Fb  |  SBZ  |      Func     |   Fc  |
+class FPForm<bits<6> opcode, bits<8> fun, dag iops, dag oops,
+             string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let InOperandList = iops;
+  let OutOperandList = oops;
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+  bits<8> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-13} = 0;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+class FPForm1<bits<6> opcode, bits<8> fun, dag iops, dag oops,
+             string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let InOperandList = iops;
+  let OutOperandList = oops;
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+  bits<8> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{15-13} = 0;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+// New add  	fselXX	Chapter2.6.5(3)
+// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
+//                   r + r :  |  Opcode |    Fa   |    Fb  |   Func  |  Fc  |  Fd  |
+class FForm4<bits<6> opcode, bits<6> func, dag iops, dag oops,
+             string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RD;
+  bits<5> RC;
+  bits<5> RB;
+  bits<5> RA;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = RC;
+  let Inst{4-0} = RD;
+}
+
+// New add  	fselXX	Chapter2.6.5(4)
+// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
+//                   r + i :  |  Opcode |    Fa   |    Fb  |   Func  |  imm |  Fd  |
+class FForm4L<bits<6> opcode, bits<6> func, dag iops, dag oops,
+              string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Constraints = "$RFALSE = $RDEST";
+  let DisableEncoding = "$RFALSE";
+
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> LIT;
+  bits<5> RD;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = LIT;
+  let Inst{4-0} = RD;
+}
+
+// New add  	CSRXX	Chapter4.9.2
+// fp_complex_operation_form  |31     26|25     21|20    16|15       8|7       0|
+//                         :  |  Opcode |    Ra   |    Rb  |   Func   |  Index  |
+class CSRForm<bits<6> opcode, bits<8> func, dag iops, dag oops,
+             string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RB;
+  bits<5> RA;
+  bits<8> Function = func;
+  bits<8> L;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = 0x1f;
+  let Inst{15-8} = Function;
+  let Inst{7-0} = L;
+}
+
+// New add  FCVTSH	Chapter 4.6.3.3
+// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
+//                   r + i :  |  Opcode |    Fa   |    Fb  |   Func  |  imm |  Fd  |
+class FCForm4L<bits<6> opcode, bits<6> func, dag iops, dag oops,
+              string opstr, string operands, list<dag> pattern=[]>
+    : InstSw64<opcode, opstr, operands> {
+  let Pattern = pattern;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> LIT;
+  bits<5> RD;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = LIT;
+  let Inst{4-0} = RD;
+}
+
+
+// Pseudo instructions.
+class PseudoInstSw64<dag oops, dag iops, string opstr="", list<dag> pattern>
+    : InstSw64<0, opstr, "">  {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Pattern = pattern;
+  let isCodeGenOnly = 1;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td b/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td
new file mode 100644
index 000000000000..5339b7864a28
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64InstrFormatsV.td
@@ -0,0 +1,389 @@
+//===- Sw64InstrFormats.td - Sw64 Instruction Formats ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// SIMD Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+
+class SIMDPseudo<dag outs, dag ins, list<dag> pattern>:
+  PseudoInstSw64<outs, ins, "", pattern> {
+}
+
+
+class InstSw64V<bits<6> op> : Instruction {
+  field bits<32> Inst;
+  let Namespace = "Sw64";
+  let Inst{31-26} = op;
+  let Size = 4;
+  field bits<32> SoftFail = 0;
+}
+
+class InstSw64VLog<bits<4> op> : Instruction {
+  field bits<32> Inst;
+  let Namespace = "Sw64";
+  let Inst{31-28} = op;
+  let Size = 4;
+  field bits<32> SoftFail = 0;
+}
+
+// VLDD/VLDW       Chapter2.6.3
+// Memory  |31     26|25      21|20      16|15               0|
+//         |  Opcode |   RA/RA  |    RB    |        disp      |
+class MFormV<bits<6> opcode> : InstSw64V<opcode> {
+
+  bits<5> RA;
+  bits<21> addr;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = addr{20-16};
+  let Inst{15-0} = addr{15-0};
+}
+
+// Memory + Func  |31     26|25      21|20      16|15    12|11           0|
+//                |  Opcode |   RA/RA  |    RB    |  Func  |    disp      |
+class MFuncFormV<bits<6> opcode, bits<4> func> : InstSw64V<opcode> {
+
+  bits<5> RA;
+  bits<21> addr;
+  bits<4> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = addr{20-16};
+  let Inst{15-12} = Function;
+  let Inst{11-0} = addr{11-0};
+}
+
+// fadds/faddd... Chapter2.6.4
+// simple_operation_form |31     26|25      21|20     16|15   13|12            5|4     0|
+//              r + r :  |  Opcode |      RA  |     RB  |  SBZ  |      Func     |   RC  |
+class FPFormV<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+  bits<8> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-13} = 0;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+class FPFormV_2RV<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+  bits<8> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{15-13} = 0;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+class FPFormV_2RV1<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+  bits<8> Function = fun;
+
+  let Inst{20-16} = RB;
+  let Inst{15-13} = 0;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+class FPFormV_CT<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+  bits<8> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{15-13} = 0;
+  let Inst{12-5} = Function;
+  let Inst{4-0} = RC;
+}
+
+
+//      Chapter2.6.4
+// simple_operation_form |31     26|25      21|20             13|12            5|4     0|
+//              r + i :  |  Opcode |   RA     |        imm      |      Func     |   RC  |
+class FPFormIV<bits<6> opcode, bits<8> fun> : InstSw64V<opcode> {
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<8> Imm;
+  bits<8> Function = fun;
+
+  let Inst{25-21} = RA;
+  let Inst{20-13} = Imm;
+  let Inst{12-11} = Function{7-6};
+  let Inst{10} = 1;
+  let Inst{9-5} = Function{4-0};
+  let Inst{4-0} = RC;
+}
+
+
+// New add      fselXX  Chapter2.6.5(3)
+// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
+//                   r + r :  |  Opcode |    Va   |    Vb  |   Func  |  Vc  |  Vd  |
+class FForm4V<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> RC;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = RC;
+  let Inst{4-0} = RD;
+}
+
+class FForm4VINSECTL<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> RC;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = 31;
+}
+
+class FForm4VCPY<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> RC;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = 31;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = 31;
+  let Inst{4-0} = RD;
+}
+
+// vcpyw/vcpys
+class FForm2V<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> RC;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = 0;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = 0;
+  let Inst{4-0} = RC;
+}
+
+
+// New add      fselXX  Chapter2.6.5(4)
+// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
+//                   r + i :  |  Opcode |    Va   |    Vb  |   Func  |  imm |  Vd  |
+class FForm4LV<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> Imm;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = Imm;
+  let Inst{4-0} = RD;
+}
+
+class FForm4LV1<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> Imm;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = 31;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = Imm;
+  let Inst{4-0} = RD;
+}
+
+class FForm4LV2<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> RC;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = RC;
+  let Inst{4-0} = RD;
+}
+// New add  vext Fix the RD to RC
+// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
+//                   r + i :  |  Opcode |    Va   |    Vb  |   Func  |  imm |  Vd  |
+class FForm4LVV<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> Imm;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = 0;
+  let Inst{15-10} = Function;
+  let Inst{9-5} = Imm;
+  let Inst{4-0} = RC;
+}
+
+// New add      vlogxx
+// vlogxx:  |31     28|27      26|25    21|20    16|15      10|9    5|4    0|
+//          |  Opcode | zz[7:6]  |   Va   |   Vb   |  zz[5:0] |  Vc  |  Vd  |
+class FForm_VANDW<bits<6> opcode> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = 0;
+  let Inst{9-5} = 31;
+  let Inst{4-0} = RD;
+}
+
+class FForm_VBICW<bits<6> opcode> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = 0x30;
+  let Inst{9-5} = 31;
+  let Inst{4-0} = RD;
+}
+
+class FForm_VBISW<bits<6> opcode> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = 0x3c;
+  let Inst{9-5} = 31;
+  let Inst{4-0} = RD;
+}
+
+class FForm_VXORW<bits<6> opcode> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = 0x3c;
+  let Inst{9-5} = 31;
+  let Inst{4-0} = RD;
+}
+
+class FForm_VEQVW<bits<6> opcode> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = 0x03;
+  let Inst{9-5} = 31;
+  let Inst{4-0} = RD;
+}
+
+class FForm_VORNOTW<bits<6> opcode> : InstSw64V<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = 0x33;
+  let Inst{9-5} = 31;
+  let Inst{4-0} = RD;
+}
+
+
+class FForm4LVLogZZ<bits<4> opcode> : InstSw64VLog<opcode> {
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> RC;
+  bits<8> Imm;
+
+  let Inst{27-26} = Imm{7-6};
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = Imm{5-0};
+  let Inst{9-5} = RC;
+  let Inst{4-0} = RD;
+}
+
+class FForm4LVLog<bits<4> opcode, bits<8> zz> : InstSw64VLog<opcode> {
+  bits<5> RC;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Inst{27-26} = zz{7-6};
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15-10} = zz{5-0};
+  let Inst{9-5} = 31;
+  let Inst{4-0} = RC;
+}
+
+// fp_complex_operation_form  |31     26|25     21|20    16|15     10|9    5|4    0|
+//                   r + i :  |  Opcode |    Va   |    Vb  |   Func  |  imm |  Vd  |
+class FForm4_VSELi<bits<6> opcode, bits<6> func> : InstSw64V<opcode> {
+
+  bits<5> RD;
+  bits<5> RA;
+  bits<5> RB;
+  bits<5> Imm;
+  bits<6> Function = func;
+
+  let Inst{25-21} = RA;
+  let Inst{20-16} = RB;
+  let Inst{15} = 1;
+  let Inst{14-10} = Function{4-0};
+  let Inst{9-5} = Imm;
+  let Inst{4-0} = RD;
+}
+
+class VectorIndex<ValueType ty, code pred> : Operand<ty>, ImmLeaf<ty, pred>;
+
+def VectorIndexB : VectorIndex<i64, [{ return ((uint64_t)Imm) < 32; }]>;
+def VectorIndexH : VectorIndex<i64, [{ return ((uint64_t)Imm) < 16; }]>;
+def VectorIndexS : VectorIndex<i64, [{ return ((uint64_t)Imm) < 8; }]>;
+def VectorIndexD : VectorIndex<i64, [{ return ((uint64_t)Imm) < 4; }]>;
diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp b/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp
new file mode 100644
index 000000000000..8107c009230e
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.cpp
@@ -0,0 +1,1012 @@
+//===-- Sw64InstrInfo.cpp - Sw64 Instruction Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sw64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64InstrInfo.h"
+#include "Sw64.h"
+#include "Sw64MachineFunctionInfo.h"
+#include "Sw64OptionRecord.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "Sw64combinefma"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "Sw64GenInstrInfo.inc"
+
+// Pin the vtable to this file.
+void Sw64InstrInfo::anchor() {}
+
+Sw64InstrInfo::Sw64InstrInfo()
+    : Sw64GenInstrInfo(Sw64::ADJUSTSTACKDOWN, Sw64::ADJUSTSTACKUP), RI() {}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned Sw64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+                                            int &FrameIndex) const {
+  switch (MI.getOpcode()) {
+  case Sw64::LDL:
+  case Sw64::LDW:
+  case Sw64::LDHU:
+  case Sw64::LDBU:
+  case Sw64::LDS:
+  case Sw64::LDD:
+    if (MI.getOperand(1).isFI()) {
+      FrameIndex = MI.getOperand(1).getIndex();
+      return MI.getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned Sw64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+                                           int &FrameIndex) const {
+  switch (MI.getOpcode()) {
+  case Sw64::STL:
+  case Sw64::STH:
+  case Sw64::STB:
+  case Sw64::STW:
+  case Sw64::STS:
+  case Sw64::STD:
+    if (MI.getOperand(1).isFI()) {
+      FrameIndex = MI.getOperand(1).getIndex();
+      return MI.getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned Sw64InstrInfo::insertBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "Sw64 branch conditions have two components!");
+
+  // Unconditional branch.
+  if (Cond.empty()) {
+    MachineInstr &MI = *BuildMI(&MBB, DL, get(Sw64::PseudoBR)).addMBB(TBB);
+    if (BytesAdded)
+      *BytesAdded += getInstSizeInBytes(MI);
+    return 1;
+  }
+
+  // Either a one or two-way conditional branch.
+  unsigned Opc = Cond[0].getImm();
+  MachineInstr &CondMI = *BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).addMBB(TBB);
+  if (BytesAdded)
+    *BytesAdded += getInstSizeInBytes(CondMI);
+
+  // One-way conditional branch.
+  if (!FBB)
+    return 1;
+
+  // Two-way conditional branch.
+  MachineInstr &MI = *BuildMI(&MBB, DL, get(Sw64::PseudoBR)).addMBB(FBB);
+  if (BytesAdded)
+    *BytesAdded += getInstSizeInBytes(MI);
+  return 2;
+}
+
+void Sw64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI,
+                                const DebugLoc &DL, MCRegister DestReg,
+                                MCRegister SrcReg, bool KillSrc) const {
+  if ((Sw64::F4RCRegClass.contains(DestReg) ||
+       Sw64::FPRC_loRegClass.contains(DestReg)) && // for rust and SIMD
+      Sw64::GPRCRegClass.contains(SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::ITOFS), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::F4RCRegClass.contains(SrcReg) && // for rust and SIMD
+             Sw64::GPRCRegClass.contains(DestReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::FTOIS), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::FPRCRegClass.contains(SrcReg) && // for rust and SIMD
+             Sw64::GPRCRegClass.contains(DestReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::FTOIT), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::FPRCRegClass.contains(DestReg) && // for rust and SIMD
+             Sw64::GPRCRegClass.contains(SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::ITOFT), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::FPRCRegClass.contains(DestReg) && // for rust and SIMD
+             Sw64::FPRC_loRegClass.contains(SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg)
+        .addReg(SrcReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::FPRCRegClass.contains(SrcReg) && // for rust and SIMD
+             Sw64::FPRC_loRegClass.contains(DestReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg)
+        .addReg(SrcReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::GPRCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::BISr), DestReg)
+        .addReg(SrcReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::F4RCRegClass.contains(DestReg, SrcReg)) {
+    unsigned int RC = MI->getOperand(1).getReg();
+    unsigned int Opc = Sw64::CPYSS;
+    for (MachineBasicBlock::iterator MBBI = MI; MBBI != MBB.begin(); --MBBI) {
+      if (MBBI->getOpcode() == Sw64::VLDS || MBBI->getOpcode() == Sw64::VLDD) {
+        unsigned int RD = MBBI->getOperand(0).getReg();
+        if (RC == RD)
+          Opc = Sw64::VCPYS;
+        break;
+      }
+    }
+    BuildMI(MBB, MI, DL, get(Opc), DestReg)
+        .addReg(SrcReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::F8RCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg)
+        .addReg(SrcReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::FPRCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::CPYSD), DestReg)
+        .addReg(SrcReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Sw64::V256LRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Sw64::VOR), DestReg)
+        .addReg(SrcReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else {
+    llvm_unreachable("Attempt to copy register that is not GPR or FPR");
+  }
+}
+
+void Sw64InstrInfo::storeRegToStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
+    bool isKill, int FrameIdx, const TargetRegisterClass *RC,
+    const TargetRegisterInfo *TRI, Register VReg) const {
+
+  DebugLoc DL;
+  if (MI != MBB.end())
+    DL = MI->getDebugLoc();
+
+  unsigned Opc = 0;
+
+  if (RC == &Sw64::F4RCRegClass)
+    Opc = Sw64::STS;
+  else if (RC == &Sw64::F8RCRegClass)
+    Opc = Sw64::STD;
+  else if (RC == &Sw64::GPRCRegClass)
+    Opc = Sw64::STL;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::i64) ||
+           TRI->isTypeLegalForClass(*RC, MVT::f64))
+    Opc = Sw64::STD;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::i32) ||
+           TRI->isTypeLegalForClass(*RC, MVT::f32))
+    Opc = Sw64::STS;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32))
+    Opc = Sw64::VSTD;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::v4f32))
+    Opc = Sw64::VSTS;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64))
+    Opc = Sw64::VSTD;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::v4f64))
+    Opc = Sw64::VSTD;
+  else
+    llvm_unreachable("Unhandled register class");
+
+  BuildMI(MBB, MI, DL, get(Opc))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FrameIdx)
+      .addReg(Sw64::R31);
+}
+
+void Sw64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                         Register DestReg, int FrameIdx,
+                                         const TargetRegisterClass *RC,
+                                         const TargetRegisterInfo *TRI,
+                                         Register VReg) const {
+  DebugLoc DL;
+  if (MI != MBB.end())
+    DL = MI->getDebugLoc();
+
+  unsigned Opc = 0;
+
+  if (RC == &Sw64::F4RCRegClass)
+    Opc = Sw64::LDS;
+  else if (RC == &Sw64::F8RCRegClass)
+    Opc = Sw64::LDD;
+  else if (RC == &Sw64::GPRCRegClass)
+    Opc = Sw64::LDL;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::i64) ||
+           TRI->isTypeLegalForClass(*RC, MVT::f64))
+    Opc = Sw64::LDD;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::i32) ||
+           TRI->isTypeLegalForClass(*RC, MVT::f32))
+    Opc = Sw64::LDS;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32))
+    Opc = Sw64::VLDD;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::v4f32))
+    Opc = Sw64::VLDS;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64))
+    Opc = Sw64::VLDD;
+  else if (TRI->isTypeLegalForClass(*RC, MVT::v4f64))
+    Opc = Sw64::VLDD;
+  else
+    llvm_unreachable("Unhandled register class");
+
+  BuildMI(MBB, MI, DL, get(Opc), DestReg)
+      .addFrameIndex(FrameIdx)
+      .addReg(Sw64::R31);
+}
+
+static unsigned Sw64RevCondCode(unsigned Opcode) {
+  switch (Opcode) {
+  case Sw64::BEQ:
+    return Sw64::BNE;
+  case Sw64::BNE:
+    return Sw64::BEQ;
+  case Sw64::BGE:
+    return Sw64::BLT;
+  case Sw64::BGT:
+    return Sw64::BLE;
+  case Sw64::BLE:
+    return Sw64::BGT;
+  case Sw64::BLT:
+    return Sw64::BGE;
+  case Sw64::BLBC:
+    return Sw64::BLBS;
+  case Sw64::BLBS:
+    return Sw64::BLBC;
+  case Sw64::FBEQ:
+    return Sw64::FBNE;
+  case Sw64::FBNE:
+    return Sw64::FBEQ;
+  case Sw64::FBGE:
+    return Sw64::FBLT;
+  case Sw64::FBGT:
+    return Sw64::FBLE;
+  case Sw64::FBLE:
+    return Sw64::FBGT;
+  case Sw64::FBLT:
+    return Sw64::FBGE;
+  default:
+    llvm_unreachable("Unknown opcode");
+  }
+  return 0; // Not reached
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+//
+
+static bool isCondOpCode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case Sw64::BEQ:
+  case Sw64::BNE:
+  case Sw64::BGE:
+  case Sw64::BGT:
+  case Sw64::BLE:
+  case Sw64::BLT:
+  case Sw64::BLBC:
+  case Sw64::BLBS:
+  case Sw64::FBEQ:
+  case Sw64::FBNE:
+  case Sw64::FBGE:
+  case Sw64::FBGT:
+  case Sw64::FBLE:
+  case Sw64::FBLT:
+    return true;
+  }
+  return false; // Not reached
+}
+
+static bool isUnCondOpCode(unsigned Opcode) { return Opcode == Sw64::PseudoBR; }
+
+static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
+                            SmallVectorImpl<MachineOperand> &Cond) {
+
+  Target = LastInst->getOperand(1).getMBB();
+  Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+  Cond.push_back(LastInst->getOperand(0));
+}
+
+bool Sw64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
+                                  MachineBasicBlock *&TBB,
+                                  MachineBasicBlock *&FBB,
+                                  SmallVectorImpl<MachineOperand> &Cond,
+                                  bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return false;
+
+  if (!isUnpredicatedTerminator(*I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = &*I;
+  unsigned LastOpc = LastInst->getOpcode();
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+    if (isUnCondOpCode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (isCondOpCode(LastOpc)) {
+      parseCondBranch(LastInst, TBB, Cond);
+      return false;
+    } // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = &*I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && isUnCondOpCode(LastOpc)) {
+    while (isUnCondOpCode(SecondLastOpc)) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        SecondLastInst = &*I;
+        SecondLastOpc = SecondLastInst->getOpcode();
+      }
+    }
+  }
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
+    return true;
+
+  if (isCondOpCode(SecondLastOpc) && isUnCondOpCode(LastOpc)) {
+    parseCondBranch(SecondLastInst, TBB, Cond);
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two Sw64::BRs, handle it.  The second one is not
+  // executed, so remove it.
+  if (isUnCondOpCode(SecondLastOpc) && isUnCondOpCode(LastOpc)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned Sw64InstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                     int *BytesRemoved) const {
+  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+  if (I == MBB.end())
+    return false;
+
+  if (I->getOpcode() != Sw64::PseudoBR && !isCondOpCode(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) {
+    if (BytesRemoved)
+      *BytesRemoved = 4;
+    return 1;
+  }
+  --I;
+  if (!isCondOpCode(I->getOpcode())) {
+    if (BytesRemoved)
+      *BytesRemoved = 4;
+    return 1;
+  }
+
+  // Remove the branch.
+  I->eraseFromParent();
+  if (BytesRemoved)
+    *BytesRemoved = 8;
+  return 2;
+}
+
+void Sw64InstrInfo::insertNoop(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(Sw64::BISr), Sw64::R31)
+      .addReg(Sw64::R31)
+      .addReg(Sw64::R31);
+}
+
+bool Sw64InstrInfo::ReverseBranchCondition(
+    SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Invalid Sw64 branch opcode!");
+  Cond[0].setImm(Sw64RevCondCode(Cond[0].getImm()));
+  return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned Sw64InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  Sw64MachineFunctionInfo *Sw64FI = MF->getInfo<Sw64MachineFunctionInfo>();
+  unsigned GlobalBaseReg = Sw64FI->getGlobalBaseReg(*MF);
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  GlobalBaseReg = Sw64::R29;
+  Sw64FI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
+
+/// getGlobalRetAddr - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned Sw64InstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
+  Sw64MachineFunctionInfo *Sw64FI = MF->getInfo<Sw64MachineFunctionInfo>();
+  unsigned GlobalRetAddr = Sw64FI->getGlobalRetAddr(*MF);
+  if (GlobalRetAddr != 0)
+    return GlobalRetAddr;
+
+  // Insert the set of GlobalRetAddr into the first MBB of the function
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  GlobalRetAddr = Sw64::R26;
+  RegInfo.addLiveIn(Sw64::R26);
+  Sw64FI->setGlobalRetAddr(GlobalRetAddr);
+  return GlobalRetAddr;
+}
+
+MachineInstr *Sw64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                                      int FrameIx,
+                                                      uint64_t Offset,
+                                                      const MDNode *MDPtr,
+                                                      DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Sw64::DBG_VALUE))
+                                .addFrameIndex(FrameIx)
+                                .addImm(0)
+                                .addImm(Offset)
+                                .addMetadata(MDPtr);
+  return &*MIB;
+}
+
+// for vector optimize.
+// Utility routine that checks if \param MO is defined by an
+// \param CombineOpc instruction in the basic block \param MBB
+static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
+                       unsigned CombineOpc) {
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  MachineInstr *MI = nullptr;
+
+  if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
+    MI = MRI.getUniqueVRegDef(MO.getReg());
+
+  LLVM_DEBUG(dbgs() << "is MO reg?" << MO.isReg();
+             dbgs() << "is Register Virtual?"
+                    << Register::isVirtualRegister(MO.getReg()));
+
+  // And it needs to be in the trace (otherwise, it won't have a depth).
+  if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
+    return false;
+
+  // Must only used by the user we combine with.
+  if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+    return false;
+
+  return true;
+}
+
+//
+// Is \param MO defined by a floating-point multiply and can be combined?
+static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
+                               unsigned MulOpc) {
+  return canCombine(MBB, MO, MulOpc);
+}
+
+// TODO: There are many more machine instruction opcodes to match:
+//       1. Other data types (integer, vectors)
+//       2. Other math / logic operations (xor, or)
+//       3. Other forms of the same operation (intrinsics and other variants)
+bool Sw64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
+                                                bool Invert) const {
+  if (Invert)
+    return false;
+  switch (Inst.getOpcode()) {
+  case Sw64::ADDD:
+  case Sw64::ADDS:
+  case Sw64::MULD:
+  case Sw64::MULS:
+  case Sw64::VADDS:
+  case Sw64::VADDD:
+  case Sw64::VMULS:
+  case Sw64::VMULD:
+    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
+  default:
+    return false;
+  }
+}
+
+// FP Opcodes that can be combined with a FMUL
+static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
+  switch (Inst.getOpcode()) {
+  default:
+    break;
+  case Sw64::ADDS:
+  case Sw64::ADDD:
+  case Sw64::SUBS:
+  case Sw64::SUBD: {
+    TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
+    return (Options.UnsafeFPMath ||
+            Options.AllowFPOpFusion == FPOpFusion::Fast);
+  }
+  case Sw64::VADDS:
+  case Sw64::VADDD:
+  case Sw64::VSUBS:
+  case Sw64::VSUBD:
+    return true;
+  }
+  return false;
+}
+
+/// Find instructions that can be turned into madd.
+static bool getFMAPatterns(MachineInstr &Root,
+                           SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+
+  if (!isCombineInstrCandidateFP(Root))
+    return false;
+
+  MachineBasicBlock &MBB = *Root.getParent();
+  bool Found = false;
+
+  switch (Root.getOpcode()) {
+  default:
+    assert(false && "Unsupported FP instruction in combiner\n");
+    break;
+  case Sw64::ADDS:
+    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
+           "FADDS does not have register operands");
+    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULS)) {
+      Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
+      Found = true;
+    }
+    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULS)) {
+      Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
+      Found = true;
+    }
+    break;
+
+  case Sw64::ADDD:
+    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULD)) {
+      Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
+      Found = true;
+    }
+    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULD)) {
+      Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
+      Found = true;
+    }
+    break;
+
+  case Sw64::SUBS:
+    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULS)) {
+      Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
+      Found = true;
+    }
+    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULS)) {
+      Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
+      Found = true;
+    }
+    break;
+
+  case Sw64::SUBD:
+    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::MULD)) {
+      Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
+      Found = true;
+    }
+    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::MULD)) {
+      Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
+      Found = true;
+    }
+    break;
+  case Sw64::VADDS:
+    assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
+           "FADDS does not have register operands");
+    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULS)) {
+      Patterns.push_back(MachineCombinerPattern::VMULADDS_OP1);
+      Found = true;
+    }
+    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULS)) {
+      Patterns.push_back(MachineCombinerPattern::VMULADDS_OP2);
+      Found = true;
+    }
+    break;
+
+  case Sw64::VADDD:
+    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULD)) {
+      Patterns.push_back(MachineCombinerPattern::VMULADDD_OP1);
+      Found = true;
+    }
+    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULD)) {
+      Patterns.push_back(MachineCombinerPattern::VMULADDD_OP2);
+      Found = true;
+    }
+    break;
+
+  case Sw64::VSUBS:
+    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULS)) {
+      Patterns.push_back(MachineCombinerPattern::VMULSUBS_OP1);
+      Found = true;
+    }
+    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULS)) {
+      Patterns.push_back(MachineCombinerPattern::VMULSUBS_OP2);
+      Found = true;
+    }
+    break;
+  case Sw64::VSUBD:
+    if (canCombineWithFMUL(MBB, Root.getOperand(1), Sw64::VMULD)) {
+      Patterns.push_back(MachineCombinerPattern::VMULSUBD_OP1);
+      Found = true;
+    }
+    if (canCombineWithFMUL(MBB, Root.getOperand(2), Sw64::VMULD)) {
+      Patterns.push_back(MachineCombinerPattern::VMULSUBD_OP2);
+      Found = true;
+    }
+    break;
+  }
+  return Found;
+}
+
+/// Return true when a code sequence can improve throughput. It
+/// should be called only for instructions in loops.
+/// \param Pattern - combiner pattern
+bool Sw64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
+  switch (Pattern) {
+  default:
+    break;
+  case MachineCombinerPattern::FMULADDS_OP1:
+  case MachineCombinerPattern::FMULADDS_OP2:
+  case MachineCombinerPattern::FMULSUBS_OP1:
+  case MachineCombinerPattern::FMULSUBS_OP2:
+  case MachineCombinerPattern::FMULADDD_OP1:
+  case MachineCombinerPattern::FMULADDD_OP2:
+  case MachineCombinerPattern::FMULSUBD_OP1:
+  case MachineCombinerPattern::FMULSUBD_OP2:
+  case MachineCombinerPattern::FNMULSUBS_OP1:
+  case MachineCombinerPattern::FNMULSUBD_OP1:
+  case MachineCombinerPattern::VMULADDS_OP1:
+  case MachineCombinerPattern::VMULADDS_OP2:
+  case MachineCombinerPattern::VMULADDD_OP1:
+  case MachineCombinerPattern::VMULADDD_OP2:
+  case MachineCombinerPattern::VMULSUBS_OP1:
+  case MachineCombinerPattern::VMULSUBS_OP2:
+  case MachineCombinerPattern::VMULSUBD_OP1:
+  case MachineCombinerPattern::VMULSUBD_OP2:
+    return true;
+  } // end switch (Pattern)
+  return false;
+}
+
+/// Return true when there is potentially a faster code sequence for an
+/// instruction chain ending in \p Root. All potential patterns are listed in
+/// the \p Pattern vector. Pattern should be sorted in priority order since the
+/// pattern evaluator stops checking as soon as it finds a faster sequence.
+bool Sw64InstrInfo::getMachineCombinerPatterns(
+    MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+    bool DoRegPressureReduce) const {
+  // Floating point patterns
+  if (getFMAPatterns(Root, Patterns))
+    return true;
+
+  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+                                                     DoRegPressureReduce);
+}
+
+enum class FMAInstKind { Default, Indexed, Accumulator };
+/// genFusedMultiply - Generate fused multiply instructions.
+/// This function supports both integer and floating point instructions.
+/// A typical example:
+///  F|MUL I=A,B,0
+///  F|ADD R,I,C
+///  ==> F|MADD R,A,B,C
+/// \param MF Containing MachineFunction
+/// \param MRI Register information
+/// \param TII Target information
+/// \param Root is the F|ADD instruction
+/// \param [out] InsInstrs is a vector of machine instructions and will
+/// contain the generated madd instruction
+/// \param IdxMulOpd is index of operand in Root that is the result of
+/// the F|MUL. In the example above IdxMulOpd is 1.
+/// \param MaddOpc the opcode fo the f|madd instruction
+/// \param RC Register class of operands
+/// \param kind of fma instruction (addressing mode) to be generated
+/// \param ReplacedAddend is the result register from the instruction
+/// replacing the non-combined operand, if any.
+static MachineInstr *
+genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
+                 const TargetInstrInfo *TII, MachineInstr &Root,
+                 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
+                 unsigned MaddOpc, const TargetRegisterClass *RC,
+                 FMAInstKind kind = FMAInstKind::Default,
+                 const unsigned *ReplacedAddend = nullptr) {
+  assert(IdxMulOpd == 1 || IdxMulOpd == 2);
+
+  LLVM_DEBUG(dbgs() << "creating fma insn \n");
+  LLVM_DEBUG(dbgs() << MaddOpc);
+  LLVM_DEBUG(dbgs() << "\n");
+
+  unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
+  MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
+  unsigned ResultReg = Root.getOperand(0).getReg();
+  unsigned SrcReg0 = MUL->getOperand(1).getReg();
+  bool Src0IsKill = MUL->getOperand(1).isKill();
+  unsigned SrcReg1 = MUL->getOperand(2).getReg();
+  bool Src1IsKill = MUL->getOperand(2).isKill();
+
+  unsigned SrcReg2;
+  bool Src2IsKill;
+  if (ReplacedAddend) {
+    // If we just generated a new addend, we must be it's only use.
+    SrcReg2 = *ReplacedAddend;
+    Src2IsKill = true;
+  } else {
+    SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
+    Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
+  }
+  if (Register::isVirtualRegister(ResultReg))
+    MRI.constrainRegClass(ResultReg, RC);
+  if (Register::isVirtualRegister(SrcReg0))
+    MRI.constrainRegClass(SrcReg0, RC);
+  if (Register::isVirtualRegister(SrcReg1))
+    MRI.constrainRegClass(SrcReg1, RC);
+  if (Register::isVirtualRegister(SrcReg2))
+    MRI.constrainRegClass(SrcReg2, RC);
+
+  MachineInstrBuilder MIB;
+  if (kind == FMAInstKind::Default)
+    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
+              .addReg(SrcReg0, getKillRegState(Src0IsKill))
+              .addReg(SrcReg1, getKillRegState(Src1IsKill))
+              .addReg(SrcReg2, getKillRegState(Src2IsKill));
+  else if (kind == FMAInstKind::Indexed)
+    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
+              .addReg(SrcReg2, getKillRegState(Src2IsKill))
+              .addReg(SrcReg0, getKillRegState(Src0IsKill))
+              .addReg(SrcReg1, getKillRegState(Src1IsKill))
+              .addImm(MUL->getOperand(3).getImm());
+  else if (kind == FMAInstKind::Accumulator)
+    MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
+              .addReg(SrcReg2, getKillRegState(Src2IsKill))
+              .addReg(SrcReg0, getKillRegState(Src0IsKill))
+              .addReg(SrcReg1, getKillRegState(Src1IsKill));
+  else
+    assert(false && "Invalid FMA instruction kind \n");
+  // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
+  InsInstrs.push_back(MIB);
+  return MUL;
+}
+
+/// When getMachineCombinerPatterns() finds potential patterns,
+/// this function generates the instructions that could replace the
+/// original code sequence
+void Sw64InstrInfo::genAlternativeCodeSequence(
+    MachineInstr &Root, MachineCombinerPattern Pattern,
+    SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
+    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+
+  LLVM_DEBUG(dbgs() << "combining float instring\n");
+  MachineBasicBlock &MBB = *Root.getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+  MachineInstr *MUL;
+  const TargetRegisterClass *RC;
+  unsigned Opc;
+  switch (Pattern) {
+  default:
+    // Reassociate instructions.
+    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+                                                DelInstrs, InstrIdxForVirtReg);
+    return;
+  // Floating Point Support
+  case MachineCombinerPattern::FMULADDS_OP1:
+  case MachineCombinerPattern::FMULADDD_OP1:
+    // FMUL I=A,B
+    // FADD R,I,C
+    // ==> FMAx R,A,B,C
+    // --- Create(FMAx);
+    if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
+      Opc = Sw64::FMAS;
+      RC = &Sw64::F4RCRegClass;
+    } else {
+      Opc = Sw64::FMAD;
+      RC = &Sw64::F8RCRegClass;
+    }
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+    break;
+  case MachineCombinerPattern::FMULADDS_OP2:
+  case MachineCombinerPattern::FMULADDD_OP2:
+    // FMUL I=A,B
+    // FADD R,C,I
+    // ==> FMAx R,A,B,C
+    // --- Create(FMAx);
+    if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
+      Opc = Sw64::FMAS;
+      RC = &Sw64::F4RCRegClass;
+    } else {
+      Opc = Sw64::FMAD;
+      RC = &Sw64::F8RCRegClass;
+    }
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+    break;
+
+  case MachineCombinerPattern::FMULSUBS_OP1:
+  case MachineCombinerPattern::FMULSUBD_OP1: {
+    // FMUL I=A,B,0
+    // FSUB R,I,C
+    // ==> FMSx R,A,B,C // = A*B - C
+    // --- Create(FMSx);
+    if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
+      Opc = Sw64::FMSS;
+      RC = &Sw64::F4RCRegClass;
+    } else {
+      Opc = Sw64::FMSD;
+      RC = &Sw64::F8RCRegClass;
+    }
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+    break;
+  }
+  case MachineCombinerPattern::FMULSUBS_OP2:
+  case MachineCombinerPattern::FMULSUBD_OP2: {
+    // FMUL I=A,B,0
+    // FSUB R,I,C
+    // ==> FNMAx R,A,B,C // = -A*B + C
+    // --- Create(FNMAx);
+    if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
+      Opc = Sw64::FNMAS;
+      RC = &Sw64::F4RCRegClass;
+    } else {
+      Opc = Sw64::FNMAD;
+      RC = &Sw64::F8RCRegClass;
+    }
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+    break;
+  }
+
+  case MachineCombinerPattern::FNMULSUBS_OP1:
+  case MachineCombinerPattern::FNMULSUBD_OP1: {
+    // FNMUL I=A,B,0
+    // FSUB R,I,C
+    // ==> FNMSx R,A,B,C // = -A*B - C
+    // --- Create(FNMSx);
+    if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
+      Opc = Sw64::FNMSS;
+      RC = &Sw64::F4RCRegClass;
+    } else {
+      Opc = Sw64::FNMSD;
+      RC = &Sw64::F8RCRegClass;
+    }
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+    break;
+  }
+
+  case MachineCombinerPattern::VMULADDS_OP1:
+  case MachineCombinerPattern::VMULADDD_OP1: {
+    // VMULx I=A,B
+    // VADDx I,C,R
+    // ==> VMAx A,B,C,R // = A*B+C
+    // --- Create(VMAx);
+    Opc = Pattern == MachineCombinerPattern::VMULADDS_OP1 ? Sw64::VMAS
+                                                          : Sw64::VMAD;
+    RC = &Sw64::V256LRegClass;
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+    break;
+  }
+  case MachineCombinerPattern::VMULADDS_OP2:
+  case MachineCombinerPattern::VMULADDD_OP2: {
+    // VMUL I=A,B
+    // VADD C,R,I
+    // ==> VMA A,B,C,R (computes C + A*B)
+    // --- Create(FMSUB);
+    Opc = Pattern == MachineCombinerPattern::VMULADDS_OP2 ? Sw64::VMAS
+                                                          : Sw64::VMAD;
+    RC = &Sw64::V256LRegClass;
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+    break;
+  }
+
+  case MachineCombinerPattern::VMULSUBS_OP1:
+  case MachineCombinerPattern::VMULSUBD_OP1: {
+    // VMULx I=A,B
+    // VSUBx I,C,R
+    // ==> VMSx A,B,C,R // = A*B-C
+    // --- Create(VMSx);
+    Opc = Pattern == MachineCombinerPattern::VMULSUBS_OP1 ? Sw64::VMSS
+                                                          : Sw64::VMSD;
+    RC = &Sw64::V256LRegClass;
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
+    break;
+  }
+  case MachineCombinerPattern::VMULSUBS_OP2:
+  case MachineCombinerPattern::VMULSUBD_OP2: {
+    // FMUL I=A,B,0
+    // FSUB R,C,I
+    // ==> FMSUB R,A,B,C (computes C - A*B)
+    // --- Create(FMSUB);
+    Opc = Pattern == MachineCombinerPattern::VMULSUBS_OP2 ? Sw64::VNMAS
+                                                          : Sw64::VNMAD;
+    RC = &Sw64::V256LRegClass;
+    MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
+    break;
+  }
+  } // end switch (Pattern)
+  // Record MUL and ADD/SUB for deletion
+  DelInstrs.push_back(MUL);
+  DelInstrs.push_back(&Root);
+}
+
+bool Sw64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
+                                         const MachineBasicBlock *MBB,
+                                         const MachineFunction &MF) const {
+  if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
+    return true;
+
+  switch (MI.getOpcode()) {
+  case Sw64::MOVProgPCGp:
+  case Sw64::MOVaddrPCGp:
+  case Sw64::WMEMB:
+  case Sw64::IMEMB:
+  case Sw64::MB:
+    return true;
+  }
+  return false;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.h b/llvm/lib/Target/Sw64/Sw64InstrInfo.h
new file mode 100644
index 000000000000..c20d2b117bdd
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.h
@@ -0,0 +1,143 @@
+//===-- Sw64InstrInfo.h - Sw64 Instruction Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sw64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H
+#define LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H
+
+#include "Sw64RegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "Sw64GenInstrInfo.inc"
+
+namespace llvm {
+
+class Sw64InstrInfo : public Sw64GenInstrInfo {
+  const Sw64RegisterInfo RI;
+  virtual void anchor();
+
+public:
+  Sw64InstrInfo();
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Sw64RegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  unsigned isLoadFromStackSlot(const MachineInstr &MI,
+                               int &FrameIndex) const override;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  unsigned isStoreToStackSlot(const MachineInstr &MI,
+                              int &FrameIndex) const override;
+
+  bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify) const override;
+
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
+
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+
+  void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                   const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
+                   bool KillSrc) const override;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, Register SrcReg,
+                           bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI,
+                           Register VReg) const override;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI, Register DestReg,
+                            int FrameIndex, const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI,
+                            Register VReg) const override;
+
+  // Emit code before MBBI to load immediate value into physical register Reg.
+  // Returns an iterator to the new instruction.
+  MachineBasicBlock::iterator loadImmediate(MachineBasicBlock &MBB,
+                                            MachineBasicBlock::iterator MI,
+                                            unsigned Reg, uint64_t Value) const;
+  void insertNoop(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MI) const override;
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+  /// getGlobalRetAddr - Return a virtual register initialized with the
+  /// the global return address register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalRetAddr(MachineFunction *MF) const;
+
+  bool isSchedulingBoundary(const MachineInstr &MI,
+                            const MachineBasicBlock *MBB,
+                            const MachineFunction &MF) const override;
+
+  /// Return true when a code sequence can improve throughput. It
+  /// should be called only for instructions in loops.
+  /// \param Pattern - combiner pattern
+  bool isThroughputPattern(MachineCombinerPattern Pattern) const override;
+
+  /// Return true when there is potentially a faster code sequence
+  /// for an instruction chain ending in ``Root``. All potential patterns are
+  /// listed in the ``Patterns`` array.
+  bool
+  getMachineCombinerPatterns(MachineInstr &Root,
+                             SmallVectorImpl<MachineCombinerPattern> &Patterns,
+                             bool DoRegPressureReduce) const override;
+
+  /// Return true when Inst is associative and commutative so that it can be
+  /// reassociated.
+  bool isAssociativeAndCommutative(const MachineInstr &Inst,
+                                   bool Invert) const override;
+
+  /// When getMachineCombinerPatterns() finds patterns, this function generates
+  /// the instructions that could replace the original code sequence
+  void genAlternativeCodeSequence(
+      MachineInstr &Root, MachineCombinerPattern Pattern,
+      SmallVectorImpl<MachineInstr *> &InsInstrs,
+      SmallVectorImpl<MachineInstr *> &DelInstrs,
+      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+  // Sw64 supports MachineCombiner.
+  bool useMachineCombiner() const override { return true; }
+
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx, uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+};
+} // namespace llvm
+#endif // END LLVM_LIB_TARGET_SW64_SW64INSTRINFO_H
diff --git a/llvm/lib/Target/Sw64/Sw64InstrInfo.td b/llvm/lib/Target/Sw64/Sw64InstrInfo.td
new file mode 100644
index 000000000000..c69b805ce622
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64InstrInfo.td
@@ -0,0 +1,2084 @@
+//===- Sw64InstrInfo.td - The Sw64 Instruction Set -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+include "Sw64InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Sw64 Instruction Predicate Definitions.
+//
+def EnableIntShift         : Predicate<"Subtarget->enableIntShift()">,
+                             AssemblerPredicate<(all_of Featureintshift), "swIntShift">;
+
+def EnableFloatCmov        : Predicate<"Subtarget->enableFloatCmov()">;
+
+def EnableCrcInst          : Predicate<"Subtarget->enableCrcInst()">;
+
+def EnableWmembInst        : Predicate<"Subtarget->enableWmembInst()">;
+
+def EnableCasInst          : Predicate<"Subtarget->enableCasInst()">;
+
+def HasSIMD                : Predicate<"Subtarget->hasSIMD()">,
+                             AssemblerPredicate<(all_of FeatureSIMD)>;
+//********************
+//Custom DAG Nodes
+//********************
+
+def SDTFPUnaryOpUnC  : SDTypeProfile<1, 1, [
+  SDTCisFP<1>, SDTCisFP<0>
+]>;
+
+def SDTIntTriOp : SDTypeProfile<1, 3, [
+  SDTCisInt<1>, SDTCisInt<0>, SDTCisInt<2>, SDTCisInt<3>
+]>;
+
+def Sw64_cvtqt   : SDNode<"Sw64ISD::CVTQT_",    SDTFPUnaryOpUnC, []>;
+def Sw64_cvtqs   : SDNode<"Sw64ISD::CVTQS_",    SDTFPUnaryOpUnC, []>;
+def Sw64_cvttq   : SDNode<"Sw64ISD::CVTTQ_"  ,  SDTFPUnaryOp, []>;
+def Sw64_cvtts   : SDNode<"Sw64ISD::CVTTS_",    SDTFPUnaryOpUnC,
+                          [SDNPHasChain]>;
+def Sw64_cvtst   : SDNode<"Sw64ISD::CVTST_",    SDTFPUnaryOpUnC,
+                          [SDNPHasChain]>;
+def Sw64_tprello : SDNode<"Sw64ISD::TPRelLo",   SDTIntBinOp, []>;
+def Sw64_tprelhi : SDNode<"Sw64ISD::TPRelHi",   SDTIntBinOp, []>;
+
+def Sw64_tlsgd   : SDNode<"Sw64ISD::TLSGD",    SDTIntTriOp, []>;
+def Sw64_tlsldm   : SDNode<"Sw64ISD::TLSLDM",    SDTIntBinOp, []>;
+def Sw64_dtprello : SDNode<"Sw64ISD::DTPRelLo",   SDTIntBinOp, []>;
+def Sw64_dtprelhi : SDNode<"Sw64ISD::DTPRelHi",   SDTIntBinOp, []>;
+
+def Sw64_syscall   : SDNode<"Sw64ISD::SysCall", SDTIntUnaryOp, []>;
+def Sw64_LDAWithChain : SDNode<"Sw64ISD::LDAWC", SDTIntBinOp, [SDNPHasChain]>;
+def Sw64_gprello : SDNode<"Sw64ISD::GPRelLo",   SDTIntUnaryOp>;
+def Sw64_gprelhi : SDNode<"Sw64ISD::GPRelHi",   SDTIntUnaryOp>;
+def Sw64_rellit  : SDNode<"Sw64ISD::RelLit",    SDTIntUnaryOp>;
+
+def Sw64_gprel : SDNode<"Sw64ISD::GPRel",   SDTIntUnaryOp>;
+def Sw64_tprel : SDNode<"Sw64ISD::TPRel",   SDTIntUnaryOp>;
+def Sw64_dtprel : SDNode<"Sw64ISD::DTPRel",   SDTIntUnaryOp>;
+
+def Sw64ldih : SDNode<"Sw64ISD::LDIH", SDTIntUnaryOp, []>;
+def Sw64ldi : SDNode<"Sw64ISD::LDI", SDTIntBinOp, [SDNPOutGlue]>;
+
+def Sw64_relgottp  : SDNode<"Sw64ISD::RelGottp",    SDTIntBinOp, [SDNPMayLoad]>;
+def retflag       : SDNode<"Sw64ISD::RET_FLAG", SDTNone,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_Sw64CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
+def SDT_Sw64CallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i64>,
+                                          SDTCisVT<1, i64> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_Sw64CallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_Sw64CallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def Sw64_frecs : SDNode<"Sw64ISD::FRECS",     SDTFPUnaryOp, []>;
+def Sw64_frecd : SDNode<"Sw64ISD::FRECD",     SDTFPUnaryOp, []>;
+def Sw64_sbt   : SDNode<"Sw64ISD::SBT",     SDTIntBinOp, []>;
+def Sw64_cbt   : SDNode<"Sw64ISD::CBT",     SDTIntBinOp, []>;
+def Sw64_addpi   : SDNode<"Sw64ISD::ADDPI",   SDTIntUnaryOp, []>;
+def Sw64_addpis  : SDNode<"Sw64ISD::ADDPIS",  SDTIntUnaryOp, []>;
+
+def Sw64_revbh  : SDNode<"Sw64ISD::REVBH",  SDTIntUnaryOp, []>;
+def Sw64_revbw  : SDNode<"Sw64ISD::REVBW",  SDTIntUnaryOp, []>;
+
+def Sw64_rolw : SDNode<"Sw64ISD::ROLW",   SDTIntBinOp, []>;
+
+def Sw64_crc32b : SDNode<"Sw64ISD::CRC32B",   SDTIntBinOp, []>;
+def Sw64_crc32h : SDNode<"Sw64ISD::CRC32H",   SDTIntBinOp, []>;
+def Sw64_crc32w : SDNode<"Sw64ISD::CRC32W",   SDTIntBinOp, []>;
+def Sw64_crc32l : SDNode<"Sw64ISD::CRC32L",   SDTIntBinOp, []>;
+def Sw64_crc32cb : SDNode<"Sw64ISD::CRC32CB",   SDTIntBinOp, []>;
+def Sw64_crc32ch : SDNode<"Sw64ISD::CRC32CH",   SDTIntBinOp, []>;
+def Sw64_crc32cw : SDNode<"Sw64ISD::CRC32CW",   SDTIntBinOp, []>;
+def Sw64_crc32cl : SDNode<"Sw64ISD::CRC32CL",   SDTIntBinOp, []>;
+
+def Sw64_casl : SDNode<"Sw64ISD::CASL",   SDTIntBinOp, []>;
+def Sw64_casw : SDNode<"Sw64ISD::CASW",   SDTIntBinOp, []>;
+
+let Constraints = "$RD = $RC" in
+class inst_cas<bits<6> opc, bits<8> fun, string opstr>
+	: OForm <opc, fun, (ins GPRC:$RA, GPRC:$RB, GPRC:$RC), (outs GPRC:$RD),
+                 opstr, "$RA,$RB,$RC">;
+
+def CASW : inst_cas<0x10, 0x5e, "casw">;
+def CASL : inst_cas<0x10, 0x5f, "casl">;
+
+def : Pat<(atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp),
+          (CASL GPRC:$cmp, GPRC:$ptr, GPRC:$swp)>,
+          Requires<[EnableCasInst, HasCore4]>;
+
+def : Pat<(atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp),
+          (CASW GPRC:$cmp, GPRC:$ptr, GPRC:$swp)>,
+          Requires<[EnableCasInst, HasCore4]>;
+
+def call_symbol : Operand<i64>;
+//********************
+//Paterns for matching
+//********************
+def invX : SDNodeXForm<imm, [{ //invert
+  return getI64Imm(~N->getZExtValue(), SDLoc(N));
+}]>;
+def negX : SDNodeXForm<imm, [{ //negate
+  return getI64Imm(~N->getZExtValue() + 1, SDLoc(N));
+}]>;
+def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
+  return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32, SDLoc(N));
+}]>;
+def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
+  return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48, SDLoc(N));
+}]>;
+def LL16 : SDNodeXForm<imm, [{ //lda part of constant
+  return getI64Imm(get_lda16(N->getZExtValue()), SDLoc(N));
+}]>;
+def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
+  return getI64Imm(get_ldah16(N->getZExtValue()), SDLoc(N));
+}]>;
+def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
+  ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
+  return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()), SDLoc(N));
+}]>;
+def nearP2X : SDNodeXForm<imm, [{
+  return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())), SDLoc(N));
+}]>;
+def nearP2RemX : SDNodeXForm<imm, [{
+  uint64_t x =
+    abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
+  return getI64Imm(Log2_64(x), SDLoc(N));
+}]>;
+
+def immUExt8  : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
+  return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+}]>;
+def immUExt8inv  : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
+  return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
+}], invX>;
+def immUExt8neg  : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
+  return ((uint64_t)~N->getZExtValue() + 1) ==
+         (uint8_t)((uint64_t)~N->getZExtValue() + 1);
+}], negX>;
+
+def immUExt13 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 13);
+}]>;
+
+def immSExt12  : PatLeaf<(imm), [{ //imm fits in 12 bit sign extended field
+  return ((int64_t)N->getZExtValue() << 52) >> 52 ==
+         (int64_t)N->getZExtValue();
+}]>;
+
+def immSExt16  : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
+  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+         (int64_t)N->getZExtValue();
+}]>;
+
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
+  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!RHS) return 0;
+  uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
+  return build != 0;
+}]>;
+
+def immFPZ  : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
+  (void)N; // silence warning.
+  return true;
+}]>;
+
+def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
+def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
+def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
+def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
+def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
+def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
+def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
+def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
+def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
+def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
+
+def immRemP2n : PatLeaf<(imm), [{
+  return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
+                       N->getZExtValue());
+}]>;
+def immRemP2 : PatLeaf<(imm), [{
+  return isPowerOf2_64(N->getZExtValue() -
+                       getNearPower2((uint64_t)N->getZExtValue()));
+}]>;
+def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
+  int64_t d =  abs64((int64_t)N->getZExtValue() -
+               (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
+  if (isPowerOf2_64(d)) return false;
+  switch (d) {
+    case 1: case 3: case 5: return false;
+    default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+  };
+}]>;
+
+def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
+def add4  : PatFrag<(ops node:$op1, node:$op2),
+                    (add (shl node:$op1, (i64 2)), node:$op2)>;
+def sub4  : PatFrag<(ops node:$op1, node:$op2),
+                    (sub (shl node:$op1, (i64 2)), node:$op2)>;
+def add8  : PatFrag<(ops node:$op1, node:$op2),
+                    (add (shl node:$op1, (i64 3)), node:$op2)>;
+def sub8  : PatFrag<(ops node:$op1, node:$op2),
+                    (sub (shl node:$op1, (i64 3)), node:$op2)>;
+
+class ThridOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
+
+def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
+  return isOrEquivalentToAdd(N);
+}]>;
+def AddrFI : ComplexPattern<iPTR, 1, "SelectAddrFI", [frameindex], []>;
+
+//Pseudo ops for selection
+
+def WTF : PseudoInstSw64<(outs), (ins variable_ops), "#wtf", []>, Sched<[]>;
+
+let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
+def ADJUSTSTACKUP : PseudoInstSw64<(outs), (ins s64imm:$amt1, s64imm:$amt2),
+                "; ADJUP $amt1",
+                [(callseq_end timm:$amt1, timm:$amt2)]>, Sched<[]>;
+def ADJUSTSTACKDOWN : PseudoInstSw64<(outs), (ins s64imm:$amt1, s64imm:$amt2),
+                "; ADJDOWN $amt1",
+                [(callseq_start (i64 timm:$amt1), (i64 timm:$amt2))]>, Sched<[]>;
+}
+
+let isCodeGenOnly = 1 in {
+def ALTENT : PseudoInstSw64<(outs), (ins s64imm:$TARGET), "$$${TARGET}..ng:\n",
+                            []>, Sched<[]>;
+def PCLABEL : PseudoInstSw64<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[]>,
+                            Sched<[]>;
+def MEMLABEL : PseudoInstSw64<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k,
+                              s64imm:$m),
+                              "LSMARKER$$$i$$$j$$$k$$$m:", []>, Sched<[]>;
+}
+let hasNoSchedulingInfo = 1 in {
+let usesCustomInserter = 1 in {   // Expanded after instruction selection.
+def ATOMIC_CMP_SWAP_I32 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic cmpare and swap",
+    [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
+
+def ATOMIC_CMP_SWAP_I64 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic compare and swap",
+    [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
+
+
+def ATOMIC_LOAD_ADD_I32 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load add",
+    [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_ADD_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load add",
+    [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_UMAX_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load umax",
+    [(set GPRC:$dst, (atomic_load_umax_64 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_MAX_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load max",
+    [(set GPRC:$dst, (atomic_load_max_64 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_UMIN_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load umin",
+    [(set GPRC:$dst, (atomic_load_umin_64 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_MIN_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load min",
+    [(set GPRC:$dst, (atomic_load_min_64 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_NAND_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 64-bit atomic load nand",
+    [(set GPRC:$dst, (atomic_load_nand_64 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_UMAX_I32 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load umax",
+    [(set GPRC:$dst, (atomic_load_umax_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_MAX_I32 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load max",
+    [(set GPRC:$dst, (atomic_load_max_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_UMIN_I32 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load umin",
+    [(set GPRC:$dst, (atomic_load_umin_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_MIN_I32 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load min",
+    [(set GPRC:$dst, (atomic_load_min_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_NAND_I32 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 32-bit atomic load nand",
+    [(set GPRC:$dst, (atomic_load_nand_32 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_UMAX_I16 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load umax",
+    [(set GPRC:$dst, (atomic_load_umax_16 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_MAX_I16 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load max",
+    [(set GPRC:$dst, (atomic_load_max_16 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_UMIN_I16 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load umin",
+    [(set GPRC:$dst, (atomic_load_umin_16 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_MIN_I16 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load min",
+    [(set GPRC:$dst, (atomic_load_min_16 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_NAND_I16 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 16-bit atomic load nand",
+    [(set GPRC:$dst, (atomic_load_nand_16 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_UMAX_I8 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load umax",
+    [(set GPRC:$dst, (atomic_load_umax_8 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_MAX_I8 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load max",
+    [(set GPRC:$dst, (atomic_load_max_8 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_UMIN_I8 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load umin",
+    [(set GPRC:$dst, (atomic_load_umin_8 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_MIN_I8 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load min",
+    [(set GPRC:$dst, (atomic_load_min_8 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_NAND_I8 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp),"# 8-bit atomic load nand",
+    [(set GPRC:$dst, (atomic_load_nand_8 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_SWAP_I32 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic swap",
+    [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_SWAP_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic swap",
+    [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))]>;
+
+
+def ATOMIC_LOAD_AND_I32 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load and",
+    [(set GPRC:$dst, (atomic_load_and_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_AND_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load and",
+    [(set GPRC:$dst, (atomic_load_and_64 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_OR_I32 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load or",
+    [(set GPRC:$dst, (atomic_load_or_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_OR_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load and",
+    [(set GPRC:$dst, (atomic_load_or_64 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_SUB_I32 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load sub",
+    [(set GPRC:$dst, (atomic_load_sub_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_SUB_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load sub",
+    [(set GPRC:$dst, (atomic_load_sub_64 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_XOR_I32 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic load xor",
+    [(set GPRC:$dst, (atomic_load_xor_32 GPRC:$ptr, GPRC:$swp))]>;
+def ATOMIC_LOAD_XOR_I64 :PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic load xor",
+    [(set GPRC:$dst, (atomic_load_xor_64 GPRC:$ptr, GPRC:$swp))]>;
+
+
+//I8
+def ATOMIC_LOAD_ADD_I8: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load add",
+    [(set GPRC:$dst, (atomic_load_add_8 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_SWAP_I8: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic swap",
+    [(set GPRC:$dst, (atomic_swap_8 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_AND_I8: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load and",
+    [(set GPRC:$dst, (atomic_load_and_8 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_OR_I8: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load or",
+    [(set GPRC:$dst, (atomic_load_or_8 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_SUB_I8: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load sub",
+    [(set GPRC:$dst, (atomic_load_sub_8 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_XOR_I8: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic load xor",
+    [(set GPRC:$dst, (atomic_load_xor_8 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_CMP_SWAP_I8 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic compare and swap",
+    [(set GPRC:$dst, (atomic_cmp_swap_8 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
+
+//I16
+def ATOMIC_LOAD_ADD_I16: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load add",
+    [(set GPRC:$dst, (atomic_load_add_16 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_SWAP_I16: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic swap",
+    [(set GPRC:$dst, (atomic_swap_16 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_AND_I16: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic and",
+    [(set GPRC:$dst, (atomic_load_and_16 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_OR_I16: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load or",
+    [(set GPRC:$dst, (atomic_load_or_16 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_SUB_I16: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic load and sub",
+    [(set GPRC:$dst, (atomic_load_sub_16 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_LOAD_XOR_I16: PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic xor",
+    [(set GPRC:$dst, (atomic_load_xor_16 GPRC:$ptr, GPRC:$swp))]>;
+
+def ATOMIC_CMP_SWAP_I16 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic compare and swap",
+    [(set GPRC:$dst, (atomic_cmp_swap_16 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
+
+
+def CAS32 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic compare and swap",
+    [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
+def CAS64 : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic compare and swap",
+    [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))]>;
+
+def LAS32 : PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp),
+    "# 32-bit atomic load and sub",
+    [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))]>;
+def LAS64 :PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp),
+    "# 64-bit atomic load and sub",
+    [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))]>;
+
+def SWAP32 : PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp),
+    "# 32-bit atomic swap",
+    [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))]>;
+def SWAP64 :PseudoInstSw64<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp),
+    "# 64-bit atomic swap",
+    [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))]>;
+}
+
+let mayLoad = 1, mayStore = 1 in {
+  def ATOMIC_LOAD_ADD_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
+  def ATOMIC_LOAD_ADD_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
+
+  def ATOMIC_SWAP_I32_POSTRA      : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
+  def ATOMIC_SWAP_I64_POSTRA      : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
+
+  def ATOMIC_LOAD_AND_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
+  def ATOMIC_LOAD_AND_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
+
+  def ATOMIC_LOAD_OR_I32_POSTRA   : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
+  def ATOMIC_LOAD_OR_I64_POSTRA   : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
+
+  def ATOMIC_LOAD_SUB_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
+  def ATOMIC_LOAD_SUB_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
+
+  def ATOMIC_LOAD_XOR_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 32-bit atomic", []>;
+  def ATOMIC_LOAD_XOR_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$swp), "# 64-bit atomic", []>;
+
+  def ATOMIC_CMP_SWAP_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
+  def ATOMIC_CMP_SWAP_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+      (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
+}
+def ATOMIC_LOAD_ADD_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_ADD_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
+
+def ATOMIC_SWAP_I8_POSTRA         : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_SWAP_I16_POSTRA        : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
+
+def ATOMIC_LOAD_AND_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_AND_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
+
+def ATOMIC_LOAD_OR_I8_POSTRA      : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_OR_I16_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
+
+def ATOMIC_LOAD_SUB_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_SUB_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
+
+def ATOMIC_LOAD_XOR_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_XOR_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$swp), "# 16-bit atomic", []>;
+
+def ATOMIC_CMP_SWAP_I8_POSTRA     : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_CMP_SWAP_I16_POSTRA    : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
+
+def ATOMIC_LOAD_UMAX_I8_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_MAX_I8_POSTRA   : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_UMIN_I8_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_MIN_I8_POSTRA   : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
+def ATOMIC_LOAD_NAND_I8_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 8-bit atomic", []>;
+
+def ATOMIC_LOAD_UMAX_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
+def ATOMIC_LOAD_MAX_I16_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
+def ATOMIC_LOAD_UMIN_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
+def ATOMIC_LOAD_MIN_I16_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
+def ATOMIC_LOAD_NAND_I16_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 16-bit atomic", []>;
+
+def ATOMIC_LOAD_UMAX_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
+def ATOMIC_LOAD_MAX_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
+def ATOMIC_LOAD_UMIN_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
+def ATOMIC_LOAD_MIN_I32_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
+def ATOMIC_LOAD_NAND_I32_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 32-bit atomic", []>;
+
+def ATOMIC_LOAD_UMAX_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
+def ATOMIC_LOAD_MAX_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
+def ATOMIC_LOAD_UMIN_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
+def ATOMIC_LOAD_MIN_I64_POSTRA  : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
+def ATOMIC_LOAD_NAND_I64_POSTRA : PseudoInstSw64<(outs GPRC:$dst),
+    (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "# 64-bit atomic", []>;
+
+
+
+}// for atomic load/store. set hasNoSchedulingInfo
+
+//***********************
+//Real instructions
+//***********************
+
+//4 The base instruction system
+//4.1 system call instruction
+let hasSideEffects = 1 in
+class inst_syscall<bits<6> opcode, string opstr, string operands="">
+    : PALForm<opcode, (ins u26imm:$disp), (outs), opstr, operands>;
+
+let Defs=[R0] in
+def SYS_CALL : inst_syscall<0x00, "sys_call", "$disp">;
+
+def LBR : inst_syscall<0x1d, "lbr", "$disp">;
+
+def : Pat<(Sw64_syscall (i64 immUExt8:$N)),(SYS_CALL immUExt8:$N )>;
+
+//4.2 control instruction
+
+//4.2.1 jump
+// Call
+
+def SDT_Sw64JmpLink      : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def Sw64JmpLink : SDNode<"Sw64ISD::JmpLink", SDT_Sw64JmpLink,
+                          [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                           SDNPVariadic]>;
+
+class arg_jmp<string opstr, bits<6> opcode, list<dag> pattern>
+    : MForm<opcode, (ins GPRC:$RA, GPRC:$RB , s16imm:$DISP), (outs) , opstr,
+            "$RA,(${RB}),$DISP", pattern>;
+
+class branch_i<string opstr, bits<6> opcode, list<dag> pattern>
+    : BForm<opcode, (ins GPRC:$RA, target:$disp) , (outs) ,
+            opstr, "$RA,${disp}", pattern>;
+
+let isCall = 1, Defs = [R26], Uses = [R27] in
+def JSR : arg_jmp<"call", 0x01,  []>;
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in
+def RET : arg_jmp<"ret", 0x02,  []>;
+
+def Sw64Ret : SDNode<"Sw64ISD::Ret", SDTNone,
+                     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+let RA = 31, DISP=0 in
+def JMP : arg_jmp<"jmp", 0x03,  []>;
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+def BR : branch_i<"br", 0x04, []>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in
+def BSR : branch_i<"bsr", 0x05, []>;
+
+// for expand Call target, we create two Insns like:
+// load R27,symbol(GP) defs R27
+// call R26,R27,symbol use R27
+// so we dont need to use R27, we actually def R27
+let isBarrier = 1, isCall = 1, Defs = [R26, R27], Uses = [R29] in {
+def PseudoCall : PseudoInstSw64<(outs), (ins call_symbol:$func), "",
+                                []>,Sched<[WriteJmp]>;
+}
+
+let isBarrier = 1, isCall = 1, Defs = [R26], Uses = [R27, R29] in {
+  def PseudoCallIndirect : PseudoInstSw64<(outs), (ins GPRC:$RB), "",
+		            [(Sw64JmpLink  GPRC:$RB)]>,
+			  PseudoInstExpansion<(JSR R26, GPRC:$RB, 0)>,
+			    Sched<[WriteJmp]>;
+}
+
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in
+def PseudoBrind : PseudoInstSw64<(outs), (ins GPRC:$RB), "",
+                                 [(brind GPRC:$RB)]>,
+                  PseudoInstExpansion<(JMP R31, GPRC:$RB, 0)>,
+                  Sched<[WriteJmp]>;
+
+//to match libgcc _div _rem
+let isBarrier = 1, isCall = 1, Defs = [R23, R24, R25, R27, R28]
+                    , Uses = [R24, R25, R27] in
+def PseudoCallDiv : PseudoInstSw64<(outs), (ins), "", []>,
+                    PseudoInstExpansion<(JSR R23, R27, 0)>,
+                    Sched<[WriteJmp]>;
+
+let  isBranch = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1  in
+def PseudoBR : PseudoInstSw64<(outs), (ins target:$disp), "", [(br bb:$disp)]>,
+               PseudoInstExpansion<(BR R31, target:$disp)>, Sched<[WriteJmp]>;
+
+let isBarrier = 1, isReturn = 1, isTerminator = 1 in
+def PseudoRet : PseudoInstSw64<(outs), (ins), "", [(Sw64Ret)]>,
+                PseudoInstExpansion<(RET R31, R26, 1)>, Sched<[WriteJmp]>;
+
+////4.2.2 uncondition shift
+/////////////////////////////////////////////////////////
+//Branching
+/////////////////////////////////////////////////////////
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in{
+class br_icc<bits<6> opc, string asmstr>
+    : BForm<opc, (ins GPRC:$RA, target:$disp), (outs),
+            asmstr, "$RA,${disp}">;
+class br_fcc<bits<6> opc, string asmstr>
+    : BForm<opc, (ins F8RC:$RA, target:$disp), (outs),
+            asmstr, "$RA,${disp}">;
+}
+def BEQ  : br_icc<0x30, "beq">;
+def BGE  : br_icc<0x35, "bge">;
+def BGT  : br_icc<0x34, "bgt">;
+def BLBC : br_icc<0x36, "blbc">;
+def BLBS : br_icc<0x37, "blbs">;
+def BLE  : br_icc<0x33, "ble">;
+def BLT  : br_icc<0x32, "blt">;
+def BNE  : br_icc<0x31, "bne">;
+
+//Branches, float
+def FBEQ : br_fcc<0x38, "fbeq">;
+def FBGE : br_fcc<0x3D, "fbge">;
+def FBGT : br_fcc<0x3C, "fbgt">;
+def FBLE : br_fcc<0x3B, "fble">;
+def FBLT : br_fcc<0x3A, "fblt">;
+def FBNE : br_fcc<0x39, "fbne">;
+//4.3 load and store instruction
+//4.3.1 load integer
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class load_ri<string opstr, bits<6> opcode, RegisterClass regtype,
+              SDPatternOperator loadop>
+    : MForm<opcode, (ins s64imm:$DISP, GPRC:$RB), (outs regtype:$RA),
+            opstr, "$RA,${DISP}(${RB})",
+            [(set regtype:$RA,
+                (loadop (add GPRC:$RB, immSExt16:$DISP)))]>;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 ,Constraints = "$RB = $wback,@earlyclobber $wback" in
+class load_ri1<string opstr, bits<6> opcode, bits<4> func,
+               RegisterClass regtype, SDPatternOperator loadop>
+    : MFuncForm<opcode, func, (ins GPRC:$RB, s12imm:$disp), (outs regtype:$RA, GPRC:$wback),
+           opstr,"$RA,${disp}(${RB})">;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+class store_ri<string opstr, bits<6> opcode, RegisterClass regtype,
+               SDPatternOperator storeop>
+    : MForm<opcode, (ins regtype:$RA, s64imm:$DISP, GPRC:$RB), (outs),
+            opstr, "$RA,${DISP}(${RB})",
+            [(storeop regtype:$RA,
+               (add GPRC:$RB, immSExt16:$DISP))]>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 ,Constraints = "$RB = $wback,@earlyclobber $wback" in
+class store_ri1<string opstr, bits<6> opcode, bits<4> func,
+                RegisterClass regtype, SDPatternOperator storeop>
+    : MFuncForm<opcode, func, (ins regtype:$RA, GPRC:$RB, s12imm:$disp), (outs GPRC:$wback),
+            opstr, "$RA,${disp}(${RB})">;
+
+// integer load
+def LDL  : load_ri<"ldl",  0x23, GPRC, load>;
+def LDW  : load_ri<"ldw",  0x22, GPRC, sextloadi32>;
+def LDHU : load_ri<"ldhu", 0x21, GPRC, zextloadi16>;
+def LDBU : load_ri<"ldbu", 0x20, GPRC, zextloadi8>;
+def LDL_A  : load_ri1<"ldl_a",  0x1E, 0x3, GPRC, load>;
+def LDW_A  : load_ri1<"ldw_a",  0x1E, 0x2, GPRC, sextloadi32>;
+def LDHU_A : load_ri1<"ldhu_a", 0x1E, 0x1, GPRC, zextloadi16>;
+def LDBU_A : load_ri1<"ldbu_a", 0x1E, 0x0, GPRC, zextloadi8>;
+
+// float load
+def LDS : load_ri<"flds", 0x26, F4RC, load>;
+def LDD : load_ri<"fldd", 0x27, F8RC, load>;
+def LDS_A : load_ri1<"flds_a", 0x1E, 0x4, F4RC, load>;
+def LDD_A : load_ri1<"fldd_a", 0x1E, 0x5, F8RC, load>;
+
+// integer store
+def STL : store_ri<"stl", 0x2B, GPRC, store>;
+def STW : store_ri<"stw", 0x2A, GPRC, truncstorei32>;
+def STH : store_ri<"sth", 0x29, GPRC, truncstorei16>;
+def STB : store_ri<"stb", 0x28, GPRC, truncstorei8>;
+def STL_A : store_ri1<"stl_a", 0x1E, 0x9, GPRC, store>;
+def STW_A : store_ri1<"stw_a", 0x1E, 0x8, GPRC, truncstorei32>;
+def STH_A : store_ri1<"sth_a", 0x1E, 0x7, GPRC, truncstorei16>;
+def STB_A : store_ri1<"stb_a", 0x1E, 0x6, GPRC, truncstorei8>;
+
+// float store
+def STS : store_ri<"fsts", 0x2E, F4RC, store>;
+def STD : store_ri<"fstd", 0x2F, F8RC, store>;
+def STS_A : store_ri1<"fsts_a", 0x1E, 0xA, F4RC, store>;
+def STD_A : store_ri1<"fstd_a", 0x1E, 0xB, F8RC, store>;
+
+// imm inst
+def LDA :  MForm<0x3E,(ins s64imm:$DISP, GPRC:$RB) , (outs GPRC:$RA),
+                 "ldi", "$RA,${DISP}(${RB})",
+                 [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))]>;
+def LDAH : MForm<0x3F,(ins s64imm:$DISP, GPRC:$RB) , (outs GPRC:$RA),
+                 "ldih", "$RA,${DISP}(${RB})", []>;
+
+let Uses = [R29] in {
+def LOADgprel : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "",
+    [(set GPRC:$dst, (Sw64_gprel tglobaladdr:$addr))]>, Sched<[WriteLD]>;
+
+def LOADconstant : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "", []>,
+                   Sched<[WriteAdrLD]>;
+
+def LOADlit : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "",
+    [(set GPRC:$dst, (Sw64_rellit tglobaladdr:$addr))]>, Sched<[WriteLD]>;
+
+def LOADlitSym : PseudoInstSw64<(outs GPRC:$dst), (ins s64imm:$addr), "",
+    [(set GPRC:$dst, (Sw64_rellit texternalsym:$addr))]>, Sched<[WriteLD]>;
+
+
+// The MOVaddr instruction should match only when the add is not folded
+// into a load or store address.
+def MOVaddrGP
+: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
+  [(set GPRC:$dst, (Sw64ldi (Sw64ldih tglobaladdr:$hi),
+                     tglobaladdr:$low))]>, Sched<[WriteAdrAdr]>;
+
+def MOVaddrCP
+: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
+  [(set GPRC:$dst, (Sw64ldi (Sw64ldih tconstpool:$hi),
+                     tconstpool:$low))]>, Sched<[WriteAdrAdr]>;
+
+def MOVaddrBA
+: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
+  [(set GPRC:$dst, (Sw64ldi (Sw64ldih tblockaddress:$hi),
+                     tblockaddress:$low))]>, Sched<[WriteAdrAdr]>;
+
+def MOVaddrEXT
+: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
+  [(set GPRC:$dst, (Sw64ldi (Sw64ldih texternalsym:$hi),
+                     texternalsym:$low))]>, Sched<[WriteAdrAdr]>;
+
+def MOVaddrJT
+: PseudoInstSw64<(outs GPRC:$dst), (ins i64imm:$hi, i64imm:$low), "",
+  [(set GPRC:$dst, (Sw64ldi (Sw64ldih tjumptable:$hi),
+                     tjumptable:$low))]>, Sched<[WriteAdrAdr]>;
+}
+
+//TODO: for core3 target, sw64 need gpdisp to get global address table
+// we cannot change Prologue disp, or it will cause error
+let isBarrier = 1, hasNoSchedulingInfo = 1, Defs = [R29] in
+def MOVProgPCGp : PseudoInstSw64<(outs),
+                  (ins s16imm:$DISP, s16imm:$NUM , GPRC:$dst_reg), "", []>,
+									 Sched<[WriteAdrAdr]>;
+
+let Defs = [R29] in
+def MOVaddrPCGp : PseudoInstSw64<(outs),
+                  (ins s16imm:$DISP, s16imm:$NUM , GPRC:$dst_reg), "", []>,
+									 Sched<[WriteAdrAdr]>;
+
+// def patterns
+
+def : Pat<(Sw64JmpLink tglobaladdr:$func),
+          (PseudoCall tglobaladdr:$func)>;
+def : Pat<(Sw64JmpLink texternalsym:$func),
+          (PseudoCall texternalsym:$func)>;
+
+def : Pat<(Sw64_LDAWithChain GPRC:$RB, immSExt16:$DISP),
+          (LDA immSExt16:$DISP, GPRC:$RB)>;
+
+
+def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDBU   immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDHU  immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDW   immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDBU   immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDHU   immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (sextloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDW   immSExt16:$DISP, GPRC:$RB)>;
+
+def : Pat<(i64 (load GPRC:$addr)),
+          (LDL  0, GPRC:$addr)>;
+def : Pat<(i64 (sextloadi32 GPRC:$addr)),
+          (LDW  0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi32 GPRC:$addr)),
+          (LDW  0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi16 GPRC:$addr)),
+          (LDHU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi16 GPRC:$addr)),
+          (LDHU 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi8 GPRC:$addr)),
+          (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi8 GPRC:$addr)),
+          (LDBU 0, GPRC:$addr)>;
+
+//4.3.5 s float load
+def : Pat<(f32 (load GPRC:$addr)),
+          (LDS  0, GPRC:$addr)>;
+def : Pat<(f64 (load GPRC:$addr)),
+          (LDD  0, GPRC:$addr)>;
+//4.3.3 store integer
+def : Pat<(store GPRC:$DATA, GPRC:$addr),
+          (STL  GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
+          (STW  GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
+          (STH GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
+          (STB GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F4RC:$DATA, GPRC:$addr),
+          (STS  F4RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F8RC:$DATA, GPRC:$addr),
+          (STD  F8RC:$DATA, 0, GPRC:$addr)>;
+multiclass LdPat<PatFrag LoadOp, InstSw64 Inst, ValueType VT> {
+  def : Pat<(VT (LoadOp GPRC:$addr)), (Inst 0, GPRC:$addr)>;
+  def : Pat<(VT (LoadOp AddrFI:$addr)), (Inst 0, AddrFI:$addr)>;
+  def : Pat<(VT (LoadOp (add GPRC:$addr, immSExt16:$DISP))),
+            (Inst immSExt16:$DISP, GPRC:$addr)>;
+   def : Pat<(VT (LoadOp (add AddrFI:$addr, immSExt16:$DISP))),
+            (Inst immSExt16:$DISP, AddrFI:$addr)>;
+  def : Pat<(VT (LoadOp (IsOrAdd AddrFI:$addr, immSExt16:$DISP))),
+            (Inst immSExt16:$DISP, AddrFI:$addr)>;
+}
+
+multiclass StPat<PatFrag StoreOp, InstSw64 Inst, RegisterClass StTy> {
+  def : Pat<(StoreOp StTy:$rs2, GPRC:$rs1), (Inst StTy:$rs2, 0, GPRC:$rs1)>;
+  def : Pat<(StoreOp StTy:$rs2, AddrFI:$rs1), (Inst StTy:$rs2, 0, AddrFI:$rs1)>;
+  def : Pat<(StoreOp StTy:$rs2, (add GPRC:$rs1, immSExt16:$DISP)),
+            (Inst StTy:$rs2, immSExt16:$DISP, GPRC:$rs1)>;
+  def : Pat<(StoreOp StTy:$rs2, (add AddrFI:$rs1, immSExt16:$DISP)),
+            (Inst StTy:$rs2, immSExt16:$DISP, AddrFI:$rs1)>;
+  def : Pat<(StoreOp StTy:$rs2, (IsOrAdd AddrFI:$rs1, immSExt16:$DISP)),
+            (Inst StTy:$rs2, immSExt16:$DISP, AddrFI:$rs1)>;
+}
+
+defm : LdPat<load, LDL, i64>;
+defm : LdPat<extloadi32, LDW, i64>;
+defm : LdPat<sextloadi32, LDW, i64>;
+defm : LdPat<extloadi16, LDHU, i64>;
+defm : LdPat<zextloadi16, LDHU, i64>;
+defm : LdPat<extloadi8, LDBU, i64>;
+defm : LdPat<zextloadi8, LDBU, i64>;
+defm : LdPat<load, LDS, f32>;
+defm : LdPat<load, LDD, f64>;
+
+defm : StPat<store, STL, GPRC>;
+defm : StPat<truncstorei32, STW, GPRC>;
+defm : StPat<truncstorei16, STH, GPRC>;
+defm : StPat<truncstorei8, STB, GPRC>;
+defm : StPat<store, STS, F4RC>;
+defm : StPat<store, STD, F8RC>;
+//4.5 int caculate instruction
+multiclass inst_rr_ri<bits<8> fun, string opstr,
+                      SDPatternOperator OpNode = null_frag> {
+  def r : OForm <0x10, fun, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC),
+                 opstr, "$RA,$RB,$RC",
+                 [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>;
+  def i : OFormL<0x12, fun,(ins GPRC:$RA, u8imm:$L), (outs GPRC:$RC),
+                 opstr, "$RA,$L,$RC",
+                 [(set GPRC:$RC, (OpNode GPRC:$RA, (i64 immUExt8:$L)))]>;
+}
+
+let Predicates = [EnableCrcInst] in
+class inst_rr_r<bits<8> fun, string opstr,
+                SDPatternOperator OpNode>
+    :  OForm <0x10, fun, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC),
+                 opstr, "$RA,$RB,$RC",
+                 [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>;
+
+multiclass inst_rr_rin<bits<8> funL, bits<8> funQ, string opstr,
+                      SDPatternOperator OpNode > {
+  def Q : OForm <0x10, funQ, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC),
+                 opstr # "l", "$RA,$RB,$RC",
+                 [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>;
+  def L : OForm <0x10, funL, (ins GPRC:$RA, GPRC:$RB), (outs GPRC:$RC),
+                 opstr # "w", "$RA,$RB,$RC",
+                 [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))]>;
+}
+
+class inst_rr_13<bits<8> fun, string opstr,
+                 SDPatternOperator OpNode=null_frag>
+    : OFormI <0x10, fun, (ins s13imm:$L), (outs GPRC:$RC),
+             opstr, "$L, $RC",
+             [(set GPRC:$RC, (OpNode immUExt13:$L))]>;
+
+let RA = 31 in
+class inst_rr_2<bits<8> fun, string opstr, ValueType vt>
+    : OForm <0x10, fun, (ins GPRC:$RB), (outs GPRC:$RC),
+             opstr, "$RB, $RC",
+             [(set GPRC:$RC, (sext_inreg GPRC:$RB, vt))]>;
+
+let RA = 31 in
+class inst_rr_3<bits<8> fun, string opstr,
+                SDPatternOperator OpNode=null_frag>
+    : OForm <0x10, fun, (ins GPRC:$RB), (outs GPRC:$RC),
+             opstr, "$RB, $RC",
+             [(set GPRC:$RC, (OpNode GPRC:$RB))]>;
+
+multiclass inst_lw<bits<8> funL, bits<8> funQ, string opstr,
+                   SDPatternOperator OpNode> {
+  defm L : inst_rr_ri<funL, opstr # "w">;
+  defm Q : inst_rr_ri<funQ, opstr # "l">;
+}
+
+defm ADD  : inst_lw<0x00, 0x08,"add",  add>;
+defm SUB  : inst_lw<0x01, 0x09,"sub",  sub>;
+defm S4ADD: inst_lw<0x02, 0x0a,"s4add", add4>;
+defm S4SUB: inst_lw<0x03, 0x0b,"s4sub", sub4>;
+defm S8ADD: inst_lw<0x04, 0x0c,"s8add", add8>;
+defm S8SUB: inst_lw<0x05, 0x0d,"s8sub", sub8>;
+defm MUL  : inst_lw<0x10, 0x18,"mul",  mul>;
+
+let mayRaiseFPException = 1, hasSideEffects = 1 in {
+  defm DIV  : inst_rr_rin<0x11, 0x1a, "div",  sdiv>;
+  defm UDIV : inst_rr_rin<0x12, 0x1b, "udiv",  udiv>;
+  defm REM  : inst_rr_rin<0x13, 0x1c, "rem",  srem>;
+  defm UREM : inst_rr_rin<0x14, 0x1d, "urem",  urem>;
+}
+
+def ADDPI : inst_rr_13<0x1e, "addpi", Sw64_addpi>;
+def ADDPIS : inst_rr_13<0x1f, "addpis", Sw64_addpis>;
+
+defm SBT  : inst_rr_ri<0x2d, "sbt",  Sw64_sbt>;
+defm CBT  : inst_rr_ri<0x2e, "cbt",  Sw64_cbt>;
+
+defm UMULH : inst_rr_ri<0x19, "umulh", mulhu>;
+
+let RA=31 in
+class inst_ct<bits<8> func, string opstr, SDNode OpNode>
+    : OForm<0x10, func, (ins GPRC:$RB), (outs GPRC:$RC),
+            opstr, "$RB,$RC", [(set GPRC:$RC, (OpNode GPRC:$RB))]>;
+
+def CTTZ : inst_ct<0x5A, "cttz", cttz>;
+def CTLZ : inst_ct<0x59, "ctlz", ctlz>;
+def CTPOP : inst_ct<0x58, "ctpop", ctpop>;
+
+defm ZAP : inst_rr_ri<0x68, "zap">;
+defm ZAPNOT : inst_rr_ri<0x69, "zapnot">;
+
+def SEXTB : inst_rr_2<0x6A, "sextb", i8>;
+def SEXTH : inst_rr_2<0x6B, "sexth", i16>;
+
+//4.5.2 integer cmp
+defm CMPEQ : inst_rr_ri<0x28, "cmpeq", seteq>;
+defm CMPLT : inst_rr_ri<0x29, "cmplt", setlt>;
+defm CMPLE : inst_rr_ri<0x2A, "cmple", setle>;
+defm CMPULT : inst_rr_ri<0x2B, "cmpult", setult>;
+defm CMPULE : inst_rr_ri<0x2C, "cmpule", setule>;
+
+//4.5.3 integer order
+defm AND   : inst_rr_ri<0x38, "and",   and>;
+defm BIC   : inst_rr_ri<0x39, "bic",
+     BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm BIS   : inst_rr_ri<0x3A, "bis",   or>;
+defm ORNOT : inst_rr_ri<0x3B, "ornot",
+     BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
+defm XOR   : inst_rr_ri<0x3C, "xor",   xor>;
+defm EQV   : inst_rr_ri<0x3D, "eqv",
+     BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
+
+//4.5.4 integer move position
+defm SL    : inst_rr_ri<0x48, "sll",   shl>;
+defm SRA   : inst_rr_ri<0x4A, "sra",   sra>;
+defm SRL   : inst_rr_ri<0x49, "srl",   srl>;
+defm ROLL  : inst_rr_ri<0x4B, "roll",   rotl>;
+defm SLLW  : inst_rr_ri<0x4C, "sllw">;
+defm SRLW  : inst_rr_ri<0x4D, "srlw">;
+defm SRAW  : inst_rr_ri<0x4E, "sraw">;
+defm ROLW  : inst_rr_ri<0x4F, "rolw",   Sw64_rolw>;
+
+def sexti32 : ComplexPattern<i64, 1, "selectSExti32">;
+
+def zexti32 : ComplexPattern<i64, 1, "selectZExti32">;
+
+def : Pat<(srem (sexti32 (i64 GPRC:$RA)), (sexti32 (i64 GPRC:$RB))),
+          (REML GPRC:$RA, GPRC:$RB)>;
+
+def : Pat<(sext_inreg (sdiv GPRC:$RA, GPRC:$RB), i32),
+          (DIVL GPRC:$RA, GPRC:$RB)>;
+
+def : Pat<(sext_inreg (udiv (and GPRC:$RA,0xffffffff), (and GPRC:$RB,0xffffffff)), i32),
+          (UDIVL GPRC:$RA, GPRC:$RB)>;
+
+def : Pat<(sext_inreg (shl GPRC:$RA, GPRC:$RB), i32),
+          (SLLWr GPRC:$RA, GPRC:$RB)>,
+          Requires<[EnableIntShift, HasCore4]>;
+
+def : Pat<(sext_inreg (shl GPRC:$RA, (i64 immUExt8:$L)), i32),
+          (SLLWi GPRC:$RA, (i64 immUExt8:$L))>,
+          Requires<[EnableIntShift, HasCore4]>;
+
+def : Pat<(sext_inreg (srl GPRC:$RA, GPRC:$RB), i32),
+          (SRLWr GPRC:$RA, GPRC:$RB)>,
+          Requires<[EnableIntShift, HasCore4]>;
+
+def : Pat<(srl (i64 (zexti32 GPRC:$RA)), (i64 immUExt8:$L)),
+          (SRLWi GPRC:$RA, (i64 immUExt8:$L))>,
+          Requires<[EnableIntShift, HasCore4]>;
+
+def : Pat<(sra (i64 (sexti32 GPRC:$RA)), (i64 GPRC:$RB)),
+          (SRAWr GPRC:$RA, GPRC:$RB)>,
+          Requires<[EnableIntShift, HasCore4]>;
+
+def : Pat<(sra (i64 (sexti32 GPRC:$RA)), (i64 immUExt8:$L)),
+          (SRAWi GPRC:$RA, (i64 immUExt8:$L))>,
+          Requires<[EnableIntShift, HasCore4]>;
+
+def : Pat<(sext_inreg (rotl GPRC:$RA, GPRC:$RB), i32),
+          (ROLWr GPRC:$RA, GPRC:$RB)>,
+          Requires<[EnableIntShift, HasCore4]>;
+
+def : Pat<(sext_inreg (rotl GPRC:$RA, (i64 immUExt8:$L)), i32),
+          (ROLWi GPRC:$RA, (i64 immUExt8:$L))>,
+          Requires<[EnableIntShift, HasCore4]>;
+
+multiclass PatGprInst<SDPatternOperator OpNode, string Inst>{
+def : Pat<(OpNode GPRC:$rs1, GPRC:$rs2),
+            (!cast<Instruction>(Inst # "Qr")
+                  GPRC:$rs1, GPRC:$rs2)>;
+
+def : Pat<(intop (OpNode GPRC:$rs1, GPRC:$rs2)),
+            (!cast<Instruction>(Inst # "Lr")
+                  GPRC:$rs1, GPRC:$rs2)>;
+
+def : Pat<(OpNode GPRC:$rs1, immUExt8:$imm8),
+            (!cast<Instruction>(Inst # "Qi")
+                  GPRC:$rs1, immUExt8:$imm8)>;
+
+def : Pat<(intop (OpNode GPRC:$rs1, immUExt8:$imm8)),
+            (!cast<Instruction>(Inst # "Li")
+                  GPRC:$rs1, immUExt8:$imm8)>;
+}
+
+multiclass PatGprInstn<SDPatternOperator OpNode, string Inst>{
+def : Pat<(OpNode GPRC:$rs1, GPRC:$rs2),
+            (!cast<Instruction>(Inst # "Q")
+                  GPRC:$rs1, GPRC:$rs2)>;
+
+def : Pat<(intop (OpNode GPRC:$rs1, GPRC:$rs2)),
+            (!cast<Instruction>(Inst # "L")
+                  GPRC:$rs1, GPRC:$rs2)>;
+}
+
+defm : PatGprInst<add, "ADD">;
+defm : PatGprInst<sub, "SUB">;
+defm : PatGprInst<mul, "MUL">;
+defm : PatGprInst<add4, "S4ADD">;
+defm : PatGprInst<add8, "S8ADD">;
+defm : PatGprInst<sub4, "S4SUB">;
+defm : PatGprInst<sub8, "S8SUB">;
+
+//Const cases since legalize does sub x, int -> add x, inv(int) + 1
+def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)),
+          (SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)),
+          (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)),
+          (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
+// Define the pattern that produces ZAPNOTi.
+
+def : Pat<(cttz_zero_undef i64:$Rn), (CTTZ $Rn)>;
+
+def : Pat<(zappat:$imm GPRC:$RA),
+          (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
+
+def : Pat<(sext_inreg GPRC:$RB, i32),
+          (ADDLi GPRC:$RB, 0)>;
+
+def : Pat<(sext_inreg (add GPRC:$RA, GPRC:$RB), i32),
+          (ADDLr GPRC:$RA, GPRC:$RB)>;
+
+def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQr GPRC:$X, GPRC:$Y)>;
+def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+def : Pat<(setueq immUExt8:$Y, GPRC:$X), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(seteq GPRC:$X, GPRC:$Y), (CMPEQr GPRC:$X, GPRC:$Y)>;
+def : Pat<(seteq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+def : Pat<(seteq immUExt8:$Y, GPRC:$X), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULTr GPRC:$Y, GPRC:$X)>;
+def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
+def : Pat<(brcond (i64 (setugt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPULEi GPRC:$Y, immUExt8:$X), bb:$DISP)>;
+
+def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULEr GPRC:$Y, GPRC:$X)>;
+def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
+def : Pat<(brcond (i64 (setuge GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPULTi GPRC:$Y, immUExt8:$X), bb:$DISP)>;
+
+def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLTr GPRC:$Y, GPRC:$X)>;
+def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
+def : Pat<(brcond (i64 (setgt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPLEi GPRC:$Y, immUExt8:$X), bb:$DISP)>;
+
+def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLEr GPRC:$Y, GPRC:$X)>;
+def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
+def : Pat<(brcond (i64 (setgt GPRC:$Y, immUExt8:$X)), bb:$DISP), (BEQ (CMPLTi GPRC:$Y, immUExt8:$X), bb:$DISP)>;
+
+def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQr GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setne GPRC:$X, immUExt8:$Y),
+          (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
+def : Pat<(brcond (i64 (setne GPRC:$X, immUExt8:$Y)), bb:$DISP), (BEQ (CMPEQi GPRC:$X, immUExt8:$Y), bb:$DISP)>;
+def : Pat<(brcond (i64 (setne immUExt8:$Y, GPRC:$X)), bb:$DISP), (BEQ (CMPEQi GPRC:$X, immUExt8:$Y), bb:$DISP)>;
+
+def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQr GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setune GPRC:$X, immUExt8:$Y),
+          (CMPEQi (CMPEQr GPRC:$X, immUExt8:$Y), 0)>;
+
+//after put here because SLi
+//Stupid crazy arithmetic stuff:
+let AddedComplexity = 1 in {
+def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(sext_inreg (mul GPRC:$RA, 5), i32), (S4ADDLr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(sext_inreg (mul GPRC:$RA, 9), i32), (S8ADDLr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(sext_inreg (mul GPRC:$RA, 3), i32), (S4SUBLr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(sext_inreg (mul GPRC:$RA, 7), i32), (S8SUBLr GPRC:$RA, GPRC:$RA)>;
+//slight tree expansion if we are multiplying near to a power of 2
+
+def : Pat<(mul GPRC:$RA, immRem1:$imm),
+          (ADDQr (SLi GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2:$imm),
+          (ADDQr (SLi GPRC:$RA, (nearP2X immRem2:$imm)),
+                 (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3:$imm),
+          (ADDQr (SLi GPRC:$RA, (nearP2X immRem3:$imm)),
+                 (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4:$imm),
+          (S4ADDQr GPRC:$RA, (SLi GPRC:$RA, (nearP2X immRem4:$imm)))>;
+def : Pat<(mul GPRC:$RA, immRem5:$imm),
+          (ADDQr (SLi GPRC:$RA, (nearP2X immRem5:$imm)),
+                 (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2:$imm),
+          (ADDQr (SLi GPRC:$RA, (nearP2X immRemP2:$imm)),
+                 (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
+
+} //Added complexity
+//4.5.5 integer cond select
+// RA:COND RB:TRUE RC:FALSE RD:DEST
+multiclass select_inst< bits<3> fun, string opstr, SDPatternOperator OpNode> {
+def r : OForm4 <0x11, fun, (ins GPRC:$RA, GPRC:$RB, GPRC:$RC),
+                (outs GPRC:$RD), opstr, "$RA,$RB,$RC,$RD",
+                [(set GPRC:$RD,
+                    (select (i64 (OpNode GPRC:$RA)), GPRC:$RB, GPRC:$RC))]>;
+def i : OForm4L<0x13, fun, (ins GPRC:$RA, u8imm:$L, GPRC:$RC),
+                (outs GPRC:$RD), opstr, "$RA,$L,$RC,$RD",
+                [(set GPRC:$RD,
+                    (select (i64 (OpNode GPRC:$RA)), immUExt8:$L, GPRC:$RC))]>;
+}
+
+defm SELEQ  : select_inst<0x0, "seleq",  CmpOpFrag<(seteq node:$R, 0)>>;
+defm SELNE  : select_inst<0x5, "selne",  CmpOpFrag<(setne node:$R, 0)>>;
+defm SELLT  : select_inst<0x4, "sellt",  CmpOpFrag<(setlt node:$R, 0)>>;
+defm SELLE  : select_inst<0x3, "selle",  CmpOpFrag<(setle node:$R, 0)>>;
+defm SELGT  : select_inst<0x2, "selgt",  CmpOpFrag<(setgt node:$R, 0)>>;
+defm SELGE  : select_inst<0x1, "selge",  CmpOpFrag<(setge node:$R, 0)>>;
+defm SELLBC : select_inst<0x6, "sellbc", CmpOpFrag<(xor   node:$R, 1)>>;
+defm SELLBS : select_inst<0x7, "sellbs", CmpOpFrag<(and   node:$R, 1)>>;
+
+//General pattern for select
+def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
+          (SELNEr GPRC:$which, GPRC:$src1, GPRC:$src2)>;
+def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
+          (SELEQi GPRC:$which, immUExt8:$src2, GPRC:$src1)>;
+def : Pat<(select (i64 (setne GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (SELEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (i64 (setgt GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (SELLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (i64 (setge GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (SELLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (i64 (setlt GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (SELGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (i64 (setle GPRC:$RCOND, 0)), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (SELGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+
+def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
+          (SUBQr (UMULHr GPRC:$RA, GPRC:$RB),
+                 (ADDQr (SELGEr GPRC:$RB, R31, GPRC:$RA),
+                        (SELGEr GPRC:$RA, R31, GPRC:$RB)))>;
+
+defm CMPBGE : inst_rr_ri<0x6C, "cmpgeb">;
+
+defm EXTLB : inst_rr_ri<0x50, "extlb">;
+defm EXTLH : inst_rr_ri<0x51, "extlh">;
+defm EXTLW : inst_rr_ri<0x52, "extlw">;
+defm EXTLL : inst_rr_ri<0x53, "extll">;
+
+defm EXTHB : inst_rr_ri<0x54, "exthb">;
+defm EXTHH : inst_rr_ri<0x55, "exthh">;
+defm EXTHW : inst_rr_ri<0x56, "exthw">;
+defm EXTHL : inst_rr_ri<0x57, "exthl">;
+
+defm INSLB: inst_rr_ri<0x40, "inslb">;
+defm INSLH: inst_rr_ri<0x41, "inslh">;
+defm INSLW: inst_rr_ri<0x42, "inslw">;
+defm INSLL: inst_rr_ri<0x43, "insll">;
+defm INSHB: inst_rr_ri<0x44, "inshb">;
+defm INSHH: inst_rr_ri<0x45, "inshh">;
+defm INSHW: inst_rr_ri<0x46, "inshw">;
+defm INSHL: inst_rr_ri<0x47, "inshl">;
+
+def REVBH: inst_rr_3<0x5B, "revbh", Sw64_revbh>;
+def REVBW: inst_rr_3<0x5C, "revbw", Sw64_revbw>;
+def REVBL: inst_rr_3<0x5D, "revbl", bswap>;
+
+def : Pat<(sra (bswap GPRC:$RB), (i64 32)),
+          (REVBW GPRC:$RB)>;
+def : Pat<(sra (bswap GPRC:$RB), (i64 48)),
+          (REVBH GPRC:$RB)>;
+def : Pat<(srl (bswap GPRC:$RB), (i64 32)),
+          (REVBW GPRC:$RB)>;
+def : Pat<(srl (bswap GPRC:$RB), (i64 48)),
+          (REVBH GPRC:$RB)>;
+
+defm MASKLB: inst_rr_ri<0x60, "masklb">;
+defm MASKLH: inst_rr_ri<0x61, "masklh">;
+defm MASKLW: inst_rr_ri<0x62, "masklw">;
+defm MASKLL: inst_rr_ri<0x63, "maskll">;
+defm MASKHB: inst_rr_ri<0x64, "maskhb">;
+defm MASKHH: inst_rr_ri<0x65, "maskhh">;
+defm MASKHW: inst_rr_ri<0x66, "maskhw">;
+defm MASKHL: inst_rr_ri<0x67, "maskhl">;
+
+// 4.5.7 crc32 instruction
+def CRC32B: inst_rr_r<0x20, "crc32b", Sw64_crc32b>;
+def CRC32H: inst_rr_r<0x21, "crc32h", Sw64_crc32h>;
+def CRC32W: inst_rr_r<0x22, "crc32w", Sw64_crc32w>;
+def CRC32L: inst_rr_r<0x23, "crc32l", Sw64_crc32l>;
+def CRC32CB: inst_rr_r<0x24, "crc32cb", Sw64_crc32cb>;
+def CRC32CH: inst_rr_r<0x25, "crc32ch", Sw64_crc32ch>;
+def CRC32CW: inst_rr_r<0x26, "crc32cw", Sw64_crc32cw>;
+def CRC32CL: inst_rr_r<0x27, "crc32cl", Sw64_crc32cl>;
+
+def : Pat<(Sw64_crc32b GPRC:$rs1, GPRC:$rs2),
+          (CRC32B GPRC:$rs1, GPRC:$rs2)>,
+	  Requires<[EnableCrcInst, HasCore4]>;
+def : Pat<(Sw64_crc32h GPRC:$rs1, GPRC:$rs2),
+          (CRC32H GPRC:$rs1, GPRC:$rs2)>,
+	  Requires<[EnableCrcInst, HasCore4]>;
+def : Pat<(Sw64_crc32w GPRC:$rs1, GPRC:$rs2),
+          (CRC32W GPRC:$rs1, GPRC:$rs2)>,
+	  Requires<[EnableCrcInst, HasCore4]>;
+def : Pat<(Sw64_crc32l GPRC:$rs1, GPRC:$rs2),
+          (CRC32L GPRC:$rs1, GPRC:$rs2)>,
+	  Requires<[EnableCrcInst, HasCore4]>;
+def : Pat<(Sw64_crc32cb GPRC:$rs1, GPRC:$rs2),
+          (CRC32CB GPRC:$rs1, GPRC:$rs2)>,
+	  Requires<[EnableCrcInst, HasCore4]>;
+def : Pat<(Sw64_crc32ch GPRC:$rs1, GPRC:$rs2),
+          (CRC32CH GPRC:$rs1, GPRC:$rs2)>,
+	  Requires<[EnableCrcInst, HasCore4]>;
+def : Pat<(Sw64_crc32cw GPRC:$rs1, GPRC:$rs2),
+          (CRC32CW GPRC:$rs1, GPRC:$rs2)>,
+	  Requires<[EnableCrcInst, HasCore4]>;
+def : Pat<(Sw64_crc32cl GPRC:$rs1, GPRC:$rs2),
+          (CRC32CL GPRC:$rs1, GPRC:$rs2)>,
+	  Requires<[EnableCrcInst, HasCore4]>;
+
+//4.6 float caculate instruction
+//4.6.1 float caculate
+class inst_fpu<bits<8>func , string opstr, RegisterClass regtype>
+    : FPForm<0x18, func, (ins regtype:$RA, regtype:$RB),
+             (outs regtype:$RC), opstr, "$RA,$RB,$RC">;
+
+class inst_fpu_rr<bits<8>func , string opstr, RegisterClass regtype ,
+                  SDPatternOperator OpNode>
+    : FPForm<0x18, func, (ins regtype:$RA, regtype:$RB),
+             (outs regtype:$RC), opstr, "$RA,$RB,$RC",
+             [(set regtype:$RC, (OpNode regtype:$RA, regtype:$RB))]>;
+
+class inst_fpu_rr_rev64<bits<8>func , string opstr,
+                  SDPatternOperator OpNode>
+    : FPForm<0x18, func, (ins F8RC:$RA, F8RC:$RB),
+             (outs F8RC:$RC), opstr, "$RA,$RB,$RC",
+             [(set F8RC:$RC, (OpNode F8RC:$RB, F8RC:$RA))]>;
+
+let DecoderNamespace = "FP32" in {
+class inst_fpu_rr_rev32<bits<8>func , string opstr,
+                  SDPatternOperator OpNode>
+    : FPForm<0x18, func, (ins F4RC:$RA, F4RC:$RB),
+             (outs F4RC:$RC), opstr, "$RA,$RB,$RC",
+             [(set F4RC:$RC, (OpNode F4RC:$RB, F4RC:$RA))]>;
+}
+
+multiclass inst_fpu_sd<bits<8>funcS, bits<8>funcD ,
+                       string opstr, SDPatternOperator OpNode>{
+  def S : inst_fpu_rr<funcS, opstr # "s", F4RC, OpNode>;
+  def D : inst_fpu_rr<funcD, opstr # "d", F8RC, OpNode>;
+}
+
+defm ADD  : inst_fpu_sd<0x00, 0x01, "fadd", fadd>;
+defm SUB  : inst_fpu_sd<0x02, 0x03, "fsub", fsub>;
+defm MUL  : inst_fpu_sd<0x04, 0x05, "fmul", fmul>;
+defm DIV  : inst_fpu_sd<0x06, 0x07, "fdiv", fdiv>;
+
+def CPYSD   : inst_fpu_rr_rev64<0x30, "fcpys",  fcopysign>;
+def CPYSED  : inst_fpu_rr_rev64<0x31, "fcpyse", fcopysign>;
+def CPYSND  : inst_fpu_rr_rev64<0x32, "fcpysn", null_frag>;
+def CPYSS   : inst_fpu_rr_rev32<0x30, "fcpys",  fcopysign>;
+def CPYSES  : inst_fpu_rr_rev32<0x31, "fcpyse", fcopysign>;
+def CPYSNS  : inst_fpu_rr_rev32<0x32, "fcpysn", null_frag>;
+
+def CMPTEQ  : inst_fpu<0x10, "fcmpeq", F8RC>;
+def CMPTLE  : inst_fpu<0x11, "fcmple", F8RC>;
+def CMPTLT  : inst_fpu<0x12, "fcmplt", F8RC>;
+def CMPTUN  : inst_fpu<0x13, "fcmpun", F8RC>;
+
+//4.6.2 float cmp
+
+def : Pat<(fabs F8RC:$RB),
+          (CPYSD F31, F8RC:$RB)>;
+def : Pat<(fabs F4RC:$RB),
+          (CPYSS F31, F4RC:$RB)>;
+def : Pat<(fneg F8RC:$RB),
+          (CPYSND F8RC:$RB, F8RC:$RB)>;
+def : Pat<(fneg F4RC:$RB),
+          (CPYSNS F4RC:$RB, F4RC:$RB)>;
+
+def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
+          (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
+          (CPYSND F8RC:$B, F8RC:$A)>;
+
+def : Pat<(fneg (fcopysign F4RC:$A, F4RC:$B)),
+          (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fneg (fcopysign F8RC:$A, F8RC:$B)),
+          (CPYSND F8RC:$B, F8RC:$A)>;
+
+def : Pat<(fneg (fcopysign F4RC:$A, F8RC:$B)),
+          (CPYSNS (COPY_TO_REGCLASS F8RC:$B, F4RC), F4RC:$A)>;
+def : Pat<(fneg (fcopysign F8RC:$A, F4RC:$B)),
+          (CPYSND (COPY_TO_REGCLASS F4RC:$B, F8RC), F8RC:$A)>;
+
+def : Pat<(fcopysign F4RC:$A, F8RC:$B),
+          (CPYSS (COPY_TO_REGCLASS F8RC:$B, F4RC), F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, F4RC:$B),
+          (CPYSD (COPY_TO_REGCLASS F4RC:$B, F8RC), F8RC:$A)>;
+
+//4.6.3 float convert
+
+let RA = 31 in
+class inst_fpu_cvt<bits<8> func, string opstr,
+                   RegisterClass regorg, RegisterClass regdst,
+                   SDPatternOperator OpNode=null_frag>
+    : FPForm<0x18, func, (ins regorg:$RB), (outs regdst:$RC),
+             opstr, "$RB,$RC",
+             [(set regdst:$RC, (OpNode regorg:$RB))]>;
+
+let RA = 31 in
+class inst_fpu_cmov<bits<8> func, string opstr,
+                   RegisterClass regorg, RegisterClass regdst,
+                   SDPatternOperator OpNode=null_frag>
+    : FPForm<0x10, func, (ins regorg:$RB), (outs regdst:$RC),
+             opstr, "$RB,$RC",
+             [(set (i64 regdst:$RC), (OpNode regorg:$RB))]>;
+
+let RA = 31 in
+class inst_flw<bits<8>func , string opstr, RegisterClass regtype>
+    : FPForm<0x18, func, (ins regtype:$RB),
+             (outs regtype:$RC), opstr, "$RB,$RC">;
+
+let RB = 31 in
+class inst_fpu_cvt1<bits<8> func, string opstr,
+                   RegisterClass regorg, RegisterClass regdst,
+                   SDPatternOperator OpNode=null_frag>
+    : FPForm<0x18, func, (ins regorg:$RA), (outs regdst:$RC),
+             opstr, "$RA,$RC",
+             [(set regdst:$RC, (OpNode regorg:$RA))]>;
+
+def CVTQS : inst_fpu_cvt<0x2D, "fcvtls", F8RC, F4RC, Sw64_cvtqs>;
+def CVTQT : inst_fpu_cvt<0x2F, "fcvtld", F8RC, F8RC, Sw64_cvtqt>;
+def CVTTQ : inst_fpu_cvt<0x24, "fcvtdl_z", F8RC, F8RC, Sw64_cvttq>;
+def CVTST : inst_fpu_cvt<0x20, "fcvtsd", F4RC, F8RC, fpextend>;
+def CVTTS : inst_fpu_cvt<0x21, "fcvtds", F8RC, F4RC, fpround>;
+def : Pat<(Sw64_cvtts F8RC:$RB), (CVTTS F8RC:$RB)>;
+def : Pat<(Sw64_cvtst F4RC:$RB), (CVTST F4RC:$RB)>;
+
+def FCVTWL : inst_flw<0x28, "fcvtwl", F8RC>;
+def FCVTLW : inst_flw<0x29, "fcvtlw", F8RC>;
+
+def FCTTDL_G : inst_fpu_cvt<0x22, "fcvtdl_g", F8RC, F8RC>;
+def FCTTDL_P : inst_fpu_cvt<0x23, "fcvtdl_p", F8RC, F8RC>;
+def FCTTDL_N : inst_fpu_cvt<0x25, "fcvtdl_n", F8RC, F8RC>;
+def FCTTDL : inst_fpu_cvt<0x27, "fcvtdl", F8RC, F8RC>;
+
+def FCVTHS : inst_fpu_cvt<0x2E, "fcvths", F8RC, F8RC>;
+def FCVTSH : FCForm4L<0x1B, 0x37, (ins F8RC:$RA, F8RC:$RB, u6imm:$LIT),
+                 (outs F8RC:$RD),  "fcvtsh", "$RA,$RB,$LIT,$RD">;
+
+def CMOVDL : inst_fpu_cmov<0x72, "cmovdl", F8RC, GPRC>;
+def CMOVDL_G : inst_fpu_cmov<0x74, "cmovdl_g", F8RC, GPRC>;
+def CMOVDL_P : inst_fpu_cmov<0x7A, "cmovdl_p", F8RC, GPRC>;
+def CMOVDL_Z : inst_fpu_cmov<0x7C, "cmovdl_z", F8RC, GPRC>;
+def CMOVDL_N : inst_fpu_cmov<0x80, "cmovdl_n", F8RC, GPRC>;
+def CMOVDLU : inst_fpu_cmov<0x81, "cmovdlu", F8RC, GPRC>;
+def CMOVDLU_G : inst_fpu_cmov<0x82, "cmovdlu_g", F8RC, GPRC>;
+def CMOVDLU_P : inst_fpu_cmov<0x83, "cmovdlu_p", F8RC, GPRC>;
+def CMOVDLU_Z : inst_fpu_cmov<0x84, "cmovdlu_z", F8RC, GPRC>;
+def CMOVDLU_N : inst_fpu_cmov<0x85, "cmovdlu_n", F8RC, GPRC>;
+def CMOVDWU : inst_fpu_cmov<0x86, "cmovdwu", F8RC, GPRC>;
+def CMOVDWU_G : inst_fpu_cmov<0x87, "cmovdwu_g", F8RC, GPRC>;
+def CMOVDWU_P : inst_fpu_cmov<0x88, "cmovdwu_p", F8RC, GPRC>;
+def CMOVDWU_Z : inst_fpu_cmov<0x89, "cmovdwu_z", F8RC, GPRC>;
+def CMOVDWU_N : inst_fpu_cmov<0x8A, "cmovdwu_n", F8RC, GPRC>;
+def CMOVDW : inst_fpu_cmov<0x8B, "cmovdw", F8RC, GPRC>;
+def CMOVDW_G : inst_fpu_cmov<0x8C, "cmovdw_g", F8RC, GPRC>;
+def CMOVDW_P : inst_fpu_cmov<0x8D, "cmovdw_p", F8RC, GPRC>;
+def CMOVDW_Z : inst_fpu_cmov<0x8E, "cmovdw_z", F8RC, GPRC>;
+def CMOVDW_N : inst_fpu_cmov<0x8F, "cmovdw_n", F8RC, GPRC>;
+
+// f64 to i64
+def : Pat<(i64 (fp_to_sint F8RC:$Rn)), (CMOVDL_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_sint (fround F8RC:$Rn))), (CMOVDL_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_sint (fceil F8RC:$Rn))), (CMOVDL_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_sint (ffloor F8RC:$Rn))), (CMOVDL_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_sint (fnearbyint F8RC:$Rn))), (CMOVDL F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+
+// f64 to u64
+def : Pat<(i64 (fp_to_uint F8RC:$Rn)), (CMOVDLU_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_uint (fround F8RC:$Rn))), (CMOVDLU_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_uint (fceil F8RC:$Rn))), (CMOVDLU_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_uint (ffloor F8RC:$Rn))), (CMOVDLU_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_uint (fnearbyint F8RC:$Rn))), (CMOVDLU F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+
+// f32 to i64
+def : Pat<(i64 (fp_to_sint F4RC:$Rn)), (CMOVDL_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_sint (fround F4RC:$Rn))), (CMOVDL_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_sint (fceil F4RC:$Rn))), (CMOVDL_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_sint (ffloor F4RC:$Rn))), (CMOVDL_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_sint (fnearbyint F4RC:$Rn))), (CMOVDL (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+
+// f32 to u64
+def : Pat<(i64 (fp_to_uint F4RC:$Rn)), (CMOVDLU_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_uint (fround F4RC:$Rn))), (CMOVDLU_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_uint (fceil F4RC:$Rn))), (CMOVDLU_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_uint (ffloor F4RC:$Rn))), (CMOVDLU_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(i64 (fp_to_uint (fnearbyint F4RC:$Rn))), (CMOVDLU (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+
+// f64 to u32
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint F8RC:$Rn))), i32),
+          (CMOVDWU_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fround F8RC:$Rn)))), i32),
+          (CMOVDWU_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fceil F8RC:$Rn)))), i32),
+          (CMOVDWU_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (ffloor F8RC:$Rn)))), i32),
+          (CMOVDWU_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fnearbyint F8RC:$Rn)))), i32),
+          (CMOVDWU F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+
+// f64 to i32
+def : Pat<(sext_inreg (fp_to_sint F8RC:$Rn), i32),
+          (CMOVDW_Z F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (fp_to_sint (fround F8RC:$Rn)), i32),
+          (CMOVDW_G F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (fp_to_sint (fceil F8RC:$Rn)), i32),
+          (CMOVDW_P F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (fp_to_sint (ffloor F8RC:$Rn)), i32),
+          (CMOVDW_N F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (fp_to_sint (fnearbyint F8RC:$Rn)), i32),
+          (CMOVDW F8RC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+
+// f32 to u32
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint F4RC:$Rn))), i32),
+          (CMOVDWU_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fround F4RC:$Rn)))), i32),
+          (CMOVDWU_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fceil F4RC:$Rn)))), i32),
+          (CMOVDWU_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (ffloor F4RC:$Rn)))), i32),
+          (CMOVDWU_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (assertzext (i64 (fp_to_uint (fnearbyint F4RC:$Rn)))), i32),
+          (CMOVDWU (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+
+// f32 to i32
+def : Pat<(sext_inreg (fp_to_sint F4RC:$Rn), i32),
+          (CMOVDW_Z (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (fp_to_sint (fround F4RC:$Rn)), i32),
+          (CMOVDW_G (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (fp_to_sint (fceil F4RC:$Rn)), i32),
+          (CMOVDW_P (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (fp_to_sint (ffloor F4RC:$Rn)), i32),
+          (CMOVDW_N (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(sext_inreg (fp_to_sint (fnearbyint F4RC:$Rn)), i32),
+          (CMOVDW (CVTST F4RC:$Rn))>, Requires<[EnableFloatCmov, HasCore4]>;
+
+def CMOVLS : inst_fpu_cvt<0x48, "cmovls", GPRC, F4RC>;
+def CMOVWS : inst_fpu_cvt<0x49, "cmovws", GPRC, F4RC>;
+def CMOVLD : inst_fpu_cvt<0x4a, "cmovld", GPRC, F8RC>;
+def CMOVWD : inst_fpu_cvt<0x4b, "cmovwd", GPRC, F8RC>;
+def CMOVULS : inst_fpu_cvt<0x4c, "cmovuls", GPRC, F4RC>;
+def CMOVULD : inst_fpu_cvt<0x4e, "cmovuld", GPRC, F8RC>;
+def CMOVUWS : inst_fpu_cvt<0x4d, "cmovuws", GPRC, F4RC>;
+def CMOVUWD : inst_fpu_cvt<0x4f, "cmovuwd", GPRC, F8RC>;
+
+def : Pat<(f32 (sint_to_fp GPRC:$Rn)), (CMOVLS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(f32 (sint_to_fp (assertsext GPRC:$Rn))), (CMOVWS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(f64 (sint_to_fp GPRC:$Rn)), (CMOVLD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(f64 (sint_to_fp (assertsext GPRC:$Rn))), (CMOVWD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(f32 (uint_to_fp GPRC:$Rn)), (CMOVULS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(f64 (uint_to_fp GPRC:$Rn)), (CMOVULD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(f32 (uint_to_fp (and (assertsext GPRC:$Rn), 0xffffffff))), (CMOVUWS GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+def : Pat<(f64 (uint_to_fp (and (assertsext GPRC:$Rn), 0xffffffff))), (CMOVUWD GPRC:$Rn)>, Requires<[EnableFloatCmov, HasCore4]>;
+
+def FRIS   : inst_fpu_cvt<0x5A, "fris", F4RC, F4RC, fnearbyint>;
+def FRIS_G : inst_fpu_cvt<0x5B, "fris_g", F4RC, F4RC, fround>;
+def FRIS_P : inst_fpu_cvt<0x5C, "fris_p", F4RC, F4RC, fceil>;
+def FRIS_Z : inst_fpu_cvt<0x5D, "fris_z", F4RC, F4RC, ftrunc>;
+def FRIS_N : inst_fpu_cvt<0x5F, "fris_n", F4RC, F4RC, ffloor>;
+
+def FRID   : inst_fpu_cvt<0x60, "frid", F8RC, F8RC, fnearbyint>;
+def FRID_G : inst_fpu_cvt<0x61, "frid_g", F8RC, F8RC, fround>;
+def FRID_P : inst_fpu_cvt<0x62, "frid_p", F8RC, F8RC, fceil>;
+def FRID_Z : inst_fpu_cvt<0x63, "frid_z", F8RC, F8RC, ftrunc>;
+def FRID_N : inst_fpu_cvt<0x64, "frid_n", F8RC, F8RC, ffloor>;
+
+def SQRTSS : inst_flw<0x08, "fsqrts", F4RC>;
+def SQRTSD : inst_flw<0x09, "fsqrtd", F8RC>;
+
+def FRECS : inst_fpu_cvt1<0x58, "frecs", F4RC, F4RC, Sw64_frecs>;
+def FRECD : inst_fpu_cvt1<0x59, "frecd", F8RC, F8RC, Sw64_frecd>;
+
+def : Pat<(fsqrt F4RC:$RB), (SQRTSS F4RC:$RB)>;
+def : Pat<(fsqrt F8RC:$RB), (SQRTSD F8RC:$RB)>;
+
+//4.6.6 int 2 float or float 2 int
+
+let RB = 31 in
+class inst_fpu_fi<bits<6> opc, bits<8> func, string opstr,
+                  RegisterClass regorg, RegisterClass regdst,
+                  SDPatternOperator OpNode>
+    : FPForm<opc, func, (ins regorg:$RA), (outs regdst:$RC),
+             opstr, "$RA,$RC",
+             [(set regdst:$RC, (OpNode regorg:$RA))]>;
+
+def ITOFS : inst_fpu_fi<0x18, 0x40, "ifmovs", GPRC, F4RC, bitconvert>;
+def ITOFT : inst_fpu_fi<0x18, 0x41, "ifmovd", GPRC, F8RC, bitconvert>;
+def FTOIS : inst_fpu_fi<0x10, 0x70, "fimovs", F4RC, GPRC, bitconvert>;
+def FTOIT : inst_fpu_fi<0x10, 0x78, "fimovd", F8RC, GPRC, bitconvert>;
+
+let DecoderNamespace = "SIMD" in {
+class inst_fpu_fi_simd<bits<6> opc, bits<8> func, string opstr,
+                  RegisterClass regorg, RegisterClass regdst,
+                  SDPatternOperator OpNode>
+    : FPForm1<opc, func, (ins regorg:$RA), (outs regdst:$RC),
+             opstr, "$RA,$RC",
+             [(set regdst:$RC, (OpNode regorg:$RA))]>;
+}
+
+def ITOFStmp : inst_fpu_fi_simd<0x18, 0x40, "ifmovs", GPRC, FPRC, null_frag>;
+def ITOFTtmp : inst_fpu_fi_simd<0x18, 0x41, "ifmovd", GPRC, FPRC, null_frag>;
+def FTOIStmp : inst_fpu_fi_simd<0x10, 0x70, "fimovs", FPRC, GPRC, null_frag>;
+def FTOITtmp : inst_fpu_fi_simd<0x10, 0x78, "fimovd", FPRC, GPRC, null_frag>;
+
+multiclass inst_fpu_fma<bits<6> funcS, bits<6> funcD, string opstr> {
+  def S : FForm4<0x19, funcS, (ins F4RC:$RA, F4RC:$RB, F4RC:$RC),
+                 (outs F4RC:$RD),  opstr # "s", "$RA,$RB,$RC,$RD">;
+
+  def D : FForm4<0x19, funcD, (ins F8RC:$RA, F8RC:$RB, F8RC:$RC),
+                 (outs F8RC:$RD),  opstr # "d", "$RA,$RB,$RC,$RD">;
+}
+
+defm FMA  : inst_fpu_fma<0x00, 0x01, "fma">;
+defm FMS  : inst_fpu_fma<0x02, 0x03, "fms">;
+defm FNMA : inst_fpu_fma<0x04, 0x05, "fnma">;
+defm FNMS : inst_fpu_fma<0x06, 0x07, "fnms">;
+
+multiclass fma_pat<RegisterClass regtype, string type> {
+def : Pat<(fma regtype:$RA, regtype:$RB, regtype:$RC),
+    (!cast<Instruction>("FMA" # type)
+        regtype:$RA, regtype:$RB, regtype:$RC)>;
+
+def : Pat<(fma regtype:$RA, regtype:$RB,
+                            (fneg regtype:$RC)),
+    (!cast<Instruction>("FMS" # type)
+        regtype:$RA, regtype:$RB, regtype:$RC)>;
+
+def : Pat<(fneg
+          (fma regtype:$RA, regtype:$RB,
+                         (fneg regtype:$RC))),
+    (!cast<Instruction>("FNMA" # type)
+        regtype:$RA, regtype:$RB, regtype:$RC)>;
+
+def : Pat<(fneg
+          (fma regtype:$RA, regtype:$RB, regtype:$RC)),
+    (!cast<Instruction>("FNMS" # type)
+        regtype:$RA, regtype:$RB, regtype:$RC)>;
+
+}
+
+defm : fma_pat<F4RC, "S">;
+defm : fma_pat<F8RC, "D">;
+
+class inst_fpu_select64<bits<6> fun, string opstr>
+    : FForm4 <0x19, fun, (ins F8RC:$RC, F8RC:$RB, F8RC:$RA),
+                (outs F8RC:$RD), opstr, "$RA,$RB,$RC,$RD">;
+
+let DecoderNamespace = "FP32" in {
+class inst_fpu_select32<bits<6> fun, string opstr>
+    : FForm4 <0x19, fun, (ins F4RC:$RC, F4RC:$RB, F8RC:$RA),
+                (outs F4RC:$RD), opstr, "$RA,$RB,$RC,$RD">;
+}
+def FSELEQD : inst_fpu_select64<0x10, "fseleq">;
+def FSELNED : inst_fpu_select64<0x11, "fselne">;
+def FSELLTD : inst_fpu_select64<0x12, "fsellt">;
+def FSELLED : inst_fpu_select64<0x13, "fselle">;
+def FSELGTD : inst_fpu_select64<0x14, "fselgt">;
+def FSELGED : inst_fpu_select64<0x15, "fselge">;
+def FSELEQS : inst_fpu_select32<0x10, "fseleq">;
+def FSELNES : inst_fpu_select32<0x11, "fselne">;
+def FSELLTS : inst_fpu_select32<0x12, "fsellt">;
+def FSELLES : inst_fpu_select32<0x13, "fselle">;
+def FSELGTS : inst_fpu_select32<0x14, "fselgt">;
+def FSELGES : inst_fpu_select32<0x15, "fselge">;
+
+multiclass f_select_pat<SDPatternOperator OpNode, string Inst,
+                        InstSw64 InstCmp> {
+def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F4RC:$st, F4RC:$sf),
+        (!cast<Instruction>(Inst # "S")
+               F4RC:$sf, F4RC:$st, (InstCmp F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F8RC:$st, F8RC:$sf),
+        (!cast<Instruction>(Inst # "D")
+               F8RC:$sf, F8RC:$st, (InstCmp F8RC:$RA, F8RC:$RB))>;
+}
+
+multiclass f_select_pat_c<SDPatternOperator OpNode, string Inst,
+                          InstSw64 InstCmp> {
+def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F4RC:$st, F4RC:$sf),
+        (!cast<Instruction>(Inst # "S")
+               F4RC:$sf, F4RC:$st, (InstCmp F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (i64 (OpNode F8RC:$RA, F8RC:$RB)), F8RC:$st, F8RC:$sf),
+        (!cast<Instruction>(Inst # "D")
+               F8RC:$sf, F8RC:$st, (InstCmp F8RC:$RB, F8RC:$RA))>;
+}
+defm : f_select_pat<seteq, "FSELNE", CMPTEQ>;
+defm : f_select_pat<setne, "FSELEQ", CMPTEQ>;
+defm : f_select_pat<setlt, "FSELNE", CMPTLT>;
+defm : f_select_pat<setle, "FSELNE", CMPTLE>;
+
+defm : f_select_pat_c<setgt, "FSELNE", CMPTLT>;
+defm : f_select_pat_c<setge, "FSELNE", CMPTLE>;
+
+def :  Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
+           (f64 (FSELEQD  F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>;
+def :  Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
+           (f32 (FSELEQS  F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>;
+
+//4.6.9 read and write float register
+let RB=31, RC=31 in {
+def RFPCR : FPForm<0x18,0x50,(ins), (outs F8RC:$RA), "rfpcr", "$RA">;
+def WFPCR : FPForm<0x18,0x51,(ins F8RC:$RA), (outs), "wfpcr", "$RA">;
+}
+
+let RA=31, RB = 31, RC=31 in {
+def SETFPEC0 : FPForm<0x18,0x54,(ins), (outs),"setfpec0","">;
+def SETFPEC1 : FPForm<0x18,0x55,(ins), (outs),"setfpec1","">;
+def SETFPEC2 : FPForm<0x18,0x56,(ins), (outs),"setfpec2","">;
+def SETFPEC3 : FPForm<0x18,0x57,(ins), (outs),"setfpec3","">;
+
+def NOP      : FPForm<0x10,0x3a,(ins), (outs),"nop", "">;
+}
+//4.7 sundry instruction
+let RA = 0, RB= 0, hasNoSchedulingInfo=1 in{
+def WMEMB : MfcForm<0x06, 0x0002,(ins), (outs), "wmemb">;
+def IMEMB : MfcForm<0x06, 0x0001,(ins), (outs), "imemb">;
+def MB  : MfcForm<0x06, 0x0000,(ins), (outs), "memb">; //memory barrier
+def HALT : MfcForm<0x06, 0x0080,(ins), (outs), "halt">;
+}
+def : Pat<(trap), (SYS_CALL 0x80)>;
+def : Pat<(atomic_fence (i64 5), (timm)), (WMEMB)>,Requires<[EnableWmembInst, HasCore4]>;
+def : Pat<(atomic_fence (timm), (timm)), (MB)>;
+let RB = 31 in {
+//4.7.3 read time counter
+def RPCC : MfcForm<0x06, 0x0020,(ins), (outs GPRC:$RA), "rtc", "$RA">; //Read process cycle counter
+//4.7.4 read cpu core id
+def RCID : MfcForm<0x06, 0x0040,(ins), (outs GPRC:$RA), "rcid", "$RA">;
+}
+//4.7.6 atom operate instruction
+
+let mayLoad = 1 in {
+def LDQ_L : MFuncForm<0x08,0x1,(ins s64imm:$disp, GPRC:$RB),(outs GPRC:$RA),
+                      "lldl", "$RA,${disp}(${RB})">;
+def LDL_L : MFuncForm<0x08,0x0,(ins s64imm:$disp, GPRC:$RB),(outs GPRC:$RA),
+                      "lldw", "$RA,${disp}(${RB})">;
+}
+
+
+let mayStore = 1 in {
+def STQ_C : MFuncForm<0x08,0x9, (ins GPRC:$RA, s64imm:$disp, GPRC:$RB), (outs),
+                      "lstl","$RA,${disp}(${RB})">;
+def STL_C : MFuncForm<0x08,0x8, (ins GPRC:$RA, s64imm:$disp, GPRC:$RB), (outs),
+                      "lstw","$RA,${disp}(${RB})">;
+}
+
+let RB = 31,  hasNoSchedulingInfo = 1 in {
+def WR_F : MfcForm<0x06, 0x1020, (ins GPRC:$RA) , (outs), "wr_f", "$RA">;
+
+def RD_F : MfcForm<0x06, 0x1000, (ins GPRC:$RA) , (outs), "rd_f", "$RA">;
+}
+
+//4.8 cache control instruction
+// Prefetch
+def SDT_ZPrefetch : SDTypeProfile<0, 2, [
+  SDTCisPtrTy<0>, SDTCisInt<1>,
+]>;
+
+def z_s_fillcs : SDNode<"Sw64ISD::Z_S_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>;
+def z_s_fillde : SDNode<"Sw64ISD::Z_S_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>;
+def z_fillde : SDNode<"Sw64ISD::Z_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>;
+def z_fillde_e : SDNode<"Sw64ISD::Z_FILLDE_E", SDT_ZPrefetch, [SDNPHasChain]>;
+def z_fillcs : SDNode<"Sw64ISD::Z_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>;
+def z_fillcs_e : SDNode<"Sw64ISD::Z_FILLCS_E", SDT_ZPrefetch, [SDNPHasChain]>;
+def z_e_fillcs : SDNode<"Sw64ISD::Z_E_FILLCS", SDT_ZPrefetch, [SDNPHasChain]>;
+def z_e_fillde : SDNode<"Sw64ISD::Z_E_FILLDE", SDT_ZPrefetch, [SDNPHasChain]>;
+
+def z_flushd : SDNode<"Sw64ISD::Z_FLUSHD", SDT_ZPrefetch, [SDNPHasChain]>;
+
+let usesCustomInserter = 1, mayLoad = 1, mayStore = 0, RA = 31 in
+class Prefetch<string opstr, bits<6> opcode, SDPatternOperator loadop>
+      : MForm<opcode, (ins s64imm:$DISP, GPRC:$RB), (outs),
+            opstr, "${DISP}(${RB})",
+           [(loadop immSExt16:$DISP, GPRC:$RB)]>;
+
+def FILLCS   : Prefetch<"fillcs",   0x09, z_fillcs>;
+def S_FILLDE : Prefetch<"s_fillde", 0x23, z_s_fillde>;
+def S_FILLCS : Prefetch<"s_fillcs", 0x22, z_s_fillcs>;
+def FILLDE   : Prefetch<"fillde",   0x26, z_fillde>;
+def FILLDE_E : Prefetch<"fillde_e", 0x27, z_fillde_e>;
+def FILLCS_E : Prefetch<"fillcs_e", 0x0B, z_fillcs_e>;
+def E_FILLCS : Prefetch<"e_fillcs", 0x0A, z_e_fillcs>;
+def E_FILLDE : Prefetch<"e_fillde", 0x0C, z_e_fillde>;
+
+// END Prefetch
+
+
+//4.9 privilege instruction
+def DPFHR : MPrvlForm<0x1e,0xe,(ins u5imm:$TH, s64imm:$disp, GPRC:$RB), (outs),
+                      "dpfhr", "$TH,${disp}(${RB})">;
+def DPFHW : MPrvlForm<0x1e,0xf,(ins u5imm:$TH, s64imm:$disp, GPRC:$RB), (outs),
+                      "dpfhw", "$TH,${disp}(${RB})">;
+
+//4.9.1 csrr and csrw
+// ----------------------------------------------------------
+def CSRR : CSRForm<0x06, 0xfe, (ins GPRC:$RA, u5imm:$L), (outs), "csrr", "$RA,$L">;
+def CSRW : CSRForm<0x06, 0xff, (ins GPRC:$RA, u5imm:$L), (outs), "csrw", "$RA,$L">;
+// ----------------------------------------------------------
+
+//4.9.2 csrws and csrwc
+// ----------------------------------------------------------
+def CSRWS : CSRForm<0x06, 0xfc, (ins GPRC:$RA, u5imm:$L), (outs), "csrws", "$RA,$L">;
+def CSRWC : CSRForm<0x06, 0xfd, (ins GPRC:$RA, u5imm:$L), (outs), "csrwc", "$RA,$L">;
+// ----------------------------------------------------------
+
+class BrPat<InstSw64 Inst>
+    : Pat<(brcond GPRC:$RA, bb:$DISP),
+           (Inst GPRC:$RA, bb:$DISP)>;
+
+class BrPat_const<PatFrag CondOp, InstSw64 Inst>
+    : Pat<(brcond (i64 (CondOp GPRC:$RA, 0)), bb:$DISP),
+          (Inst GPRC:$RA, bb:$DISP)>;
+
+class BrPat_cond<PatFrag CondOp, InstSw64 InstBr,
+                 InstSw64 InstCmp, RegisterClass regtype>
+    : Pat<(brcond (i64 (CondOp regtype:$RB, regtype:$RA)), bb:$DISP),
+          (InstBr (InstCmp regtype:$RA, regtype:$RB), bb:$DISP)>;
+class BrPat_cond_i<PatFrag CondOp, InstSw64 InstBr,
+                 InstSw64 InstCmp, RegisterClass regtype>
+    : Pat<(brcond (i64 (CondOp regtype:$RA, regtype:$RB)), bb:$DISP),
+          (InstBr (InstCmp regtype:$RA, regtype:$RB), bb:$DISP)>;
+
+class BrSwapPat_cond<PatFrag CondOp, InstSw64 InstBr,
+                     InstSw64 InstCmp, RegisterClass regtype>
+    : Pat<(brcond (i64 (CondOp regtype:$RB, regtype:$RA)), bb:$DISP),
+          (InstBr (InstCmp regtype:$RB, regtype:$RA), bb:$DISP)>;
+
+class BrPat_f<PatFrag CondOp, InstSw64 InstBr>
+    : Pat<(brcond (i64 (CondOp F8RC:$RA, immFPZ)), bb:$DISP),
+          (InstBr F8RC:$RA, bb:$DISP)>;
+
+
+def : BrPat_const<seteq, BEQ>;
+def : BrPat_const<setge, BGE>;
+def : BrPat_const<setgt, BGT>;
+def : BrPat_const<setle, BLE>;
+def : BrPat_const<setlt, BLT>;
+def : BrPat_const<setne, BNE>;
+
+def : BrPat<BNE>;
+def : BrPat_cond_i<seteq, BNE, CMPEQr, GPRC>;
+def : BrPat_cond_i<setle, BNE, CMPLEr, GPRC>;
+def : BrPat_cond_i<setlt, BNE, CMPLTr, GPRC>;
+def : BrPat_cond_i<setule, BNE, CMPULEr, GPRC>;
+def : BrPat_cond_i<setult, BNE, CMPULTr, GPRC>;
+
+def : BrPat_cond_i<setne, BEQ, CMPEQr, GPRC>;
+def : BrPat_cond_i<setune, BEQ, CMPEQr, GPRC>;
+
+def : BrPat_f<seteq, FBEQ>;
+def : BrPat_f<setne, FBNE>;
+def : BrPat_f<setle, FBLE>;
+def : BrPat_f<setlt, FBLT>;
+
+def : BrPat_cond<setlt,  FBEQ, CMPTLE, F8RC>;
+def : BrPat_cond<setult,  FBEQ, CMPTLE, F8RC>;
+def : BrPat_cond<setle,  FBEQ, CMPTLT, F8RC>;
+def : BrPat_cond<setule,  FBEQ, CMPTLT, F8RC>;
+
+def : BrSwapPat_cond<setgt,  FBEQ, CMPTLE, F8RC>;
+def : BrSwapPat_cond<setugt,  FBEQ, CMPTLE, F8RC>;
+def : BrSwapPat_cond<setge,  FBEQ, CMPTLT, F8RC>;
+def : BrSwapPat_cond<setuge,  FBEQ, CMPTLT, F8RC>;
+
+def : BrPat_cond_i<setne,  FBEQ, CMPTEQ, F8RC>;
+def : BrPat_cond_i<seteq,  FBNE, CMPTEQ, F8RC>;
+
+def : BrPat_cond_i<setge,   FBEQ, CMPTLT, F8RC>;
+def : BrPat_cond_i<setgt,   FBEQ, CMPTLE, F8RC>;
+
+
+ class IBrPat_cond_i<PatFrag CondOp, InstSw64 InstBr,
+                 InstSw64 InstCmp, RegisterClass regtype>
+    : Pat<(brcond (i64(CondOp regtype:$RA, immUExt8:$RB)), bb:$DISP),
+          (InstBr (InstCmp regtype:$RA, immUExt8:$RB), bb:$DISP)>;
+ def : IBrPat_cond_i<seteq, BNE, CMPEQi, GPRC>;
+ def : IBrPat_cond_i<setne, BEQ, CMPEQi, GPRC>;
+ def : IBrPat_cond_i<setlt, BNE, CMPLTi, GPRC>;
+ def : IBrPat_cond_i<setle, BNE, CMPLEi, GPRC>;
+ def : IBrPat_cond_i<setgt, BEQ, CMPLEi, GPRC>;
+ def : IBrPat_cond_i<setge, BEQ, CMPLTi, GPRC>;
+
+ def : IBrPat_cond_i<setult, BNE, CMPULTi, GPRC>;
+ def : IBrPat_cond_i<setule, BNE, CMPULEi, GPRC>;
+ def : IBrPat_cond_i<setugt, BEQ, CMPULEi, GPRC>;
+ def : IBrPat_cond_i<setuge, BEQ, CMPULTi, GPRC>;
+
+
+//Int cond patterns
+def : Pat<(brcond (i64 (and GPRC:$RA, 1)), bb:$DISP),
+          (BLBS  GPRC:$RA, bb:$DISP)>;
+
+def : Pat<(brcond (i64 (seteq (and GPRC:$RA, 1), 0)), bb:$DISP),
+          (BLBC  GPRC:$RA, bb:$DISP)>;
+
+//Constant handling
+def immSExt16int  : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
+  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+         ((int64_t)N->getZExtValue() << 32) >> 32;
+}], SExt16>;
+
+def immConst2PartInt  : PatLeaf<(imm), [{
+  //true if imm fits in a LDAH LDA pair with zeroext
+  uint64_t uval = N->getZExtValue();
+  int32_t val32 = (int32_t)uval;
+  return ((uval >> 32) == 0 && //empty upper bits
+          val32 <= IMM_FULLHIGH);
+}]>;
+
+def immConst2Part  : PatLeaf<(imm), [{
+  //true if imm fits in a LDAH LDA pair
+  int64_t val = (int64_t)N->getZExtValue();
+  return (val <= IMM_FULLHIGH  && val >= IMM_FULLLOW);
+}]>;
+
+
+def : Pat<(i64 immConst2Part:$imm),
+          (LDA (LL16 immConst2Part:$imm),
+               (LDAH (LH16 immConst2Part:$imm), R31))>;
+
+def : Pat<(i64 immSExt16:$imm),
+          (LDA immSExt16:$imm, R31)>;
+
+def : Pat<(i64 immSExt16int:$imm),
+          (ZAPNOTi (LDA immSExt16int:$imm, R31), 15)>;
+
+def : Pat<(i64 immConst2PartInt:$imm),
+          (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
+                        (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))),
+                              R31)), 15)>;
+
+//===----------------------------------------------------------------------===//
+// Instruction aliases
+//===----------------------------------------------------------------------===//
+// 4.5.1 integer caculate
+def : InstAlias<"addw $RA,$L,$RC",
+                (ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"subw $RA,$L,$RC",
+                (SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"s4addw $RA,$L,$RC",
+                (S4ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"s4subw $RA,$L,$RC",
+                (S4SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"s8addw $RA,$L,$RC",
+                (S8ADDLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"s8subw $RA,$L,$RC",
+                (S8SUBLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"addl $RA,$L,$RC",
+                (ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"subl $RA,$L,$RC",
+                (SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"s4addl $RA,$L,$RC",
+                (S4ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"s4subl $RA,$L,$RC",
+                (S4SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"s8addl $RA,$L,$RC",
+                (S8ADDQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"s8subl $RA,$L,$RC",
+                (S8SUBQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"mulw $RA,$L,$RC",
+                (MULLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"mull $RA,$L,$RC",
+                (MULQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"umulh $RA,$L,$RC",
+                (UMULHi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"zap $RA,$L,$RC",
+                (ZAPi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"zapnot $RA,$L,$RC",
+                (ZAPNOTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"addpi $L,$RC",
+                (ADDPI GPRC:$RC, s13imm:$L), 0>;
+def : InstAlias<"addpis $L,$RC",
+                (ADDPIS GPRC:$RC, s13imm:$L), 0>;
+def : InstAlias<"sbt $RA,$L,$RC",
+                (SBTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"cbt $RA,$L,$RC",
+                (CBTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+
+// 4.5.2 integer cmp
+def : InstAlias<"cmpeq $RA,$L,$RC",
+                (CMPEQi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"cmple $RA,$L,$RC",
+                (CMPLEi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"cmplt $RA,$L,$RC",
+                (CMPLTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"cmpule $RA,$L,$RC",
+                (CMPULEi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"cmpult $RA,$L,$RC",
+                (CMPULTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+
+// 4.5.3 integer order
+def : InstAlias<"and $RA,$L,$RC",
+                (ANDi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"bic $RA,$L,$RC",
+                (BICi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"bis $RA,$L,$RC",
+                (BISi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"ornot $RA,$L,$RC",
+                (ORNOTi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"xor $RA,$L,$RC",
+                (XORi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"eqv $RA,$L,$RC",
+                (EQVi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+
+// 4.5.4 integer move position
+def : InstAlias<"sll $RA,$L,$RC",
+                (SLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"srl $RA,$L,$RC",
+                (SRLi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+def : InstAlias<"sra $RA,$L,$RC",
+                (SRAi GPRC:$RA, GPRC:$RC, u8imm:$L), 0>;
+
+// 4.5.5 integer cond select
+def : InstAlias<"seleq $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELEQi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
+                 0>;
+def : InstAlias<"seleq $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELEQr GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE, GPRC:$RDEST),
+                 0>;
+def : InstAlias<"selge $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELGEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
+                 0>;
+def : InstAlias<"selgt $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELGTi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
+                 0>;
+def : InstAlias<"selle $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELLEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
+                 0>;
+def : InstAlias<"sellt $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELLTi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
+                 0>;
+def : InstAlias<"selne $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELNEi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
+                 0>;
+def : InstAlias<"sellbc $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELLBCi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
+                 0>;
+def : InstAlias<"sellbs $RCOND,$RTRUE,$RFALSE,$RDEST",
+                (SELLBSi GPRC:$RCOND, GPRC:$RFALSE, u8imm:$RTRUE, GPRC:$RDEST),
+                 0>;
+
+// 4.5.6 byte operate
+def : InstAlias<"nop", (BISr R31, R31, R31), 0>;
+
+def : InstAlias<"ldi $RA, $imm",
+                (LDA GPRC:$RA, s64imm:$imm, R31), 0>;
+def : InstAlias<"br $disp",
+                (BR R31, target:$disp), 0>;
+def : InstAlias<"mov $imm,$RA",
+                (BISi GPRC:$RA, R31,  u8imm:$imm), 0>;
+def : InstAlias<"mov $RB,$RA",
+                (BISr GPRC:$RA, R31, GPRC:$RB), 0>;
+def : InstAlias<"ret $31,($26), $imm",
+                (RET R31, R26, s16imm:$imm), 0>;
+def : InstAlias<"ret",
+                (RET R31, R26, 1), 0>;
+def : InstAlias<"ldgp $29,0(${RA})",
+                (MOVaddrPCGp 0, 0, GPRC:$RA), 0>;
+def : InstAlias<"clr $RA",
+                (BISr R31, R31, GPRC:$RA), 0>;
+
+//===----------------------------------------------------------------------===//
+// Sw64 vector Definitions.
+//===----------------------------------------------------------------------===//
+
+include "Sw64VectorVarDefine.td"
+include "Sw64InstrFormatsV.td"
+include "Sw64InstrVector.td"
diff --git a/llvm/lib/Target/Sw64/Sw64InstrVector.td b/llvm/lib/Target/Sw64/Sw64InstrVector.td
new file mode 100644
index 000000000000..da17a14ffaae
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64InstrVector.td
@@ -0,0 +1,1767 @@
+//===- Sw64InstrVector.td - SIMD instructions -*- tablegen ----------------*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Sw64 SIMD instructions.
+//
+//===----------------------------------------------------------------------===//
+
+class IsCommutable {
+  bit isCommutable = 1;
+}
+
+def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
+                                      SDTCisInt<1>,
+                                      SDTCisSameAs<1, 2>,
+                                      SDTCisVT<3, OtherVT>]>;
+def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
+                                       SDTCisFP<1>,
+                                       SDTCisSameAs<1, 2>,
+                                       SDTCisVT<3, OtherVT>]>;
+
+def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>,
+                                    SDTCisSameAs<0, 1>]>;
+
+def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+                                   SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>;
+def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+                                   SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
+def SDT_INSVE : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                     SDTCisVT<2, i32>, SDTCisSameAs<0, 3>,
+                                     SDTCisVT<4, i32>]>;
+def SDT_VINSECTL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+                                   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>;
+
+def SDT_VecReduce : SDTypeProfile<1, 1, [    // vector reduction
+  SDTCisFP<0>, SDTCisVec<1>
+]>;
+
+def SDTVBroadcast  : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+
+def SDT_ZVecBinaryFp : SDTypeProfile<1, 2, [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisVT<2, f32>]>;
+
+def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisVT<2, i64>]>;
+
+def SDT_ZVecCT : SDTypeProfile<1, 1, [    // vector number of head 0/1.
+  SDTCisInt<0>, SDTCisVec<1>
+]>;
+def SDT_ZVecFREC : SDTypeProfile<1, 1, [    // vector number of head 0/1.
+  SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>
+]>;
+
+def SDT_Vlog : SDTypeProfile<1, 4, [       // vlogzz
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>, SDTCisInt<4>
+]>;
+
+def SDT_ZVecFCMP : SDTypeProfile<1, 2, [
+  SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+def SDT_ZVecFCVT : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisVec<1>
+]>;
+def SDT_ZVecFCVTDL : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>
+]>;
+def SDT_ZVecFCVTSH : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisVec<1>
+]>;
+def SDT_ZVecFCVTHS : SDTypeProfile<1, 2, [
+  SDTCisVec<0>, SDTCisVec<1>
+]>;
+
+def SDT_ZVecFRI : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>
+]>;
+def SDT_ZVecCPY : SDTypeProfile<1, 1, [
+  SDTCisVec<1>
+]>;
+
+def SDT_VSELECT : SDTypeProfile<1, 3, [
+  SDTCisInt<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+def SDT_VSQRT : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0, 1>
+]>;
+def SDT_VSUMF : SDTypeProfile<1, 1, [
+  SDTCisFP<0>, SDTCisVec<1>, SDTCisFP<1>
+]>;
+def SDT_Sw64VTruncStore : SDTypeProfile<0, 2, [SDTCisPtrTy<1>]>;
+
+def Sw64VBroadCastLd : SDNode<"Sw64ISD::VBROADCAST_LD", SDTLoad>;
+def Sw64VBroadCast   : SDNode<"Sw64ISD::VBROADCAST", SDTVBroadcast>;
+
+def Sw64VBroadCasti32: PatFrag<(ops node:$src),
+                           (Sw64VBroadCastLd node:$src), [{
+    return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
+}]>;
+
+def Sw64VBroadCastf32: PatFrag<(ops node:$src),
+                           (Sw64VBroadCastLd node:$src), [{
+    return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4
+           && cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+
+def Sw64VBroadCastf64: PatFrag<(ops node:$src),
+                           (Sw64VBroadCastLd node:$src), [{
+    return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
+}]>;
+
+def Sw64VTruncStore : SDNode<"Sw64ISD::VTRUNCST", SDT_Sw64VTruncStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def z_ldwe : SDNode<"Sw64ISD::VLDWE", SDTLoad>;
+def z_ldse : SDNode<"Sw64ISD::VLDSE", SDTLoad>;
+def z_ldde : SDNode<"Sw64ISD::VLDDE", SDTLoad>;
+
+def z_vlog : SDNode<"Sw64ISD::VLOG", SDT_Vlog>;
+
+def z_ctpop : SDNode<"Sw64ISD::VCTPOP", SDT_ZVecCT>;
+def z_ctlz : SDNode<"Sw64ISD::VCTLZ", SDT_ZVecCT>;
+
+def Sw64VNOR : SDNode<"Sw64ISD::VNOR", SDTIntBinOp,
+                      [SDNPCommutative, SDNPAssociative]>;
+def Sw64VEQV : SDNode<"Sw64ISD::VEQV", SDTIntBinOp>;
+def Sw64VORNOT : SDNode<"Sw64ISD::VORNOT", SDTIntBinOp>;
+
+
+def Sw64VSHF : SDNode<"Sw64ISD::VSHF", SDT_VSHF>;
+
+def Sw64SHF  : SDNode<"Sw64ISD::SHF", SDT_SHF>;
+def Sw64ILVEV : SDNode<"Sw64ISD::ILVEV", SDT_ILV>;
+def Sw64ILVOD : SDNode<"Sw64ISD::ILVOD", SDT_ILV>;
+def Sw64ILVL  : SDNode<"Sw64ISD::ILVL",  SDT_ILV>;
+def Sw64ILVR  : SDNode<"Sw64ISD::ILVR",  SDT_ILV>;
+def Sw64PCKEV : SDNode<"Sw64ISD::PCKEV", SDT_ILV>;
+def Sw64PCKOD : SDNode<"Sw64ISD::PCKOD", SDT_ILV>;
+def Sw64INSVE : SDNode<"Sw64ISD::INSVE", SDT_INSVE>;
+
+def Sw64VFCMPEQ : SDNode<"Sw64ISD::VFCMPEQ", SDT_ZVecFCMP>;
+def Sw64VFCMPLE : SDNode<"Sw64ISD::VFCMPLE", SDT_ZVecFCMP>;
+def Sw64VFCMPLT : SDNode<"Sw64ISD::VFCMPLT", SDT_ZVecFCMP>;
+def Sw64VFCMPUN : SDNode<"Sw64ISD::VFCMPUN", SDT_ZVecFCMP>;
+
+def Sw64VFCVTSD : SDNode<"Sw64ISD::VFCVTSD", SDT_ZVecFCVT>;
+def Sw64VFCVTDS : SDNode<"Sw64ISD::VFCVTDS", SDT_ZVecFCVT>;
+def Sw64VFCVTLS : SDNode<"Sw64ISD::VFCVTLS", SDT_ZVecFCVT>;
+def Sw64VFCVTLD : SDNode<"Sw64ISD::VFCVTLD", SDT_ZVecFCVT>;
+def Sw64VFCVTSH : SDNode<"Sw64ISD::VFCVTSH", SDT_ZVecFCVTSH>;
+def Sw64VFCVTHS : SDNode<"Sw64ISD::VFCVTHS", SDT_ZVecFCVTHS>;
+
+def Sw64VFCVTDL : SDNode<"Sw64ISD::VFCVTDL", SDT_ZVecFCVTDL>;
+def Sw64VFCVTDLG : SDNode<"Sw64ISD::VFCVTDLG", SDT_ZVecFCVTDL>;
+def Sw64VFCVTDLP : SDNode<"Sw64ISD::VFCVTDLP", SDT_ZVecFCVTDL>;
+def Sw64VFCVTDLZ : SDNode<"Sw64ISD::VFCVTDLZ", SDT_ZVecFCVTDL>;
+def Sw64VFCVTDLN : SDNode<"Sw64ISD::VFCVTDLN", SDT_ZVecFCVTDL>;
+
+def Sw64VFRIS : SDNode<"Sw64ISD::VFRIS", SDT_ZVecFRI>;
+def Sw64VFRISG : SDNode<"Sw64ISD::VFRISG", SDT_ZVecFRI>;
+def Sw64VFRISP : SDNode<"Sw64ISD::VFRISP", SDT_ZVecFRI>;
+def Sw64VFRISZ : SDNode<"Sw64ISD::VFRISZ", SDT_ZVecFRI>;
+def Sw64VFRISN : SDNode<"Sw64ISD::VFRISN", SDT_ZVecFRI>;
+def Sw64VFRID : SDNode<"Sw64ISD::VFRID", SDT_ZVecFRI>;
+def Sw64VFRIDG : SDNode<"Sw64ISD::VFRIDG", SDT_ZVecFRI>;
+def Sw64VFRIDP : SDNode<"Sw64ISD::VFRIDP", SDT_ZVecFRI>;
+def Sw64VFRIDZ : SDNode<"Sw64ISD::VFRIDZ", SDT_ZVecFRI>;
+def Sw64VFRIDN : SDNode<"Sw64ISD::VFRIDN", SDT_ZVecFRI>;
+
+def vseleqw :  SDNode<"Sw64ISD::VSELEQW", SDT_VSELECT>;
+def vselltw :  SDNode<"Sw64ISD::VSELLTW", SDT_VSELECT>;
+def vsellew :  SDNode<"Sw64ISD::VSELLEW", SDT_VSELECT>;
+def vsellbcw : SDNode<"Sw64ISD::VSELLBCW", SDT_VSELECT>;
+
+def vfcmovlt : SDNode<"Sw64ISD::VFCMOVLT", SDTFPTernaryOp>;
+def vfcmovle : SDNode<"Sw64ISD::VFCMOVLE", SDTFPTernaryOp>;
+def vfcmoveq : SDNode<"Sw64ISD::VFCMOVEQ", SDTFPTernaryOp>;
+
+def vect_vucaddw : SDNode<"Sw64ISD::VECT_VUCADDW", SDTIntBinOp>;
+def vect_vucaddh : SDNode<"Sw64ISD::VECT_VUCADDH", SDTIntBinOp>;
+def vect_vucaddb : SDNode<"Sw64ISD::VECT_VUCADDB", SDTIntBinOp>;
+def vect_vucsubw : SDNode<"Sw64ISD::VECT_VUCSUBW", SDTIntBinOp>;
+def vect_vucsubh : SDNode<"Sw64ISD::VECT_VUCSUBH", SDTIntBinOp>;
+def vect_vucsubb : SDNode<"Sw64ISD::VECT_VUCSUBB", SDTIntBinOp>;
+
+def z_vshl_by_scalar    : SDNode<"Sw64ISD::VSHL_BY_SCALAR",
+                                 SDT_ZVecBinaryFp>;
+def z_vsrl_by_scalar    : SDNode<"Sw64ISD::VSRL_BY_SCALAR",
+                                 SDT_ZVecBinaryFp>;
+def z_vsra_by_scalar    : SDNode<"Sw64ISD::VSRA_BY_SCALAR",
+                                 SDT_ZVecBinaryFp>;
+
+def z_vcopyf : SDNode<"Sw64ISD::VCOPYF",
+      SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, []>;
+
+def z_v8sll    : SDNode<"Sw64ISD::V8SLL",
+                                 SDT_ZVecBinaryInt>;
+
+def z_v8srl    : SDNode<"Sw64ISD::V8SRL",
+                                 SDT_ZVecBinaryInt>;
+
+def z_v8sra    : SDNode<"Sw64ISD::V8SRA",
+                                 SDT_ZVecBinaryInt>;
+
+def z_vrotr    : SDNode<"Sw64ISD::VROTR",
+                                 SDT_ZVecBinaryInt>;
+
+def Sw64VINSECTL  : SDNode<"Sw64ISD::VINSECTL", SDT_VINSECTL>;
+
+// ---- For immediate format.
+
+def SDT_ZV8X : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+
+def Sw64V8SLL : SDNode<"Sw64ISD::V8SLLi", SDT_ZV8X>;
+def Sw64V8SRL : SDNode<"Sw64ISD::V8SRLi", SDT_ZV8X>;
+def Sw64V8SRA : SDNode<"Sw64ISD::V8SRAi", SDT_ZV8X>;
+def Sw64VROTR : SDNode<"Sw64ISD::VROTRi", SDT_ZV8X>;
+def Sw64VROLB : SDNode<"Sw64ISD::VROLBi", SDT_ZV8X>;
+def Sw64VROLH : SDNode<"Sw64ISD::VROLHi", SDT_ZV8X>;
+def Sw64VROLL : SDNode<"Sw64ISD::VROLLi", SDT_ZV8X>;
+
+def z_v8slli : PatFrag<(ops node:$vec, node:$val),
+    (v8i32 (Sw64V8SLL node:$vec, node:$val))>;
+
+def z_v8srli : PatFrag<(ops node:$vec, node:$val),
+    (v8i32 (Sw64V8SRL node:$vec, node:$val))>;
+
+def z_v8srai : PatFrag<(ops node:$vec, node:$val),
+    (v8i32 (Sw64V8SRA node:$vec, node:$val))>;
+
+def z_vrotri : PatFrag<(ops node:$vec, node:$val),
+    (v8i32 (Sw64VROTR node:$vec, node:$val))>;
+
+def z_vrolbi : PatFrag<(ops node:$vec, node:$val),
+    (v32i8 (Sw64VROLB node:$vec, node:$val))>;
+def z_vrolhi : PatFrag<(ops node:$vec, node:$val),
+    (v16i16 (Sw64VROLH node:$vec, node:$val))>;
+def z_vrolli : PatFrag<(ops node:$vec, node:$val),
+    (v4i64 (Sw64VROLL node:$vec, node:$val))>;
+
+def z_vslls : PatFrag<(ops node:$vec, node:$val),
+    (v4f32 (Sw64V8SLL node:$vec, node:$val))>;
+
+def z_vslld : PatFrag<(ops node:$vec, node:$val),
+    (v4f64 (Sw64V8SLL node:$vec, node:$val))>;
+
+def z_vsrls : PatFrag<(ops node:$vec, node:$val),
+    (v4f32 (Sw64V8SRL node:$vec, node:$val))>;
+
+def z_vsrld : PatFrag<(ops node:$vec, node:$val),
+    (v4f64 (Sw64V8SRL node:$vec, node:$val))>;
+
+// ----
+
+def Sw64VExtractSExt : SDNode<"Sw64ISD::VEXTRACT_SEXT_ELT",
+    SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
+def Sw64VExtractZExt : SDNode<"Sw64ISD::VEXTRACT_ZEXT_ELT",
+    SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
+
+// Pattern fragments
+def vextract_sext_i8  : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractSExt node:$vec, node:$idx, i8)>;
+def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractSExt node:$vec, node:$idx, i16)>;
+def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractSExt node:$vec, node:$idx, i32)>;
+def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractSExt node:$vec, node:$idx, i64)>;
+
+def vextract_sext_f32 : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractSExt node:$vec, node:$idx, f32)>;
+
+def vextract_sext_f64 : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractSExt node:$vec, node:$idx, f64)>;
+
+def vextract_zext_i8  : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractZExt node:$vec, node:$idx, i8)>;
+def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractZExt node:$vec, node:$idx, i16)>;
+def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractZExt node:$vec, node:$idx, i32)>;
+def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx),
+                                (Sw64VExtractZExt node:$vec, node:$idx, i64)>;
+
+def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>;
+def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>;
+
+class vsetcc_type<ValueType ResTy, CondCode CC> :
+  PatFrag<(ops node:$lhs, node:$rhs),
+          (vsetcc (ResTy node:$lhs), (ResTy node:$rhs), CC)>;
+
+def SDT_VSetGE : SDTypeProfile<1, 2, [SDTCisInt<0>,
+                                      SDTCisVec<1>,
+                                      SDTCisSameAs<1, 2>]>;
+def vsetge_v8i32  : SDNode<"Sw64ISD::VSETGE", SDT_VSetGE>;
+
+def z_vsetge : PatFrag<(ops node:$vec, node:$val),
+              (vsetge_v8i32 node:$vec, node:$val)>;
+
+class Vector_2Op_Pat<SDPatternOperator OpNode, ValueType Ty> :
+  PatFrag<(ops node:$vec, node:$val),
+          (OpNode (Ty node:$vec), (Ty node:$val))>;
+
+class Vector_1Op_Pat<SDPatternOperator OpNode, ValueType Ty> :
+  PatFrag<(ops node:$src),
+          (OpNode (Ty node:$src))>;
+
+multiclass MultiVec2OpPat<SDPatternOperator OpNode> {
+  def v8i32  : Vector_2Op_Pat<OpNode, v8i32>;
+  def v4i64  : Vector_2Op_Pat<OpNode, v4i64>;
+  def v4f32  : Vector_2Op_Pat<OpNode, v4f32>;
+  def v4f64  : Vector_2Op_Pat<OpNode, v4f64>;
+  def v16i16 : Vector_2Op_Pat<OpNode, v16i16>;
+  def v32i8  : Vector_2Op_Pat<OpNode, v32i8>;
+}
+
+defm add   : MultiVec2OpPat<add>;
+defm sub   : MultiVec2OpPat<sub>;
+defm and   : MultiVec2OpPat<and>;
+defm xor   : MultiVec2OpPat<xor>;
+defm or    : MultiVec2OpPat<or>;
+
+def vbic   : BinOpFrag<(and node:$LHS, (vnot node:$RHS))>;
+def vornot : BinOpFrag<(or node:$LHS, (vnot node:$RHS))>;
+def veqv   : BinOpFrag<(vnot (xor node:$LHS, node:$RHS))>;
+
+def vseteq_v8i32  : vsetcc_type<v8i32, SETEQ>;
+def vsetle_v8i32  : vsetcc_type<v8i32, SETLE>;
+def vsetlt_v8i32  : vsetcc_type<v8i32, SETLT>;
+def vsetule_v8i32 : vsetcc_type<v8i32, SETULE>;
+def vsetult_v8i32 : vsetcc_type<v8i32, SETULT>;
+def vsetueq_v32i8  : vsetcc_type<v32i8, SETUEQ>;
+def vsetugt_v32i8  : vsetcc_type<v32i8, SETUGT>;
+
+def SDT_VMAX : SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisVec<0>,
+                                    SDTCisSameAs<0,1>,
+                                    SDTCisSameAs<0,2>]>;
+def SDT_VFMAX : SDTypeProfile<1, 2, [SDTCisFP<0>,SDTCisVec<0>,
+                                     SDTCisSameAs<0,1>,
+                                     SDTCisSameAs<0,2>]>;
+def vmax : SDNode<"Sw64ISD::VMAX", SDT_VMAX>;
+def vmin : SDNode<"Sw64ISD::VMIN", SDT_VMAX>;
+def vumax : SDNode<"Sw64ISD::VUMAX", SDT_VMAX>;
+def vumin : SDNode<"Sw64ISD::VUMIN", SDT_VMAX>;
+
+def vmaxf : SDNode<"Sw64ISD::VMAXF", SDT_VFMAX>;
+def vminf : SDNode<"Sw64ISD::VMINF", SDT_VFMAX>;
+
+class vfsetcc_type<ValueType ResTy, ValueType OpTy, CondCode CC> :
+  PatFrag<(ops node:$lhs, node:$rhs),
+          (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>;
+
+// ISD::SETFALSE cannot occur
+def vfseteq_v4f32 : vfsetcc_type<v4i64, v4f32, SETEQ>;
+def vfseteq_v4f64 : vfsetcc_type<v4i64, v4f64, SETEQ>;
+def vfsetge_v4f32 : vfsetcc_type<v4i64, v4f32, SETGE>;
+def vfsetge_v4f64 : vfsetcc_type<v4i64, v4f64, SETGE>;
+def vfsetgt_v4f32 : vfsetcc_type<v4i64, v4f32, SETGT>;
+def vfsetgt_v4f64 : vfsetcc_type<v4i64, v4f64, SETGT>;
+def vfsetle_v4f32 : vfsetcc_type<v4i64, v4f32, SETLE>;
+def vfsetle_v4f64 : vfsetcc_type<v4i64, v4f64, SETLE>;
+def vfsetlt_v4f32 : vfsetcc_type<v4i64, v4f32, SETLT>;
+def vfsetlt_v4f64 : vfsetcc_type<v4i64, v4f64, SETLT>;
+def vfsetne_v4f32 : vfsetcc_type<v4i64, v4f32, SETNE>;
+def vfsetne_v4f64 : vfsetcc_type<v4i64, v4f64, SETNE>;
+
+def vfsetoeq_v4f32 : vfsetcc_type<v4i64, v4f32, SETOEQ>;
+def vfsetoeq_v4f64 : vfsetcc_type<v4i64, v4f64, SETOEQ>;
+def vfsetoge_v4f32 : vfsetcc_type<v4i64, v4f32, SETOGE>;
+def vfsetoge_v4f64 : vfsetcc_type<v4i64, v4f64, SETOGE>;
+def vfsetogt_v4f32 : vfsetcc_type<v4i64, v4f32, SETOGT>;
+def vfsetogt_v4f64 : vfsetcc_type<v4i64, v4f64, SETOGT>;
+def vfsetole_v4f32 : vfsetcc_type<v4i64, v4f32, SETOLE>;
+def vfsetole_v4f64 : vfsetcc_type<v4i64, v4f64, SETOLE>;
+def vfsetolt_v4f32 : vfsetcc_type<v4i64, v4f32, SETOLT>;
+def vfsetolt_v4f64 : vfsetcc_type<v4i64, v4f64, SETOLT>;
+def vfsetone_v4f32 : vfsetcc_type<v4i64, v4f32, SETONE>;
+def vfsetone_v4f64 : vfsetcc_type<v4i64, v4f64, SETONE>;
+def vfsetord_v4f32 : vfsetcc_type<v4i64, v4f32, SETO>;
+def vfsetord_v4f64 : vfsetcc_type<v4i64, v4f64, SETO>;
+def vfsetun_v4f32  : vfsetcc_type<v4i64, v4f32, SETUO>;
+def vfsetun_v4f64  : vfsetcc_type<v4i64, v4f64, SETUO>;
+def vfsetueq_v4f32 : vfsetcc_type<v4i64, v4f32, SETUEQ>;
+def vfsetueq_v4f64 : vfsetcc_type<v4i64, v4f64, SETUEQ>;
+def vfsetuge_v4f32 : vfsetcc_type<v4i64, v4f32, SETUGE>;
+def vfsetuge_v4f64 : vfsetcc_type<v4i64, v4f64, SETUGE>;
+def vfsetugt_v4f32 : vfsetcc_type<v4i64, v4f32, SETUGT>;
+def vfsetugt_v4f64 : vfsetcc_type<v4i64, v4f64, SETUGT>;
+def vfsetule_v4f32 : vfsetcc_type<v4i64, v4f32, SETULE>;
+def vfsetule_v4f64 : vfsetcc_type<v4i64, v4f64, SETULE>;
+def vfsetult_v4f32 : vfsetcc_type<v4i64, v4f32, SETULT>;
+def vfsetult_v4f64 : vfsetcc_type<v4i64, v4f64, SETULT>;
+def vfsetune_v4f32 : vfsetcc_type<v4i64, v4f32, SETUNE>;
+def vfsetune_v4f64 : vfsetcc_type<v4i64, v4f64, SETUNE>;
+// ISD::SETTRUE cannot occur
+// ISD::SETFALSE2 cannot occur
+// ISD::SETTRUE2 cannot occur
+
+class SplatComplexPattern<Operand opclass, ValueType ty, int numops, string fn,
+                          list<SDNode> roots = [],
+                          list<SDNodeProperty> props = []> :
+  ComplexPattern<ty, numops, fn, roots, props> {
+  Operand OpClass = opclass;
+}
+
+multiclass MultiVec1OpPat<SDPatternOperator OpNode> {
+  def v8i32  : Vector_1Op_Pat<OpNode, v8i32>;
+  def v4i64  : Vector_1Op_Pat<OpNode, v4i64>;
+  def v4f32  : Vector_1Op_Pat<OpNode, v4f32>;
+  def v4f64  : Vector_1Op_Pat<OpNode, v4f64>;
+  def v16i16 : Vector_1Op_Pat<OpNode, v16i16>;
+  def v32i8  : Vector_1Op_Pat<OpNode, v32i8>;
+}
+
+defm vsplat : MultiVec1OpPat<Sw64VBroadCast>;
+
+def vsplati64_simm8 : SplatComplexPattern<s8imm, v4i64, 1,
+                                          "selectVSplatSimm8",
+                                          [build_vector, bitconvert]>;
+
+def vsplati64_uimm8 : SplatComplexPattern<vsplat_uimm8, v4i64, 1,
+                                          "selectVSplatUimm8",
+                                          [build_vector, bitconvert]>;
+
+def vsplati32_simm8 : SplatComplexPattern<vsplat_simm8, v8i32, 1,
+                                          "selectVSplatSimm8",
+                                          [build_vector, bitconvert]>;
+
+def vsplati32_uimm8 : SplatComplexPattern<vsplat_uimm8, v8i32, 1,
+                                          "selectVSplatUimm8",
+                                          [build_vector, bitconvert]>;
+
+def vsplati16_uimm8 : SplatComplexPattern<vsplat_uimm8, v16i16, 1,
+                                          "selectVSplatUimm8",
+                                          [build_vector, bitconvert]>;
+
+def vsplati8_uimm8 : SplatComplexPattern<vsplat_uimm8, v32i8, 1,
+                                          "selectVSplatUimm8",
+                                          [build_vector, bitconvert]>;
+
+def AddSubImm8Pat : ComplexPattern<i64, 1, "SelectAddSubImm<MVT::i8>", []>;
+def ComplexImmPat : ComplexPattern<i64, 1, "SelectComplexImm", []>;
+
+def addrimm10 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10", [frameindex]>;
+
+def addrimm10lsl1 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10Lsl1",
+                                   [frameindex]>;
+
+def addrimm16 : ComplexPattern<iPTR, 2, "selectIntAddrSImm16",
+                                   [frameindex]>;
+def addrimm12 : ComplexPattern<iPTR, 2, "selectIntAddrSImm12",
+                                   [frameindex]>;
+
+def immZExt1Ptr : ImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
+def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
+def immZExt3Ptr : ImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
+def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
+def immZExt5Ptr : ImmLeaf<iPTR, [{return isUInt<5>(Imm);}]>;
+def immZExt8Ptr : ImmLeaf<iPTR, [{return isUInt<8>(Imm);}]>;
+
+def vinsert_v8i32 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+    (v8i32 (vector_insert node:$vec, node:$val, node:$idx))>;
+
+def vinsert_v4f32 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+    (v4f32 (vector_insert node:$vec, node:$val, node:$idx))>;
+
+def vinsert_v4f64 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+    (v4f64 (vector_insert node:$vec, node:$val, node:$idx))>;
+
+def vinsert_v32i8 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+    (v32i8 (vector_insert node:$vec, node:$val, node:$idx))>;
+
+def vinsert_v16i16 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+    (v16i16 (vector_insert node:$vec, node:$val, node:$idx))>;
+// Instruction desc.
+// 存储器指令格式
+class VectorStoreBASE<string instr_asm, ValueType vt,
+                   SDPatternOperator OpNode,
+                   RegisterOperand ROWD, Operand MemOpnd,
+                   ComplexPattern Addr = addrimm10> {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins ROWD:$RA, MemOpnd:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $addr");
+  list<dag> Pattern = [(OpNode (vt ROWD:$RA), Addr:$addr)];
+}
+
+class VectorLoadBASE<string instr_asm, ValueType vt,
+                   SDPatternOperator OpNode,
+                   RegisterOperand ROWD, Operand MemOpnd,
+                   ComplexPattern Addr = addrimm10> {
+  dag OutOperandList = (outs ROWD:$RA);
+  dag InOperandList = (ins MemOpnd:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $addr");
+  list<dag> Pattern = [(set ROWD:$RA, (vt (OpNode Addr:$addr)))];
+}
+
+let mayStore = 1 in
+class VectorStore<bits<6> opcode, string instr_asm, RegisterOperand ROWD,
+                  ValueType vt, SDPatternOperator OpNode=null_frag>
+  : MFormV<opcode>,
+  VectorStoreBASE<instr_asm, vt, OpNode, ROWD, mem_simm16, addrimm16>;
+
+let mayLoad = 1 in
+class VectorLoad<bits<6> opcode, string instr_asm, RegisterOperand ROWD,
+                 ValueType vt, SDPatternOperator OpNode=null_frag>
+  : MFormV<opcode>,
+  VectorLoadBASE<instr_asm, vt, OpNode, ROWD, mem_simm16, addrimm16>;
+
+let DecoderMethod = "DecodeFIXMEInstruction" in{
+def VSTS : VectorStore<0x0E, "vsts", V256LOpnd, v4f32, store>;
+def VSTD : VectorStore<0x0F, "vstd", V256LOpnd, v4f64, store>;
+def VLDS : VectorLoad <0x0C, "vlds", V256LOpnd, v4f32, load>;
+def VLDD : VectorLoad <0x0D, "vldd", V256LOpnd, v4f64, load>;
+def VLDWE : VectorLoad <0x09, "ldwe", V256LOpnd, v8i32, Sw64VBroadCasti32>;
+def VLDSE : VectorLoad <0x0A, "ldse", V256LOpnd, v4f32, Sw64VBroadCastf32>;
+def VLDDE : VectorLoad <0x0B, "ldde", V256LOpnd, v4f64, Sw64VBroadCastf64>;
+}
+multiclass V256Pat<SDPatternOperator OpNode> {
+  def v32i8  : PatFrag<(ops node:$src), (v32i8 (OpNode node:$src))>;
+  def v16i16 : PatFrag<(ops node:$src), (v16i16 (OpNode node:$src))>;
+  def v8i32  : PatFrag<(ops node:$src), (v8i32 (OpNode node:$src))>;
+  def v4i64  : PatFrag<(ops node:$src), (v4i64 (OpNode node:$src))>;
+  def v4f64  : PatFrag<(ops node:$src), (v4f64 (OpNode node:$src))>;
+}
+
+////////////////////////////////////////////
+// Extern Vector Memory Operation
+// /////////////////////////////////////////
+// 带功能域的存储器指令格式
+let mayStore = 1 in
+class VectorStoreExt<bits<4> func, string instr_asm, ValueType vt,
+                     SDPatternOperator OpNode=null_frag>
+  : MFuncFormV<0x1C, func>,
+  VectorStoreBASE<instr_asm, vt, OpNode, V256LOpnd, mem_simm12, addrimm12>;
+
+let mayLoad = 1 in
+class VectorLoadExt<bits<4> func, string instr_asm, ValueType vt,
+                    SDPatternOperator OpNode=null_frag>
+  : MFuncFormV<0x1C, func>,
+  VectorLoadBASE<instr_asm, vt, OpNode, V256LOpnd, mem_simm12, addrimm12>;
+
+let DecoderMethod = "DecodeFIXMEInstruction" in{
+def VLDWU  : VectorLoadExt <0x00, "vldw_u" , v8i32>;
+def VLDSU  : VectorLoadExt <0x02, "vlds_u" , v4f32>;
+def VLDDU  : VectorLoadExt <0x04, "vldd_u" , v4f64>;
+def VLDDNC : VectorLoadExt <0x0e, "vldd_nc", v4f64>;
+def VSTWU  : VectorStoreExt<0x01, "vstw_u" , v8i32>;
+def VSTSU  : VectorStoreExt<0x03, "vsts_u" , v4f32>;
+def VSTDU  : VectorStoreExt<0x05, "vstd_u" , v4f64>;
+def VSTWUL : VectorStoreExt<0x08, "vstw_ul", v8i32>;
+def VSTSUL : VectorStoreExt<0x0a, "vsts_ul", v4f32>;
+def VSTDUL : VectorStoreExt<0x0c, "vstd_ul", v4f64>;
+def VSTWUH : VectorStoreExt<0x09, "vstw_uh", v8i32>;
+def VSTSUH : VectorStoreExt<0x0b, "vsts_uh", v4f32>;
+def VSTDUH : VectorStoreExt<0x0d, "vstd_uh", v4f64>;
+def VSTDNC : VectorStoreExt<0x0f, "vstd_nc", v4f64>;
+}
+class vload_pat<ValueType Vt, SDPatternOperator OpNode, Instruction Inst>
+  : Pat<(Vt (OpNode addrimm16:$src)), (Inst addrimm16:$src)>;
+
+class vstore_pat<ValueType Vt, SDPatternOperator OpNode, Instruction Inst>
+  : Pat<(OpNode (Vt V256L:$DST), addrimm16:$src), (Inst $DST, addrimm16:$src)>;
+
+
+// commom pattern for load/store intrinsic
+multiclass vector_mem_multipat<ValueType Vt> {
+def : vload_pat<Vt, load, VLDD>;
+def : vstore_pat<Vt, store, VSTD>;
+def : vload_pat<Vt, int_sw64_vload, VLDD>;
+def : vstore_pat<Vt, int_sw64_vstore, VSTD>;
+}
+
+multiclass vector_mem_intrpat<ValueType Vt, Instruction InstL, Instruction InstS> {
+def : vload_pat<Vt, int_sw64_vload, InstL>;
+def : vstore_pat<Vt, int_sw64_vstore, InstS>;
+}
+
+// extension pattern for load_u/loade/store_u/storeuh/..
+multiclass vector_mem_extension<ValueType Vt, string LoadI, string StoreI> {
+def : vload_pat <Vt, int_sw64_vload_u, !cast<Instruction>(LoadI#U)>;
+def : vload_pat <Vt, int_sw64_vloade,  !cast<Instruction>(LoadI#E)>;
+def : vstore_pat<Vt, int_sw64_vstore_u, !cast<Instruction>(StoreI#U)>;
+def : vstore_pat<Vt, int_sw64_vstoreuh, !cast<Instruction>(StoreI#UH)>;
+def : vstore_pat<Vt, int_sw64_vstoreul, !cast<Instruction>(StoreI#UL)>;
+}
+
+defm : vector_mem_multipat<v32i8>;
+defm : vector_mem_multipat<v16i16>;
+defm : vector_mem_multipat<v8i32>;
+defm : vector_mem_multipat<v4i64>;
+
+defm : vector_mem_intrpat<v4f32, VLDS, VSTS>;
+defm : vector_mem_intrpat<v4f64, VLDD, VSTD>;
+
+defm : vector_mem_extension<v8i32, "VLDW", "VSTW">;
+defm : vector_mem_extension<v4f32, "VLDS", "VSTS">;
+defm : vector_mem_extension<v4f64, "VLDD", "VSTD">;
+defm : vector_mem_extension<v4i64, "VLDD", "VSTD">;
+
+multiclass vector_mem_nc<ValueType Vt> {
+def : vload_pat <Vt, int_sw64_vloadnc, VLDDNC>;
+def : vstore_pat<Vt, int_sw64_vstorenc, VSTDNC>;
+}
+
+defm : vector_mem_nc<v32i8>;
+defm : vector_mem_nc<v16i16>;
+defm : vector_mem_nc<v8i32>;
+defm : vector_mem_nc<v4i64>;
+defm : vector_mem_nc<v4f64>;
+
+def : Pat<(v8i32 (Sw64VBroadCast (i64 (extloadi32 addrimm16:$src)))),
+          (VLDWE addrimm16:$src)>;
+def : Pat<(v4f32 (Sw64VBroadCast (f32 (load addrimm16:$src)))),
+          (VLDSE addrimm16:$src)>;
+def : Pat<(v4i64 (Sw64VBroadCast (i64 (load addrimm16:$src)))),
+          (VLDDE addrimm16:$src)>;
+def : Pat<(v4f64 (Sw64VBroadCast (f64 (load addrimm16:$src)))),
+          (VLDDE addrimm16:$src)>;
+
+def : vstore_pat<v4i64, Sw64VTruncStore, VSTS>;
+
+class SIMD_3RR_SAME<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                   RegisterOperand ROC, RegisterOperand ROA = ROC,
+                   RegisterOperand ROB = ROC> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+  list<dag> Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), (Vt ROB:$RB)))];
+}
+
+class SIMD_3RI_SAME<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                   Operand immtype, RegisterOperand ROC,
+                   RegisterOperand ROA = ROC> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, immtype:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC");
+  list<dag> Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), immtype:$Imm))];
+}
+
+class SIMD_4RR_SAME<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                   RegisterOperand ROC> {
+  dag OutOperandList = (outs ROC:$RD);
+  dag InOperandList = (ins ROC:$RA, ROC:$RB, ROC:$RC);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
+  list<dag> Pattern = [(set (Vt ROC:$RD),
+    (OpNode (Vt ROC:$RA), (Vt ROC:$RB), (Vt ROC:$RC)))];
+}
+
+class Vector_2OP_Reg_Pat<SDPatternOperator OpNode, ValueType Vt, ValueType Yt,
+                        RegisterOperand OR, Instruction Inst>
+  : Pat<(OpNode (Vt OR:$RA), (Yt OR:$RB)),
+        (Inst (Vt OR:$RA), (Yt OR:$RB))>;
+
+class Vector_1OP_Imm_Pat<SDPatternOperator OpNode, ValueType Vt,
+                        ComplexPattern cpx,
+                        RegisterOperand OR, Instruction Inst>
+  : Pat<(Vt (OpNode (Vt OR:$RA), (i64 cpx:$Imm))),
+        (Inst (Vt OR:$RA), $Imm)>;
+
+class Vector_2OP_Imm_VB_Pat<SDPatternOperator OpNode, SDPatternOperator immop,
+                        ValueType Vt, ValueType it, ComplexPattern cpx,
+                        RegisterOperand OR, Instruction Inst>
+  : Pat<(Vt (OpNode (Vt OR:$RA), (it (immop (i64 cpx:$Imm))))),
+        (Inst (Vt OR:$RA), $Imm)>;
+
+class Vector_2OP_Reg_Scalar<SDPatternOperator OpNode,
+                        ValueType Vt, ValueType it, RegisterOperand ROA,
+                        RegisterOperand ROB, Instruction Inst>
+  : Pat<(Vt (OpNode (Vt ROA:$RA), (i64 GPRCOpnd:$RB))),
+        (Inst (Vt ROA:$RA), (i32 (COPY_TO_REGCLASS GPRCOpnd:$RB, ROB)))>;
+
+class Vector_2OP_Reg_S32<SDPatternOperator OpNode,
+                        ValueType Vt, RegisterOperand ROA, Instruction Inst>
+  : Pat<(Vt (OpNode (Vt ROA:$RA), (Vt (Sw64VBroadCast (i64 GPRCOpnd:$RB))))),
+        (Inst (Vt ROA:$RA), (i32 (COPY_TO_REGCLASS GPRCOpnd:$RB, FPRC_lo)))>;
+
+class Vector_3OP_SameReg_Pat<SDPatternOperator OpNode,
+                        ValueType Vt, RegisterOperand OR, Instruction Inst>
+  : Pat<(OpNode (Vt OR:$RA), (Vt OR:$RB), (Vt OR:$RC)),
+        (Inst OR:$RA, OR:$RB, OR:$RC)>;
+
+multiclass SIMD_ARITH<bits<6> Opcode, bits<8>func,
+                      string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                      Operand immtype, RegisterOperand RO,
+                      SDPatternOperator IOp = null_frag,
+                      ComplexPattern cpx = AddSubImm8Pat> {
+  def rr : FPFormV<Opcode, func>, SIMD_3RR_SAME<instr_asm, OpNode, Vt, RO>;
+
+  def ri : FPFormIV<Opcode, func>,
+           SIMD_3RI_SAME<instr_asm, null_frag, Vt, immtype, RO>;
+
+  def : Vector_2OP_Reg_Pat<IOp, Vt, Vt, RO, !cast<Instruction>(NAME # rr)>;
+
+  def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, Vt, Vt,
+        cpx, RO, !cast<Instruction>(NAME # ri)>;
+
+  def : Vector_2OP_Imm_VB_Pat<IOp, Sw64VBroadCast, Vt, Vt,
+        cpx, RO, !cast<Instruction>(NAME # ri)>;
+}
+
+defm VUCADDv16i16 : SIMD_ARITH<0x1A, 0x42, "vucaddh", add, v16i16,
+                            s8imm, V256LOpnd, int_sw64_vucaddh_v16hi>;
+defm VUCSUBv16i16 : SIMD_ARITH<0x1A, 0x43, "vucsubh", sub, v16i16,
+                            s8imm, V256LOpnd, int_sw64_vucsubh_v16hi>;
+defm VUCADDv32i8  : SIMD_ARITH<0x1A, 0x44, "vucaddb", add, v32i8,
+                            s8imm, V256LOpnd, int_sw64_vucaddb_v32qi>;
+defm VUCSUBv32i8  : SIMD_ARITH<0x1A, 0x45, "vucsubb", sub, v32i8,
+                            s8imm, V256LOpnd, int_sw64_vucsubb_v32qi>;
+defm VADDv8i32    : SIMD_ARITH<0x1A, 0x00, "vaddw", add, v8i32,
+                            s8imm, V256LOpnd>;
+defm VSUBv8i32    : SIMD_ARITH<0x1A, 0x01, "vsubw", sub, v8i32,
+                            s8imm, V256LOpnd>;
+defm VUCADDv8i32  : SIMD_ARITH<0x1A, 0x40, "vucaddw", add, v8i32,
+                            s8imm, V256LOpnd, int_sw64_vucaddw>;
+defm VUCSUBv8i32  : SIMD_ARITH<0x1A, 0x41, "vucsubw", sub, v8i32,
+                            s8imm, V256LOpnd, int_sw64_vucsubw>;
+defm VADDv4i64    : SIMD_ARITH<0x1A, 0x0E, "vaddl", add, v4i64,
+                            s8imm, V256LOpnd>;
+defm VSUBv4i64    : SIMD_ARITH<0x1A, 0x0F, "vsubl", sub, v4i64,
+                            s8imm, V256LOpnd>;
+
+def : Vector_2OP_Reg_Pat<int_sw64_vucaddb, v8i32, v8i32, V256LOpnd, VUCADDv32i8rr>;
+def : Vector_2OP_Reg_Pat<int_sw64_vucsubb, v8i32, v8i32, V256LOpnd, VUCSUBv32i8rr>;
+def : Vector_2OP_Reg_Pat<int_sw64_vucaddh, v8i32, v8i32, V256LOpnd, VUCADDv16i16rr>;
+def : Vector_2OP_Reg_Pat<int_sw64_vucsubh, v8i32, v8i32, V256LOpnd, VUCSUBv16i16rr>;
+
+def : Vector_1OP_Imm_Pat<int_sw64_vucaddbi, v8i32, AddSubImm8Pat, V256LOpnd, VUCADDv32i8ri>;
+def : Vector_1OP_Imm_Pat<int_sw64_vucsubbi, v8i32, AddSubImm8Pat, V256LOpnd, VUCSUBv32i8ri>;
+def : Vector_1OP_Imm_Pat<int_sw64_vucaddhi, v8i32, AddSubImm8Pat, V256LOpnd, VUCADDv16i16ri>;
+def : Vector_1OP_Imm_Pat<int_sw64_vucsubhi, v8i32, AddSubImm8Pat, V256LOpnd, VUCSUBv16i16ri>;
+
+class SIMD_3RR_VCMPGEW<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                   RegisterOperand ROC = FPRCOpnd, RegisterOperand ROA = V256LOpnd,
+                   RegisterOperand ROB = V256LOpnd> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+}
+
+class SIMD_3RI_VCMPGEW<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                   Operand immtype, RegisterOperand ROC = FPRCOpnd,
+                   RegisterOperand ROA = V256LOpnd> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, immtype:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC");
+}
+
+def VCMPGEWrr : FPFormV<0x1A, 0x02>,  SIMD_3RR_VCMPGEW<"vcmpgew", null_frag, v8i32>;
+def VCMPGEWri : FPFormIV<0x1A, 0x02>, SIMD_3RI_VCMPGEW<"vcmpgew", null_frag, v8i32, u8imm>;
+
+def : Pat<(int_sw64_vcmpgew (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB)),
+          (i64 (FTOIStmp (VCMPGEWrr (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB))))>;
+def : Pat<(int_sw64_vcmpgew (v8i32 V256LOpnd:$RA), (v8i32 (Sw64VBroadCast (i64 AddSubImm8Pat:$Imm)))),
+          (i64 (FTOIStmp (VCMPGEWri (v8i32 V256LOpnd:$RA), $Imm)))>;
+
+defm VCMPEQW  : SIMD_ARITH<0x1A, 0x03, "vcmpeqw",  seteq, v8i32,
+                           u8imm, V256LOpnd, int_sw64_vcmpeqw,  AddSubImm8Pat>;
+defm VCMPLEW  : SIMD_ARITH<0x1A, 0x04, "vcmplew",  setle, v8i32,
+                           u8imm, V256LOpnd, int_sw64_vcmplew,  AddSubImm8Pat>;
+defm VCMPLTW  : SIMD_ARITH<0x1A, 0x05, "vcmpltw",  setlt, v8i32,
+                           u8imm, V256LOpnd, int_sw64_vcmpltw,  AddSubImm8Pat>;
+defm VCMPULEW : SIMD_ARITH<0x1A, 0x06, "vcmpulew", setule, v8i32,
+                           u8imm, V256LOpnd, int_sw64_vcmpulew, AddSubImm8Pat>;
+defm VCMPULTW : SIMD_ARITH<0x1A, 0x07, "vcmpultw", setult, v8i32,
+                           u8imm, V256LOpnd, int_sw64_vcmpultw, AddSubImm8Pat>;
+
+defm VCMPUEQB : SIMD_ARITH<0x1A, 0x4B, "vcmpueqb", null_frag, v32i8,
+                           u8imm, V256LOpnd, int_sw64_vcmpueqb, AddSubImm8Pat>;
+defm VCMPUGTB : SIMD_ARITH<0x1A, 0x4C, "vcmpugtb", null_frag, v32i8,
+                           u8imm, V256LOpnd, int_sw64_vcmpugtb, AddSubImm8Pat>;
+
+class SIMD_2RR_BASE<string instr_asm,
+                   RegisterOperand ROC, RegisterOperand ROA = ROC> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RC");
+}
+
+def CTPOPOW : FPFormV_CT<0x1A, 0x18>,
+              SIMD_2RR_BASE<"ctpopow", FPRCOpnd, V256LOpnd>;
+def CTLZOW  : FPFormV_CT<0x1A, 0x19>,
+              SIMD_2RR_BASE<"ctlzow", FPRCOpnd, V256LOpnd>;
+
+def VSUMv8i32 : FPFormV_CT<0x1A, 0x47>,
+                SIMD_2RR_BASE<"vsumw", FPRCOpnd, V256LOpnd>;
+def VSUMv4i64 : FPFormV_CT<0x1A, 0x48>,
+                SIMD_2RR_BASE<"vsuml", FPRCOpnd, V256LOpnd>;
+
+def : Pat<(int_sw64_vsumw (v8i32 V256LOpnd:$RA)),
+          (i64 (FTOIStmp (i64 (VSUMv8i32 (v8i32 V256LOpnd:$RA)))))>;
+
+def : Pat<(int_sw64_vsuml (v4i64 V256LOpnd:$RA)),
+          (i64 (FTOITtmp (i64 (VSUMv4i64 (v4i64 V256LOpnd:$RA)))))>;
+
+def : Pat<(int_sw64_ctpopow (v4i64 V256LOpnd:$RA)),
+          (i64 (FTOIStmp (i64 (CTPOPOW (v4i64 V256LOpnd:$RA)))))>;
+
+def : Pat<(int_sw64_ctlzow (v4i64 V256LOpnd:$RA)),
+          (i64 (FTOIStmp (i64 (CTLZOW (v4i64 V256LOpnd:$RA)))))>;
+
+class SIMD_3RR_SCALER<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                   RegisterOperand ROC, RegisterOperand ROA = ROC,
+                   RegisterOperand ROB = ROC> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+  list<dag> Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROA:$RA), ROB:$RB))];
+}
+
+multiclass SIMD_SHIFT<bits<6> Opcode, bits<8>func,
+                      string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                      Operand immtype, RegisterOperand ROA, RegisterOperand ROB,
+                      SDPatternOperator IOp = null_frag> {
+  def rr : FPFormV<Opcode, func>,
+           SIMD_3RR_SCALER<instr_asm, null_frag, Vt, ROA, ROA, ROB>;
+
+  def ri : FPFormIV<Opcode, func>,
+           SIMD_3RI_SAME<instr_asm, null_frag, Vt, immtype, ROA>;
+
+  def : Vector_2OP_Reg_Scalar<OpNode, Vt, Vt, ROA, ROB,
+                      !cast<Instruction>(NAME # rr)>;
+
+  def : Vector_1OP_Imm_Pat<OpNode, Vt, AddSubImm8Pat, ROA, !cast<Instruction>(NAME # ri)>;
+}
+
+multiclass SIMD_Shift_Multi<bits<8> funcW, bits<8> funcB,bits<8> funcH,
+                            bits<8> funcL, string instr_asm, RegisterOperand RO,
+                            SDPatternOperator OpNode> {
+defm v8i32  : SIMD_SHIFT<0x1A, funcW, instr_asm#w, OpNode, v8i32,
+                        s8imm, V256LOpnd, RO>;
+defm v16i16 : SIMD_SHIFT<0x1A, funcH, instr_asm#h, OpNode, v16i16,
+                        s8imm, V256LOpnd, RO>;
+defm v32i8  : SIMD_SHIFT<0x1A, funcB, instr_asm#b, OpNode, v32i8,
+                        s8imm, V256LOpnd, RO>;
+defm v4i64  : SIMD_SHIFT<0x1A, funcL, instr_asm#l, OpNode, v4i64,
+                        s8imm, V256LOpnd, RO>;
+
+def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, v8i32, v8i32,
+      AddSubImm8Pat, V256LOpnd, !cast<Instruction>(NAME # v8i32 #ri)>;
+
+def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, v16i16, v16i16,
+      AddSubImm8Pat, V256LOpnd, !cast<Instruction>(NAME # v16i16 #ri)>;
+
+def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, v32i8, v32i8,
+      AddSubImm8Pat, V256LOpnd, !cast<Instruction>(NAME # v32i8 #ri)>;
+
+def : Vector_2OP_Imm_VB_Pat<OpNode, Sw64VBroadCast, v4i64, v4i64,
+      AddSubImm8Pat, V256LOpnd, !cast<Instruction>(NAME # v4i64 #ri)>;
+}
+
+defm VSLL : SIMD_Shift_Multi<0x08, 0x10, 0x14, 0x1A, "vsll",
+                             FPRCloOpnd, int_sw64_vsll>;
+defm VSRL : SIMD_Shift_Multi<0x09, 0x11, 0x15, 0x1B, "vsrl",
+                             FPRCloOpnd, int_sw64_vsrl>;
+defm VSRA : SIMD_Shift_Multi<0x0A, 0x12, 0x16, 0x1C, "vsra",
+                             FPRCloOpnd, int_sw64_vsra>;
+defm VROL : SIMD_Shift_Multi<0x0B, 0x13, 0x17, 0x1D, "vrol",
+                             FPRCloOpnd, int_sw64_vrol>;
+
+multiclass Vector_Shift_VB<SDPatternOperator OpNode, ValueType Vt,
+                           string InstName> {
+def : Vector_1OP_Imm_Pat<OpNode, Vt, AddSubImm8Pat, V256LOpnd,
+      !cast<Instruction>(InstName # ri)>;
+def : Vector_2OP_Reg_S32<OpNode, Vt, V256LOpnd,
+      !cast<Instruction>(InstName # rr)>;
+}
+
+multiclass Vector_Shift<SDPatternOperator OpNode, string InstName> {
+defm : Vector_Shift_VB<OpNode, v32i8 , InstName # v32i8 >;
+defm : Vector_Shift_VB<OpNode, v16i16, InstName # v16i16>;
+defm : Vector_Shift_VB<OpNode, v8i32 , InstName # v8i32 >;
+defm : Vector_Shift_VB<OpNode, v4i64 , InstName # v4i64 >;
+}
+
+defm : Vector_Shift<int_sw64_vsll, "VSLL">;
+defm : Vector_Shift<int_sw64_vsrl, "VSRL">;
+defm : Vector_Shift<int_sw64_vsra, "VSRA">;
+defm : Vector_Shift<int_sw64_vrol, "VROL">;
+
+defm VSLLOW : SIMD_SHIFT<0x1A, 0x0C, "sllow", int_sw64_sllow, v4i64,
+                         s8imm, V256LOpnd, FPRCloOpnd>;
+defm VSRLOW : SIMD_SHIFT<0x1A, 0x0D, "srlow", int_sw64_srlow, v4i64,
+                         s8imm, V256LOpnd, FPRCloOpnd>;
+defm VSRAOW : SIMD_SHIFT<0x1A, 0x46, "sraow", int_sw64_sraow, v4i64,
+                         s8imm, V256LOpnd, FPRCloOpnd>;
+
+def  : Pat<(int_sw64_vslls (v4f32 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)),
+        (VSLLOWri V256LOpnd:$RA, $Imm)>;
+def  : Pat<(int_sw64_vslld (v4f64 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)),
+        (VSLLOWri V256LOpnd:$RA, $Imm)>;
+
+def  : Pat<(int_sw64_vsrls (v4f32 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)),
+        (VSRLOWri V256LOpnd:$RA, $Imm)>;
+def  : Pat<(int_sw64_vsrld (v4f64 V256LOpnd:$RA), (i64 AddSubImm8Pat:$Imm)),
+        (VSRLOWri V256LOpnd:$RA, $Imm)>;
+
+multiclass SIMD_LOGIC<bits<8> OpFunc,string instr_asm, RegisterOperand RO,
+                      SDPatternOperator OpNode> {
+def "" : FForm4LVLog<0x5, OpFunc>,
+            SIMD_3RR_SAME<instr_asm, OpNode, v8i32, RO>;
+
+def : Vector_2OP_Reg_Pat<OpNode, v16i16, v16i16, RO,
+                        !cast<Instruction>(NAME)>;
+def : Vector_2OP_Reg_Pat<OpNode, v32i8, v32i8, RO,
+                        !cast<Instruction>(NAME)>;
+def : Vector_2OP_Reg_Pat<OpNode, v4i64, v4i64, RO,
+                        !cast<Instruction>(NAME)>;
+}
+
+defm VOR  : SIMD_LOGIC<0x54, "vbisw", V256LOpnd, or>;
+defm VAND : SIMD_LOGIC<0x40, "vandw", V256LOpnd, and>;
+defm VXOR : SIMD_LOGIC<0x1c, "vxorw", V256LOpnd, xor>;
+
+defm VORNOT  : SIMD_LOGIC<0x51, "vornotw", V256LOpnd, vornot>;
+defm VBIC    : SIMD_LOGIC<0x10, "vbicw",   V256LOpnd, vbic>;
+defm VEQV    : SIMD_LOGIC<0x41, "veqvw",   V256LOpnd, veqv>;
+
+def : Pat<(v8i32  immAllZerosV), (VOR  (v8i32 V31) , (v8i32 V31))>;
+def : Pat<(v32i8  immAllZerosV), (VOR  (v32i8 V31) , (v32i8 V31))>;
+def : Pat<(v16i16 immAllZerosV), (VOR  (v16i16 V31), (v16i16 V31))>;
+def : Pat<(v4i64  immAllZerosV), (VOR  (v4i64 V31) , (v4i64 V31))>;
+
+def : Pat<(v8i32  immAllOnesV), (VEQV  (v8i32 V31) , (v8i32 V31))>;
+def : Pat<(v32i8  immAllOnesV), (VEQV  (v32i8 V31) , (v32i8 V31))>;
+def : Pat<(v16i16 immAllOnesV), (VEQV  (v16i16 V31), (v16i16 V31))>;
+def : Pat<(v4i64  immAllOnesV), (VEQV  (v4i64 V31) , (v4i64 V31))>;
+
+class SIMD_INSERT_BASE<string instr_asm,
+                  Operand ImmOp, ValueType vectype, ValueType eltVt,
+                  RegisterOperand FPO = FPRCOpnd> {
+  dag OutOperandList = (outs V256LOpnd:$RD);
+  dag InOperandList = (ins FPO:$RA, V256LOpnd:$RB, ImmOp:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD");
+
+  list<dag> Pattern = [(set V256LOpnd:$RD,
+              (vector_insert (vectype V256LOpnd:$RB),
+                    (eltVt FPO:$RA), ImmOp:$Imm))];
+
+}
+
+multiclass SIMD_INSERT_Multi<bits<6> funcB, bits<6> funcH,bits<6> funcW,
+                            bits<6> funcL, string instr_asm> {
+def E8  : FForm4LV<0x1B, funcB>,
+         SIMD_INSERT_BASE<instr_asm # b, VectorIndexB, v32i8,  i64>;
+
+def E16 : FForm4LV<0x1B, funcH>,
+         SIMD_INSERT_BASE<instr_asm # h, VectorIndexH, v16i16, i64>;
+
+def E32 : FForm4LV<0x1B, funcW>,
+         SIMD_INSERT_BASE<instr_asm # w, VectorIndexS, v8i32,  i32, FPRCloOpnd>;
+
+def E64 : FForm4LV<0x1B, funcL>,
+         SIMD_INSERT_BASE<instr_asm # f, VectorIndexD, v4f64,  f64>;
+}
+
+defm VINS : SIMD_INSERT_Multi<0x2A, 0x2B, 0x20, 0x21, "vins">;
+
+def : Pat<(vector_insert (v4f32 V256LOpnd:$RB), (f32 FPRCloOpnd:$RA), VectorIndexD:$idx),
+          (VINSE64 (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC)), (v4f32 V256LOpnd:$RB), VectorIndexD:$idx)>;
+
+def : Pat<(vector_insert (v4i64 V256LOpnd:$RB), (i64 FPRCOpnd:$RA), VectorIndexD:$idx),
+          (VINSE64 (i64 FPRCOpnd:$RA), (v4i64 V256LOpnd:$RB), VectorIndexD:$idx)>;
+
+class vins_pat<SDPatternOperator OpNode, Instruction Inst, RegisterClass RC,
+               ValueType vectype, ValueType eltvt, Operand ImmOp>
+    : Pat<(OpNode GPRCOpnd:$RA, (vectype V256LOpnd:$RB), ImmOp:$idx),
+          (vectype (Inst (eltvt (COPY_TO_REGCLASS GPRCOpnd:$RA, RC)), (vectype V256LOpnd:$RB), ImmOp:$idx))>;
+
+class vinselt<SDPatternOperator OpNode, Instruction Inst, RegisterClass RC,
+               ValueType vectype, ValueType eltvt, Operand ImmOp>
+    : Pat<(OpNode (vectype V256LOpnd:$RA), GPRCOpnd:$RB, ImmOp:$idx),
+          (vectype (Inst (eltvt (COPY_TO_REGCLASS GPRCOpnd:$RB, RC)), (vectype V256LOpnd:$RA), ImmOp:$idx))>;
+
+def : vins_pat<int_sw64_vinsw,  VINSE32, FPRC_lo, v8i32 , i32, VectorIndexS>;
+def : vins_pat<int_sw64_vinsb,  VINSE8 , FPRC, v32i8 , i64, VectorIndexB>;
+def : vins_pat<int_sw64_vinsh,  VINSE16, FPRC, v16i16, i64, VectorIndexH>;
+def : vins_pat<int_sw64_vinsl,  VINSE64, FPRC, v4i64 , i64, VectorIndexD>;
+
+def : vinselt<vector_insert,  VINSE32, FPRC_lo, v8i32 , i32, VectorIndexS>;
+def : vinselt<vector_insert,  VINSE8 , FPRC, v32i8 , i64, VectorIndexB>;
+def : vinselt<vector_insert,  VINSE16, FPRC, v16i16, i64, VectorIndexH>;
+def : vinselt<vector_insert,  VINSE64, FPRC, v4i64 , i64, VectorIndexD>;
+
+def : Pat<(int_sw64_vinsfs (f32 FPRCloOpnd:$RA),
+              (v4f32 V256LOpnd:$RB), VectorIndexD:$idx),
+          (v4f32 (VINSE64 (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC)), (v4f32 V256LOpnd:$RB), VectorIndexD:$idx))>;
+def : Pat<(int_sw64_vinsfd (f64 FPRCOpnd:$RA),
+              (v4f64 V256LOpnd:$RB), VectorIndexD:$idx),
+          (v4f64 (VINSE64 (f64 FPRCOpnd:$RA), (v4f64 V256LOpnd:$RB), VectorIndexD:$idx))>;
+
+multiclass SIMD_COPY_Multi<bits<6> funcB, bits<6> funcH,bits<6> funcW,
+                           bits<6> funcL, string instr_asm> {
+def E8  : FForm2V<0x1B, funcB>,
+          SIMD_2RR_BASE<instr_asm # b, V256LOpnd, FPRCOpnd>;
+
+def E16 : FForm2V<0x1B, funcH>,
+          SIMD_2RR_BASE<instr_asm # h, V256LOpnd, FPRCOpnd>;
+
+def E32 : FForm2V<0x1B, funcW>,
+          SIMD_2RR_BASE<instr_asm # w, V256LOpnd, FPRCloOpnd>;
+
+def E64 : FForm2V<0x1B, funcL>,
+          SIMD_2RR_BASE<instr_asm # f, V256LOpnd, FPRCOpnd>;
+
+def : Pat <(v32i8 (Sw64VBroadCast GPRCOpnd:$RA)),
+           (v32i8 (!cast<Instruction>(NAME # E8)
+                  (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>;
+
+def : Pat <(v16i16 (Sw64VBroadCast GPRCOpnd:$RA)),
+           (v16i16 (!cast<Instruction>(NAME # E16)
+                  (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>;
+
+def : Pat <(v8i32 (Sw64VBroadCast GPRCOpnd:$RA)),
+           (v8i32 (!cast<Instruction>(NAME # E32)
+                  (i32 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC_lo))))>;
+
+def : Pat <(v4i64 (Sw64VBroadCast GPRCOpnd:$RA)),
+           (v4i64 (!cast<Instruction>(NAME # E64)
+                  (i64 (COPY_TO_REGCLASS GPRCOpnd:$RA, FPRC))))>;
+
+def : Pat <(v4f64 (Sw64VBroadCast (f64 FPRCOpnd:$RA))),
+           (v4f64 (!cast<Instruction>(NAME # E64) (f64 FPRCOpnd:$RA)))>;
+
+def : Pat <(v4f32 (Sw64VBroadCast (f32 FPRCloOpnd:$RA))),
+           (v4f32 (!cast<Instruction>(NAME # E64) (f64 (COPY_TO_REGCLASS FPRCloOpnd:$RA, FPRC))))>;
+}
+
+defm VCPY : SIMD_COPY_Multi<0x32, 0x33, 0x24, 0x25, "vcpy">;
+
+multiclass SIMD_VINSECT_Multi<string instr_asm> {
+def H : FForm4VINSECTL<0x1B, 0x2C>,
+        SIMD_3RR_SAME<instr_asm # h, Sw64VINSECTL, v16i16, V256LOpnd>;
+
+def W : FForm4VINSECTL<0x1B, 0x2D>,
+        SIMD_3RR_SAME<instr_asm # w, Sw64VINSECTL, v8i32, V256LOpnd>;
+
+def L : FForm4VINSECTL<0x1B, 0x2E>,
+        SIMD_3RR_SAME<instr_asm # l, Sw64VINSECTL, v4i64, V256LOpnd>;
+
+def B : FForm4VINSECTL<0x1B, 0x2F>,
+        SIMD_3RR_SAME<instr_asm # b, Sw64VINSECTL, v32i8, V256LOpnd>;
+}
+
+defm VINSECTL : SIMD_VINSECT_Multi<"vinsectl">;
+
+def VSHFQB : FForm4VINSECTL<0x1B, 0x31>,
+             SIMD_3RR_SAME<"vshfqb", int_sw64_vshfqb, v32i8, V256LOpnd>;
+
+class SIMD_4RI_BASE<string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                    Operand ImmOp, ImmLeaf Imm, RegisterOperand ROD,
+                    RegisterOperand ROA = ROD, RegisterOperand ROB = ROD> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB, ImmOp:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD");
+
+  list<dag> Pattern = [(set ROD:$RD,
+                      (OpNode (Vt ROA:$RA), (Vt ROB:$RB), Imm:$Imm))];
+}
+
+class SIMD_4RR_BASE<string instr_asm,
+                    RegisterOperand ROC, RegisterOperand ROA,
+                    RegisterOperand ROB = ROA, RegisterOperand ROD = ROA> {
+
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
+
+}
+
+def VSHFQ  : FForm4LV<0x1B, 0x30>,
+             SIMD_4RI_BASE<"vshfq", int_sw64_vshfq, v8i32, u5imm, immZExt4Ptr, V256LOpnd>;
+
+def VCONW  : FForm4LV2<0x1B, 0x26>,
+             SIMD_4RR_BASE<"vconw", FPRCOpnd, V256LOpnd>;
+def VCONS  : FForm4LV2<0x1B, 0x28>,
+             SIMD_4RR_BASE<"vcons", FPRCOpnd, V256LOpnd>;
+def VCOND  : FForm4LV2<0x1B, 0x29>,
+             SIMD_4RR_BASE<"vcond", FPRCOpnd, V256LOpnd>;
+def VSHFW  : FForm4LV2<0x1B, 0x27>,
+             SIMD_4RR_BASE<"vshfw", FPRCOpnd, V256LOpnd>;
+
+def : Pat<(int_sw64_vshfq (v8i32 V256LOpnd:$RA),
+                (v8i32 V256LOpnd:$RB), (i64 ComplexImmPat:$imm)),
+          (VSHFQ V256LOpnd:$RA, V256LOpnd:$RB, $imm)>;
+
+def : Pat<(int_sw64_vconw (v8i32 V256LOpnd:$RA),
+                (v8i32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
+          (VCONW (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB),
+                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
+
+def : Pat<(int_sw64_vcons (v4f32 V256LOpnd:$RA),
+                (v4f32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
+          (VCONS (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB),
+                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
+
+def : Pat<(int_sw64_vcond (v4f64 V256LOpnd:$RA),
+                (v4f64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
+          (VCOND (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB),
+                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
+
+def : Pat<(int_sw64_vconl (v4i64 V256LOpnd:$RA),
+                (v4i64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
+          (VCOND (v4i64 V256LOpnd:$RA), (v4i64 V256LOpnd:$RB),
+                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
+
+def : Pat<(Sw64VSHF (v8i32 V256LOpnd:$RA),
+                (v8i32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
+          (VSHFW (v8i32 V256LOpnd:$RA), (v8i32 V256LOpnd:$RB),
+                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
+def : Pat<(Sw64VSHF (v4i64 V256LOpnd:$RA),
+                (v4i64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
+          (VSHFW (v4i64 V256LOpnd:$RA), (v4i64 V256LOpnd:$RB),
+                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
+def : Pat<(Sw64VSHF (v4f32 V256LOpnd:$RA),
+                (v4f32 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
+          (VSHFW (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB),
+                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
+def : Pat<(Sw64VSHF (v4f64 V256LOpnd:$RA),
+                (v4f64 V256LOpnd:$RB), (i64 GPRCOpnd:$RC)),
+          (VSHFW (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB),
+                        (i64 (ITOFTtmp GPRCOpnd:$RC)))>;
+
+def VEXTW : FForm4LVV<0x1B, 0x22>,
+            SIMD_3RI_SAME<"vextw", null_frag, v8i32, u5imm, FPRCOpnd, V256LOpnd>;
+def VEXTF : FForm4LVV<0x1B, 0x23>,
+            SIMD_3RI_SAME<"vextf", null_frag, v4f32, u5imm, FPRCOpnd, V256LOpnd>;
+
+multiclass Vector_extract_pat<SDPatternOperator Intr, Instruction Inst,
+                              Instruction TransI, ValueType vecty,
+                              ValueType ext_vt, SDPatternOperator Index> {
+def : Pat<(ext_vt (vector_extract (vecty V256LOpnd:$RA), Index:$Idx)),
+          (ext_vt (TransI (Inst (vecty V256LOpnd:$RA), Index:$Idx)))>;
+
+def : Pat<(ext_vt (Intr (vecty V256LOpnd:$RA), Index:$Idx)),
+          (ext_vt (TransI (Inst (vecty V256LOpnd:$RA), Index:$Idx)))>;
+}
+
+defm : Vector_extract_pat<int_sw64_vextw, VEXTW, FTOIStmp, v8i32, i64, VectorIndexS>;
+defm : Vector_extract_pat<int_sw64_vextl, VEXTF, FTOITtmp, v4i64, i64, VectorIndexD>;
+
+// TODO: How to Combine it with class pattern?
+def : Pat<(f64 (vector_extract (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx)),
+          (f64 (VEXTF (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx))>;
+def : Pat<(f64 (int_sw64_vextfd (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx)),
+          (f64 (VEXTF (v4f64 V256LOpnd:$RA), VectorIndexD:$Idx))>;
+def : Pat<(f32 (vector_extract (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx)),
+          (f32 (COPY_TO_REGCLASS (VEXTF (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx), FPRC_lo))>;
+def : Pat<(f32 (int_sw64_vextfs (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx)),
+          (f32 (COPY_TO_REGCLASS (VEXTF (v4f32 V256LOpnd:$RA), VectorIndexD:$Idx), FPRC_lo))>;
+
+class SIMD_VLOGZZ<string instr_asm,
+                  Operand ImmOp, ImmLeaf Imm, ValueType TyNode,
+                  RegisterOperand ROD> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROD:$RA, ROD:$RB, ROD:$RC, ImmOp:$Imm);
+  string AsmString = !strconcat(instr_asm # "$Imm", "\t$RA, $RB, $RC, $RD");
+
+  list<dag> Pattern = [(set ROD:$RD,
+                        (z_vlog (TyNode ROD:$RA), (TyNode ROD:$RB),
+                         (TyNode ROD:$RC), Imm:$Imm))];
+}
+
+def VLOGZZ : FForm4LVLogZZ<0x5>,
+             SIMD_VLOGZZ<"vlog", u8immHex, immZExt8Ptr, v4i64, V256LOpnd>;
+
+multiclass SIMD_Floating_3RR<bits<6> Opcode, bits<8>func,
+                      string instr_asm, SDPatternOperator OpNode> {
+def "" : FPFormV<Opcode, func>,
+         SIMD_3RR_SAME<instr_asm, null_frag, v4f64, V256LOpnd>;
+
+def : Pat<(v4i64 (OpNode (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB))),
+          (v4i64 (!cast<Instruction>(NAME) V256LOpnd:$RA, V256LOpnd:$RB))>;
+
+def : Pat<(v4i64 (OpNode (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB))),
+          (v4i64 (!cast<Instruction>(NAME) V256LOpnd:$RA, V256LOpnd:$RB))>;
+}
+
+defm VFCMPEQ : SIMD_Floating_3RR<0x1A, 0x8C, "vfcmpeq", setoeq>;
+defm VFCMPLE : SIMD_Floating_3RR<0x1A, 0x8D, "vfcmple", setole>;
+defm VFCMPLT : SIMD_Floating_3RR<0x1A, 0x8E, "vfcmplt", setolt>;
+defm VFCMPUN : SIMD_Floating_3RR<0x1A, 0x8F, "vfcmpun", setuo>;
+
+
+multiclass Vector_compare_pat<SDPatternOperator OpNode, Instruction Inst> {
+def : Pat <(OpNode (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB)),
+           (Inst V256LOpnd:$RA, V256LOpnd:$RB)>;
+def : Pat <(OpNode (v4f64 V256LOpnd:$RA), (v4f64 V256LOpnd:$RB)),
+           (Inst V256LOpnd:$RA, V256LOpnd:$RB)>;
+}
+
+defm : Vector_compare_pat<Sw64VFCMPEQ, VFCMPEQ>;
+defm : Vector_compare_pat<Sw64VFCMPLE, VFCMPLE>;
+defm : Vector_compare_pat<Sw64VFCMPLT, VFCMPLT>;
+defm : Vector_compare_pat<Sw64VFCMPUN, VFCMPUN>;
+
+def VCPYS  : FPFormV<0x1A, 0x90>,
+    SIMD_3RR_SAME<"vcpys", int_sw64_vcpysd,  v4f64, V256LOpnd>;
+def VCPYSE : FPFormV<0x1A, 0x91>,
+    SIMD_3RR_SAME<"vcpyse", int_sw64_vcpysed, v4f64, V256LOpnd>;
+def VCPYSN : FPFormV<0x1A, 0x92>,
+    SIMD_3RR_SAME<"vcpysn", int_sw64_vcpysnd, v4f64, V256LOpnd>;
+
+def : Pat<(int_sw64_vcpyss V256LOpnd:$RA, V256LOpnd:$RB),
+          (VCPYS V256LOpnd:$RA, V256LOpnd:$RB)>;
+def : Pat<(int_sw64_vcpyses V256LOpnd:$RA, V256LOpnd:$RB),
+          (VCPYSE V256LOpnd:$RA, V256LOpnd:$RB)>;
+def : Pat<(int_sw64_vcpysns V256LOpnd:$RA, V256LOpnd:$RB),
+          (VCPYSN V256LOpnd:$RA, V256LOpnd:$RB)>;
+
+multiclass SIMD_FMA<bits<6> funcS, bits<6> funcD,
+                    string instr_asm, SDPatternOperator OpNode> {
+def S : FForm4V<0x1B, funcS>,
+        SIMD_4RR_SAME<instr_asm#s, OpNode, v4f32, V256LOpnd>;
+def D : FForm4V<0x1B, funcD>,
+        SIMD_4RR_SAME<instr_asm#d, OpNode, v4f64, V256LOpnd>;
+}
+
+defm VMA  : SIMD_FMA<0x00, 0x01, "vma", fma>;
+defm VMS  : SIMD_FMA<0x02, 0x03, "vms",
+  ThridOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
+defm VNMA : SIMD_FMA<0x04, 0x05, "vnma",
+  ThridOpFrag<(fma (fneg node:$LHS), node:$MHS, node:$RHS)> >;
+defm VNMS : SIMD_FMA<0x06, 0x07, "vnms",
+  ThridOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
+
+multiclass SIMD_FLOAT_SEL<bits<6> func, string instr_asm,
+                          SDPatternOperator OpNode> {
+def "" : FForm4V<0x1B, func>,
+         SIMD_4RR_SAME<instr_asm, OpNode, v4f64, V256LOpnd>;
+
+def : Vector_3OP_SameReg_Pat<OpNode, v4f32, V256LOpnd,
+                            !cast<Instruction>(NAME)>;
+}
+
+defm VFSELEQ : SIMD_FLOAT_SEL<0x10, "vfseleq", vfcmoveq>;
+defm VFSELLT : SIMD_FLOAT_SEL<0x12, "vfsellt", vfcmovlt>;
+defm VFSELLE : SIMD_FLOAT_SEL<0x13, "vfselle", vfcmovle>;
+
+// 简单运算指令格式： 寄存器格式
+class SIMD_3RV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROC, RegisterOperand ROA = ROC,
+                       RegisterOperand ROB = ROC,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+  InstrItinClass Itinerary = itin;
+
+  string Constraints = "@earlyclobber $RC";
+}
+
+class SIMD_3RVV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROC, RegisterOperand ROA = ROC,
+                       RegisterOperand ROB = ROC,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+  InstrItinClass Itinerary = itin;
+}
+
+class SIMD_3RV_TY_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       ValueType TyNode,
+                       RegisterOperand ROC, RegisterOperand ROA = ROC,
+                       RegisterOperand ROB = ROC,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+  InstrItinClass Itinerary = itin;
+
+  string Constraints = "@earlyclobber $RC";
+}
+
+class SIMD_VFCMPS_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROC, RegisterOperand ROA,
+                       RegisterOperand ROB = ROA,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+  InstrItinClass Itinerary = itin;
+
+  string Constraints = "@earlyclobber $RC";
+}
+
+class SIMD_2RV_R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROC, RegisterOperand ROA,
+                       RegisterOperand ROB,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+  InstrItinClass Itinerary = itin;
+}
+
+class SIMD_VSETGE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROC, RegisterOperand ROA,
+                       RegisterOperand ROB = ROA,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+
+  bit usesCustomInserter = 1;
+  InstrItinClass Itinerary = itin;
+}
+
+class SIMD_VSQRT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROC, RegisterOperand ROB,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RB, $RC");
+  InstrItinClass Itinerary = itin;
+
+  string Constraints = "@earlyclobber $RC";
+}
+
+class SIMD_POPCNT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       ValueType TyNode,
+                       RegisterOperand ROC, RegisterOperand ROB,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RB, $RC");
+  InstrItinClass Itinerary = itin;
+
+  bit usesCustomInserter = 1;
+}
+
+class SIMD_REDUCE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       ValueType TyNode,
+                       RegisterOperand ROC, RegisterOperand ROA,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RC");
+  InstrItinClass Itinerary = itin;
+
+  bit usesCustomInserter = 1; // 6A should be extend.
+}
+
+// 简单运算指令格式： 立即数格式
+class SIMD_I8_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       SplatComplexPattern SplatImm, RegisterOperand ROC,
+                       RegisterOperand ROA = ROC,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, SplatImm.OpClass:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $imm, $RC");
+  InstrItinClass Itinerary = itin;
+}
+
+
+// 浮点复核运算指令格式  寄存器格式
+class SIMD_4RV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROD, RegisterOperand ROA = ROD,
+                       RegisterOperand ROB = ROD, RegisterOperand ROC = ROD,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
+
+  InstrItinClass Itinerary = itin;
+
+  string Constraints = "@earlyclobber $RD";
+}
+
+class SIMD_4RV_DESC_SEL<string instr_asm, SDPatternOperator OpNode,
+                       ValueType Vt, RegisterOperand ROD, RegisterOperand ROA = ROD,
+                       RegisterOperand ROB = ROD, RegisterOperand ROC = ROD> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
+
+  list<dag> Pattern = [(set (Vt ROD:$RD), (OpNode (Vt ROA:$RA), (Vt ROB:$RB), (Vt ROC:$RC)))];
+}
+
+class SIMD_4RV_DESC_VNMSS<string instr_asm,
+                       RegisterOperand ROD, RegisterOperand ROA = ROD,
+                       RegisterOperand ROB = ROD, RegisterOperand ROC = ROD,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
+
+  InstrItinClass Itinerary = itin;
+
+  string Constraints = "@earlyclobber $RD";
+}
+
+
+class SIMD_SELECT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROD, RegisterOperand ROA = ROD,
+                       RegisterOperand ROB = ROD, RegisterOperand ROC = ROD,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROC:$RC, ROB:$RB, ROA:$RA);
+  string AsmString = !strconcat(instr_asm, "\t$RC, $RB, $RA, $RD");
+
+  InstrItinClass Itinerary = itin;
+}
+
+
+
+class SIMD_VSETGE_I_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
+                       RegisterOperand ROC, RegisterOperand ROA,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ImmOp:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $imm, $RC");
+
+  bit usesCustomInserter = 1;
+  InstrItinClass Itinerary = itin;
+}
+
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_sw64 : PatFrag<(ops node:$in),
+                       (xor node:$in, (bitconvert (v8i32 immAllOnesV)))>;
+
+class SIMD_VBIC_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+			SDPatternOperator OpNode1, RegisterOperand ROC,
+			RegisterOperand ROA = ROC,
+                        RegisterOperand ROB = ROC,
+                        InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+
+  InstrItinClass Itinerary = itin;
+}
+
+class SIMD_VORNOT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                        RegisterOperand ROC,
+                        RegisterOperand ROA = ROC,
+                        RegisterOperand ROB = ROC,
+                        InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+
+  InstrItinClass Itinerary = itin;
+}
+
+class SIMD_COPY_DESC_BASE<string instr_asm, ValueType TyNode,
+                            SDPatternOperator OpNode, RegisterOperand ROB,
+                            RegisterOperand ROA = ROB, bit Num,
+                            InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROB:$RB);
+  dag InOperandList = (ins ROA:$RA);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB");
+
+  bit usesCustomInserter = Num; // 6A should be extend.
+  InstrItinClass Itinerary = itin;
+}
+
+class SIMD_COPYF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                            RegisterOperand ROB, RegisterOperand ROA = ROB,
+                            InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROB:$RB);
+  dag InOperandList = (ins ROA:$RA);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB");
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = itin;
+}
+
+class SIMD_COPYF_PSEUDO_BASE<ValueType VT, SDPatternOperator OpNode,
+                              RegisterClass RCWD, RegisterClass RCWS = RCWD> :
+      SIMDPseudo<(outs RCWD:$wd), (ins RCWS:$fs),
+                [(set RCWD:$wd, (VT (OpNode RCWS:$fs)))]> {
+  let usesCustomInserter = 1;
+}
+
+class SIMD_VSHIFT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       ValueType TyNode, RegisterOperand ROC,
+                       RegisterOperand ROA = ROC,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, F4RCOpnd:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC");
+  InstrItinClass Itinerary = itin;
+
+}
+
+class SIMD_VINSECTL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       RegisterOperand ROD,
+                       RegisterOperand ROA = ROD, RegisterOperand ROB = ROD,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RD");
+  InstrItinClass Itinerary = itin;
+
+}
+
+class SIMD_INSERT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROD,
+                           RegisterOperand ROA, bit Num,
+                           InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ROD:$RB, ImmOp:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD");
+  InstrItinClass Itinerary = itin;
+
+  bit usesCustomInserter = Num;
+}
+
+class SIMD_EXTRACT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                            ValueType VecTy, Operand ImmOp, ImmLeaf Imm,
+                            RegisterOperand ROD, RegisterOperand ROA,
+                            bit Num, InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ImmOp:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RD");
+
+  bit usesCustomInserter = Num;
+  InstrItinClass Itinerary = itin;
+}
+
+class SIMD_MIX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                            bit Num, RegisterOperand ROC, RegisterOperand ROA,
+                            RegisterOperand ROB = ROA, RegisterOperand ROD = ROA,
+                            InstrItinClass itin = NoItinerary> {
+
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROA:$RA, ROB:$RB, ROC:$RC);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $RC, $RD");
+
+  bit usesCustomInserter = Num;
+  InstrItinClass Itinerary = itin;
+}
+
+class VADDWC_DESC : SIMD_3RVV_DESC_BASE<"vaddw", addv8i32, V256LOpnd>, IsCommutable;
+
+class SIMD_2RV_SRi_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                       Operand ImmOp, ImmLeaf Imm, RegisterOperand ROC,
+                       RegisterOperand ROA = ROC,
+                       InstrItinClass itin = NoItinerary> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA, ImmOp:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RC");
+  InstrItinClass Itinerary = itin;
+}
+class VSUMW_DESC : SIMD_REDUCE_DESC_BASE<"vsumw", vecreduce_add, v8i32, GPRCOpnd, V256LOpnd>;
+class VSUML_DESC : SIMD_REDUCE_DESC_BASE<"vsuml", vecreduce_add, v4i64, GPRCOpnd, V256LOpnd>;
+
+class VADDWC_ENC : FPFormV<0x1A, 0x00>;
+
+class VSUMW_ENC : FPFormV_2RV<0x1A, 0x47>;
+class VSUML_ENC : FPFormV_2RV<0x1A, 0x48>;
+
+//--------------------------- Instruction defs ----------------------------------------//
+
+class SIMD_VMAX_VMIN<bits<8>func, string instr_asm, SDPatternOperator OpNode,
+                     ValueType vt, RegisterOperand RO>
+  : FPFormV<0x1A, func>, SIMD_3RR_SAME<instr_asm, OpNode, vt, RO>, IsCommutable;
+
+def VMAXB : SIMD_VMAX_VMIN<0x1E, "vmaxb", vmax, v32i8,  V256LOpnd>;
+def VMINB : SIMD_VMAX_VMIN<0x1F, "vminb", vmin, v32i8,  V256LOpnd>;
+def VMAXH : SIMD_VMAX_VMIN<0x50, "vmaxh", vmax, v16i16, V256LOpnd>;
+def VMINH : SIMD_VMAX_VMIN<0x51, "vminh", vmin, v16i16, V256LOpnd>;
+def VMAXW : SIMD_VMAX_VMIN<0x52, "vmaxw", vmax, v8i32,  V256LOpnd>;
+def VMINW : SIMD_VMAX_VMIN<0x53, "vminw", vmin, v8i32,  V256LOpnd>;
+def VMAXL : SIMD_VMAX_VMIN<0x54, "vmaxl", vmax, v4i64,  V256LOpnd>;
+def VMINL : SIMD_VMAX_VMIN<0x55, "vminl", vmin, v4i64,  V256LOpnd>;
+
+def VUMAXB : SIMD_VMAX_VMIN<0x56, "vumaxb", vumax, v32i8,  V256LOpnd>;
+def VUMINB : SIMD_VMAX_VMIN<0x57, "vuminb", vumin, v32i8,  V256LOpnd>;
+def VUMAXH : SIMD_VMAX_VMIN<0x58, "vumaxh", vumax, v16i16, V256LOpnd>;
+def VUMINH : SIMD_VMAX_VMIN<0x59, "vuminh", vumin, v16i16, V256LOpnd>;
+def VUMAXW : SIMD_VMAX_VMIN<0x5A, "vumaxw", vumax, v8i32,  V256LOpnd>;
+def VUMINW : SIMD_VMAX_VMIN<0x5B, "vuminw", vumin, v8i32,  V256LOpnd>;
+def VUMAXL : SIMD_VMAX_VMIN<0x5C, "vumaxl", vumax, v4i64,  V256LOpnd>;
+def VUMINL : SIMD_VMAX_VMIN<0x5D, "vuminl", vumin, v4i64,  V256LOpnd>;
+
+def VMAXS : SIMD_VMAX_VMIN<0xAC, "vmaxs", vmaxf, v4f32, V256LOpnd>;
+def VMINS : SIMD_VMAX_VMIN<0xAD, "vmins", vminf, v4f32, V256LOpnd>;
+def VMAXD : SIMD_VMAX_VMIN<0xAE, "vmaxd", vmaxf, v4f64, V256LOpnd>;
+def VMIND : SIMD_VMAX_VMIN<0xAF, "vmind", vminf, v4f64, V256LOpnd>;
+
+
+// For VSELXX pattern match with imm operand
+multiclass SIMD_VSELXX<bits<6> Opcode, bits<6>func,
+                      string instr_asm, SDPatternOperator OpNode, ValueType Vt,
+                      Operand immtype, RegisterOperand RO,
+                      SDPatternOperator IOp = null_frag,
+                      ComplexPattern cpx = ComplexImmPat> {
+
+  def rr : FForm4V<Opcode, func>, SIMD_4RV_DESC_SEL<instr_asm, OpNode, Vt, RO>;
+
+  def ri : FForm4_VSELi<Opcode, func>, SIMD_4RI_BASE<instr_asm, OpNode, Vt, immtype, immZExt5Ptr, RO>;
+
+  def : Pat<(Vt (OpNode (Vt RO:$RA), (Vt RO:$RB), (Vt (Sw64VBroadCast (i64 cpx:$Imm))))),
+            (!cast<Instruction>(NAME # ri) (Vt RO:$RA), (Vt RO:$RB), $Imm)>;
+}
+
+defm VSELEQW  : SIMD_VSELXX<0x1B, 0x18, "vseleqw",  vseleqw,  v8i32, u5imm, V256LOpnd>;
+defm VSELLBCW : SIMD_VSELXX<0x1B, 0x19, "vsellbcw", vsellbcw, v8i32, u5imm, V256LOpnd>;
+defm VSELLTW  : SIMD_VSELXX<0x1B, 0x1A, "vselltw",  vselltw,  v8i32, u5imm, V256LOpnd>;
+defm VSELLEW  : SIMD_VSELXX<0x1B, 0x1B, "vsellew",  vsellew,  v8i32, u5imm, V256LOpnd>;
+
+class SIMD_ARITH_FLOAT<bits<6> Opcode, bits<8>func,
+                       string instr_asm, SDPatternOperator OpNode,
+                       ValueType Vt, RegisterOperand RO> :
+      FPFormV<Opcode, func>, SIMD_3RR_SAME<instr_asm, OpNode, Vt, RO>;
+
+def VADDS : SIMD_ARITH_FLOAT<0x1A, 0x80, "vadds", fadd, v4f32, V256LOpnd>;
+def VADDD : SIMD_ARITH_FLOAT<0x1A, 0x81, "vaddd", fadd, v4f64, V256LOpnd>;
+def VSUBS : SIMD_ARITH_FLOAT<0x1A, 0x82, "vsubs", fsub, v4f32, V256LOpnd>;
+def VSUBD : SIMD_ARITH_FLOAT<0x1A, 0x83, "vsubd", fsub, v4f64, V256LOpnd>;
+def VMULS : SIMD_ARITH_FLOAT<0x1A, 0x84, "vmuls", fmul, v4f32, V256LOpnd>;
+def VMULD : SIMD_ARITH_FLOAT<0x1A, 0x85, "vmuld", fmul, v4f64, V256LOpnd>;
+def VDIVS : SIMD_ARITH_FLOAT<0x1A, 0x86, "vdivs", fdiv, v4f32, V256LOpnd>;
+def VDIVD : SIMD_ARITH_FLOAT<0x1A, 0x87, "vdivd", fdiv, v4f64, V256LOpnd>;
+
+
+def vsqrt_sw :  SDNode<"Sw64ISD::VSQRT", SDT_VSQRT>;
+
+class SIMD_VSQRT<string instr_asm, SDPatternOperator OpNode,
+                           ValueType Vt, RegisterOperand ROC,
+                           RegisterOperand ROB=ROC> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROB:$RB);
+  string AsmString = !strconcat(instr_asm, "\t$RB, $RC");
+  list<dag> Pattern = [(set (Vt ROC:$RC), (OpNode (Vt ROB:$RB)))];
+}
+
+def VSQRTS : FPFormV_2RV1<0x1A, 0x88>, SIMD_VSQRT<"vsqrts", vsqrt_sw, v4f32, V256LOpnd>;
+def VSQRTD : FPFormV_2RV1<0x1A, 0x89>, SIMD_VSQRT<"vsqrtd", vsqrt_sw, v4f64, V256LOpnd>;
+
+def Sw64VFREC : SDNode<"Sw64ISD::VFREC", SDT_ZVecFREC>;
+
+def VFRECS : FPFormV_2RV1<0x1A, 0xAA>, SIMD_VSQRT<"vfrecs", Sw64VFREC, v4f32, V256LOpnd>;
+def VFRECD : FPFormV_2RV1<0x1A, 0xAB>, SIMD_VSQRT<"vfrecd", Sw64VFREC, v4f64, V256LOpnd>;
+
+class SIMD_VSUMF<string instr_asm, SDPatternOperator OpNode,
+                       ValueType TyC, ValueType TyA,
+                       RegisterOperand ROC, RegisterOperand ROA> {
+  dag OutOperandList = (outs ROC:$RC);
+  dag InOperandList = (ins ROA:$RA);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RC");
+  list<dag> Pattern = [(set (TyC ROC:$RC), (OpNode (TyA ROA:$RA)))];
+}
+
+def VFCVTSD : FPFormV_2RV<0x1A, 0x95>, SIMD_VSUMF<"vfcvtsd", Sw64VFCVTSD, v4f64, v4f32, V256LOpnd, V256LOpnd>;
+def VFCVTDS : FPFormV_2RV<0x1A, 0x96>, SIMD_VSUMF<"vfcvtds", Sw64VFCVTDS, v4f32, v4f64, V256LOpnd, V256LOpnd>;
+def VFCVTLS : FPFormV_2RV<0x1A, 0x99>, SIMD_VSUMF<"vfcvtls", Sw64VFCVTLS, v4f32, v4i64, V256LOpnd, V256LOpnd>;
+def VFCVTLD : FPFormV_2RV<0x1A, 0x9A>, SIMD_VSUMF<"vfcvtld", Sw64VFCVTLD, v4f64, v4i64, V256LOpnd, V256LOpnd>;
+
+class SIMD_FCVTSH_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROD> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROD:$RA, ROD:$RB, ImmOp:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $RB, $Imm, $RD");
+  list<dag> Pattern = [(set (v4f64 ROD:$RD), (OpNode (v4f32 ROD:$RA), (v4f32 ROD:$RB), Imm:$Imm))];
+}
+
+class SIMD_FCVTHS_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                           Operand ImmOp, ImmLeaf Imm, RegisterOperand ROD> {
+  dag OutOperandList = (outs ROD:$RD);
+  dag InOperandList = (ins ROD:$RA, ImmOp:$Imm);
+  string AsmString = !strconcat(instr_asm, "\t$RA, $Imm, $RD");
+  list<dag> Pattern = [(set (v4f32 ROD:$RD), (OpNode (v4f64 ROD:$RA), Imm:$Imm))];
+}
+
+def VFCVTSH : FForm4LV<0x1B, 0x35>,  SIMD_FCVTSH_DESC_BASE<"vfcvtsh", Sw64VFCVTSH, uimm5, immZExt5Ptr, V256LOpnd>;
+def VFCVTHS : FForm4LV1<0x1B, 0x36>, SIMD_FCVTHS_DESC_BASE<"vfcvths", Sw64VFCVTHS, uimm5, immZExt5Ptr, V256LOpnd>;
+
+def VFCVTDL  : FPFormV_2RV<0x1A, 0x9B>, SIMD_VSUMF<"vfcvtdl",   Sw64VFCVTDL,  v4i64, v4f32, V256LOpnd, V256LOpnd>;
+def VFCVTDLG : FPFormV_2RV<0x1A, 0x9C>, SIMD_VSUMF<"vfcvtdl_g", Sw64VFCVTDLG, v4i64, v4f32, V256LOpnd, V256LOpnd>;
+def VFCVTDLP : FPFormV_2RV<0x1A, 0x9D>, SIMD_VSUMF<"vfcvtdl_p", Sw64VFCVTDLP, v4i64, v4f32, V256LOpnd, V256LOpnd>;
+def VFCVTDLZ : FPFormV_2RV<0x1A, 0x9E>, SIMD_VSUMF<"vfcvtdl_z", Sw64VFCVTDLZ, v4i64, v4f32, V256LOpnd, V256LOpnd>;
+def VFCVTDLN : FPFormV_2RV<0x1A, 0x9F>, SIMD_VSUMF<"vfcvtdl_n", Sw64VFCVTDLN, v4i64, v4f32, V256LOpnd, V256LOpnd>;
+
+def VFRIS  : FPFormV_2RV1<0x1A, 0xA0>, SIMD_VSQRT<"vfris",   Sw64VFRIS,  v4f32, V256LOpnd>;
+def VFRISG : FPFormV_2RV1<0x1A, 0xA1>, SIMD_VSQRT<"vfris_g", Sw64VFRISG, v4f32, V256LOpnd>;
+def VFRISP : FPFormV_2RV1<0x1A, 0xA2>, SIMD_VSQRT<"vfris_p", Sw64VFRISP, v4f32, V256LOpnd>;
+def VFRISZ : FPFormV_2RV1<0x1A, 0xA3>, SIMD_VSQRT<"vfris_z", Sw64VFRISZ, v4f32, V256LOpnd>;
+def VFRISN : FPFormV_2RV1<0x1A, 0xA4>, SIMD_VSQRT<"vfris_n", Sw64VFRISN, v4f32, V256LOpnd>;
+def VFRID  : FPFormV_2RV1<0x1A, 0xA5>, SIMD_VSQRT<"vfrid",   Sw64VFRID,  v4f64, V256LOpnd>;
+def VFRIDG : FPFormV_2RV1<0x1A, 0xA6>, SIMD_VSQRT<"vfrid_g", Sw64VFRIDG, v4f64, V256LOpnd>;
+def VFRIDP : FPFormV_2RV1<0x1A, 0xA7>, SIMD_VSQRT<"vfrid_p", Sw64VFRIDP, v4f64, V256LOpnd>;
+def VFRIDZ : FPFormV_2RV1<0x1A, 0xA8>, SIMD_VSQRT<"vfrid_z", Sw64VFRIDZ, v4f64, V256LOpnd>;
+def VFRIDN : FPFormV_2RV1<0x1A, 0xA9>, SIMD_VSQRT<"vfrid_n", Sw64VFRIDN, v4f64, V256LOpnd>;
+
+def vsumf :  SDNode<"Sw64ISD::VSUMF", SDT_VSUMF>;
+
+def VSUMS : FPFormV_2RV<0x1A, 0x93>, SIMD_VSUMF<"vsums", vsumf, f32, v4f32, F4RCOpnd, V256LOpnd>;
+def VSUMD : FPFormV_2RV<0x1A, 0x94>, SIMD_VSUMF<"vsumd", vsumf, f64, v4f64, F8RCOpnd, V256LOpnd>;
+
+// Patterns.
+class SIMDPat<dag pattern, dag result, list<Predicate> pred = [HasSIMD]> :
+  Pat<pattern, result>, Requires<pred>;
+
+// ------------------------
+class Sw64Pat<dag pattern, dag result> : Pat<pattern, result>;
+
+// TODO: Add support for FPOpFusion::Standard
+def AllowFPOpFusion : Predicate<"TM.Options.AllowFPOpFusion =="
+                                " FPOpFusion::Fast">;
+
+class ASE_SIMD {
+  list<Predicate> ASEPredicate = [HasSIMD];
+}
+
+
+class FPOP_FUSION_FAST {
+  list <Predicate> AdditionalPredicates = [AllowFPOpFusion];
+}
+
+
+// Additional VNMSX patterns: -a*b + c == -(a*b - c)
+multiclass Vecotr_fma_pat<SDPatternOperator OpNode, string Inst> {
+def : Vector_3OP_SameReg_Pat<OpNode, v4f32, V256LOpnd,
+          !cast<Instruction>(Inst#S)>;
+
+def : Vector_3OP_SameReg_Pat<OpNode, v4f64, V256LOpnd,
+          !cast<Instruction>(Inst#D)>;
+}
+
+defm : Vecotr_fma_pat<ThridOpFrag<(fma (fneg node:$LHS), node:$MHS, node:$RHS)>, "VNMA">;
+defm : Vecotr_fma_pat<ThridOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)>, "VNMA">;
+
+def : Pat<(int_sw64_vnmsd V256LOpnd:$RA, V256LOpnd:$RB, V256LOpnd:$RC),
+          (VNMSD $RA, $RB, $RC)>;
+
+def : Pat<(fneg v4f64:$RA), (VCPYSN $RA, $RA)>;
+def : Pat<(fneg v4f32:$RA), (VCPYSN $RA, $RA)>;
+
+def :Pat<(v4f32 (fadd (v4f32 V256LOpnd:$RA), (v4f32 V256LOpnd:$RB))),
+         (VADDD V256LOpnd:$RA, V256LOpnd:$RB)>;
+
+class bitconvert_pat<ValueType dstTy, ValueType srcTy>
+ : Pat<(dstTy  (bitconvert (srcTy V256LOpnd:$RA))), (dstTy V256LOpnd:$RA)>;
+
+def : bitconvert_pat<v32i8, v16i16>;
+def : bitconvert_pat<v32i8, v8i32>;
+def : bitconvert_pat<v32i8, v4i64>;
+def : bitconvert_pat<v32i8, v4f64>;
+
+def : bitconvert_pat<v16i16, v8i32>;
+def : bitconvert_pat<v16i16, v32i8>;
+def : bitconvert_pat<v16i16, v4i64>;
+def : bitconvert_pat<v16i16, v4f64>;
+
+def : bitconvert_pat<v8i32, v16i16>;
+def : bitconvert_pat<v8i32, v32i8>;
+def : bitconvert_pat<v8i32, v4i64>;
+def : bitconvert_pat<v8i32, v4f64>;
+
+def : bitconvert_pat<v4i64, v16i16>;
+def : bitconvert_pat<v4i64, v32i8>;
+def : bitconvert_pat<v4i64, v8i32>;
+def : bitconvert_pat<v4i64, v4f64>;
+
+def : bitconvert_pat<v4f64, v16i16>;
+def : bitconvert_pat<v4f64, v32i8>;
+def : bitconvert_pat<v4f64, v8i32>;
+def : bitconvert_pat<v4f64, v4i64>;
+
diff --git a/llvm/lib/Target/Sw64/Sw64LLRP.cpp b/llvm/lib/Target/Sw64/Sw64LLRP.cpp
new file mode 100644
index 000000000000..d7abbd8204a9
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64LLRP.cpp
@@ -0,0 +1,475 @@
+//===-- Sw64LLRP.cpp - Sw64 Load Load Replay Trap elimination pass. -- --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Here we check for potential replay traps introduced by the spiller
+// We also align some branch targets if we can do so for free.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sw_64-nops"
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "Sw64.h"
+#include "Sw64FrameLowering.h"
+#include "Sw64Subtarget.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+cl::opt<bool> Sw64Mieee("mieee", cl::desc("Support the IEEE754"),
+                        cl::init(true));
+
+cl::opt<bool> Sw64DeleteNop("sw64-delete-nop", cl::desc("Delete NOP"),
+                            cl::init(true));
+
+STATISTIC(nopintro, "Number of nops inserted");
+STATISTIC(nopalign, "Number of nops inserted for alignment");
+namespace llvm {
+cl::opt<bool> AlignAll("sw_64-align-all", cl::Hidden,
+                       cl::desc("Align all blocks"));
+
+struct Sw64LLRPPass : public MachineFunctionPass {
+  // Target machine description which we query for reg. names, data
+  // layout, etc.
+  //
+  Sw64TargetMachine &TM;
+
+  static char ID;
+  Sw64LLRPPass(Sw64TargetMachine &tm) : MachineFunctionPass(ID), TM(tm) {}
+
+  StringRef getPassName() const { return "Sw64 NOP inserter"; }
+
+  bool runOnMachineFunction(MachineFunction &F) {
+    const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
+    bool flag = false; // hasJSR ?
+    bool Changed = false;
+    MachineInstr *prev[3] = {0, 0, 0};
+    unsigned count = 0;
+
+    DebugLoc dl;
+    const Sw64Subtarget &Subtarget = F.getSubtarget<Sw64Subtarget>();
+    int curgpdist = Subtarget.getCurgpdist();
+
+    SmallVector<MachineInstr *, 4> Ops;
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
+         ++FI) {
+      MachineBasicBlock &MBB = *FI;
+      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+           MII != MIE;) {
+        MachineInstr *MI = &*MII;
+        ++MII;
+        if (MII == MIE)
+          break;
+        MachineInstr *MINext = &*MII;
+        if (MINext->getOpcode() == Sw64::FILLCS ||
+            MINext->getOpcode() == Sw64::FILLDE) {
+          if (MI->getOpcode() == Sw64::LDA &&
+              (MI->getOperand(1).getImm() == MINext->getOperand(0).getImm())) {
+            bool isRead = false;
+            for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) {
+              MachineInstr *Mtest = &*M1;
+              if (Mtest->getOpcode() == Sw64::LDA ||
+                  Mtest->getOpcode() == Sw64::LDAH ||
+                  Mtest->getOpcode() == Sw64::LDL ||
+                  Mtest->getOpcode() == Sw64::LDW ||
+                  Mtest->getOpcode() == Sw64::LDHU ||
+                  Mtest->getOpcode() == Sw64::LDBU) {
+                if (Mtest->getOperand(0).getReg() ==
+                        MI->getOperand(0).getReg() &&
+                    !isRead) {
+                  Ops.push_back(MI);
+                  break;
+                }
+              }
+              if (Mtest->getOpcode() == Sw64::STL ||
+                  Mtest->getOpcode() == Sw64::STW ||
+                  Mtest->getOpcode() == Sw64::STH ||
+                  Mtest->getOpcode() == Sw64::STB) {
+                if (Mtest->getOperand(2).getReg() ==
+                        MI->getOperand(0).getReg() ||
+                    Mtest->getOperand(0).getReg() ==
+                        MI->getOperand(0).getReg()) {
+                  isRead = true;
+                }
+              }
+              ++M1;
+            }
+          }
+        }
+      }
+      for (auto *PrefMI : Ops)
+        PrefMI->eraseFromParent();
+      Ops.clear();
+    }
+
+    // Remove all duplicate prefetch instr
+    SmallVector<MachineInstr *, 12> FILL;
+    int Dul;
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
+         ++FI) {
+      MachineBasicBlock &MBB = *FI;
+      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+           MII != MIE;) {
+        MachineInstr *MI = &*MII;
+        ++MII;
+        Dul = 1;
+        if (MII == MIE)
+          break;
+        if (MI->getOpcode() == Sw64::FILLCS ||
+            MI->getOpcode() == Sw64::FILLCS_E ||
+            MI->getOpcode() == Sw64::FILLDE ||
+            MI->getOpcode() == Sw64::FILLDE_E ||
+            MI->getOpcode() == Sw64::S_FILLDE ||
+            MI->getOpcode() == Sw64::S_FILLCS) {
+          if (!FILL.empty()) {
+            for (auto *PrefMI : FILL) {
+              if (PrefMI->getOperand(1).getReg() ==
+                  MI->getOperand(1).getReg()) {
+                Dul = 2;
+                break;
+              }
+            }
+          }
+          if (Dul == 1) {
+            for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) {
+              MachineInstr *Mtest = &*M1;
+              if (Mtest->getOpcode() == Sw64::FILLCS ||
+                  Mtest->getOpcode() == Sw64::FILLCS_E ||
+                  Mtest->getOpcode() == Sw64::FILLDE ||
+                  Mtest->getOpcode() == Sw64::FILLDE_E ||
+                  Mtest->getOpcode() == Sw64::S_FILLCS ||
+                  Mtest->getOpcode() == Sw64::S_FILLDE) {
+                if (Mtest->getOperand(1).getReg() ==
+                    MI->getOperand(1).getReg()) {
+                  FILL.push_back(Mtest);
+                }
+              }
+              ++M1;
+            }
+          }
+        }
+      }
+      if (!FILL.empty()) {
+        for (auto *PrefMI1 : FILL)
+          PrefMI1->eraseFromParent();
+      }
+      FILL.clear();
+    }
+
+    // If read and write, use fillde
+    int N = 0;
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+      MachineBasicBlock &MBB = *FI;
+      ++FI;
+      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+           MII != MIE;) {
+        MachineInstr *MI = &*MII;
+        ++MII;
+        if (MII == MIE)
+          break;
+        if (MI->getOpcode() == Sw64::FILLCS ||
+            MI->getOpcode() == Sw64::S_FILLCS) {
+          for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) {
+            MachineInstr *Mtest = &*M1;
+            if (Mtest->getOpcode() == Sw64::LDA ||
+                Mtest->getOpcode() == Sw64::LDAH ||
+                Mtest->getOpcode() == Sw64::LDL ||
+                Mtest->getOpcode() == Sw64::LDW ||
+                Mtest->getOpcode() == Sw64::LDHU ||
+                Mtest->getOpcode() == Sw64::LDBU) {
+              if (Mtest->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+                N = 1;
+              }
+            }
+            ++M1;
+          }
+          if (FI == FE)
+            break;
+          MachineBasicBlock &MBB1 = *FI;
+          for (MachineBasicBlock::iterator MII1 = MBB1.begin(),
+                                           MIE1 = MBB1.end();
+               MII1 != MIE1;) {
+            MachineInstr *MI1 = &*MII1;
+            if (MI1->getOpcode() == Sw64::STL ||
+                MI1->getOpcode() == Sw64::STW ||
+                MI1->getOpcode() == Sw64::STB ||
+                MI1->getOpcode() == Sw64::STH) {
+              if (MI1->getOperand(2).getReg() == MI->getOperand(1).getReg() &&
+                  N == 0) {
+                if (MI->getOpcode() == Sw64::FILLCS)
+                  MI->setDesc(TII->get(Sw64::FILLDE));
+                if (MI->getOpcode() == Sw64::S_FILLCS)
+                  MI->setDesc(TII->get(Sw64::S_FILLDE));
+                N = 0;
+              }
+            }
+            ++MII1;
+          }
+        }
+      }
+    }
+
+    const TargetRegisterInfo *TRI = F.getSubtarget().getRegisterInfo();
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
+         ++FI) {
+      MachineBasicBlock &MBB = *FI;
+      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+           MII != MIE;) {
+        MachineInstr *MI = &*MII;
+        ++MII;
+        if (MII == MIE)
+          break;
+        if (MI->getOpcode() == Sw64::FILLCS ||
+            MI->getOpcode() == Sw64::FILLDE) {
+          int N = 0;
+          int isDul = 0;
+          for (MachineBasicBlock::iterator MIT = MII; MIT != MIE;) {
+            MachineInstr *MITT = &*MIT;
+            if (MITT->readsRegister(MI->getOperand(1).getReg(), TRI)) {
+              N++;
+            }
+            if (MITT->getOpcode() == Sw64::FILLCS ||
+                MITT->getOpcode() == Sw64::FILLDE ||
+                MITT->getOpcode() == Sw64::FILLCS_E ||
+                MITT->getOpcode() == Sw64::FILLDE_E)
+              isDul++;
+            ++MIT;
+          }
+          if (N == 1 && isDul > 0) {
+            if (MI->getOpcode() == Sw64::FILLCS)
+              MI->setDesc(TII->get(Sw64::FILLCS_E));
+            if (MI->getOpcode() == Sw64::FILLDE) {
+              MI->setDesc(TII->get(Sw64::FILLDE_E));
+            }
+          }
+        }
+      }
+    }
+
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
+         ++FI) {
+      MachineBasicBlock &MBB = *FI;
+      for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+           MII != MIE;) {
+        MachineInstr *MI = &*MII;
+        if (MI->getOpcode() == Sw64::FILLCS ||
+            MI->getOpcode() == Sw64::S_FILLCS) {
+          for (MachineBasicBlock::iterator M1 = MII; M1 != MIE;) {
+            MachineInstr *Mtest = &*M1;
+            if (Mtest->getOpcode() == Sw64::STL ||
+                Mtest->getOpcode() == Sw64::STW ||
+                Mtest->getOpcode() == Sw64::STH ||
+                Mtest->getOpcode() == Sw64::STB) {
+              if (Mtest->getOperand(2).getReg() == MI->getOperand(1).getReg()) {
+                if (MI->getOpcode() == Sw64::FILLCS)
+                  MI->setDesc(TII->get(Sw64::FILLDE));
+                if (MI->getOpcode() == Sw64::S_FILLCS)
+                  MI->setDesc(TII->get(Sw64::S_FILLDE));
+              }
+            }
+            ++M1;
+          }
+        }
+        ++MII;
+      }
+    }
+
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
+         ++FI) {
+      MachineBasicBlock &MBB = *FI;
+
+      int count = 0;
+      bool isLable = 0;
+      if (MBB.getBasicBlock() && MBB.getBasicBlock()->isLandingPad()) {
+        MachineBasicBlock::iterator MBBI = MBB.begin();
+        for (MBBI = MBB.begin(); MBBI != MBB.end(); ++MBBI, ++count) {
+          if (count == 0 && MBBI->isLabel())
+            isLable = true;
+          if (count == 1 && isLable) {
+            BuildMI(MBB, MBBI, dl, TII->get(Sw64::MOVaddrPCGp))
+                .addGlobalAddress(&(F.getFunction()))
+                .addImm(++curgpdist)
+                .addReg(Sw64::R26);
+            isLable = false;
+          }
+        }
+        if (count == 1 && isLable) {
+          BuildMI(MBB, MBBI, dl, TII->get(Sw64::MOVaddrPCGp))
+              .addGlobalAddress(&(F.getFunction()))
+              .addImm(++curgpdist)
+              .addReg(Sw64::R26);
+          isLable = false;
+        }
+      }
+
+      MachineBasicBlock::iterator I;
+      for (I = MBB.begin(); I != MBB.end(); ++I) {
+        if (flag) {
+          BuildMI(MBB, I, dl, TII->get(Sw64::MOVaddrPCGp))
+              .addGlobalAddress(&(F.getFunction()))
+              .addImm(++curgpdist)
+              .addReg(Sw64::R26);
+          if (Sw64Mieee) {
+            if (!Sw64DeleteNop)
+              BuildMI(MBB, I, dl, TII->get(Sw64::NOP));
+          }
+          flag = false;
+        }
+        if (I->getOpcode() == Sw64::JSR ||
+            I->getOpcode() == Sw64::PseudoCallIndirect) {
+          dl = MBB.findDebugLoc(I);
+          if (Sw64Mieee) {
+            if (!Sw64DeleteNop)
+              BuildMI(MBB, I, dl, TII->get(Sw64::NOP));
+          }
+          flag = true;
+        }
+      }
+      if (flag) {
+        BuildMI(MBB, I, dl, TII->get(Sw64::MOVaddrPCGp))
+            .addGlobalAddress(&(F.getFunction()))
+            .addImm(++curgpdist)
+            .addReg(Sw64::R26);
+        if (Sw64Mieee) {
+          if (!Sw64DeleteNop)
+            BuildMI(MBB, I, dl, TII->get(Sw64::NOP));
+        }
+        flag = false;
+      }
+    }
+
+    if (!Sw64DeleteNop) {
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE;
+           ++FI) {
+        MachineBasicBlock &MBB = *FI;
+        bool ub = false;
+        for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+          if (count % 4 == 0)
+            prev[0] = prev[1] = prev[2] = 0; // Slots cleared at fetch boundary
+          ++count;
+          MachineInstr *MI = &(*I);
+          I++;
+          switch (MI->getOpcode()) {
+          case Sw64::LDL:
+          case Sw64::LDW:
+          case Sw64::LDHU:
+          case Sw64::LDBU:
+          case Sw64::LDD:
+          case Sw64::LDS:
+          case Sw64::STL:
+          case Sw64::STW:
+          case Sw64::STH:
+          case Sw64::STB:
+          case Sw64::STD:
+          case Sw64::STS:
+            dl = MBB.findDebugLoc(MI);
+            if (MI->getOperand(2).getReg() == Sw64::R30) {
+              if (prev[0] &&
+                  prev[0]->getOperand(2).getReg() ==
+                      MI->getOperand(2).getReg() &&
+                  prev[0]->getOperand(1).getImm() ==
+                      MI->getOperand(1).getImm()) {
+                prev[0] = prev[1];
+                prev[1] = prev[2];
+                prev[2] = 0;
+                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
+                    .addReg(Sw64::R31)
+                    .addReg(Sw64::R31);
+                Changed = true;
+                nopintro += 1;
+                count += 1;
+              } else if (prev[1] &&
+                         prev[1]->getOperand(2).getReg() ==
+                             MI->getOperand(2).getReg() &&
+                         prev[1]->getOperand(1).getImm() ==
+                             MI->getOperand(1).getImm()) {
+                prev[0] = prev[2];
+                prev[1] = prev[2] = 0;
+                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
+                    .addReg(Sw64::R31)
+                    .addReg(Sw64::R31);
+                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
+                    .addReg(Sw64::R31)
+                    .addReg(Sw64::R31);
+                Changed = true;
+                nopintro += 2;
+                count += 2;
+              } else if (prev[2] &&
+                         prev[2]->getOperand(2).getReg() ==
+                             MI->getOperand(2).getReg() &&
+                         prev[2]->getOperand(1).getImm() ==
+                             MI->getOperand(1).getImm()) {
+                prev[0] = prev[1] = prev[2] = 0;
+                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
+                    .addReg(Sw64::R31)
+                    .addReg(Sw64::R31);
+                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
+                    .addReg(Sw64::R31)
+                    .addReg(Sw64::R31);
+                BuildMI(MBB, MI, dl, TII->get(Sw64::BISr), Sw64::R31)
+                    .addReg(Sw64::R31)
+                    .addReg(Sw64::R31);
+                Changed = true;
+                nopintro += 3;
+                count += 3;
+              }
+              prev[0] = prev[1];
+              prev[1] = prev[2];
+              prev[2] = MI;
+              break;
+            }
+            prev[0] = prev[1];
+            prev[1] = prev[2];
+            prev[2] = 0;
+            break;
+          case Sw64::ALTENT:
+          case Sw64::MEMLABEL:
+          case Sw64::PCLABEL:
+            --count;
+            break;
+          case Sw64::BR:
+          case Sw64::PseudoBR:
+          case Sw64::JMP:
+            ub = true;
+          // fall through
+          default:
+            prev[0] = prev[1];
+            prev[1] = prev[2];
+            prev[2] = 0;
+            break;
+          }
+        }
+        if (ub || AlignAll) {
+          // we can align stuff for free at this point
+          while (count % 4) {
+            BuildMI(MBB, MBB.end(), dl, TII->get(Sw64::BISr), Sw64::R31)
+                .addReg(Sw64::R31)
+                .addReg(Sw64::R31);
+            ++count;
+            ++nopalign;
+            prev[0] = prev[1];
+            prev[1] = prev[2];
+            prev[2] = 0;
+          }
+        }
+      }
+    }
+    return Changed;
+  }
+};
+char Sw64LLRPPass::ID = 0;
+} // namespace llvm
+
+FunctionPass *llvm::createSw64LLRPPass(Sw64TargetMachine &tm) {
+  return new Sw64LLRPPass(tm);
+}
diff --git a/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp b/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp
new file mode 100644
index 000000000000..8a839ad57bb9
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64MCInstLower.cpp
@@ -0,0 +1,281 @@
+//===-- Sw64MCInstLower.cpp - Convert Sw64 MachineInstr to MCInst -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Sw64 MachineInstrs to their
+// corresponding MCInst records.
+//
+//===----------------------------------------------------------------------===//
+#include "Sw64MCInstLower.h"
+#include "MCTargetDesc/Sw64BaseInfo.h"
+#include "MCTargetDesc/Sw64MCExpr.h"
+#include "Sw64.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+
+using namespace llvm;
+
+#include "Sw64GenInstrInfo.inc"
+
+namespace llvm {
+struct Sw64InstrTable {
+  MCInstrDesc Insts[1000];
+};
+extern const Sw64InstrTable Sw64Descs;
+} // namespace llvm
+
+Sw64MCInstLower::Sw64MCInstLower(class AsmPrinter &asmprinter)
+    : Printer(asmprinter) {}
+
+void Sw64MCInstLower::Initialize(MCContext *C) { Ctx = C; }
+
+static bool lowerLitUseMOp(const MachineOperand &MO,
+                           Sw64MCExpr::Sw64ExprKind &Kind) {
+  Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None;
+  unsigned flags = MO.getTargetFlags();
+  if (flags & Sw64II::MO_LITERAL && flags & Sw64II::MO_LITERAL_BASE) {
+    TargetKind = Sw64MCExpr::MEK_LITUSE_BASE;
+  } else if (flags & Sw64II::MO_HINT && flags & Sw64II::MO_LITUSE) {
+    TargetKind = Sw64MCExpr::MEK_LITUSE_JSRDIRECT;
+  } else
+    return false;
+
+  Kind = TargetKind;
+  return true;
+}
+
+MCOperand Sw64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                              MachineOperandType MOTy,
+                                              unsigned Offset) const {
+  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+  Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None;
+  const MCSymbol *Symbol;
+
+  switch (MO.getTargetFlags()) {
+  default:
+    if (lowerLitUseMOp(MO, TargetKind))
+      break;
+    llvm_unreachable("Invalid target flag!");
+  case Sw64II::MO_NO_FLAG:
+    TargetKind = Sw64MCExpr::MEK_None;
+    break;
+  case Sw64II::MO_GPDISP_HI:
+    TargetKind = Sw64MCExpr::MEK_GPDISP_HI16;
+    break;
+  case Sw64II::MO_GPDISP_LO:
+    TargetKind = Sw64MCExpr::MEK_GPDISP_LO16;
+    break;
+  case Sw64II::MO_GPREL_HI:
+    TargetKind = Sw64MCExpr::MEK_GPREL_HI16;
+    break;
+  case Sw64II::MO_GPREL_LO:
+    TargetKind = Sw64MCExpr::MEK_GPREL_LO16;
+    break;
+  case Sw64II::MO_ABS_LO:
+  case Sw64II::MO_LITERAL:
+    TargetKind = Sw64MCExpr::MEK_ELF_LITERAL;
+    break;
+  case Sw64II::MO_LITERAL_GOT:
+    TargetKind = Sw64MCExpr::MEK_ELF_LITERAL_GOT;
+    break;
+  case Sw64II::MO_TPREL_HI:
+    TargetKind = Sw64MCExpr::MEK_TPREL_HI16;
+    break;
+  case Sw64II::MO_TPREL_LO:
+    TargetKind = Sw64MCExpr::MEK_TPREL_LO16;
+    break;
+  case Sw64II::MO_TLSGD:
+    TargetKind = Sw64MCExpr::MEK_TLSGD;
+    break;
+  case Sw64II::MO_TLSLDM:
+    TargetKind = Sw64MCExpr::MEK_TLSLDM;
+    break;
+  case Sw64II::MO_GOTTPREL:
+    TargetKind = Sw64MCExpr::MEK_GOTTPREL16;
+    break;
+  case Sw64II::MO_DTPREL_HI:
+    TargetKind = Sw64MCExpr::MEK_DTPREL_HI16;
+    break;
+  case Sw64II::MO_DTPREL_LO:
+    TargetKind = Sw64MCExpr::MEK_DTPREL_LO16;
+    break;
+  case Sw64II::MO_HINT:
+    TargetKind = Sw64MCExpr::MEK_HINT;
+  }
+
+  switch (MOTy) {
+  case MachineOperand::MO_MachineBasicBlock:
+    Symbol = MO.getMBB()->getSymbol();
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    Symbol = Printer.getSymbol(MO.getGlobal());
+    Offset += MO.getOffset();
+    break;
+  case MachineOperand::MO_BlockAddress:
+    Symbol = Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+    Offset += MO.getOffset();
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+    Offset += MO.getOffset();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    Symbol = Printer.GetJTISymbol(MO.getIndex());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    Symbol = Printer.GetCPISymbol(MO.getIndex());
+    Offset += MO.getOffset();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+
+  const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx);
+
+  if (Offset) {
+    // Assume offset is never negative.
+    assert(Offset > 0);
+
+    Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx),
+                                   *Ctx);
+  }
+
+  if (TargetKind != Sw64MCExpr::MEK_None)
+    Expr = Sw64MCExpr::create(TargetKind, Expr, *Ctx);
+
+  return MCOperand::createExpr(Expr);
+}
+
+MCOperand Sw64MCInstLower::LowerOperand(const MachineOperand &MO,
+                                        unsigned offset) const {
+  MachineOperandType MOTy = MO.getType();
+
+  switch (MOTy) {
+  default:
+    llvm_unreachable("unknown operand type");
+  case MachineOperand::MO_Register:
+    // Ignore all implicit register operands.
+    if (MO.isImplicit())
+      break;
+    return MCOperand::createReg(MO.getReg());
+  case MachineOperand::MO_Immediate:
+    return MCOperand::createImm(MO.getImm() + offset);
+  case MachineOperand::MO_MachineBasicBlock:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_JumpTableIndex:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_BlockAddress:
+    return LowerSymbolOperand(MO, MOTy, offset);
+  case MachineOperand::MO_RegisterMask:
+    break;
+  }
+
+  return MCOperand();
+}
+
+void Sw64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    MCOperand MCOp = LowerOperand(MO);
+
+    if (MCOp.isValid())
+      OutMI.addOperand(MCOp);
+  }
+}
+
+static MCOperand lowerSymbolOperand(const MachineOperand &MO,
+                                    MachineOperandType MOTy, unsigned Offset,
+                                    const AsmPrinter &AP) {
+  MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+  Sw64MCExpr::Sw64ExprKind TargetKind = Sw64MCExpr::MEK_None;
+  const MCSymbol *Symbol;
+  MCContext &Ctx = AP.OutContext;
+
+  switch (MOTy) {
+  case MachineOperand::MO_MachineBasicBlock:
+    Symbol = MO.getMBB()->getSymbol();
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    Symbol = AP.getSymbol(MO.getGlobal());
+    Offset += MO.getOffset();
+    break;
+  case MachineOperand::MO_BlockAddress:
+    Symbol = AP.GetBlockAddressSymbol(MO.getBlockAddress());
+    Offset += MO.getOffset();
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    Symbol = AP.GetExternalSymbolSymbol(MO.getSymbolName());
+    Offset += MO.getOffset();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    Symbol = AP.GetJTISymbol(MO.getIndex());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    Symbol = AP.GetCPISymbol(MO.getIndex());
+    Offset += MO.getOffset();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+
+  const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx);
+
+  if (Offset) {
+    // Assume offset is never negative.
+    assert(Offset > 0);
+
+    Expr =
+        MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
+  }
+
+  if (TargetKind != Sw64MCExpr::MEK_None)
+    Expr = Sw64MCExpr::create(TargetKind, Expr, Ctx);
+
+  return MCOperand::createExpr(Expr);
+}
+
+bool llvm::LowerSw64MachineOperandToMCOperand(const MachineOperand &MO,
+                                              MCOperand &MCOp,
+                                              const AsmPrinter &AP) {
+  switch (MO.getType()) {
+  default:
+    report_fatal_error("LowerSw64MachineInstrToMCInst: unknown operand type");
+  case MachineOperand::MO_Register:
+    // Ignore all implicit register operands.
+    if (MO.isImplicit())
+      return false;
+    MCOp = MCOperand::createReg(MO.getReg());
+    break;
+  case MachineOperand::MO_RegisterMask:
+    // Regmasks are like implicit defs.
+    return false;
+  case MachineOperand::MO_Immediate:
+    MCOp = MCOperand::createImm(MO.getImm());
+    break;
+    return false;
+  case MachineOperand::MO_MachineBasicBlock:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_JumpTableIndex:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_BlockAddress:
+    MCOp = lowerSymbolOperand(MO, MO.getType(), 0, AP);
+    return false;
+  }
+  return true;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64MCInstLower.h b/llvm/lib/Target/Sw64/Sw64MCInstLower.h
new file mode 100644
index 000000000000..7a8dfee7bb0b
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64MCInstLower.h
@@ -0,0 +1,44 @@
+//===-- Sw64MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64MCINSTLOWER_H
+#define LLVM_LIB_TARGET_SW64_SW64MCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCContext;
+class MCInst;
+class MCOperand;
+class MachineInstr;
+class MachineFunction;
+class Mangler;
+class AsmPrinter;
+
+typedef MachineOperand::MachineOperandType MachineOperandType;
+// This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY Sw64MCInstLower {
+  MCContext *Ctx;
+  AsmPrinter &Printer;
+
+public:
+  Sw64MCInstLower(class AsmPrinter &asmprinter);
+  void Initialize(MCContext *C);
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+  MCOperand LowerOperand(const MachineOperand &MO, unsigned offset = 0) const;
+
+  void lowerMemory(const MachineInstr *MI, MCInst &OutMI) const;
+
+private:
+  MCOperand LowerSymbolOperand(const MachineOperand &MO,
+                               MachineOperandType MOTy, unsigned Offset) const;
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..54a53e2bc589
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.cpp
@@ -0,0 +1,33 @@
+//===-- Sw64MachineFunctionInfo.cpp - Sw64 machine function info --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64MachineFunctionInfo.h"
+#include "Sw64InstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+void Sw64MachineFunctionInfo::anchor() {}
+
+bool Sw64MachineFunctionInfo::isLargeFrame(const MachineFunction &MF) const {
+  if (CachedEStackSize == -1) {
+    CachedEStackSize = MF.getFrameInfo().estimateStackSize(MF);
+  }
+  // isLargeFrame() is used when deciding if spill slots should be added to
+  // allow eliminateFrameIndex() to scavenge registers.
+  // This is only required when there is no FP and offsets are greater than
+  // ~256KB (~64Kwords). Thus only for code run on the emulator!
+  //
+  // The arbitrary value of 0xf000 allows frames of up to ~240KB before spill
+  // slots are added for the use of eliminateFrameIndex() register scavenging.
+  // For frames less than 240KB, it is assumed that there will be less than
+  // 16KB of function arguments.
+  return CachedEStackSize > 0xf000;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h
new file mode 100644
index 000000000000..714ecef2f50d
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64MachineFunctionInfo.h
@@ -0,0 +1,69 @@
+//===- Sw64MachineFunctionInfo.h - Sw64 machine function info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Sw64-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include <cassert>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+// Sw64MachineFunctionInfo - This class is derived from MachineFunction private
+// Sw64 target-specific information for each MachineFunction.
+class Sw64MachineFunctionInfo : public MachineFunctionInfo {
+private:
+  // GlobalBaseReg - keeps track of the virtual register initialized for
+  // use as the global base register. This is used for PIC in some PIC
+  // relocation models.
+  unsigned GlobalBaseReg;
+
+  // GlobalRetAddr = keeps track of the virtual register initialized for
+  // the return address value.
+  unsigned GlobalRetAddr;
+
+  // VarArgsOffset - What is the offset to the first vaarg
+  int VarArgsOffset;
+  // VarArgsBase - What is the base FrameIndex
+  int VarArgsBase;
+
+  virtual void anchor();
+  mutable int CachedEStackSize = -1;
+
+public:
+  Sw64MachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI)
+      : GlobalBaseReg(0), GlobalRetAddr(0), VarArgsOffset(0), VarArgsBase(0) {}
+
+  //~Sw64MachineFunctionInfo() override;
+
+  bool globalBaseRegSet() const;
+  unsigned getGlobalBaseReg(MachineFunction &MF) const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  bool globalRetAddrSet() const;
+  void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
+  unsigned getGlobalRetAddr(MachineFunction &MF) const { return GlobalRetAddr; }
+
+  int getVarArgsOffset() const { return VarArgsOffset; }
+  void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
+
+  int getVarArgsBase() const { return VarArgsBase; }
+  void setVarArgsBase(int Base) { VarArgsBase = Base; }
+  bool isLargeFrame(const MachineFunction &MF) const;
+};
+} // end namespace llvm
+#endif // LLVM_LIB_TARGET_SW64_SW64MACHINEFUNCTIONINFO_H
diff --git a/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp b/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp
new file mode 100644
index 000000000000..d349665abccd
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64MacroFusion.cpp
@@ -0,0 +1,65 @@
+//===- Sw64MacroFusion.cpp - Sw64 Macro Fusion ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sw64 implementation of the DAG scheduling
+// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64MacroFusion.h"
+#include "Sw64Subtarget.h"
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+using namespace llvm;
+
+// CMPxx followed by BEQ/BNE
+static bool isCmpBqPair(const MachineInstr *FirstMI,
+                        const MachineInstr &SecondMI) {
+  if (SecondMI.getOpcode() != Sw64::BEQ && SecondMI.getOpcode() != Sw64::BNE)
+    return false;
+
+  // Assume the 1st instr to be a wildcard if it is unspecified.
+  if (FirstMI == nullptr)
+    return true;
+
+  switch (FirstMI->getOpcode()) {
+  case Sw64::CMPEQr:
+  case Sw64::CMPEQi:
+  case Sw64::CMPLTr:
+  case Sw64::CMPLTi:
+  case Sw64::CMPLEr:
+  case Sw64::CMPLEi:
+  case Sw64::CMPULTr:
+  case Sw64::CMPULTi:
+  case Sw64::CMPULEr:
+  case Sw64::CMPULEi:
+    return true;
+  }
+
+  return false;
+}
+
+// Check if the instr pair, FirstMI and SecondMI, should be fused
+// together. Given SecondMI, when FirstMI is unspecified, then check if
+// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+                                   const TargetSubtargetInfo &TSI,
+                                   const MachineInstr *FirstMI,
+                                   const MachineInstr &SecondMI) {
+  const Sw64Subtarget &ST = static_cast<const Sw64Subtarget &>(TSI);
+
+  if (ST.hasCore4() && isCmpBqPair(FirstMI, SecondMI))
+    return true;
+
+  return false;
+}
+
+std::unique_ptr<ScheduleDAGMutation> llvm::createSw64MacroFusionDAGMutation() {
+  return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
diff --git a/llvm/lib/Target/Sw64/Sw64MacroFusion.h b/llvm/lib/Target/Sw64/Sw64MacroFusion.h
new file mode 100644
index 000000000000..92a6faf1f5bd
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64MacroFusion.h
@@ -0,0 +1,28 @@
+//===- Sw64MacroFusion.h - Sw64 Macro Fusion ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sw64 definition of the DAG scheduling
+// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H
+#define LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+// Note that you have to add:
+// DAG.addMutation(createSw64MacroFusionDAGMutation());
+// to Sw64PassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createSw64MacroFusionDAGMutation();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_SW64_SW64MACROFUSION_H
diff --git a/llvm/lib/Target/Sw64/Sw64OptionRecord.h b/llvm/lib/Target/Sw64/Sw64OptionRecord.h
new file mode 100644
index 000000000000..81a4c4d63c82
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64OptionRecord.h
@@ -0,0 +1,67 @@
+//===- Sw64OptionRecord.h - Abstraction for storing information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Sw64OptionRecord - Abstraction for storing arbitrary information in
+// ELF files. Arbitrary information (e.g. register usage) can be stored in Sw64
+// specific ELF sections like .Sw64.options. Specific records should subclass
+// Sw64OptionRecord and provide an implementation to EmitSw64OptionRecord which
+// basically just dumps the information into an ELF section. More information
+// about .Sw64.option can be found in the SysV ABI and the 64-bit ELF Object
+// specification.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H
+#define LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H
+
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <cstdint>
+
+namespace llvm {
+
+class Sw64ELFStreamer;
+
+class Sw64OptionRecord {
+public:
+  virtual ~Sw64OptionRecord() = default;
+
+  virtual void EmitSw64OptionRecord() = 0;
+};
+
+class Sw64RegInfoRecord : public Sw64OptionRecord {
+public:
+  Sw64RegInfoRecord(Sw64ELFStreamer *S, MCContext &Context)
+      : Streamer(S), Context(Context) {
+
+    const MCRegisterInfo *TRI = Context.getRegisterInfo();
+    GPRCRegClass = &(TRI->getRegClass(Sw64::GPRCRegClassID));
+    F4RCRegClass = &(TRI->getRegClass(Sw64::F4RCRegClassID));
+    F8RCRegClass = &(TRI->getRegClass(Sw64::F8RCRegClassID));
+    V256LRegClass = &(TRI->getRegClass(Sw64::V256LRegClassID));
+  }
+
+  ~Sw64RegInfoRecord() override = default;
+
+  void EmitSw64OptionRecord() override;
+  void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
+
+private:
+  Sw64ELFStreamer *Streamer;
+  MCContext &Context;
+  const MCRegisterClass *GPRCRegClass;
+  const MCRegisterClass *F4RCRegClass;
+  const MCRegisterClass *F8RCRegClass;
+  const MCRegisterClass *V256LRegClass;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SW64_SW64OPTIONRECORD_H
diff --git a/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp b/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp
new file mode 100644
index 000000000000..5790ce81fc04
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64PreLegalizerCombiner.cpp
@@ -0,0 +1,96 @@
+//=== lib/CodeGen/GlobalISel/Sw64PreLegalizerCombiner.cpp --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does combining of machine instructions at the generic MI level,
+// before the legalizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "sw_64-prelegalizer-combiner"
+
+using namespace llvm;
+
+namespace {
+class Sw64PreLegalizerCombinerInfo : public CombinerInfo {
+public:
+  Sw64PreLegalizerCombinerInfo()
+      : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
+                     /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
+                     /*EnableOptSize*/ false, /*EnableMinSize*/ false) {}
+
+  virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
+                       MachineIRBuilder &B) const override;
+};
+
+bool Sw64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
+                                           MachineInstr &MI,
+                                           MachineIRBuilder &B) const {
+  return false;
+}
+
+// Pass boilerplate
+// ================
+
+class Sw64PreLegalizerCombiner : public MachineFunctionPass {
+public:
+  static char ID;
+
+  Sw64PreLegalizerCombiner();
+
+  StringRef getPassName() const override { return "Sw64PreLegalizerCombiner"; }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+} // end anonymous namespace
+
+void Sw64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  AU.setPreservesCFG();
+  getSelectionDAGFallbackAnalysisUsage(AU);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+Sw64PreLegalizerCombiner::Sw64PreLegalizerCombiner() : MachineFunctionPass(ID) {
+  initializeSw64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
+}
+
+bool Sw64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+  auto *TPC = &getAnalysis<TargetPassConfig>();
+  Sw64PreLegalizerCombinerInfo PCInfo;
+  Combiner C(PCInfo, TPC);
+  return C.combineMachineInstrs(MF, nullptr);
+}
+
+char Sw64PreLegalizerCombiner::ID = 0;
+INITIALIZE_PASS_BEGIN(Sw64PreLegalizerCombiner, DEBUG_TYPE,
+                      "Combine Sw64 machine instrs before legalization", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(Sw64PreLegalizerCombiner, DEBUG_TYPE,
+                    "Combine Sw64 machine instrs before legalization", false,
+                    false)
+
+namespace llvm {
+FunctionPass *createSw64PreLegalizeCombiner() {
+  return new Sw64PreLegalizerCombiner();
+}
+} // end namespace llvm
diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp b/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp
new file mode 100644
index 000000000000..ce4be089ba09
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.cpp
@@ -0,0 +1,296 @@
+//===-- Sw64RegisterInfo.cpp - Sw64 Register Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sw64 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64RegisterInfo.h"
+#include "MCTargetDesc/Sw64ABIInfo.h"
+#include "Sw64.h"
+#include "Sw64InstrInfo.h"
+#include "Sw64MachineFunctionInfo.h"
+#include "Sw64Subtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sw_64-reg-info"
+
+#define GET_REGINFO_TARGET_DESC
+#include "Sw64GenRegisterInfo.inc"
+
+static cl::opt<bool> EnableOptReg("enable-sw64-opt-reg",
+                                  cl::desc("Enalbe R15/R28 reg alloc on SW64"),
+                                  cl::init(true), cl::Hidden);
+
+Sw64RegisterInfo::Sw64RegisterInfo() : Sw64GenRegisterInfo(Sw64::R26) {}
+
+// helper functions
+static long getUpper16(long l) {
+  long y = l / Sw64::IMM_MULT;
+  if (l % Sw64::IMM_MULT > Sw64::IMM_HIGH)
+    ++y;
+  return y;
+}
+
+static long getLower16(long l) {
+  long h = getUpper16(l);
+  return l - h * Sw64::IMM_MULT;
+}
+
+const uint16_t *
+Sw64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+
+  return CSR_F64_SaveList;
+}
+
+BitVector Sw64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const Sw64FrameLowering *TFI = getFrameLowering(MF);
+  if (EnableOptReg) {
+    if (TFI->hasFP(MF))
+      Reserved.set(Sw64::R15);
+  } else {
+    Reserved.set(Sw64::R15);
+    Reserved.set(Sw64::R28);
+  }
+  Reserved.set(Sw64::R29);
+  Reserved.set(Sw64::R30);
+  Reserved.set(Sw64::R31);
+  Reserved.set(Sw64::F31);
+  Reserved.set(Sw64::V31);
+  for (size_t i = 0; i < Sw64::GPRCRegClass.getNumRegs(); ++i) {
+    if (MF.getSubtarget<Sw64Subtarget>().isRegisterReserved(i)) {
+      StringRef RegName("$" + std::to_string(i));
+      Reserved.set(
+          MF.getSubtarget<Sw64Subtarget>().getTargetLowering()->MatchRegName(
+              RegName));
+    }
+  }
+
+  // hasBP
+  if (hasStackRealignment(MF) && MF.getFrameInfo().hasVarSizedObjects())
+    Reserved.set(Sw64::R14);
+
+  return Reserved;
+}
+
+const u_int32_t *
+Sw64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+                                       CallingConv::ID) const {
+  return CSR_F64_RegMask;
+}
+
+const TargetRegisterClass *
+Sw64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
+                                     unsigned Kind) const {
+  Sw64PtrClass PtrClassKind = static_cast<Sw64PtrClass>(Kind);
+
+  switch (PtrClassKind) {
+  case Sw64PtrClass::Default:
+    return &Sw64::GPRCRegClass;
+  case Sw64PtrClass::StackPointer:
+    return &Sw64::SP64RegClass;
+  case Sw64PtrClass::GlobalPointer:
+    return &Sw64::GP64RegClass;
+  }
+
+  llvm_unreachable("Unknown pointer kind");
+}
+
+bool Sw64RegisterInfo::requiresRegisterScavenging(
+    const MachineFunction &MF) const {
+  return true;
+}
+bool Sw64RegisterInfo::requiresFrameIndexScavenging(
+    const MachineFunction &MF) const {
+  return true;
+}
+bool Sw64RegisterInfo::trackLivenessAfterRegAlloc(
+    const MachineFunction &MF) const {
+  return true;
+}
+
+bool Sw64RegisterInfo::useFPForScavengingIndex(
+    const MachineFunction &MF) const {
+  return false;
+}
+
+void Sw64RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+                                   unsigned OpNo, int FrameIndex,
+                                   uint64_t StackSize, int64_t SPOffset) const {
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MI.getParent()->getParent();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  const Sw64InstrInfo &TII =
+      *static_cast<const Sw64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const Sw64RegisterInfo *RegInfo = static_cast<const Sw64RegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+
+  unsigned i = OpNo;
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  // The following stack frame objects are always referenced relative to $sp:
+  //  1. Outgoing arguments.
+  //  2. Pointer to dynamically allocated stack space.
+  //  3. Locations for callee-saved registers.
+  // Everything else is referenced relative to whatever register
+  // getFrameRegister() returns.
+  unsigned FrameReg;
+
+  if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
+    FrameReg = Sw64::R30;
+  else if (RegInfo->hasStackRealignment(MF)) {
+    if (MFI.hasVarSizedObjects() && !MFI.isFixedObjectIndex(FrameIndex))
+      FrameReg = Sw64::R14;
+    else if (MFI.isFixedObjectIndex(FrameIndex))
+      FrameReg = getFrameRegister(MF);
+    else
+      FrameReg = Sw64::R30;
+  } else
+    FrameReg = getFrameRegister(MF);
+
+  // Calculate final offset.
+  // - There is no need to change the offset if the frame object is one of the
+  //   following: an outgoing argument, pointer to a dynamically allocated
+  //   stack space or a $gp restore location,
+  // - If the frame object is any of the following, its offset must be adjusted
+  //   by adding the size of the stack:
+  //   incoming argument, callee-saved register location or local variable.
+  int64_t Offset = SPOffset + (int64_t)StackSize;
+  const MCInstrDesc &MCID = TII.get(MI.getOpcode());
+  if (MI.getNumOperands() > 2 && MI.getOperand(2).isImm()) {
+    if (MCID.mayLoad() || MCID.mayStore())
+      Offset += MI.getOperand(2).getImm();
+  }
+
+  if (MI.getOperand(1).isImm())
+    Offset += MI.getOperand(1).getImm();
+
+  if (MI.isDebugValue())
+    MI.getOperand(i + 1).ChangeToRegister(FrameReg, false);
+  else
+    MI.getOperand(2).ChangeToRegister(FrameReg, false);
+
+  LLVM_DEBUG(errs() << "Offset     : " << Offset << "\n"
+                    << "<--------->\n");
+
+  // Now add the frame object offset to the offset from the virtual frame index.
+  if (Offset > Sw64::IMM_HIGH || Offset < Sw64::IMM_LOW) {
+    LLVM_DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
+                      << Offset << "\n");
+    // so in this case, we need to use a temporary register, and move the
+    // original inst off the SP/FP
+    // fix up the old:
+    MachineInstr *nMI;
+    bool FrameRegIsKilled = false;
+    // insert the new
+    Register vreg = MF.getRegInfo().createVirtualRegister(&Sw64::GPRCRegClass);
+    if (MI.getOperand(1).getTargetFlags() == 15) {
+      nMI = BuildMI(MF, MI.getDebugLoc(), TII.get(Sw64::LDAH), vreg)
+                .addImm(getUpper16(Offset))
+                .addReg(FrameReg);
+      FrameRegIsKilled = true;
+    } else {
+      nMI = BuildMI(MF, MI.getDebugLoc(), TII.get(Sw64::LDAH), vreg)
+                .addImm(getUpper16(Offset))
+                .addReg(FrameReg);
+      FrameRegIsKilled = true;
+    }
+
+    MBB.insert(II, nMI);
+    MI.getOperand(2).ChangeToRegister(vreg, false, false, FrameRegIsKilled);
+    MI.getOperand(1).ChangeToImmediate(getLower16(Offset));
+  } else {
+    if (MI.isDebugValue())
+      MI.getOperand(i + 1).ChangeToImmediate(Offset);
+    else
+      MI.getOperand(1).ChangeToImmediate(Offset);
+  }
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+bool Sw64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                           int SPAdj, unsigned FIOperandNum,
+                                           RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+
+  LLVM_DEBUG(errs() << "\nFunction : " << MF.getName() << "\n";
+             errs() << "<--------->\n"
+                    << MI);
+
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+  uint64_t stackSize = MF.getFrameInfo().getStackSize();
+  int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex);
+
+  LLVM_DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+                    << "spOffset   : " << spOffset << "\n"
+                    << "stackSize  : " << stackSize << "\n"
+                    << "alignment  : "
+                    << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex))
+                    << "\n");
+
+  eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
+  return false;
+}
+
+Register Sw64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const Sw64FrameLowering *TFI = getFrameLowering(MF);
+
+  return TFI->hasFP(MF) ? Sw64::R15 : Sw64::R30;
+}
+
+unsigned Sw64RegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned Sw64RegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+std::string Sw64RegisterInfo::getPrettyName(unsigned reg) {
+  std::string s("#reg_#-#");
+  return s;
+}
+
+bool Sw64RegisterInfo::needsFrameMoves(const MachineFunction &MF) {
+  return MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry();
+}
diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.h b/llvm/lib/Target/Sw64/Sw64RegisterInfo.h
new file mode 100644
index 000000000000..0f0e74f0bbd9
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.h
@@ -0,0 +1,79 @@
+//===-- Sw64RegisterInfo.h - Sw64 Register Information Impl ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sw64 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_SW64_SW64REGISTERINFO_H
+#define LLVM_LIB_TARGET_SW64_SW64REGISTERINFO_H
+
+#include "Sw64.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+#define GET_REGINFO_HEADER
+#include "Sw64GenRegisterInfo.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class TargetRegisterClass;
+
+class Sw64RegisterInfo : public Sw64GenRegisterInfo {
+public:
+  Sw64RegisterInfo();
+  enum class Sw64PtrClass {
+    // The default register class for integer values.
+    Default = 0,
+    // The stack pointer only.
+    StackPointer = 1,
+    // The global pointer only.
+    GlobalPointer = 2,
+  };
+
+  // Code Generation virtual methods...
+
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+  // Eliminate virtual register which Prologue/Epilogue generate.
+  bool requiresRegisterScavenging(const MachineFunction &MF) const override;
+  bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
+  bool useFPForScavengingIndex(const MachineFunction &MF) const override;
+  bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
+
+  // Code Generation virtual methods...
+  const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+                                                unsigned Kind) const override;
+
+  bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                           unsigned FIOperandNum,
+                           RegScavenger *RS = nullptr) const override;
+
+  // Debug information queries.
+  Register getFrameRegister(const MachineFunction &MF) const override;
+
+  const u_int32_t *getCallPreservedMask(const MachineFunction &MF,
+                                        CallingConv::ID) const override;
+
+  // Return whether to emit frame moves
+  static bool needsFrameMoves(const MachineFunction &MF);
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  static std::string getPrettyName(unsigned reg);
+
+private:
+  void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+                   int FrameIndex, uint64_t StackSize, int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64RegisterInfo.td b/llvm/lib/Target/Sw64/Sw64RegisterInfo.td
new file mode 100644
index 000000000000..2b164147ebfa
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64RegisterInfo.td
@@ -0,0 +1,306 @@
+//===- Sw64RegisterInfo.td - The Sw64 Register File ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sw64 register set.
+//
+//===----------------------------------------------------------------------===//
+let Namespace = "Sw64" in {
+def sub_32: SubRegIndex<32>;
+} //Namespace Sw64
+
+// For register encoding
+class Sw64Reg<bits<16> Enc, string n, list<string> alt= []> : Register<n> {
+  let HWEncoding = Enc;
+  let Namespace = "Sw64";
+  let AltNames = alt;
+}
+
+class Sw64RegWithSubRegs<bits<16> Enc, string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  let HWEncoding = Enc;
+  let Namespace = "Sw64";
+}
+
+// GPR - One of the 32 32-bit general-purpose registers
+class Sw64GPR<bits<16> Enc, string n, list<string> alt= []> : Sw64Reg<Enc, n, alt>;
+// FPR - One of the 32 64-bit floating-point registers
+class Sw64FPR<bits<16> Enc, string n, list<Register> subregs = []>
+ : Sw64RegWithSubRegs<Enc, n, subregs>;
+
+// VEC - One of the 32 256-bit vector registers
+class Sw64VEC<bits<16> Enc, string n, list<Register> subregs>
+  : Sw64RegWithSubRegs<Enc, n, subregs> {
+  let SubRegIndices = [sub_32];
+}
+
+class Unallocatable {
+  bit isAllocatable = 0;
+}
+
+let Namespace = "Sw64" in {
+
+// General-purpose registers
+def R0 : Sw64GPR< 0, "$0">, DwarfRegNum<[0]>;
+def R1 : Sw64GPR< 1, "$1">, DwarfRegNum<[1]>;
+def R2 : Sw64GPR< 2, "$2">, DwarfRegNum<[2]>;
+def R3 : Sw64GPR< 3, "$3">, DwarfRegNum<[3]>;
+def R4 : Sw64GPR< 4, "$4">, DwarfRegNum<[4]>;
+def R5 : Sw64GPR< 5, "$5">, DwarfRegNum<[5]>;
+def R6 : Sw64GPR< 6, "$6">, DwarfRegNum<[6]>;
+def R7 : Sw64GPR< 7, "$7">, DwarfRegNum<[7]>;
+def R8 : Sw64GPR< 8, "$8">, DwarfRegNum<[8]>;
+def R9 : Sw64GPR< 9, "$9">, DwarfRegNum<[9]>;
+def R10 : Sw64GPR< 10, "$10">, DwarfRegNum<[10]>;
+def R11 : Sw64GPR< 11, "$11">, DwarfRegNum<[11]>;
+def R12 : Sw64GPR< 12, "$12">, DwarfRegNum<[12]>;
+def R13 : Sw64GPR< 13, "$13">, DwarfRegNum<[13]>;
+def R14 : Sw64GPR< 14, "$14">, DwarfRegNum<[14]>;
+def R15 : Sw64GPR< 15, "$15", ["$fp"]>, DwarfRegNum<[15]>;
+def R16 : Sw64GPR< 16, "$16">, DwarfRegNum<[16]>;
+def R17 : Sw64GPR< 17, "$17">, DwarfRegNum<[17]>;
+def R18 : Sw64GPR< 18, "$18">, DwarfRegNum<[18]>;
+def R19 : Sw64GPR< 19, "$19">, DwarfRegNum<[19]>;
+def R20 : Sw64GPR< 20, "$20">, DwarfRegNum<[20]>;
+def R21 : Sw64GPR< 21, "$21">, DwarfRegNum<[21]>;
+def R22 : Sw64GPR< 22, "$22">, DwarfRegNum<[22]>;
+def R23 : Sw64GPR< 23, "$23">, DwarfRegNum<[23]>;
+def R24 : Sw64GPR< 24, "$24">, DwarfRegNum<[24]>;
+def R25 : Sw64GPR< 25, "$25">, DwarfRegNum<[25]>;
+def R26 : Sw64GPR< 26, "$26", ["$ra"]>, DwarfRegNum<[26]>;
+def R27 : Sw64GPR< 27, "$27", ["$pv"]>, DwarfRegNum<[27]>;
+def R28 : Sw64GPR< 28, "$28", ["$at"]>, DwarfRegNum<[28]>;
+def R29 : Sw64GPR< 29, "$29", ["$gp"]>, DwarfRegNum<[29]>;
+def R30 : Sw64GPR< 30, "$30", ["$sp"]>, DwarfRegNum<[30]>;
+def R31 : Sw64GPR< 31, "$31", ["$zero"]>, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0 : Sw64FPR< 0, "$f0">, DwarfRegNum<[32]>;
+def F1 : Sw64FPR< 1, "$f1">, DwarfRegNum<[33]>;
+def F2 : Sw64FPR< 2, "$f2">, DwarfRegNum<[34]>;
+def F3 : Sw64FPR< 3, "$f3">, DwarfRegNum<[35]>;
+def F4 : Sw64FPR< 4, "$f4">, DwarfRegNum<[36]>;
+def F5 : Sw64FPR< 5, "$f5">, DwarfRegNum<[37]>;
+def F6 : Sw64FPR< 6, "$f6">, DwarfRegNum<[38]>;
+def F7 : Sw64FPR< 7, "$f7">, DwarfRegNum<[39]>;
+def F8 : Sw64FPR< 8, "$f8">, DwarfRegNum<[40]>;
+def F9 : Sw64FPR< 9, "$f9">, DwarfRegNum<[41]>;
+def F10 : Sw64FPR< 10, "$f10">, DwarfRegNum<[42]>;
+def F11 : Sw64FPR< 11, "$f11">, DwarfRegNum<[43]>;
+def F12 : Sw64FPR< 12, "$f12">, DwarfRegNum<[44]>;
+def F13 : Sw64FPR< 13, "$f13">, DwarfRegNum<[45]>;
+def F14 : Sw64FPR< 14, "$f14">, DwarfRegNum<[46]>;
+def F15 : Sw64FPR< 15, "$f15">, DwarfRegNum<[47]>;
+def F16 : Sw64FPR< 16, "$f16">, DwarfRegNum<[48]>;
+def F17 : Sw64FPR< 17, "$f17">, DwarfRegNum<[49]>;
+def F18 : Sw64FPR< 18, "$f18">, DwarfRegNum<[50]>;
+def F19 : Sw64FPR< 19, "$f19">, DwarfRegNum<[51]>;
+def F20 : Sw64FPR< 20, "$f20">, DwarfRegNum<[52]>;
+def F21 : Sw64FPR< 21, "$f21">, DwarfRegNum<[53]>;
+def F22 : Sw64FPR< 22, "$f22">, DwarfRegNum<[54]>;
+def F23 : Sw64FPR< 23, "$f23">, DwarfRegNum<[55]>;
+def F24 : Sw64FPR< 24, "$f24">, DwarfRegNum<[56]>;
+def F25 : Sw64FPR< 25, "$f25">, DwarfRegNum<[57]>;
+def F26 : Sw64FPR< 26, "$f26">, DwarfRegNum<[58]>;
+def F27 : Sw64FPR< 27, "$f27">, DwarfRegNum<[59]>;
+def F28 : Sw64FPR< 28, "$f28">, DwarfRegNum<[60]>;
+def F29 : Sw64FPR< 29, "$f29">, DwarfRegNum<[61]>;
+def F30 : Sw64FPR< 30, "$f30">, DwarfRegNum<[62]>;
+def F31 : Sw64FPR< 31, "$f31">, DwarfRegNum<[63]>;
+
+// Floating-point registers
+let SubRegIndices = [sub_32] in {
+def Q0 : Sw64FPR< 0, "$f0", [F0]>, DwarfRegNum<[32]>;
+def Q1 : Sw64FPR< 1, "$f1", [F1]>, DwarfRegNum<[33]>;
+def Q2 : Sw64FPR< 2, "$f2", [F2]>, DwarfRegNum<[34]>;
+def Q3 : Sw64FPR< 3, "$f3", [F3]>, DwarfRegNum<[35]>;
+def Q4 : Sw64FPR< 4, "$f4", [F4]>, DwarfRegNum<[36]>;
+def Q5 : Sw64FPR< 5, "$f5", [F5]>, DwarfRegNum<[37]>;
+def Q6 : Sw64FPR< 6, "$f6", [F6]>, DwarfRegNum<[38]>;
+def Q7 : Sw64FPR< 7, "$f7", [F7]>, DwarfRegNum<[39]>;
+def Q8 : Sw64FPR< 8, "$f8", [F8]>, DwarfRegNum<[40]>;
+def Q9 : Sw64FPR< 9, "$f9", [F9]>, DwarfRegNum<[41]>;
+def Q10 : Sw64FPR< 10, "$f10", [F10]>, DwarfRegNum<[42]>;
+def Q11 : Sw64FPR< 11, "$f11", [F11]>, DwarfRegNum<[43]>;
+def Q12 : Sw64FPR< 12, "$f12", [F12]>, DwarfRegNum<[44]>;
+def Q13 : Sw64FPR< 13, "$f13", [F13]>, DwarfRegNum<[45]>;
+def Q14 : Sw64FPR< 14, "$f14", [F14]>, DwarfRegNum<[46]>;
+def Q15 : Sw64FPR< 15, "$f15", [F15]>, DwarfRegNum<[47]>;
+def Q16 : Sw64FPR< 16, "$f16", [F16]>, DwarfRegNum<[48]>;
+def Q17 : Sw64FPR< 17, "$f17", [F17]>, DwarfRegNum<[49]>;
+def Q18 : Sw64FPR< 18, "$f18", [F18]>, DwarfRegNum<[50]>;
+def Q19 : Sw64FPR< 19, "$f19", [F19]>, DwarfRegNum<[51]>;
+def Q20 : Sw64FPR< 20, "$f20", [F20]>, DwarfRegNum<[52]>;
+def Q21 : Sw64FPR< 21, "$f21", [F21]>, DwarfRegNum<[53]>;
+def Q22 : Sw64FPR< 22, "$f22", [F22]>, DwarfRegNum<[54]>;
+def Q23 : Sw64FPR< 23, "$f23", [F23]>, DwarfRegNum<[55]>;
+def Q24 : Sw64FPR< 24, "$f24", [F24]>, DwarfRegNum<[56]>;
+def Q25 : Sw64FPR< 25, "$f25", [F25]>, DwarfRegNum<[57]>;
+def Q26 : Sw64FPR< 26, "$f26", [F26]>, DwarfRegNum<[58]>;
+def Q27 : Sw64FPR< 27, "$f27", [F27]>, DwarfRegNum<[59]>;
+def Q28 : Sw64FPR< 28, "$f28", [F28]>, DwarfRegNum<[60]>;
+def Q29 : Sw64FPR< 29, "$f29", [F29]>, DwarfRegNum<[61]>;
+def Q30 : Sw64FPR< 30, "$f30", [F30]>, DwarfRegNum<[62]>;
+def Q31 : Sw64FPR< 31, "$f31", [F31]>, DwarfRegNum<[63]>;
+}
+
+// Vector registers
+def V0 : Sw64VEC< 0, "$f0", [Q0]>, DwarfRegNum<[32]>;
+def V1 : Sw64VEC< 1, "$f1", [Q1]>, DwarfRegNum<[33]>;
+def V2 : Sw64VEC< 2, "$f2", [Q2]>, DwarfRegNum<[34]>;
+def V3 : Sw64VEC< 3, "$f3", [Q3]>, DwarfRegNum<[35]>;
+def V4 : Sw64VEC< 4, "$f4", [Q4]>, DwarfRegNum<[36]>;
+def V5 : Sw64VEC< 5, "$f5", [Q5]>, DwarfRegNum<[37]>;
+def V6 : Sw64VEC< 6, "$f6", [Q6]>, DwarfRegNum<[38]>;
+def V7 : Sw64VEC< 7, "$f7", [Q7]>, DwarfRegNum<[39]>;
+def V8 : Sw64VEC< 8, "$f8", [Q8]>, DwarfRegNum<[40]>;
+def V9 : Sw64VEC< 9, "$f9", [Q9]>, DwarfRegNum<[41]>;
+def V10 : Sw64VEC< 10, "$f10", [Q10]>, DwarfRegNum<[42]>;
+def V11 : Sw64VEC< 11, "$f11", [Q11]>, DwarfRegNum<[43]>;
+def V12 : Sw64VEC< 12, "$f12", [Q12]>, DwarfRegNum<[44]>;
+def V13 : Sw64VEC< 13, "$f13", [Q13]>, DwarfRegNum<[45]>;
+def V14 : Sw64VEC< 14, "$f14", [Q14]>, DwarfRegNum<[46]>;
+def V15 : Sw64VEC< 15, "$f15", [Q15]>, DwarfRegNum<[47]>;
+def V16 : Sw64VEC< 16, "$f16", [Q16]>, DwarfRegNum<[48]>;
+def V17 : Sw64VEC< 17, "$f17", [Q17]>, DwarfRegNum<[49]>;
+def V18 : Sw64VEC< 18, "$f18", [Q18]>, DwarfRegNum<[50]>;
+def V19 : Sw64VEC< 19, "$f19", [Q19]>, DwarfRegNum<[51]>;
+def V20 : Sw64VEC< 20, "$f20", [Q20]>, DwarfRegNum<[52]>;
+def V21 : Sw64VEC< 21, "$f21", [Q21]>, DwarfRegNum<[53]>;
+def V22 : Sw64VEC< 22, "$f22", [Q22]>, DwarfRegNum<[54]>;
+def V23 : Sw64VEC< 23, "$f23", [Q23]>, DwarfRegNum<[55]>;
+def V24 : Sw64VEC< 24, "$f24", [Q24]>, DwarfRegNum<[56]>;
+def V25 : Sw64VEC< 25, "$f25", [Q25]>, DwarfRegNum<[57]>;
+def V26 : Sw64VEC< 26, "$f26", [Q26]>, DwarfRegNum<[58]>;
+def V27 : Sw64VEC< 27, "$f27", [Q27]>, DwarfRegNum<[59]>;
+def V28 : Sw64VEC< 28, "$f28", [Q28]>, DwarfRegNum<[60]>;
+def V29 : Sw64VEC< 29, "$f29", [Q29]>, DwarfRegNum<[61]>;
+def V30 : Sw64VEC< 30, "$f30", [Q30]>, DwarfRegNum<[62]>;
+def V31 : Sw64VEC< 31, "$f31", [Q31]>, DwarfRegNum<[63]>;
+
+} // Namespace Sw64
+
+/// Register classes
+def GPRC : RegisterClass<"Sw64", [i64], 64, (add
+     // Volatile
+     R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+     R23, R24, R25, R28,
+     //Special meaning, but volatile
+     R27, //procedure address
+     R26, //return address
+     R29, //global offset table address
+     // Non-volatile
+     R9, R10, R11, R12, R13, R14,
+     // Don't allocate 15, 30, 31
+     R15, R30, R31)>;
+
+def F4RC : RegisterClass<"Sw64", [f32], 64, (add F0, F1,
+        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+        // Saved:
+        F2, F3, F4, F5, F6, F7, F8, F9,
+        // zero:
+        F31)>;
+
+def F8RC : RegisterClass<"Sw64", [f64], 64, (add F4RC)>;
+
+// lowest 64bits part for simd vector
+def FPRC : RegisterClass<"Sw64", [i64, f64], 64, (sequence "Q%u", 0, 31)>;
+
+def FPRC_lo : RegisterClass<"Sw64", [i32, f32], 64, (trunc F4RC, 32)>;
+
+// Stack pointer and global pointer classes for instructions that are limited
+// to a single register.
+def SP64 : RegisterClass<"Sw64", [i64], 64, (add R30)>, Unallocatable;
+def GP64 : RegisterClass<"Sw64", [i64], 64, (add R29)>, Unallocatable;
+
+def FP30 : RegisterClass<"Sw64", [f32], 64, (add F30)>, Unallocatable;
+def FD30 : RegisterClass<"Sw64", [f64], 64, (add F30)>, Unallocatable;
+// Register Operands.
+
+class Sw64AsmRegOperand : AsmOperandClass {
+  let ParserMethod = "parseAnyRegister";
+}
+
+def GPRCAsmOperand : Sw64AsmRegOperand {
+  let Name = "Reg";
+}
+
+def F4RCAsmOperand : Sw64AsmRegOperand {
+  let Name = "F4RCAsmReg";
+  let PredicateMethod = "isFGRAsmReg";
+}
+
+def F8RCAsmOperand : Sw64AsmRegOperand {
+  let Name = "F8RCAsmReg";
+  let PredicateMethod = "isFGRAsmReg";
+}
+
+def V256AsmOperand : Sw64AsmRegOperand {
+  let Name = "V256AsmReg";
+}
+
+def V256B : RegisterClass<"Sw64", [v32i8], 256, (add (sequence "V%u", 0, 31))>;
+def V256H : RegisterClass<"Sw64", [v16i16], 256, (add (sequence "V%u", 0, 31))>;
+def V256W : RegisterClass<"Sw64", [v4f32], 256, (add (sequence "V%u", 0, 31))>;
+
+def V256L : RegisterClass<"Sw64", [v32i8, v16i16, v8i32, v4i64, v4f64, v4f32], 256,
+                                  (add (sequence "V%u", 0, 31))>;
+
+def V256all : RegisterClass<"Sw64", [v32i8, v16i16, v8i32, v4i64, v4f32 ,v4f64],
+                                  256, (add (sequence "V%u", 0, 31))>;
+
+// adding a special class for floating selection
+def V256Floating : RegisterClass<"Sw64", [v4f32, v4f64],
+                            256, (add (sequence "V%u", 0, 31))>;
+def V256E64 : RegisterClass<"Sw64", [v4i64, v4f32, v4f64],
+                            256, (add (sequence "V%u", 0, 31))>;
+
+def GPRCOpnd : RegisterOperand<GPRC> {
+  let ParserMatchClass = GPRCAsmOperand;
+}
+
+def F4RCOpnd : RegisterOperand<F4RC> {
+  let ParserMatchClass = F4RCAsmOperand;
+}
+
+def F8RCOpnd : RegisterOperand<F8RC> {
+  let ParserMatchClass = F8RCAsmOperand;
+}
+
+def FPRCOpnd : RegisterOperand<FPRC> {
+  let ParserMatchClass = F8RCAsmOperand;
+}
+
+def FPRCloOpnd : RegisterOperand<FPRC_lo> {
+  let ParserMatchClass = F8RCAsmOperand;
+}
+
+def V256BOpnd : RegisterOperand<V256B> {
+  let ParserMatchClass = V256AsmOperand;
+}
+
+def V256HOpnd : RegisterOperand<V256H> {
+  let ParserMatchClass = V256AsmOperand;
+}
+
+def V256WOpnd : RegisterOperand<V256W> {
+  let ParserMatchClass = V256AsmOperand;
+}
+
+def V256LOpnd : RegisterOperand<V256L> {
+  let ParserMatchClass = V256AsmOperand;
+}
+
+def V256ALOpnd : RegisterOperand<V256all> {
+  let ParserMatchClass = V256AsmOperand;
+}
+
+def V256FOpnd : RegisterOperand<V256Floating> {
+  let ParserMatchClass = V256AsmOperand;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64Relocations.h b/llvm/lib/Target/Sw64/Sw64Relocations.h
new file mode 100644
index 000000000000..b32f148d7482
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64Relocations.h
@@ -0,0 +1,30 @@
+//===- Sw64Relocations.h - Sw64 Code Relocations --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sw64 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Sw64RELOCATIONS_H
+#define Sw64RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+namespace Sw64 {
+enum RelocationType {
+  reloc_literal,
+  reloc_gprellow,
+  reloc_gprelhigh,
+  reloc_gpdist,
+  reloc_bsr
+};
+}
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore3.td b/llvm/lib/Target/Sw64/Sw64SchedCore3.td
new file mode 100644
index 000000000000..f8e424d1639d
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64SchedCore3.td
@@ -0,0 +1,213 @@
+//===- Sw64SchedCore3.td - Sw64 Scheduling Definitions -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Core3 processor architecture mannual
+def GenericSw64Model : SchedMachineModel {
+// Core 3 has 4-way decode and 7-way dispatch,
+// in a cycle, can maxinum dispatch 3-way to ALU, 2-way to AGU, 2-way to FPU,
+// so set the dispatch width to 4 is optitional.
+  let IssueWidth        =   4;
+  let LoadLatency       =   4; // Optimistic load latency
+  let LoopMicroOpBufferSize = 16; //InsnQueue has 16 entry
+  let MispredictPenalty =  13; // Fetch + Decode/Rename/Dispatch + Branch
+  let CompleteModel = false;
+  let MicroOpBufferSize =  72; // ROB size
+
+  let PostRAScheduler = 1;
+}
+
+let SchedModel = GenericSw64Model in {
+  // chapter 2.2.1
+  // 3 pipeline ALU
+  def C3PortALU0 : ProcResource<1>;
+  def C3PortALU1 : ProcResource<1>;
+  def C3PortALU2 : ProcResource<1>;
+
+  // 2.2.1
+  // alu0 has ADD MUL, alu1 has BR/CSR BOP/SHT
+  // alu2 has CNT BOP/SHT ADD/SEL
+  def C3PortALU : ProcResGroup<[C3PortALU0, C3PortALU1, C3PortALU2]> {
+		let BufferSize = 32;
+	}
+  def C3PortALU01 : ProcResGroup<[C3PortALU0, C3PortALU1]>;
+  def C3PortALU12 : ProcResGroup<[C3PortALU1, C3PortALU2]>;
+
+  // 2 pipeline Alu Mem
+  // 2.2.3
+  // Core3a interger has two AGU Unit
+  // 2 LSU Unit deel with all load/store
+  def C3LSU  : ProcResource<2>;
+
+  def C3PortAGU0 : ProcResource<1>;
+  def C3PortAGU1 : ProcResource<1>;
+
+  def C3PortAGU01 : ProcResGroup<[C3PortAGU0, C3PortAGU1]>;
+
+  let Super = C3LSU in
+  def C3Load :  ProcResource<2> {
+    let BufferSize = 32;
+  }
+
+  def C3LoadQueue : LoadQueue<C3Load>;
+
+  let Super = C3LSU in
+  def C3Store : ProcResource<1> {
+    let BufferSize = 16;
+  }
+
+  def C3StoreQueue : StoreQueue<C3Store>;
+
+  // 2 pipeline FPU-SIMD
+  def C3PortFPU0 : ProcResource<1>;
+  def C3PortFPU1 : ProcResource<1>;
+
+
+  def C3PortFPU : ProcResGroup<[C3PortFPU0, C3PortFPU1]>;
+
+  def C3GprRF: RegisterFile<105, [GPRC], [1]>;
+
+  def C3FpuRF: RegisterFile<95, [F4RC, F8RC], [1]>;
+
+  def C3RCU : RetireControlUnit<72, 4>;
+
+  class C3WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
+       int Lat, list<int> Res = [], int UOps = 1> :
+          WriteRes<SchedRW, ExePorts> {
+                  let Latency = Lat;
+                  let ResourceCycles = Res;
+                  let NumMicroOps = UOps;
+          }
+
+  class C3LSWriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
+       int Lat, list<int> Res = [], int UOps = 1> :
+          WriteRes<SchedRW, ExePorts> {
+                  let Latency = !add(Lat, 1);
+                  let ResourceCycles = !if(!empty(Res), [1, 1], !listconcat([1], Res));
+                  let NumMicroOps = UOps;
+          }
+
+  def :  C3WriteRes<WriteJmp, [C3PortALU1], 1> ;
+  def :  C3WriteRes<WriteBR, [C3PortALU1], 1> ;
+  def :  C3WriteRes<WriteFBR, [C3PortFPU0], 6> ;
+  def :  C3WriteRes<WriteImm, [C3PortALU], 1> ;
+  def :  C3WriteRes<WriteSHT, [C3PortALU12], 1> ;
+  def :  C3WriteRes<WriteIALU, [C3PortALU], 1> ;
+  def :  C3WriteRes<WriteNOP, [C3PortALU], 0> ; //nop do not execute in backend
+  def :  C3WriteRes<WriteIMul, [C3PortALU0], 4> ;
+  def :  C3WriteRes<WriteCSR, [C3PortALU1], 3> ;
+  def :  C3WriteRes<WriteCNT, [C3PortALU2], 1> ;
+  def :  C3WriteRes<WriteSEL, [C3PortALU2], 1> ;
+  def :  C3WriteRes<WriteBOP, [C3PortALU12], 1> ;
+
+  def : C3WriteRes<WriteFPU32, [C3PortFPU], 6> ;
+  def : C3WriteRes<WriteFPU64, [C3PortFPU], 6> ;
+
+  def : C3WriteRes<WriteFSEL, [C3PortFPU1], 2, []> ;
+
+  def : C3WriteRes<WriteFPS, [C3Load], 4> ;
+
+  def : C3LSWriteRes<WriteAtomicSTW, [C3PortAGU01, C3Store], 4> ;
+
+  def : C3WriteRes<WriteFDiv32, [C3PortFPU1], 19, [19]> ;
+
+  def : C3WriteRes<WriteFDiv64, [C3PortFPU1], 34, [34]> ;
+
+  def : C3WriteRes<WriteFSqrt32, [C3PortFPU1], 19, [19]> ;
+
+  def : C3WriteRes<WriteFSqrt64, [C3PortFPU1], 33, [33]> ;
+
+  def : C3LSWriteRes<WriteST, [C3PortAGU01, C3Store], 4>;
+  def : C3LSWriteRes<WriteLD, [C3PortAGU01, C3Load], 4>;
+
+  def : C3LSWriteRes<WriteFST, [C3PortAGU01, C3Store], 4>;
+  def : C3LSWriteRes<WriteFLD, [C3PortAGU01, C3Load], 4>;
+
+  def : C3WriteRes<WriteFCvtF32ToI64, [C3PortFPU0], 4>;
+  def : C3WriteRes<WriteFCvtF64ToI64, [C3PortFPU0], 4>;
+
+  def : C3WriteRes<WriteFMovF64ToI64, [C3PortFPU], 4>;
+  def : C3WriteRes<WriteFMovI64ToF64, [C3PortFPU], 4>;
+
+  def : C3WriteRes<WriteFCvtF64ToF32, [C3PortFPU0], 4>;
+  def : C3WriteRes<WriteFCvtF32ToF64, [C3PortFPU0], 4>;
+
+  def : InstRW<[WriteIALU], (instrs COPY)>;
+
+  def : InstRW<[WriteBR], (instrs BR, BEQ, BGE,
+                           BGT, BLBC, BLBS, BLE, BLT, BNE, BSR)>;
+
+  def : InstRW<[WriteBR], (instrs SYS_CALL)>;
+  def : InstRW<[WriteBR], (instrs JMP, JSR, RET)>;
+  def : InstRW<[WriteFBR], (instregex "^FB(EQ|GE|GT|LE|LT|NE)$")>;
+
+  def : InstRW<[WriteLD], (instregex "^(S_FILL|E_FILL)(CS|DE)$")>;
+  def : InstRW<[WriteLD], (instregex "^FILL(CS|DE|CS_E|DE_E)$")>;
+
+  def : InstRW<[WriteLD], (instregex "^LD(L|W|HU|BU)$")>;
+  def : InstRW<[WriteFLD], (instregex "^LD(S|D)$")>;
+
+  def : InstRW<[WriteST], (instregex "^ST(L|W|H|B)$")>;
+  def : InstRW<[WriteFST], (instregex "^ST(S|D)$")>;
+
+  def : InstRW<[WriteImm], (instregex "^LDAH*$")>;
+
+  def : InstRW<[WriteIALU], (instregex "^(ADD|SUB|S(4|8)(ADD|SUB))(L|Q)(r|i)$")>;
+  def : InstRW<[WriteIMul], (instregex "^(MUL)(L|Q)(r|i)$")>;
+
+  def : InstRW<[WriteCNT], (instrs CTLZ, CTPOP, CTTZ)>;
+
+  def : InstRW<[WriteBOP], (instrs ZAPr, ZAPi, ZAPNOTr, ZAPNOTi, SEXTB, SEXTH)>;
+
+  def : InstRW<[WriteIALU], (instregex "^CMP(EQ|LE|LT|ULE|ULT|BGE)(r|i)*$")>;
+  def : InstRW<[WriteFPU64], (instregex "^CMP(TEQ|TLE|TLT|TUN)$")>;
+
+  def : InstRW<[WriteIALU], (instregex "^(AND|BIC|BIS|ORNOT|XOR|EQV)(r|i)*$")>;
+
+  def : InstRW<[WriteSHT], (instregex "^(SL|SRA|SRL)(r|i)*$")>;
+  def : InstRW<[WriteIMul], (instrs UMULHi, UMULHr)>;
+
+  def : InstRW<[WriteSEL], (instregex "^SEL(EQ|NE|LE|LT|GT|GE|LBC|LBS)(r|i)*$")>;
+
+  def : InstRW<[WriteBOP], (instregex "^EXT(BL|WL|LL|LW|HB|HH|HW|HL)(r|i)*$")>;
+
+  def : InstRW<[WriteBOP], (instregex "^MASKL[BHLW](r|i)*$")>;
+  def : InstRW<[WriteBOP], (instregex "^MASKH[BHLW](r|i)*$")>;
+  def : InstRW<[WriteBOP], (instregex "^INSL[BHLW](r|i)*$")>;
+  def : InstRW<[WriteBOP], (instregex "^INSH[BHLW](r|i)*$")>;
+
+  def : InstRW<[WriteFPU32], (instregex "^(ADD|SUB|MUL)(S|D)*$")>;
+  def : InstRW<[WriteFPU32], (instregex "^CPY(S|SE|SN)(S|D)*$")>;
+  def : InstRW<[WriteFPU64], (instregex "^SETFPEC(0|1|2|3)*$")>;
+  def : InstRW<[WriteImm], (instrs NOP)>;
+
+  def : InstRW<[WriteFCvtF64ToF32], (instrs FCVTLW, FCVTWL)>;
+  def : InstRW<[WriteFCvtF64ToI64], (instrs CVTQS, CVTQT)>;
+  def : InstRW<[WriteFCvtF64ToI64], (instrs CVTTQ, FCTTDL, FCTTDL_G, FCTTDL_P, FCTTDL_N)>;
+  def : InstRW<[WriteFCvtF64ToF32], (instrs CVTST, CVTTS)>;
+
+  def : InstRW<[WriteFPU32], (instregex "^(F|FN)M(A|S)S$")>;
+  def : InstRW<[WriteFPU64], (instregex "^(F|FN)M(A|S)D$")>;
+
+  def : InstRW<[WriteFSEL], (instregex "^FSEL(EQ|GE|GT|LE|LT|NE)S$")>;
+  def : InstRW<[WriteFSEL], (instregex "^FSEL(EQ|GE|GT|LE|LT|NE)D$")>;
+
+  def : InstRW<[WriteFSqrt32], (instrs SQRTSS)>;
+  def : InstRW<[WriteFSqrt64], (instrs SQRTSD)>;
+
+  def : InstRW<[WriteFDiv32], (instrs DIVS)>;
+  def : InstRW<[WriteFDiv64], (instrs DIVD)>;
+
+  def : InstRW<[WriteFPS], (instrs FTOIS, FTOIT, ITOFS, ITOFT)>;
+
+  def : InstRW<[WriteLD], (instrs LDL_L, LDQ_L)>;
+  def : InstRW<[WriteST], (instrs STL_C, STQ_C)>;
+
+  def : InstRW<[WriteIALU], (instrs RCID, RPCC)>;
+  def : InstRW<[WriteFPS], (instrs WFPCR, RFPCR)>;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td b/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td
new file mode 100644
index 000000000000..bf34ba940ac8
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64SchedCore3SIMD.td
@@ -0,0 +1,57 @@
+//===- Sw64SchedCore3SIMD.td - Sw64 Scheduling Definitions -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+let SchedModel = GenericSw64Model in {
+
+def :  WriteRes<WriteFLDS, [C3PortFPU]> { let Latency = 4; }
+def :  WriteRes<WriteFSTDS, [C3PortFPU0]> { let Latency = 4; }
+def :  WriteRes<WriteVEADD, [C3PortFPU]> { let Latency = 2; }
+def :  WriteRes<WriteVESHT2, [C3PortFPU0]> { let Latency = 2; }
+def :  WriteRes<WriteVESHT3, [C3PortFPU0]> { let Latency = 3; }
+def :  WriteRes<WriteVECNT2, [C3PortFPU0]> { let Latency = 2; }
+def :  WriteRes<WriteVECNT3, [C3PortFPU0]> { let Latency = 3; }
+def :  WriteRes<WriteVESEL, [C3PortFPU0]> { let Latency = 2; }
+def :  WriteRes<WriteFMA2, [C3PortFPU]> { let Latency = 2; }
+def :  WriteRes<WriteFMA6, [C3PortFPU]> { let Latency = 6; }
+def :  WriteRes<WriteFMA17, [C3PortFPU]> { let Latency = 17; }
+def :  WriteRes<WriteVPM1, [C3PortFPU0]> { let Latency = 1; }
+def :  WriteRes<WriteVPM2, [C3PortFPU0]> { let Latency = 2; }
+def :  WriteRes<WriteVCON1, [C3PortFPU]> { let Latency = 1; }
+def :  WriteRes<WriteVCON2, [C3PortFPU]> { let Latency = 2; }
+def :  WriteRes<WriteVSUM, [C3PortFPU]> { let Latency = 2; }
+def :  WriteRes<WriteVFREC, [C3PortFPU]> { let Latency = 3; }
+def :  WriteRes<WriteVFCT, [C3PortFPU]> { let Latency = 4; }
+def :  WriteRes<WriteVFRIS, [C3PortFPU]> { let Latency = 3; }
+
+def : InstRW<[WriteFLDS], (instregex "^(VLD)(W|S|D)(E)$")>;
+def : InstRW<[WriteFLDS], (instregex "^(VLD)(S|D)$")>;
+
+def : InstRW<[WriteFLDS], (instregex "^(VLD)(W|S|D)(U)$")>;
+def : InstRW<[WriteFSTDS], (instregex "^(VST)(W|S|D)(U)$")>;
+def : InstRW<[WriteFSTDS], (instregex "^(VST)(WU|SU|DU)(L|H)$")>;
+
+def : InstRW<[WriteFLDS], (instrs VLDDNC)>;
+def : InstRW<[WriteFSTDS], (instrs VSTDNC)>;
+
+def : InstRW<[WriteFMA6], (instregex "^(V)(ADD|SUB|MUL)(S|D)$")>;
+
+def : InstRW<[WriteFMA6], (instregex "^(VFCMP)(EQ|LE|LT|UN)$")>;
+def : InstRW<[WriteFMA2], (instregex "^(VCPY)(S|SE|SN)$")>;
+def : InstRW<[WriteFMA2], (instregex "^(V)(M|NM)(A|S)(S|D)$")>;
+
+def : InstRW<[WriteFMA2], (instregex "^(VFSEL)(EQ|LT|LE)$")>;
+def : InstRW<[WriteVPM1], (instregex "^(V)(INS|EXT|CPY)(W|FS|FD)$")>;
+
+def : InstRW<[WriteVPM1], (instregex "^(VINSECTL)(H|W|L|B)$")>;
+
+def : InstRW<[WriteVCON1], (instregex "^(VCON)(W|S|D)$")>;
+def : InstRW<[WriteVCON2], (instrs VSHFW)>;
+
+def : InstRW<[WriteVPM2], (instrs VLOGZZ)>;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64SchedCore4.td b/llvm/lib/Target/Sw64/Sw64SchedCore4.td
new file mode 100644
index 000000000000..9972c95699a3
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64SchedCore4.td
@@ -0,0 +1,75 @@
+//===- Sw64SchedCore4.td - Sw64 Scheduling Definitions -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+let SchedModel = GenericSw64Model in {
+  // 3 pipeline ALU
+
+def :  WriteRes<WriteCrc, [C3PortALU]> { let Latency = 1; }
+
+// FIXME: the latency of div and rem
+def :  WriteRes<WriteIDiv, [C3PortALU0]> { let Latency = 4; }
+def :  WriteRes<WriteIRem, [C3PortALU0]> { let Latency = 4; }
+
+def : WriteRes<WriteFCvtF64ToF64, [C3PortFPU]>{  let Latency = 2; }
+def : WriteRes<WriteFCvtF32ToF32, [C3PortFPU]>{  let Latency = 2; }
+
+def : WriteRes<WriteFREC, [C3PortFPU]>{  let Latency = 3; }
+
+def : WriteRes<WriteFCvtF64ToI32, [C3PortFPU]>{  let Latency = 2; }
+def : WriteRes<WriteFCvtI64ToF32, [C3PortFPU]>{  let Latency = 2; }
+def : WriteRes<WriteFCvtI32ToF32, [C3PortFPU]>{  let Latency = 2; }
+def : WriteRes<WriteFCvtI64ToF64, [C3PortFPU]>{  let Latency = 2; }
+def : WriteRes<WriteFCvtI32ToF64, [C3PortFPU]>{  let Latency = 2; }
+
+def : InstRW<[WriteLD], (instrs LDL_A, LDW_A, LDHU_A, LDBU_A)>;
+def : InstRW<[WriteFLD], (instrs LDS_A, LDD_A)>;
+
+def : InstRW<[WriteST], (instrs STL_A, STW_A, STH_A, STB_A)>;
+def : InstRW<[WriteFST], (instrs STS_A, STD_A)>;
+
+def : InstRW<[WriteIDiv], (instregex "^(DIV)(L|Q)$")>;
+def : InstRW<[WriteIDiv], (instregex "^(UDIV)(L|Q)$")>;
+def : InstRW<[WriteIRem], (instregex "^(REM)(L|Q)$")>;
+def : InstRW<[WriteIRem], (instregex "^(UREM)(L|Q)$")>;
+def : InstRW<[WriteJmp], (instrs ADDPI, ADDPIS)>;
+def : InstRW<[WriteImm], (instregex "^(C|S)(BT)(r|i)$")>;
+
+def : InstRW<[WriteIALU], (instrs REVBH, REVBW, REVBL)>;
+
+def : InstRW<[WriteIALU], (instregex "^(SLLW|SRAW|SRLW|ROLW|ROLL)(r|i)*$")>;
+
+def : InstRW<[WriteCrc], (instregex "^(CRC32C)(B|H|W|L)*$")>;
+def : InstRW<[WriteCrc], (instregex "^(CRC32)(B|H|W|L)*$")>;
+
+def : InstRW<[WriteFCvtF64ToI64], (instrs CMOVDL, CMOVDL_G, CMOVDL_P, CMOVDL_Z, CMOVDL_N)>;
+def : InstRW<[WriteFCvtF64ToI64], (instrs CMOVDLU, CMOVDLU_G, CMOVDLU_P, CMOVDLU_Z, CMOVDLU_N)>;
+def : InstRW<[WriteFCvtF64ToI32], (instrs CMOVDWU, CMOVDWU_G, CMOVDWU_P, CMOVDWU_Z, CMOVDWU_N)>;
+def : InstRW<[WriteFCvtF64ToI32], (instrs CMOVDW, CMOVDW_G, CMOVDW_P, CMOVDW_Z, CMOVDW_N)>;
+def : InstRW<[WriteFCvtF64ToI32], (instrs FCVTHS, FCVTSH)>;
+
+def : InstRW<[WriteFCvtI64ToF32], (instrs CMOVLS, CMOVULS)>;
+def : InstRW<[WriteFCvtI32ToF32], (instrs CMOVWS, CMOVUWS)>;
+def : InstRW<[WriteFCvtI64ToF64], (instrs CMOVLD, CMOVULD)>;
+def : InstRW<[WriteFCvtI32ToF64], (instrs CMOVWD, CMOVUWD)>;
+
+def : InstRW<[WriteFCvtF64ToF64], (instrs FRID, FRID_G, FRID_P, FRID_Z, FRID_N)>;
+def : InstRW<[WriteFCvtF32ToF32], (instrs FRIS, FRIS_G, FRIS_P, FRIS_Z, FRIS_N)>;
+
+def : InstRW<[WriteFREC], (instrs FRECS)>;
+def : InstRW<[WriteFREC], (instrs FRECD)>;
+
+def : InstRW<[WriteST], (instrs CASW, CASL)>;
+
+def : InstRW<[WriteLD], (instrs DPFHR, DPFHW)>;
+
+def : InstRW<[WriteCSR], (instrs CSRR, CSRW)>;
+def : InstRW<[WriteCSR], (instrs CSRWS, CSRWC)>;
+
+def : InstRW<[WriteJmp], (instrs LBR)>;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64Schedule.td b/llvm/lib/Target/Sw64/Sw64Schedule.td
new file mode 100644
index 000000000000..c4331abd2058
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64Schedule.td
@@ -0,0 +1,86 @@
+//===- Sw64Schedule.td - Sw64 Scheduling Definitions -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Define scheduler resources associated with def operands.
+def WriteIALU       : SchedWrite;    // 32 or 64-bit integer ALU operations
+def WriteSHT        : SchedWrite;    // 32 or 64-bit integer ALU operations
+def WriteIMul       : SchedWrite;    // 32-bit or 64-bit multiply
+def WriteIDiv       : SchedWrite;    // 32-bit or 64-bit divided
+def WriteIRem       : SchedWrite;    // 32-bit or 64-bit remainder
+def WriteImm        : SchedWrite;    // 32-bit multiply on RV64I
+def WriteCrc        : SchedWrite;
+def WriteJmp        : SchedWrite;    // Jump
+def WriteBR         : SchedWrite;    // Jump
+def WriteFBR        : SchedWrite;    // float Jump
+def WriteNop        : SchedWrite;
+def WriteLD         : SchedWrite;    // Load double-word
+def WriteFLD        : SchedWrite;    // Load double-word
+def WriteFREC       : SchedWrite;
+def WriteCSR        : SchedWrite;    // CSR instructions
+def WriteST         : SchedWrite;    // Store byte
+def WriteFST        : SchedWrite;    // Store byte
+def WriteCNT        : SchedWrite;    //Atomic memory operation word size
+def WriteSEL        : SchedWrite;    // bytes operate and selection operate
+def WriteFPS        : SchedWrite;    //Atomic memory operation double word size
+def WriteBOP        : SchedWrite;    //Atomic memory operation double word size
+def WriteAtomicSTW  : SchedWrite;    // Atomic store word
+def WriteAtomicSTD  : SchedWrite;    // Atomic store double word
+def WriteFPU32      : SchedWrite;    // FP 32-bit computation
+def WriteFPU64      : SchedWrite;    // FP 64-bit computation
+def WriteFMul32     : SchedWrite;    // 32-bit floating point multiply
+def WriteFMulAdd32  : SchedWrite;    // 32-bit floating point multiply add
+def WriteFMulSub32  : SchedWrite;    // 32-bit floating point multiply sub
+def WriteFMul64     : SchedWrite;    // 64-bit floating point multiply
+def WriteFMulAdd64  : SchedWrite;      // 64-bit floating point multiply add
+def WriteFMulSub64  : SchedWrite;    // 64-bit floating point multiply sub
+def WriteFDiv32     : SchedWrite;    // 32-bit floating point divide
+def WriteFDiv64     : SchedWrite;    // 64-bit floating point divide
+def WriteFSqrt32    : SchedWrite;    // 32-bit floating point sqrt
+def WriteFSqrt64    : SchedWrite;    // 64-bit floating point sqrt
+def WriteFSEL       : SchedWrite;    // float selection operate
+def WriteNOP       : SchedWrite;    // float selection operate
+def WriteFCvtF32ToI32     : SchedWrite;
+def WriteFCvtF32ToI64     : SchedWrite;
+def WriteFCvtF64ToI32     : SchedWrite;
+def WriteFCvtF64ToI64     : SchedWrite;
+def WriteFCvtI32ToF32     : SchedWrite;
+def WriteFCvtI32ToF64     : SchedWrite;
+def WriteFCvtI64ToF32     : SchedWrite;
+def WriteFCvtI64ToF64     : SchedWrite;
+def WriteFMovF32ToI32     : SchedWrite;
+def WriteFMovI32ToF32     : SchedWrite;
+def WriteFMovF64ToI64     : SchedWrite;
+def WriteFMovI64ToF64     : SchedWrite;
+def WriteFCvtF32ToF64     : SchedWrite;
+def WriteFCvtF64ToF32     : SchedWrite;
+def WriteFCvtF64ToF64     : SchedWrite;
+def WriteFCvtF32ToF32     : SchedWrite;
+
+def WriteAdrLD : WriteSequence<[WriteImm, WriteLD]>;
+def WriteAdrAdr : WriteSequence<[WriteImm, WriteImm]>;
+
+def WriteFLDS : SchedWrite;
+def WriteFSTDS : SchedWrite;
+def WriteVEADD : SchedWrite;
+def WriteVESHT2 : SchedWrite;
+def WriteVESHT3 : SchedWrite;
+def WriteVECNT2 : SchedWrite;
+def WriteVECNT3 : SchedWrite;
+def WriteVESEL : SchedWrite;
+def WriteFMA2 : SchedWrite;
+def WriteFMA6 : SchedWrite;
+def WriteFMA17 : SchedWrite;
+def WriteVPM1 : SchedWrite;
+def WriteVPM2 : SchedWrite;
+def WriteVCON1 : SchedWrite;
+def WriteVCON2 : SchedWrite;
+def WriteVSUM : SchedWrite;
+def WriteVFREC : SchedWrite;
+def WriteVFCT : SchedWrite;
+def WriteVFRIS : SchedWrite;
diff --git a/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp
new file mode 100644
index 000000000000..aefaadeb9777
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.cpp
@@ -0,0 +1,54 @@
+//===-- Sw64SelectionDAGInfo.cpp - Sw64 SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Sw64SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "sw_64-selectiondag-info"
+
+SDValue Sw64SelectionDAGInfo::EmitTargetCodeForMemcpy(
+    SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  unsigned SizeBitWidth = Size.getValueSizeInBits();
+  // Call __memcpy_4 if the src, dst and size are all 4 byte aligned.
+  if (!AlwaysInline && Alignment >= Align(4) &&
+      DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) {
+    const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
+    TargetLowering::ArgListTy Args;
+    TargetLowering::ArgListEntry Entry;
+    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+    Entry.Node = Dst;
+    Args.push_back(Entry);
+    Entry.Node = Src;
+    Args.push_back(Entry);
+    Entry.Node = Size;
+    Args.push_back(Entry);
+
+    TargetLowering::CallLoweringInfo CLI(DAG);
+    CLI.setDebugLoc(dl)
+        .setChain(Chain)
+        .setLibCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+                      Type::getVoidTy(*DAG.getContext()),
+                      DAG.getExternalSymbol(
+                          "memcpy", TLI.getPointerTy(DAG.getDataLayout())),
+                      std::move(Args))
+        .setDiscardResult();
+
+    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+    return CallResult.second;
+  }
+
+  // Otherwise have the target-independent code call memcpy.
+  return SDValue();
+}
diff --git a/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h
new file mode 100644
index 000000000000..1d242766f5f8
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64SelectionDAGInfo.h
@@ -0,0 +1,34 @@
+//===-- Sw64SelectionDAGInfo.h - Sw64 SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sw64 subclass for SelectionDAGTargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64SELECTIONDAGINFO_H
+#define LLVM_LIB_TARGET_SW64_SW64SELECTIONDAGINFO_H
+
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
+namespace llvm {
+
+class Sw64TargetMachine;
+
+class Sw64SelectionDAGInfo : public SelectionDAGTargetInfo {
+public:
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
+                                  SDValue Chain, SDValue Op1, SDValue Op2,
+                                  SDValue Op3, Align Alignment, bool isVolatile,
+                                  bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const override;
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64Subtarget.cpp b/llvm/lib/Target/Sw64/Sw64Subtarget.cpp
new file mode 100644
index 000000000000..96f81e041f47
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64Subtarget.cpp
@@ -0,0 +1,117 @@
+//===-- Sw64Subtarget.cpp - Sw64 Subtarget Information ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Sw64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64Subtarget.h"
+#include "Sw64.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/MC/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "sw_64-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "Sw64GenSubtargetInfo.inc"
+
+static cl::opt<bool> Sw64IntArith("sw-int-divmod", cl::init(true),
+                                  cl::desc("Enable sw64 core4 integer"
+                                           "arithmetic instructions"));
+
+static cl::opt<bool> Sw64IntShift("sw-shift-word", cl::init(false),
+                                  cl::desc("Enable sw64 core4 integer"
+                                           "shift instructions"));
+
+static cl::opt<bool> Sw64ByteInst("sw-rev", cl::init(false),
+                                  cl::desc("Enable sw64 core4 byte"
+                                           "manipulation instructions"));
+
+static cl::opt<bool> Sw64FloatArith("sw-recip", cl::init(true),
+                                    cl::desc("Enable sw64 core4 float"
+                                             "arithmetic instructions"));
+
+static cl::opt<bool> Sw64FloatRound("sw-fprnd", cl::init(false),
+                                    cl::desc("Enable sw64 core4 float"
+                                             "round instructions"));
+
+static cl::opt<bool> Sw64FloatCmov("sw-cmov", cl::init(true),
+                                   cl::desc("Enable sw64 core4 float"
+                                            "cmov instructions"));
+
+static cl::opt<bool> Sw64PostInc("sw-auto-inc-dec", cl::init(false),
+                                 cl::desc("Enable sw64 core4 post-inc"
+                                          "load and store instructions"));
+
+static cl::opt<bool>
+    Sw64CasInst("sw-use-cas", cl::init(true),
+                cl::desc("Enable sw64 core4 cas instructions"));
+
+static cl::opt<bool>
+    Sw64CrcInst("sw-crc32", cl::init(false),
+                cl::desc("Enable sw64 core4 crc32 instructions"));
+
+static cl::opt<bool> Sw64SCbtInst("sw-sbt-cbt", cl::init(false),
+                                  cl::desc("Enable sw64 core4 integer"
+                                           "sbt and cbt instructions"));
+
+static cl::opt<bool>
+    Sw64WmembInst("sw-wmemb", cl::init(false),
+                  cl::desc("Enable sw64 core4 wmemb instructions"));
+
+static cl::opt<bool> Sw64InstMullShiftAddSub("sw64-inst-mull-shiftaddsub",
+                                             cl::init(true),
+                                             cl::desc("Inst mull optmize to"
+                                                      "shift with add or sub"));
+
+static cl::opt<bool> Sw64InstExt("sw64-ext-opt", cl::init(false),
+                                 cl::desc("Optimize zext and sext"));
+
+static cl::opt<bool> Sw64InstMemset("sw64-inst-memset", cl::init(true),
+                                    cl::desc("Delete part of call memset"));
+
+cl::opt<bool> HasSIMD("msimd", cl::desc("Support the SIMD"), cl::init(false));
+
+void Sw64Subtarget::anchor() {}
+
+Sw64Subtarget &Sw64Subtarget::initializeSubtargetDependencies(const Triple &TT,
+                                                              StringRef CPU,
+                                                              StringRef FS) {
+  std::string CPUName = std::string(CPU);
+  std::string TuneCPUName = std::string(CPU);
+  ParseSubtargetFeatures(CPUName, /*TuneCPU*/ TuneCPUName, FS);
+  return *this;
+}
+
+Sw64Subtarget::Sw64Subtarget(const Triple &TT, const std::string &CPU,
+                             const std::string &FS, const TargetMachine &TM)
+    : Sw64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(),
+      Sw64OptMul(Sw64InstMullShiftAddSub), Sw64OptMemset(Sw64InstMemset),
+      Sw64OptExt(Sw64InstExt),
+      ReserveRegister(Sw64::GPRCRegClass.getNumRegs() +
+                      Sw64::F4RCRegClass.getNumRegs() + 1),
+      Sw64EnableIntAri(Sw64IntArith), Sw64EnableIntShift(Sw64IntShift),
+      Sw64EnableByteInst(Sw64ByteInst), Sw64EnableFloatAri(Sw64FloatArith),
+      Sw64EnableFloatRound(Sw64FloatRound), Sw64EnableFloatCmov(Sw64FloatCmov),
+      Sw64EnablePostInc(Sw64PostInc), Sw64EnableCasInst(Sw64CasInst),
+      Sw64EnableCrcInst(Sw64CrcInst), Sw64EnableSCbtInst(Sw64SCbtInst),
+      Sw64EnableWmembInst(Sw64WmembInst),
+      FrameLowering(initializeSubtargetDependencies(TT, CPU, FS)),
+      TLInfo(TM, *this), TSInfo(), curgpdist(0) {}
+
+void Sw64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+                                        unsigned NumRegionInstrs) const {
+  Policy.OnlyBottomUp = false;
+  // Spilling is generally expensive on Sw64, so always enable
+  // register-pressure tracking.
+  Policy.ShouldTrackPressure = true;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64Subtarget.h b/llvm/lib/Target/Sw64/Sw64Subtarget.h
new file mode 100644
index 000000000000..fc181560f5ba
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64Subtarget.h
@@ -0,0 +1,163 @@
+//===-- Sw64Subtarget.h - Define Subtarget for the Sw64 -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sw64 specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_SW64_SW64SUBTARGET_H
+#define LLVM_LIB_TARGET_SW64_SW64SUBTARGET_H
+
+#include "Sw64FrameLowering.h"
+#include "Sw64ISelLowering.h"
+#include "Sw64InstrInfo.h"
+#include "Sw64SelectionDAGInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+using namespace llvm;
+extern cl::opt<bool> Sw64Mieee;
+extern cl::opt<bool> Sw64DeleteNop;
+
+extern cl::opt<bool> HasSIMD;
+
+#define GET_SUBTARGETINFO_HEADER
+#include "Sw64GenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class Sw64Subtarget : public Sw64GenSubtargetInfo {
+  virtual void anchor();
+
+  enum Sw64ArchEnum { sw64 = 0, swTarch, core3b, core4 };
+
+  bool isCore3b;
+  bool isCore4;
+  bool relax;
+  bool Ev;
+
+  bool Sw64OptMul;
+
+  bool Sw64OptMemset;
+
+  bool Sw64OptExt;
+
+  bool Sw64EnableIntAri;
+  bool Sw64EnableIntShift;
+  bool Sw64EnableByteInst;
+  bool Sw64EnableFloatAri;
+  bool Sw64EnableFloatRound;
+  bool Sw64EnableFloatCmov;
+  bool Sw64EnablePostInc;
+  bool Sw64EnableCasInst;
+  bool Sw64EnableCrcInst;
+  bool Sw64EnableSCbtInst;
+  bool Sw64EnableWmembInst;
+  bool Misaligned256StoreIsSlow = false;
+  uint8_t MaxInterleaveFactor = 2;
+  unsigned WideningBaseCost = 0;
+
+  Sw64InstrInfo InstrInfo;
+  // ReserveRegister[i] - #i is not available as a general purpose register.
+  BitVector ReserveRegister;
+  Sw64FrameLowering FrameLowering;
+  Sw64TargetLowering TLInfo;
+  Sw64SelectionDAGInfo TSInfo;
+
+  bool HasCT;
+  bool Is64Bit = true;
+
+  Sw64ArchEnum Sw64ArchVersion;
+
+public:
+  mutable int curgpdist;
+  // This constructor initializes the data members to match that
+  // of the specified triple.
+  Sw64Subtarget &initializeSubtargetDependencies(const Triple &TT,
+                                                 StringRef CPU, StringRef FS);
+
+  Sw64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
+                const TargetMachine &TM);
+
+  bool hasMieee() const { return Sw64Mieee; }
+  bool hasDeleteNop() const { return Sw64DeleteNop; }
+
+  int &getCurgpdist() const { return curgpdist; }
+  void setCurgpdist(int &count) { curgpdist = count; }
+  bool hasSIMD() const { return HasSIMD; }
+
+  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+  unsigned getWideningBaseCost() const { return WideningBaseCost; }
+  bool isMisaligned256StoreSlow() const { return Misaligned256StoreIsSlow; }
+
+  // ParseSubtargetFeatures - Parses features string setting specified
+  // subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+
+  const Sw64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+  const Sw64FrameLowering *getFrameLowering() const override {
+    return &FrameLowering;
+  }
+  const Sw64TargetLowering *getTargetLowering() const override {
+    return &TLInfo;
+  }
+  const Sw64SelectionDAGInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+
+  const Sw64RegisterInfo *getRegisterInfo() const override {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  AntiDepBreakMode getAntiDepBreakMode() const override {
+    return TargetSubtargetInfo::ANTIDEP_CRITICAL;
+  }
+
+  // TODO: enable PostRAscheduler for test
+  bool enablePostRAScheduler() const { return true; }
+
+  bool enableMachineScheduler() const { return true; }
+
+  bool is64Bit() const { return true; }
+  bool hasCore3b() const { return Sw64ArchVersion == core3b; }
+#ifdef SW64_DEFAULT_ARCH_CORE3
+  bool hasCore4() const { return Sw64ArchVersion == core4; }
+#else
+  bool hasCore4() const { return true; }
+#endif
+  bool enRelax() const { return relax; }
+  bool hasEv() const { return Ev; }
+  bool hasCT() const { return HasCT; }
+  bool isRegisterReserved(size_t i) const { return ReserveRegister[i]; }
+
+  bool enOptMul() const { return Sw64OptMul; }
+
+  bool enOptMemset() const { return Sw64OptMemset; }
+
+  bool enOptExt() const { return Sw64OptExt; }
+
+  bool enableIntAri() const { return Sw64EnableIntAri; }
+  bool enableIntShift() const { return Sw64EnableIntShift; }
+  bool enableByteInst() const { return Sw64EnableByteInst; }
+  bool enableFloatAri() const { return Sw64EnableFloatAri; }
+  bool enableFloatRound() const { return Sw64EnableFloatRound; }
+  bool enableFloatCmov() const { return Sw64EnableFloatCmov; }
+  bool enablePostInc() const { return Sw64EnablePostInc; }
+  bool enableCasInst() const { return Sw64EnableCasInst; }
+  bool enableCrcInst() const { return Sw64EnableCrcInst; }
+  bool enableSCbtInst() const { return Sw64EnableSCbtInst; }
+  bool enableWmembInst() const { return Sw64EnableWmembInst; }
+
+  void overrideSchedPolicy(MachineSchedPolicy &Policy,
+                           unsigned NumRegionInstrs) const;
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp b/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp
new file mode 100644
index 000000000000..d11b61e7dbc4
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64TargetMachine.cpp
@@ -0,0 +1,193 @@
+//===-- Sw64TargetMachine.cpp - Define TargetMachine for Sw64 -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64TargetMachine.h"
+#include "MCTargetDesc/Sw64MCTargetDesc.h"
+#include "Sw64.h"
+#include "Sw64MachineFunctionInfo.h"
+#include "Sw64MacroFusion.h"
+#include "Sw64TargetObjectFile.h"
+#include "Sw64TargetTransformInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Transforms/Scalar.h"
+#include <optional>
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMCR("sw_64-enable-mcr",
+                               cl::desc("Enable the machine combiner pass"),
+                               cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+    EnablePrefetch("enable-sw64-prefetching",
+                   cl::desc("Enable software prefetching on SW64"),
+                   cl::init(true), cl::Hidden);
+
+cl::opt<bool> FS_LOAD("fastload",
+                      cl::desc("Enable fast/load optimize(developing)"),
+                      cl::init(false), cl::Hidden);
+
+static Reloc::Model getEffectiveRelocModel(const Triple &TT,
+                                           std::optional<Reloc::Model> RM) {
+  if (!RM)
+    return Reloc::Static;
+  return *RM;
+}
+
+static CodeModel::Model
+getEffectiveSw64CodeModel(std::optional<CodeModel::Model> CM) {
+  if (CM) {
+    if (*CM != CodeModel::Small && *CM != CodeModel::Medium &&
+        *CM != CodeModel::Large)
+      report_fatal_error(
+          "Target only supports CodeModel Small, Medium or Large");
+    return *CM;
+  }
+  return CodeModel::Small;
+}
+
+// Create an ILP32 architecture model
+Sw64TargetMachine::Sw64TargetMachine(const Target &T, const Triple &TT,
+                                     StringRef CPU, StringRef FS,
+                                     const TargetOptions &Options,
+                                     std::optional<Reloc::Model> RM,
+                                     std::optional<CodeModel::Model> CM,
+                                     CodeGenOpt::Level OL, bool JIT)
+    : LLVMTargetMachine(
+          T,
+          "e-m:e-p:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n64-S128-v256:256",
+          TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
+          getEffectiveSw64CodeModel(CM), OL),
+      TLOF(std::make_unique<Sw64TargetObjectFile>()),
+      ABI(Sw64ABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
+      Subtarget(TT, std::string(CPU), std::string(FS), *this) {
+  initAsmInfo();
+}
+
+Sw64TargetMachine::~Sw64TargetMachine() = default;
+
+namespace {
+
+// Sw64 Code Generator Pass Configuration Options.
+class Sw64PassConfig : public TargetPassConfig {
+public:
+  Sw64PassConfig(Sw64TargetMachine &TM, PassManagerBase &PM)
+      : TargetPassConfig(TM, PM) {
+    if (TM.getOptLevel() != CodeGenOpt::None)
+      substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
+  }
+
+  Sw64TargetMachine &getSw64TargetMachine() const {
+    return getTM<Sw64TargetMachine>();
+  }
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+    DAG->addMutation(createSw64MacroFusionDAGMutation());
+    return DAG;
+  }
+
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+    DAG->addMutation(createSw64MacroFusionDAGMutation());
+    return DAG;
+  }
+
+  void addIRPasses() override;
+  bool addILPOpts() override;
+  bool addInstSelector() override;
+  void addPreSched2() override;
+  void addPreEmitPass() override;
+  void addPreRegAlloc() override;
+  void addPreLegalizeMachineIR() override;
+  // for Inst Selector.
+  bool addGlobalInstructionSelect() override;
+};
+
+} // end anonymous namespace
+
+TargetPassConfig *Sw64TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new Sw64PassConfig(*this, PM);
+}
+
+void Sw64PassConfig::addIRPasses() {
+  addPass(createAtomicExpandPass());
+
+  if (EnablePrefetch)
+    addPass(createLoopDataPrefetchPass());
+
+  TargetPassConfig::addIRPasses();
+}
+
+void Sw64PassConfig::addPreLegalizeMachineIR() {
+  addPass(createSw64PreLegalizeCombiner());
+}
+
+void Sw64PassConfig::addPreSched2() { addPass(createSw64ExpandPseudo2Pass()); }
+
+bool Sw64PassConfig::addInstSelector() {
+  addPass(createSw64ISelDag(getSw64TargetMachine(), getOptLevel()));
+  return false;
+}
+
+void Sw64PassConfig::addPreRegAlloc() {
+  addPass(createSw64IEEEConstraintPass());
+}
+
+void Sw64PassConfig::addPreEmitPass() {
+  addPass(createSw64BranchSelection());
+  addPass(createSw64LLRPPass(getSw64TargetMachine()));
+  addPass(createSw64ExpandPseudoPass());
+}
+
+bool Sw64PassConfig::addILPOpts() {
+
+  if (EnableMCR)
+    addPass(&MachineCombinerID);
+
+  return true;
+}
+
+bool Sw64PassConfig::addGlobalInstructionSelect() {
+  addPass(new InstructionSelect());
+  return false;
+}
+
+// Force static initialization.
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64Target() {
+  RegisterTargetMachine<Sw64TargetMachine> X(getTheSw64Target());
+
+  PassRegistry *PR = PassRegistry::getPassRegistry();
+  initializeSw64BranchSelectionPass(*PR);
+  initializeSw64PreLegalizerCombinerPass(*PR);
+  initializeSw64DAGToDAGISelPass(*PR);
+}
+
+TargetTransformInfo
+Sw64TargetMachine::getTargetTransformInfo(const Function &F) const {
+  return TargetTransformInfo(Sw64TTIImpl(this, F));
+}
+
+MachineFunctionInfo *Sw64TargetMachine::createMachineFunctionInfo(
+    BumpPtrAllocator &Allocator, const Function &F,
+    const TargetSubtargetInfo *STI) const {
+  return Sw64MachineFunctionInfo::create<Sw64MachineFunctionInfo>(Allocator, F,
+                                                                  STI);
+}
diff --git a/llvm/lib/Target/Sw64/Sw64TargetMachine.h b/llvm/lib/Target/Sw64/Sw64TargetMachine.h
new file mode 100644
index 000000000000..40e34b131a42
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64TargetMachine.h
@@ -0,0 +1,61 @@
+//===-- Sw64TargetMachine.h - Define TargetMachine for Sw64 ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sw64 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETMACHINE_H
+#define LLVM_LIB_TARGET_SW64_SW64TARGETMACHINE_H
+
+#include "MCTargetDesc/Sw64ABIInfo.h"
+#include "Sw64Subtarget.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+#include <memory>
+#include <optional>
+
+namespace llvm {
+
+class Sw64TargetMachine : public LLVMTargetMachine {
+  std::unique_ptr<TargetLoweringObjectFile> TLOF;
+  Sw64ABIInfo ABI;
+  Sw64Subtarget Subtarget;
+
+public:
+  Sw64TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                    StringRef FS, const TargetOptions &Options,
+                    std::optional<Reloc::Model> RM,
+                    std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL,
+                    bool JIT);
+  ~Sw64TargetMachine() override;
+
+  const Sw64ABIInfo &getABI() const { return ABI; }
+  const Sw64Subtarget *getSubtargetImpl() const { return &Subtarget; }
+  const Sw64Subtarget *getSubtargetImpl(const Function &) const override {
+    return &Subtarget;
+  }
+
+  MachineFunctionInfo *
+  createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
+                            const TargetSubtargetInfo *STI) const override;
+
+  // Pass Pipeline Configuration
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+  TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
+
+  TargetLoweringObjectFile *getObjFileLowering() const override {
+    return TLOF.get();
+  }
+};
+
+} // end namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp
new file mode 100644
index 000000000000..545eccc94202
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.cpp
@@ -0,0 +1,121 @@
+//===-- Sw64TargetObjectFile.cpp - Sw64 object files --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64TargetObjectFile.h"
+#include "Sw64Subtarget.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+void Sw64TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+
+  SmallDataSection = getContext().getELFSection(
+      ".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                                               ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  // TextSection       - see MObjectFileInfo.cpp
+  // StaticCtorSection - see MObjectFileInfo.cpp
+  // StaticDtorSection - see MObjectFileInfo.cpp
+}
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section could be addressed by
+// using gp_rel operator.
+bool Sw64TargetObjectFile::isInSmallSection(uint64_t Size) const {
+  // gcc has traditionally not treated zero-sized objects as small data, so this
+  // is effectively part of the ABI.
+  return Size > 0 && Size <= SSThreshold;
+}
+
+// Return true if this global address should be placed into small data/bss
+// section.
+bool Sw64TargetObjectFile::isGlobalInSmallSection(
+    const GlobalObject *GO, const TargetMachine &TM) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GO);
+  if (!GVA)
+    return false;
+
+  // If the variable has an explicit section, it is placed in that section.
+  if (GVA->hasSection()) {
+    StringRef Section = GVA->getSection();
+
+    // Explicitly placing any variable in the small data section overrides
+    // the global -G value.
+    if (Section == ".sdata" || Section == ".sbss")
+      return true;
+
+    // Otherwise reject putting the variable to small section if it has an
+    // explicit section name.
+    return false;
+  }
+
+  if (((GVA->hasExternalLinkage() && GVA->isDeclaration()) ||
+       GVA->hasCommonLinkage()))
+    return false;
+
+  Type *Ty = GVA->getValueType();
+  // It is possible that the type of the global is unsized, i.e. a declaration
+  // of a extern struct. In this case don't presume it is in the small data
+  // section. This happens e.g. when building the FreeBSD kernel.
+  if (!Ty->isSized())
+    return false;
+
+  return isInSmallSection(
+      GVA->getParent()->getDataLayout().getTypeAllocSize(Ty));
+}
+
+MCSection *Sw64TargetObjectFile::SelectSectionForGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && isGlobalInSmallSection(GO, TM))
+    return SmallBSSSection;
+  if (Kind.isData() && isGlobalInSmallSection(GO, TM))
+    return SmallDataSection;
+  if (Kind.isReadOnly())
+    return GO->hasLocalLinkage() ? ReadOnlySection : DataRelROSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
+}
+
+void Sw64TargetObjectFile::getModuleMetadata(Module &M) {
+  SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+  M.getModuleFlagsMetadata(ModuleFlags);
+
+  for (const auto &MFE : ModuleFlags) {
+    StringRef Key = MFE.Key->getString();
+    if (Key == "SmallDataLimit") {
+      SSThreshold = mdconst::extract<ConstantInt>(MFE.Val)->getZExtValue();
+      break;
+    }
+  }
+}
+
+// Return true if this constant should be placed into small data section.
+bool Sw64TargetObjectFile::isConstantInSmallSection(const DataLayout &DL,
+                                                    const Constant *CN) const {
+  return isInSmallSection(DL.getTypeAllocSize(CN->getType()));
+}
+
+MCSection *Sw64TargetObjectFile::getSectionForConstant(const DataLayout &DL,
+                                                       SectionKind Kind,
+                                                       const Constant *C,
+                                                       Align &Alignment) const {
+  if (isConstantInSmallSection(DL, C))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C,
+                                                            Alignment);
+}
diff --git a/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h
new file mode 100644
index 000000000000..0bae78a8106b
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64TargetObjectFile.h
@@ -0,0 +1,49 @@
+//===-- Sw64TargetObjectFile.h - Sw64 Object Info -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_SW64_SW64TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+static const unsigned CodeModelLargeSize = 256;
+
+class Sw64TargetObjectFile : public TargetLoweringObjectFileELF {
+  MCSection *BSSSectionLarge;
+  MCSection *DataSectionLarge;
+  MCSection *ReadOnlySectionLarge;
+  MCSection *DataRelROSectionLarge;
+  MCSection *SmallDataSection;
+  MCSection *SmallBSSSection;
+  unsigned SSThreshold = 8;
+
+public:
+  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+  /// Return true if this global address should be placed into small data/bss
+  /// section.
+  bool isGlobalInSmallSection(const GlobalObject *GO,
+                              const TargetMachine &TM) const;
+
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+                                    const TargetMachine &TM) const override;
+
+  /// Return true if this constant should be placed into small data section.
+  bool isConstantInSmallSection(const DataLayout &DL, const Constant *CN) const;
+
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+                                   const Constant *C,
+                                   Align &Alignment) const override;
+
+  void getModuleMetadata(Module &M) override;
+
+  bool isInSmallSection(uint64_t Size) const;
+};
+} // end namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64TargetStreamer.h b/llvm/lib/Target/Sw64/Sw64TargetStreamer.h
new file mode 100644
index 000000000000..884c03d97eb5
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64TargetStreamer.h
@@ -0,0 +1,150 @@
+//===-- Sw64TargetStreamer.h - Sw64 Target Streamer ------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETSTREAMER_H
+#define LLVM_LIB_TARGET_SW64_SW64TARGETSTREAMER_H
+
+#include "MCTargetDesc/Sw64ABIFlagsSection.h"
+#include "MCTargetDesc/Sw64ABIInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/FormattedStream.h"
+#include <optional>
+
+namespace llvm {
+
+struct Sw64ABIFlagsSection;
+
+class Sw64TargetStreamer : public MCTargetStreamer {
+public:
+  Sw64TargetStreamer(MCStreamer &S);
+
+  virtual void setPic(bool Value) {}
+
+  virtual void emitDirectiveSetReorder();
+  virtual void emitDirectiveSetNoReorder();
+  virtual void emitDirectiveSetMacro();
+  virtual void emitDirectiveSetNoMacro();
+  virtual void emitDirectiveSetAt();
+  virtual void emitDirectiveSetNoAt();
+  virtual void emitDirectiveEnd(StringRef Name);
+
+  virtual void emitDirectiveEnt(const MCSymbol &Symbol);
+  virtual void emitDirectiveNaN2008();
+  virtual void emitDirectiveNaNLegacy();
+  virtual void emitDirectiveInsn();
+  virtual void emitDirectiveSetCore3b();
+  virtual void emitDirectiveSetCore4();
+  virtual void emitFrame(unsigned StackReg, unsigned StackSize,
+                         unsigned ReturnReg);
+  virtual void emitDirectiveSetArch(StringRef Arch);
+
+  void prettyPrintAsm(MCInstPrinter &InstPrinter, uint64_t Address,
+                      const MCInst &Inst, const MCSubtargetInfo &STI,
+                      raw_ostream &OS) override;
+
+  void emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI);
+
+  void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
+  void reallowModuleDirective() { ModuleDirectiveAllowed = true; }
+  bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
+
+  // This method enables template classes to set internal abi flags
+  // structure values.
+  template <class PredicateLibrary>
+  void updateABIInfo(const PredicateLibrary &P) {
+    ABI = P.getABI();
+    ABIFlagsSection.setAllFromPredicates(P);
+  }
+
+  Sw64ABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
+  const Sw64ABIInfo &getABI() const {
+    assert(ABI && "ABI hasn't been set!");
+    return *ABI;
+  }
+
+protected:
+  std::optional<Sw64ABIInfo> ABI;
+  Sw64ABIFlagsSection ABIFlagsSection;
+
+  bool GPRInfoSet;
+  unsigned GPRBitMask;
+  int GPROffset;
+
+  bool FPRInfoSet;
+  unsigned FPRBitMask;
+  int FPROffset;
+
+  bool FrameInfoSet;
+  int FrameOffset;
+  unsigned FrameReg;
+  unsigned ReturnReg;
+
+private:
+  bool ModuleDirectiveAllowed;
+};
+
+// This part is for ascii assembly output
+class Sw64TargetAsmStreamer : public Sw64TargetStreamer {
+  formatted_raw_ostream &OS;
+
+public:
+  Sw64TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+
+  void emitDirectiveSetReorder() override;
+  void emitDirectiveSetNoReorder() override;
+  void emitDirectiveSetMacro() override;
+  void emitDirectiveSetNoMacro() override;
+  void emitDirectiveSetAt() override;
+  void emitDirectiveSetNoAt() override;
+  void emitDirectiveEnd(StringRef Name) override;
+
+  void emitDirectiveEnt(const MCSymbol &Symbol) override;
+  void emitDirectiveNaN2008() override;
+  void emitDirectiveNaNLegacy() override;
+  void emitDirectiveInsn() override;
+  void emitFrame(unsigned StackReg, unsigned StackSize,
+                 unsigned ReturnReg) override;
+  void emitDirectiveSetCore3b() override;
+  void emitDirectiveSetCore4() override;
+
+  void emitDirectiveSetArch(StringRef Arch) override;
+};
+
+// This part is for ELF object output
+class Sw64TargetELFStreamer : public Sw64TargetStreamer {
+  bool MicroSw64Enabled;
+  const MCSubtargetInfo &STI;
+  bool Pic;
+
+public:
+  MCELFStreamer &getStreamer();
+  Sw64TargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
+
+  void setPic(bool Value) override { Pic = Value; }
+
+  void emitLabel(MCSymbol *Symbol) override;
+  void finish() override;
+
+  void emitDirectiveSetNoReorder() override;
+
+  void emitDirectiveEnt(const MCSymbol &Symbol) override;
+  void emitDirectiveNaN2008() override;
+  void emitDirectiveNaNLegacy() override;
+  void emitDirectiveInsn() override;
+  void emitFrame(unsigned StackReg, unsigned StackSize,
+                 unsigned ReturnReg) override;
+
+  void emitSw64AbiFlags();
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp
new file mode 100644
index 000000000000..44c98b98fa01
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.cpp
@@ -0,0 +1,787 @@
+//===-- Sw64TargetTransformInfo.cpp - Sw64-specific TTI -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a TargetTransformInfo analysis pass specific to the
+// Sw64 target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/CostTable.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "sw64tti"
+
+//===----------------------------------------------------------------------===//
+//
+// Sw64 cost model.
+//
+//===----------------------------------------------------------------------===//
+
+InstructionCost Sw64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+                                           TTI::TargetCostKind CostKind) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented
+  // yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  if (Imm == 0)
+    return TTI::TCC_Free;
+
+  if (Imm.getBitWidth() <= 64) {
+    // Constants loaded via lgfi.
+    if (isInt<32>(Imm.getSExtValue()))
+      return TTI::TCC_Basic;
+    // Constants loaded via llilf.
+    if (isUInt<32>(Imm.getZExtValue()))
+      return TTI::TCC_Basic;
+    // Constants loaded via llihf:
+    if ((Imm.getZExtValue() & 0xffffffff) == 0)
+      return TTI::TCC_Basic;
+
+    return 2 * TTI::TCC_Basic;
+  }
+
+  return 4 * TTI::TCC_Basic;
+}
+
+InstructionCost Sw64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+                                               const APInt &Imm, Type *Ty,
+                                               TTI::TargetCostKind CostKind,
+                                               Instruction *Inst) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented
+  // yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  switch (Opcode) {
+  default:
+    return TTI::TCC_Free;
+  case Instruction::GetElementPtr:
+    // Always hoist the base address of a GetElementPtr. This prevents the
+    // creation of new constants for every base constant that gets constant
+    // folded with the offset.
+    if (Idx == 0)
+      return 2 * TTI::TCC_Basic;
+    return TTI::TCC_Free;
+  case Instruction::Store:
+    return TTI::TCC_Basic;
+  case Instruction::ICmp:
+  case Instruction::Add:
+  case Instruction::Sub:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // Or their negation, by swapping addition vs. subtraction.
+      if (isUInt<32>(-Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Mul:
+  case Instruction::Or:
+  case Instruction::Xor:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Masks supported by oilf/xilf.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // Masks supported by oihf/xihf.
+      if ((Imm.getZExtValue() & 0xffffffff) == 0)
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::And:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Any 32-bit AND operation can by implemented via nilf.
+      if (BitSize <= 32)
+        return TTI::TCC_Free;
+      // 64-bit masks supported by nilf.
+      if (isUInt<32>(~Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // 64-bit masks supported by nilh.
+      if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+  case Instruction::BitCast:
+  case Instruction::PHI:
+  case Instruction::Call:
+  case Instruction::Select:
+  case Instruction::Ret:
+  case Instruction::Load:
+    break;
+  }
+
+  return Sw64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
+}
+
+InstructionCost Sw64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID,
+                                                 unsigned Idx, const APInt &Imm,
+                                                 Type *Ty,
+                                                 TTI::TargetCostKind CostKind) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented
+  // yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  switch (IID) {
+  default:
+    return TTI::TCC_Free;
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
+    // These get expanded to include a normal addition/subtraction.
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      if (isUInt<32>(-Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Intrinsic::smul_with_overflow:
+  case Intrinsic::umul_with_overflow:
+    // These get expanded to include a normal multiplication.
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Intrinsic::experimental_stackmap:
+    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TTI::TCC_Free;
+    break;
+  case Intrinsic::experimental_patchpoint_void:
+  case Intrinsic::experimental_patchpoint_i64:
+    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TTI::TCC_Free;
+    break;
+  }
+  return Sw64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
+}
+
+bool Sw64TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+                                const TargetTransformInfo::LSRCost &C2) {
+  // check instruction count (first), and don't care about
+  // ImmCost, since offsets are checked explicitly.
+  return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
+                  C1.NumBaseAdds, C1.ScaleCost, C1.SetupCost) <
+         std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
+                  C2.NumBaseAdds, C2.ScaleCost, C2.SetupCost);
+}
+
+unsigned Sw64TTIImpl::getNumberOfRegisters(bool Vector) {
+  if (Vector) {
+    return 0;
+  }
+  return 12;
+}
+
+bool Sw64TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) {
+  EVT VT = TLI->getValueType(DL, DataType);
+  return (VT.isScalarInteger() && TLI->isTypeLegal(VT));
+}
+
+void Sw64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+                                          TTI::UnrollingPreferences &UP,
+                                          OptimizationRemarkEmitter *ORE) {
+  // Find out if L contains a call, what the machine instruction count
+  // estimate is, and how many stores there are.
+  bool HasCall = false;
+  InstructionCost NumStores = 0;
+  for (auto &BB : L->blocks())
+    for (auto &I : *BB) {
+      if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
+        if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
+          if (isLoweredToCall(F))
+            HasCall = true;
+          if (F->getIntrinsicID() == Intrinsic::memcpy ||
+              F->getIntrinsicID() == Intrinsic::memset)
+            NumStores++;
+        } else { // indirect call.
+          HasCall = true;
+        }
+      }
+      if (isa<StoreInst>(&I)) {
+        Type *MemAccessTy = I.getOperand(0)->getType();
+        NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy,
+                                     std::nullopt, 0, TTI::TCK_RecipThroughput);
+      }
+    }
+
+  // The processor will run out of store tags if too many stores
+  // are fed into it too quickly. Therefore make sure there are not
+  // too many stores in the resulting unrolled loop.
+  unsigned const NumStoresVal = *NumStores.getValue();
+  unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
+
+  if (HasCall) {
+    // Only allow full unrolling if loop has any calls.
+    UP.FullUnrollMaxCount = Max;
+    UP.MaxCount = 1;
+    return;
+  }
+
+  UP.MaxCount = Max;
+  if (UP.MaxCount <= 1)
+    return;
+
+  // Allow partial and runtime trip count unrolling.
+  UP.Partial = UP.Runtime = true;
+
+  UP.PartialThreshold = 75;
+  if (L->getLoopDepth() > 1)
+    UP.PartialThreshold *= 2;
+
+  UP.DefaultUnrollRuntimeCount = 4;
+
+  // Allow expensive instructions in the pre-header of the loop.
+  UP.AllowExpensiveTripCount = true;
+  UP.UnrollAndJam = true;
+
+  UP.Force = true;
+}
+
+// Return the bit size for the scalar type or vector element
+// type. getScalarSizeInBits() returns 0 for a pointer type.
+static unsigned getScalarSizeInBits(Type *Ty) {
+  unsigned Size = (Ty->isPtrOrPtrVectorTy() ? 64U : Ty->getScalarSizeInBits());
+  assert(Size > 0 && "Element must have non-zero size.");
+  return Size;
+}
+
+// getNumberOfParts() calls getTypeLegalizationCost() which splits the vector
+// type until it is legal. This would e.g. return 4 for <6 x i64>, instead of
+// 3.
+static unsigned getNumVectorRegs(Type *Ty) { return 0; }
+
+unsigned Sw64TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
+  return ST->getMaxInterleaveFactor();
+}
+
+TypeSize Sw64TTIImpl::getRegisterBitWidth(bool Vector) const {
+  return TypeSize::getFixed(64);
+}
+
+unsigned Sw64TTIImpl::getCFInstrCost(unsigned Opcode,
+                                     TTI::TargetCostKind CostKind,
+                                     const Instruction *I) {
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return Opcode == Instruction::PHI ? 0 : 1;
+  assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind");
+  // Branches are assumed to be predicted.
+  return 0;
+}
+
+bool Sw64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
+                                        ArrayRef<const Value *> Args) {
+
+  // A helper that returns a vector type from the given type. The number of
+  // elements in type Ty determine the vector width.
+  auto toVectorTy = [&](Type *ArgTy) {
+    return FixedVectorType::get(ArgTy->getScalarType(),
+                                cast<FixedVectorType>(DstTy)->getNumElements());
+  };
+
+  // Exit early if DstTy is not a vector type whose elements are at least
+  // 16-bits wide.
+  if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
+    return false;
+
+  // Determine if the operation has a widening variant. We consider both the
+  // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
+  // instructions.
+  //
+  // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
+  //       verify that their extending operands are eliminated during code
+  //       generation.
+  switch (Opcode) {
+  case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
+  case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
+    break;
+  default:
+    return false;
+  }
+
+  // To be a widening instruction (either the "wide" or "long" versions), the
+  // second operand must be a sign- or zero extend having a single user. We
+  // only consider extends having a single user because they may otherwise not
+  // be eliminated.
+  if (Args.size() != 2 ||
+      (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
+      !Args[1]->hasOneUse())
+    return false;
+  auto *Extend = cast<CastInst>(Args[1]);
+
+  // Legalize the destination type and ensure it can be used in a widening
+  // operation.
+  auto DstTyL = getTypeLegalizationCost(DstTy);
+  unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
+  if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
+    return false;
+
+  // Legalize the source type and ensure it can be used in a widening
+  // operation.
+  auto *SrcTy = toVectorTy(Extend->getSrcTy());
+  auto SrcTyL = getTypeLegalizationCost(SrcTy);
+  unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
+  if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
+    return false;
+
+  // Get the total number of vector elements in the legalized types.
+  InstructionCost NumDstEls =
+      DstTyL.first * DstTyL.second.getVectorMinNumElements();
+  InstructionCost NumSrcEls =
+      SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
+
+  // Return true if the legalized types have the same number of vector elements
+  // and the destination element type size is twice that of the source type.
+  return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
+}
+
+InstructionCost Sw64TTIImpl::getArithmeticInstrCost(
+    unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+    TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
+    ArrayRef<const Value *> Args, const Instruction *CxtI) {
+  // TODO: Handle more cost kinds.
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
+                                         Args, CxtI);
+
+  // Legalize the type.
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
+
+  // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
+  // add in the widening overhead specified by the sub-target. Since the
+  // extends feeding widening instructions are performed automatically, they
+  // aren't present in the generated code and have a zero cost. By adding a
+  // widening overhead here, we attach the total cost of the combined operation
+  // to the widening instruction.
+  InstructionCost Cost = 0;
+  if (isWideningInstruction(Ty, Opcode, Args))
+    Cost += ST->getWideningBaseCost();
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+  switch (ISD) {
+  default:
+    return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+                                                Op2Info);
+  case ISD::SDIV:
+    if (Op2Info.isConstant() && Op2Info.isUniform() && Op2Info.isPowerOf2()) {
+      // On Sw64, scalar signed division by constants power-of-two are
+      // normally expanded to the sequence ADD + CMP + SELECT + SRA.
+      // The OperandValue properties many not be same as that of previous
+      // operation; conservatively assume OP_None.
+      Cost +=
+          getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+      Cost +=
+          getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+      Cost +=
+          getArithmeticInstrCost(Instruction::Select, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+      Cost +=
+          getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+                                 Op1Info.getNoProps(), Op2Info.getNoProps());
+      return Cost;
+    }
+    [[fallthrough]];
+  case ISD::UDIV:
+    if (Op2Info.isConstant() && Op2Info.isUniform()) {
+      auto VT = TLI->getValueType(DL, Ty);
+      if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
+        // Vector signed division by constant are expanded to the
+        // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
+        // to MULHS + SUB + SRL + ADD + SRL.
+        InstructionCost MulCost =
+            getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+        InstructionCost AddCost =
+            getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+        InstructionCost ShrCost =
+            getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
+                                   Op1Info.getNoProps(), Op2Info.getNoProps());
+        return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
+      }
+    }
+
+    Cost +=
+        BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info);
+    if (Ty->isVectorTy()) {
+      // On Sw64, vector divisions are not supported natively and are
+      // expanded into scalar divisions of each pair of elements.
+      Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind,
+                                     Op1Info, Op2Info);
+      Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
+                                     Op1Info, Op2Info);
+      // TODO: if one of the arguments is scalar, then it's not necessary to
+      // double the cost of handling the vector elements.
+      Cost += Cost;
+    }
+    return Cost;
+
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::XOR:
+  case ISD::OR:
+  case ISD::AND:
+    // These nodes are marked as 'custom' for combining purposes only.
+    // We know that they are legal. See LowerAdd in ISelLowering.
+    return (Cost + 1) * LT.first;
+
+  case ISD::FADD:
+    // These nodes are marked as 'custom' just to lower them to SVE.
+    // We know said lowering will incur no additional cost.
+    if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty())
+      return (Cost + 2) * LT.first;
+
+    return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+                                                Op2Info);
+  }
+}
+InstructionCost Sw64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
+                                            VectorType *Tp, ArrayRef<int> Mask,
+                                            TTI::TargetCostKind CostKind,
+                                            int Index, VectorType *SubTp,
+                                            ArrayRef<const Value *> Args) {
+  Kind = improveShuffleKindFromMask(Kind, Mask);
+  return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
+}
+// Return the log2 difference of the element sizes of the two vector types.
+static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) {
+  unsigned Bits0 = Ty0->getScalarSizeInBits();
+  unsigned Bits1 = Ty1->getScalarSizeInBits();
+
+  if (Bits1 > Bits0)
+    return (Log2_32(Bits1) - Log2_32(Bits0));
+
+  return (Log2_32(Bits0) - Log2_32(Bits1));
+}
+
+// Return the number of instructions needed to truncate SrcTy to DstTy.
+unsigned Sw64TTIImpl::getVectorTruncCost(Type *SrcTy, Type *DstTy) { return 1; }
+
+// Return the cost of converting a vector bitmask produced by a compare
+// (SrcTy), to the type of the select or extend instruction (DstTy).
+unsigned Sw64TTIImpl::getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
+  assert(SrcTy->isVectorTy() && DstTy->isVectorTy() &&
+         "Should only be called with vector types.");
+
+  unsigned PackCost = 0;
+  unsigned SrcScalarBits = SrcTy->getScalarSizeInBits();
+  unsigned DstScalarBits = DstTy->getScalarSizeInBits();
+  unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+  if (SrcScalarBits > DstScalarBits)
+    // The bitmask will be truncated.
+    PackCost = getVectorTruncCost(SrcTy, DstTy);
+  else if (SrcScalarBits < DstScalarBits) {
+    unsigned DstNumParts = getNumVectorRegs(DstTy);
+    // Each vector select needs its part of the bitmask unpacked.
+    PackCost = Log2Diff * DstNumParts;
+    // Extra cost for moving part of mask before unpacking.
+    PackCost += DstNumParts - 1;
+  }
+
+  return PackCost;
+}
+
+// Return the type of the compared operands. This is needed to compute the
+// cost for a Select / ZExt or SExt instruction.
+static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
+  Type *OpTy = nullptr;
+  if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0)))
+    OpTy = CI->getOperand(0)->getType();
+  else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0)))
+    if (LogicI->getNumOperands() == 2)
+      if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
+        if (isa<CmpInst>(LogicI->getOperand(1)))
+          OpTy = CI0->getOperand(0)->getType();
+
+  return nullptr;
+}
+
+unsigned Sw64TTIImpl::getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+                                                    const Instruction *I) {
+  unsigned Cost = 0;
+  return Cost;
+}
+
+InstructionCost Sw64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                              Type *Src,
+                                              TTI::CastContextHint CCH,
+                                              TTI::TargetCostKind CostKind,
+                                              const Instruction *I) {
+  // FIXME: Can the logic below also be used for these cost kinds?
+  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {
+    auto BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+    return BaseCost == 0 ? BaseCost : 1;
+  }
+
+  unsigned DstScalarBits = Dst->getScalarSizeInBits();
+  unsigned SrcScalarBits = Src->getScalarSizeInBits();
+
+  if (!Src->isVectorTy()) {
+    assert(!Dst->isVectorTy());
+
+    if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
+      if (SrcScalarBits >= 32 ||
+          (I != nullptr && isa<LoadInst>(I->getOperand(0))))
+        return 1;
+      return SrcScalarBits > 1 ? 2 /*i8/i16 extend*/ : 5 /*branch seq.*/;
+    }
+
+    if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+        Src->isIntegerTy(1)) {
+
+      // This should be extension of a compare i1 result, which is done with
+      // ipm and a varying sequence of instructions.
+      unsigned Cost = 0;
+      if (Opcode == Instruction::SExt)
+        Cost = (DstScalarBits < 64 ? 3 : 4);
+      if (Opcode == Instruction::ZExt)
+        Cost = 3;
+      Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+      if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+        // If operands of an fp-type was compared, this costs +1.
+        Cost++;
+      return Cost;
+    }
+  }
+
+  return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+}
+
+// Scalar i8 / i16 operations will typically be made after first extending
+// the operands to i32.
+static unsigned getOperandsExtensionCost(const Instruction *I) {
+  unsigned ExtCost = 0;
+  for (Value *Op : I->operands())
+    // A load of i8 or i16 sign/zero extends to i32.
+    if (!isa<LoadInst>(Op) && !isa<ConstantInt>(Op))
+      ExtCost++;
+
+  return ExtCost;
+}
+
+InstructionCost Sw64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                                Type *CondTy,
+                                                CmpInst::Predicate VecPred,
+                                                TTI::TargetCostKind CostKind,
+                                                const Instruction *I) {
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
+
+  if (!ValTy->isVectorTy()) {
+    switch (Opcode) {
+    case Instruction::ICmp: {
+      // A loaded value compared with 0 with multiple users becomes Load and
+      // Test. The load is then not foldable, so return 0 cost for the ICmp.
+      unsigned ScalarBits = ValTy->getScalarSizeInBits();
+      if (I != nullptr && ScalarBits >= 32)
+        if (LoadInst *Ld = dyn_cast<LoadInst>(I->getOperand(0)))
+          if (const ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+            if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
+                C->isZero())
+              return 0;
+
+      unsigned Cost = 1;
+      if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
+        Cost += (I != nullptr ? getOperandsExtensionCost(I) : 2);
+      return Cost;
+    }
+    case Instruction::Select:
+      if (ValTy->isFloatingPointTy())
+        return 4; // No load on condition for FP - costs a conditional jump.
+      return 1;   // Load On Condition / Select Register.
+    }
+  }
+
+  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
+}
+
+// Check if a load may be folded as a memory operand in its user.
+bool Sw64TTIImpl::isFoldableLoad(const LoadInst *Ld,
+                                 const Instruction *&FoldedValue) {
+  if (!Ld->hasOneUse())
+    return false;
+  FoldedValue = Ld;
+  const Instruction *UserI = cast<Instruction>(*Ld->user_begin());
+  unsigned LoadedBits = getScalarSizeInBits(Ld->getType());
+  unsigned TruncBits = 0;
+  unsigned SExtBits = 0;
+  unsigned ZExtBits = 0;
+  if (UserI->hasOneUse()) {
+    unsigned UserBits = UserI->getType()->getScalarSizeInBits();
+    if (isa<TruncInst>(UserI))
+      TruncBits = UserBits;
+    else if (isa<SExtInst>(UserI))
+      SExtBits = UserBits;
+    else if (isa<ZExtInst>(UserI))
+      ZExtBits = UserBits;
+  }
+  if (TruncBits || SExtBits || ZExtBits) {
+    FoldedValue = UserI;
+    UserI = cast<Instruction>(*UserI->user_begin());
+    // Load (single use) -> trunc/extend (single use) -> UserI
+  }
+  if ((UserI->getOpcode() == Instruction::Sub ||
+       UserI->getOpcode() == Instruction::SDiv ||
+       UserI->getOpcode() == Instruction::UDiv) &&
+      UserI->getOperand(1) != FoldedValue)
+    return false; // Not commutative, only RHS foldable.
+  // LoadOrTruncBits holds the number of effectively loaded bits, but 0 if an
+  // extension was made of the load.
+  unsigned LoadOrTruncBits =
+      ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
+  switch (UserI->getOpcode()) {
+  case Instruction::Add: // SE: 16->32, 16/32->64, z14:16->64. ZE: 32->64
+  case Instruction::Sub:
+  case Instruction::ICmp:
+    if (LoadedBits == 32 && ZExtBits == 64)
+      return true;
+    LLVM_FALLTHROUGH;
+  case Instruction::Mul: // SE: 16->32, 32->64, z14:16->64
+    if (UserI->getOpcode() != Instruction::ICmp) {
+      if (LoadedBits == 16 && SExtBits == 32)
+        return true;
+      if (LoadOrTruncBits == 16)
+        return true;
+    }
+    LLVM_FALLTHROUGH;
+  case Instruction::SDiv: // SE: 32->64
+    if (LoadedBits == 32 && SExtBits == 64)
+      return true;
+    LLVM_FALLTHROUGH;
+  case Instruction::UDiv:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // All possible extensions of memory checked above.
+    // Comparison between memory and immediate.
+    if (UserI->getOpcode() == Instruction::ICmp)
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(UserI->getOperand(1)))
+        if (CI->getValue().isIntN(16))
+          return true;
+    return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
+    break;
+  }
+  return false;
+}
+
+static bool isBswapIntrinsicCall(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    if (auto *CI = dyn_cast<CallInst>(I))
+      if (auto *F = CI->getCalledFunction())
+        if (F->getIntrinsicID() == Intrinsic::bswap)
+          return true;
+  return false;
+}
+
+InstructionCost Sw64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
+                                             MaybeAlign Alignment,
+                                             unsigned AddressSpace,
+                                             TTI::TargetCostKind CostKind,
+                                             TTI::OperandValueInfo OpInfo,
+                                             const Instruction *I) {
+  assert(!Ty->isVoidTy() && "Invalid type");
+
+  // TODO: Handle other cost kinds.
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return 1;
+
+  // Type legalization can't handle structs
+  if (TLI->getValueType(DL, Ty, true) == MVT::Other)
+    return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
+                                  CostKind);
+
+  auto LT = getTypeLegalizationCost(Ty);
+
+  if (ST->isMisaligned256StoreSlow() && Opcode == Instruction::Store &&
+      LT.second.is256BitVector() && (!Alignment || *Alignment < Align(32))) {
+    // Unaligned stores are extremely inefficient. We don't split all
+    // unaligned 128-bit stores because the negative impact that has shown in
+    // practice on inlined block copy code.
+    // We make such stores expensive so that we will only vectorize if there
+    // are 6 other instructions getting vectorized.
+    const int AmortizationCost = 6;
+
+    return LT.first * 2 * AmortizationCost;
+  }
+
+  if (Ty->isVectorTy() &&
+      cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
+    unsigned ProfitableNumElements;
+    if (Opcode == Instruction::Store)
+      // We use a custom trunc store lowering so v.4b should be profitable.
+      ProfitableNumElements = 4;
+    else
+      // We scalarize the loads because there is not v.4b register and we
+      // have to promote the elements to v.2.
+      ProfitableNumElements = 8;
+
+    if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
+      unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
+      unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
+      // We generate 2 instructions per vector element.
+      return NumVectorizableInstsToAmortize * NumVecElts * 2;
+    }
+  }
+  return LT.first;
+}
+
+TargetTransformInfo::PopcntSupportKind
+Sw64TTIImpl::getPopcntSupport(unsigned TyWidth) {
+  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+  // Sw64 only support 64 Bit Pop County
+  if (TyWidth == 32 || TyWidth == 64)
+    return TTI::PSK_FastHardware;
+  return TTI::PSK_Software;
+}
diff --git a/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h
new file mode 100644
index 000000000000..cd1b8f2f2f3d
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64TargetTransformInfo.h
@@ -0,0 +1,137 @@
+//===-- Sw64TargetTransformInfo.h - Sw64 specific TTI ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// Sw64 target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SW64_SW64TARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_SW64_SW64TARGETTRANSFORMINFO_H
+
+#include "Sw64.h"
+#include "Sw64TargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+
+class Sw64TTIImpl : public BasicTTIImplBase<Sw64TTIImpl> {
+  typedef BasicTTIImplBase<Sw64TTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const Sw64Subtarget *ST;
+  const Sw64TargetLowering *TLI;
+
+  const Sw64Subtarget *getST() const { return ST; }
+  const Sw64TargetLowering *getTLI() const { return TLI; }
+
+  unsigned const LIBCALL_COST = 30;
+
+  bool isWideningInstruction(Type *Ty, unsigned Opcode,
+                             ArrayRef<const Value *> Args);
+
+public:
+  explicit Sw64TTIImpl(const Sw64TargetMachine *TM, const Function &F)
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
+
+  unsigned getNumberOfRegisters(unsigned ClassID) const {
+    bool Vector = (ClassID == 1);
+    if (Vector) {
+      if (ST->hasSIMD())
+        return 32;
+      return 0;
+    }
+    return 32;
+  }
+
+  unsigned getMaxInterleaveFactor(ElementCount VF);
+  bool enableInterleavedAccessVectorization() { return true; }
+  TypeSize getRegisterBitWidth(bool Vector) const;
+
+  unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+                          const Instruction *I);
+
+  InstructionCost getMemoryOpCost(
+      unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace,
+      TTI::TargetCostKind CostKind,
+      TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
+      const Instruction *I = nullptr);
+  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+                               TTI::UnrollingPreferences &UP,
+                               OptimizationRemarkEmitter *ORE);
+
+  InstructionCost getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
+      TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
+      ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+      const Instruction *CxtI = nullptr);
+
+  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
+                                TTI::TargetCostKind CostKind);
+
+  InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
+                                    const APInt &Imm, Type *Ty,
+                                    TTI::TargetCostKind CostKind,
+                                    Instruction *Inst = nullptr);
+  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+                                      const APInt &Imm, Type *Ty,
+                                      TTI::TargetCostKind CostKind);
+
+  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+                     const TargetTransformInfo::LSRCost &C2);
+
+  unsigned getNumberOfRegisters(bool Vector);
+
+  unsigned getCacheLineSize() const override { return 128; }
+  unsigned getPrefetchDistance() const override { return 524; }
+  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+                                unsigned NumStridedMemAccesses,
+                                unsigned NumPrefetches,
+                                bool HasCall) const override {
+    return 1;
+  }
+
+  bool hasDivRemOp(Type *DataType, bool IsSigned);
+  bool prefersVectorizedAddressing() { return false; }
+  bool LSRWithInstrQueries() { return true; }
+  bool supportsEfficientVectorElementLoadStore() { return true; }
+
+  InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
+                                 ArrayRef<int> Mask,
+                                 TTI::TargetCostKind CostKind, int Index,
+                                 VectorType *SubTp,
+                                 ArrayRef<const Value *> Args = std::nullopt);
+  unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
+  unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+  unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
+                                         const Instruction *I);
+  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+                                   TTI::CastContextHint CCH,
+                                   TTI::TargetCostKind CostKind,
+                                   const Instruction *I = nullptr);
+  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+                                     CmpInst::Predicate VecPred,
+                                     TTI::TargetCostKind CostKind,
+                                     const Instruction *I = nullptr);
+  bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue);
+
+  TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td b/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td
new file mode 100644
index 000000000000..f6996237dbfe
--- /dev/null
+++ b/llvm/lib/Target/Sw64/Sw64VectorVarDefine.td
@@ -0,0 +1,317 @@
+//===- Sw64InstrInfo.td - The Sw64 Instruction Set -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Sw64 Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+class ConstantSImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
+                                  int Offset = 0> : AsmOperandClass {
+  let Name = "ConstantSImm" # Bits # "_" # Offset;
+  let RenderMethod = "addConstantSImmOperands<" # Bits # ", " # Offset # ">";
+  let PredicateMethod = "isConstantSImm<" # Bits # ", " # Offset # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "SImm" # Bits # "_" # Offset;
+}
+
+class ConstantUImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
+                                  int Offset = 0> : AsmOperandClass {
+  let Name = "ConstantUImm" # Bits # "_" # Offset;
+  let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">";
+  let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "UImm" # Bits # "_" # Offset;
+}
+
+def ConstantUImm7Lsl2AsmOperandClass : AsmOperandClass {
+  let Name = "UImm7Lsl2";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledSImm<7, 2>";
+}
+
+def ConstantSImm8AsmOperandClass
+    : ConstantSImmAsmOperandClass<8, [ConstantUImm7Lsl2AsmOperandClass]>;
+
+def ConstantUImm8AsmOperandClass
+    : ConstantUImmAsmOperandClass<8, [ConstantUImm7Lsl2AsmOperandClass]>;
+
+foreach I = {8} in
+  def vsplat_simm # I : Operand<vAny> {
+    let ParserMatchClass =
+        !cast<AsmOperandClass>("ConstantSImm" # I # "AsmOperandClass");
+  }
+
+foreach I = {8} in
+  def vsplat_uimm # I : Operand<vAny> {
+    let ParserMatchClass =
+        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+  }
+
+// Generic case - only to support certain assembly pseudo instructions.
+class UImmAnyAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
+    : AsmOperandClass {
+  let Name = "ImmAny";
+  let RenderMethod = "addConstantUImmOperands<32>";
+  let PredicateMethod = "isSImm<" # Bits # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "ImmAny";
+}
+
+class SImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
+    : AsmOperandClass {
+  let Name = "SImm" # Bits;
+  let RenderMethod = "addSImmOperands<" # Bits # ">";
+  let PredicateMethod = "isSImm<" # Bits # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "SImm" # Bits;
+}
+
+class UImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
+    : AsmOperandClass {
+  let Name = "UImm" # Bits;
+  let RenderMethod = "addUImmOperands<" # Bits # ">";
+  let PredicateMethod = "isUImm<" # Bits # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "UImm" # Bits;
+}
+
+def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> {
+  let Name = "UImm32_Coerced";
+  let DiagnosticType = "UImm32_Coerced";
+}
+
+def SImm32RelaxedAsmOperandClass
+    : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> {
+  let Name = "SImm32_Relaxed";
+  let PredicateMethod = "isAnyImm<33>";
+  let DiagnosticType = "SImm32_Relaxed";
+}
+
+def SImm32AsmOperandClass
+    : SImmAsmOperandClass<32, [SImm32RelaxedAsmOperandClass]>;
+def ConstantUImm26AsmOperandClass
+    : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>;
+def ConstantUImm20AsmOperandClass
+    : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>;
+
+def UImm16RelaxedAsmOperandClass
+    : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> {
+  let Name = "UImm16_Relaxed";
+  let PredicateMethod = "isAnyImm<16>";
+  let DiagnosticType = "UImm16_Relaxed";
+}
+
+// FIXME: One of these should probably have UImm16AsmOperandClass as the
+//        superclass instead of UImm16RelaxedasmOPerandClass.
+def UImm16AsmOperandClass
+    : UImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]>;
+def SImm16RelaxedAsmOperandClass
+    : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> {
+  let Name = "SImm16_Relaxed";
+  let PredicateMethod = "isAnyImm<16>";
+  let DiagnosticType = "SImm16_Relaxed";
+}
+
+def SImm16AsmOperandClass
+    : SImmAsmOperandClass<16, [SImm16RelaxedAsmOperandClass]>;
+
+def ConstantSImm10Lsl3AsmOperandClass : AsmOperandClass {
+  let Name = "SImm10Lsl3";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledSImm<10, 3>";
+  let SuperClasses = [SImm16AsmOperandClass];
+  let DiagnosticType = "SImm10_Lsl3";
+}
+
+def Sw64MemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+  let ParserMethod = "parseMemOperand";
+}
+
+foreach I = {16, 32} in
+  def simm # I : Operand<i32> {
+    let DecoderMethod = "DecodeSImmWithOffsetAndScale<" # I # ">";
+    let ParserMatchClass = !cast<AsmOperandClass>("SImm" # I # "AsmOperandClass");
+  }
+
+foreach I = {1, 2, 3} in
+  def Sw64MemSimm16Lsl # I # AsmOperand : AsmOperandClass {
+    let Name = "MemOffsetSimm16_" # I;
+    let SuperClasses = [Sw64MemAsmOperand];
+    let RenderMethod = "addMemOperands";
+    let ParserMethod = "parseMemOperand";
+    let PredicateMethod = "isMemWithSimmOffset<10, " # I # ">";
+    let DiagnosticType = "MemSImm10Lsl" # I;
+  }
+
+class mem_generic : Operand<iPTR> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops ptr_rc, simm16);
+  let EncoderMethod = "getMemEncoding";
+  let ParserMatchClass = Sw64MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
+}
+
+def ConstantSImm10Lsl2AsmOperandClass : AsmOperandClass {
+  let Name = "SImm10Lsl2";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledSImm<10, 2>";
+  let SuperClasses = [ConstantSImm10Lsl3AsmOperandClass];
+  let DiagnosticType = "SImm10_Lsl2";
+}
+
+foreach I = {2, 3} in
+  def simm16_ # I : Operand<i32> {
+    let DecoderMethod = "DecodeSImmWithOffsetAndScale<10, " # I # ">";
+    let ParserMatchClass =
+        !cast<AsmOperandClass>("ConstantSImm10Lsl" # I # "AsmOperandClass");
+  }
+
+  def mem_simm16 : mem_generic {
+    let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm16_2"));
+    let EncoderMethod = "getMemEncoding<2>";
+    let ParserMatchClass =
+            !cast<AsmOperandClass>("Sw64MemSimm16Lsl2AsmOperand");
+  }
+
+  def mem_simm12 : mem_generic {
+    let MIOperandInfo = (ops ptr_rc, !cast<Operand>("simm16_3"));
+    let EncoderMethod = "getMemEncoding<3>";
+    let ParserMatchClass =
+            !cast<AsmOperandClass>("Sw64MemSimm16Lsl3AsmOperand");
+  }
+
+class ConstantUImmRangeAsmOperandClass<int Bottom, int Top,
+                                       list<AsmOperandClass> Supers = []>
+    : AsmOperandClass {
+  let Name = "ConstantUImmRange" # Bottom # "_" # Top;
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isConstantUImmRange<" # Bottom # ", " # Top # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "UImmRange" # Bottom # "_" # Top;
+}
+
+def ConstantSImm19Lsl2AsmOperandClass : AsmOperandClass {
+  let Name = "SImm19Lsl2";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledSImm<19, 2>";
+  let SuperClasses = [ConstantUImm20AsmOperandClass];
+  let DiagnosticType = "SImm19_Lsl2";
+}
+
+def ConstantSImm11AsmOperandClass
+    : ConstantSImmAsmOperandClass<11, [ConstantSImm10Lsl2AsmOperandClass]>;
+def ConstantSImm10Lsl1AsmOperandClass : AsmOperandClass {
+  let Name = "SImm10Lsl1";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledSImm<10, 1>";
+  let SuperClasses = [ConstantSImm11AsmOperandClass];
+  let DiagnosticType = "SImm10_Lsl1";
+}
+
+def ConstantUImm10AsmOperandClass
+    : ConstantUImmAsmOperandClass<10, [ConstantSImm10Lsl1AsmOperandClass]>;
+def ConstantSImm10AsmOperandClass
+    : ConstantSImmAsmOperandClass<10, [ConstantUImm10AsmOperandClass]>;
+def ConstantSImm9AsmOperandClass
+    : ConstantSImmAsmOperandClass<9, [ConstantSImm10AsmOperandClass]>;
+def ConstantSImm7Lsl2AsmOperandClass : AsmOperandClass {
+  let Name = "SImm7Lsl2";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledSImm<7, 2>";
+  let SuperClasses = [ConstantSImm9AsmOperandClass];
+  let DiagnosticType = "SImm7_Lsl2";
+}
+
+def ConstantUImm7Sub1AsmOperandClass
+    : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass], -1> {
+  // Specify the names since the -1 offset causes invalid identifiers otherwise.
+  let Name = "UImm7_N1";
+  let DiagnosticType = "UImm7_N1";
+}
+def ConstantUImm7AsmOperandClass
+    : ConstantUImmAsmOperandClass<7, [ConstantUImm7Sub1AsmOperandClass]>;
+def ConstantUImm6Lsl2AsmOperandClass : AsmOperandClass {
+  let Name = "UImm6Lsl2";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledUImm<6, 2>";
+  let SuperClasses = [ConstantUImm7AsmOperandClass];
+  let DiagnosticType = "UImm6_Lsl2";
+}
+
+def ConstantUImm6AsmOperandClass
+    : ConstantUImmAsmOperandClass<6, [ConstantUImm6Lsl2AsmOperandClass]>;
+def ConstantSImm6AsmOperandClass
+    : ConstantSImmAsmOperandClass<6, [ConstantUImm6AsmOperandClass]>;
+
+def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass {
+  let Name = "UImm5Lsl2";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledUImm<5, 2>";
+  let SuperClasses = [ConstantSImm6AsmOperandClass];
+  let DiagnosticType = "UImm5_Lsl2";
+}
+def ConstantUImm5_Range2_64AsmOperandClass
+    : ConstantUImmRangeAsmOperandClass<2, 64, [ConstantUImm5Lsl2AsmOperandClass]>;
+def ConstantUImm5Plus33AsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm5_Range2_64AsmOperandClass],
+                                  33>;
+def ConstantUImm5ReportUImm6AsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus33AsmOperandClass]> {
+  let Name = "ConstantUImm5_0_Report_UImm6";
+  let DiagnosticType = "UImm5_0_Report_UImm6";
+}
+def ConstantUImm5Plus32AsmOperandClass
+    : ConstantUImmAsmOperandClass<
+          5, [ConstantUImm5ReportUImm6AsmOperandClass], 32>;
+def ConstantUImm5Plus32NormalizeAsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus32AsmOperandClass], 32> {
+  let Name = "ConstantUImm5_32_Norm";
+  // We must also subtract 32 when we render the operand.
+  let RenderMethod = "addConstantUImmOperands<5, 32, -32>";
+}
+
+def ConstantUImm5Plus1ReportUImm6AsmOperandClass
+    : ConstantUImmAsmOperandClass<
+          5, [ConstantUImm5Plus32NormalizeAsmOperandClass], 1>{
+  let Name = "ConstantUImm5_Plus1_Report_UImm6";
+}
+
+def ConstantUImm5Plus1AsmOperandClass
+    : ConstantUImmAsmOperandClass<
+          5, [ConstantUImm5Plus1ReportUImm6AsmOperandClass], 1>;
+def ConstantUImm5AsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus1AsmOperandClass]>;
+def ConstantSImm5AsmOperandClass
+    : ConstantSImmAsmOperandClass<5, [ConstantUImm5AsmOperandClass]>;
+def ConstantUImm4AsmOperandClass
+    : ConstantUImmAsmOperandClass<4, [ConstantSImm5AsmOperandClass]>;
+def ConstantSImm4AsmOperandClass
+    : ConstantSImmAsmOperandClass<4, [ConstantUImm4AsmOperandClass]>;
+def ConstantUImm3AsmOperandClass
+    : ConstantUImmAsmOperandClass<3, [ConstantSImm4AsmOperandClass]>;
+def ConstantUImm2Plus1AsmOperandClass
+    : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass], 1>;
+def ConstantUImm2AsmOperandClass
+    : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>;
+def ConstantUImm1AsmOperandClass
+    : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>;
+
+// Unsigned Operands
+foreach I = {1, 2, 3, 4, 5, 6, 7, 8, 10, 20, 26} in
+  def uimm # I : Operand<i32> {
+    let PrintMethod = "printUImm<" # I # ">";
+    let ParserMatchClass =
+        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+  }
+
+foreach I = {1, 2, 3, 4} in
+  def uimm # I # _ptr : Operand<iPTR> {
+    let PrintMethod = "printUImm<" # I # ">";
+    let ParserMatchClass =
+        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+  }
diff --git a/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt b/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..cf9ad922078a
--- /dev/null
+++ b/llvm/lib/Target/Sw64/TargetInfo/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_llvm_component_library(LLVMSw64Info
+  Sw64TargetInfo.cpp
+
+  LINK_COMPONENTS
+  Support
+
+  ADD_TO_COMPONENT
+  Sw64
+
+  )
diff --git a/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp
new file mode 100644
index 000000000000..02a741e72e15
--- /dev/null
+++ b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.cpp
@@ -0,0 +1,24 @@
+//===-- Sw64TargetInfo.cpp - Sw64 Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TargetInfo/Sw64TargetInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+using namespace llvm;
+
+Target &llvm::getTheSw64Target() {
+  static Target TheSw64Target;
+  return TheSw64Target;
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSw64TargetInfo() {
+  RegisterTarget<Triple::sw_64,
+                 /*HasJIT=*/true>
+      X(getTheSw64Target(), "sw_64", "Sw64", "Sw64");
+}
diff --git a/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h
new file mode 100644
index 000000000000..723121a752b1
--- /dev/null
+++ b/llvm/lib/Target/Sw64/TargetInfo/Sw64TargetInfo.h
@@ -0,0 +1,12 @@
+#ifndef LLVM_LIB_TARGET_SW_64_TARGETINFO_SW64TARGETINFO_H
+#define LLVM_LIB_TARGET_SW_64_TARGETINFO_SW64TARGETINFO_H
+
+namespace llvm {
+
+class Target;
+
+Target &getTheSw64Target();
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_RISCV_TARGETINFO_RISCVTARGETINFO_H
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 9358b1365958..1a65edd32065 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -486,6 +486,10 @@ StringRef sys::detail::getHostCPUNameForBPF() {
 #endif
 }
 
+StringRef sys::detail::getHostCPUNameForSW64(StringRef ProcCpuinfoContent) {
+  return "sw_64";
+}
+
 #if defined(__i386__) || defined(_M_IX86) || \
     defined(__x86_64__) || defined(_M_X64)
 
@@ -1460,6 +1464,12 @@ StringRef sys::getHostCPUName() {
     return "generic";
   }
 }
+#elif defined(__linux__) && defined(__sw_64__)
+StringRef sys::getHostCPUName() {
+  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
+  StringRef Content = P ? P->getBuffer() : "";
+  return detail::getHostCPUNameForSW64(Content);
+}
 #elif defined(__loongarch__)
 StringRef sys::getHostCPUName() {
   // Use processor id to detect cpu name.
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index 2d61113f32a8..f25b31f92da8 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -72,6 +72,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
   case spir:           return "spir";
   case spirv32:        return "spirv32";
   case spirv64:        return "spirv64";
+  case sw_64:          return "sw_64";
   case systemz:        return "s390x";
   case tce:            return "tce";
   case tcele:          return "tcele";
@@ -131,6 +132,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
   case sparcel:
   case sparc:       return "sparc";
 
+  case sw_64:       return "sw64";
+
   case systemz:     return "s390";
 
   case x86:
@@ -364,6 +367,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("sparc", sparc)
     .Case("sparcel", sparcel)
     .Case("sparcv9", sparcv9)
+    .Case("sw_64", sw_64)
     .Case("s390x", systemz)
     .Case("systemz", systemz)
     .Case("tce", tce)
@@ -511,6 +515,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("sparc", Triple::sparc)
     .Case("sparcel", Triple::sparcel)
     .Cases("sparcv9", "sparc64", Triple::sparcv9)
+    .Cases("sw", "sw_64", "sw6a", "sw6b", "sw4d", "sw8a", Triple::sw_64)
     .Case("tce", Triple::tce)
     .Case("tcele", Triple::tcele)
     .Case("xcore", Triple::xcore)
@@ -690,6 +695,17 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
       (SubArchName.endswith("r6el") || SubArchName.endswith("r6")))
     return Triple::MipsSubArch_r6;
 
+  if (SubArchName.startswith("sw")) {
+    if (SubArchName.endswith("6a"))
+      return Triple::Sw64SubArch_6a;
+    else if (SubArchName.endswith("6b"))
+      return Triple::Sw64SubArch_6b;
+    else if (SubArchName.endswith("4d"))
+      return Triple::Sw64SubArch_4d;
+    else if (SubArchName.endswith("8a"))
+      return Triple::Sw64SubArch_8a;
+  }
+
   if (SubArchName == "powerpcspe")
     return Triple::PPCSubArch_spe;
 
@@ -853,6 +869,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   case Triple::sparcv9:
   case Triple::spir64:
   case Triple::spir:
+  case Triple::sw_64:
   case Triple::tce:
   case Triple::tcele:
   case Triple::thumbeb:
@@ -1461,6 +1478,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::sparcv9:
   case llvm::Triple::spir64:
   case llvm::Triple::spirv64:
+  case llvm::Triple::sw_64:
   case llvm::Triple::systemz:
   case llvm::Triple::ve:
   case llvm::Triple::wasm64:
@@ -1491,6 +1509,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::bpfeb:
   case Triple::bpfel:
   case Triple::msp430:
+  case Triple::sw_64:
   case Triple::systemz:
   case Triple::ve:
     T.setArch(UnknownArch);
@@ -1604,6 +1623,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::sparcv9:
   case Triple::spir64:
   case Triple::spirv64:
+  case Triple::sw_64:
   case Triple::systemz:
   case Triple::ve:
   case Triple::wasm64:
@@ -1675,6 +1695,7 @@ Triple Triple::getBigEndianArchVariant() const {
   case Triple::spir:
   case Triple::spirv32:
   case Triple::spirv64:
+  case Triple::sw_64:
   case Triple::wasm32:
   case Triple::wasm64:
   case Triple::x86:
@@ -1784,6 +1805,7 @@ bool Triple::isLittleEndian() const {
   case Triple::spir:
   case Triple::spirv32:
   case Triple::spirv64:
+  case Triple::sw_64:
   case Triple::tcele:
   case Triple::thumb:
   case Triple::ve:
diff --git a/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
index 80c90cbf5be3..4aea4f945183 100644
--- a/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
@@ -1,7 +1,7 @@
 ; REQUIRES: cxx-shared-library
 ; RUN: %lli -jit-kind=mcjit -relocation-model=pic -code-model=large %s
 ; XFAIL: target={{.*-(cygwin|windows-msvc|windows-gnu)}}
-; XFAIL: target={{(mips|mipsel)-.*}}, target={{(i686|i386).*}}, target={{(aarch64|arm).*}}
+; XFAIL: target={{(mips|mipsel)-.*}}, target={{(i686|i386).*}}, target={{(aarch64|arm).*}}, target={{(sw_64).*}}
 declare ptr @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(ptr, ptr, ptr)
 declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg
index b6874dd86ead..4af68aa23a5a 100644
--- a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg
+++ b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg
@@ -7,6 +7,7 @@ if (
     | ("Mips" in targets)
     | ("PowerPC" in targets)
     | ("SystemZ" in targets)
+    | ("Sw64" in targets)
 ):
     config.unsupported = False
 else:
@@ -25,6 +26,7 @@ if root.host_arch not in [
     "PowerPC",
     "ppc64",
     "ppc64le",
+    "sw_64",
     "SystemZ",
 ]:
     config.unsupported = True
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg b/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
index 5095d98a5fd6..d584f3aa69c4 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/lit.local.cfg
@@ -1,6 +1,10 @@
 if "armv4" in config.root.target_triple or "armv5" in config.root.target_triple:
     config.unsupported = True
 
+# Remote MCJIT is not supported on sw_64 now.
+if 'sw_64' in config.root.target_triple:
+    config.unsupported = True
+
 # This is temporary, until Remote MCJIT works on ARM
 # See http://llvm.org/bugs/show_bug.cgi?id=18057
 # if 'armv7' in config.root.target_triple:
diff --git a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
index cbd7c544065d..4503e5fbf303 100644
--- a/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
+++ b/llvm/test/ExecutionEngine/OrcLazy/lit.local.cfg
@@ -10,6 +10,7 @@ if config.root.host_arch not in [
     "mips64",
     "mips64el",
     "loongarch64",
+    "sw_64",
 ]:
     config.unsupported = True
 
diff --git a/llvm/test/tools/llvm-reduce/file-output-type.test b/llvm/test/tools/llvm-reduce/file-output-type.test
index 93b9ca4acb08..b6a4347af425 100644
--- a/llvm/test/tools/llvm-reduce/file-output-type.test
+++ b/llvm/test/tools/llvm-reduce/file-output-type.test
@@ -1,4 +1,5 @@
 # REQUIRES: default_triple
+# XFAIL: target={{(sw_64).*}}
 # RUN: rm -rf %t.dir && mkdir %t.dir  && cd %t.dir
 
 # RUN: llvm-as -o test-output-format.bc %p/Inputs/test-output-format.ll
diff --git a/third-party/benchmark/src/cycleclock.h b/third-party/benchmark/src/cycleclock.h
index d65d32a39d3d..e0eb7f3edbb9 100644
--- a/third-party/benchmark/src/cycleclock.h
+++ b/third-party/benchmark/src/cycleclock.h
@@ -173,6 +173,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
   struct timeval tv;
   gettimeofday(&tv, nullptr);
   return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
+#elif defined(__sw_64__)
+  // FIXME: SW CPU get cycle time
+  struct timeval tv;
+  gettimeofday(&tv, nullptr);
+  return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
 #elif defined(__loongarch__)
   struct timeval tv;
   gettimeofday(&tv, nullptr);
-- 
Gitee


From 75d00b6292113b3e513ca4a13d5a982eb997b8bc Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Thu, 23 Jan 2025 14:50:58 +0800
Subject: [PATCH 2/3] [Sw64] Add Sw64 target support for clang

---
 clang/include/clang/Basic/Attr.td             |   12 +
 clang/include/clang/Basic/AttrDocs.td         |   11 +
 clang/include/clang/Basic/BuiltinsSw64.def    |  249 +++
 .../clang/Basic/DiagnosticDriverKinds.td      |    2 +
 .../clang/Basic/DiagnosticSemaKinds.td        |    4 +
 clang/include/clang/Basic/TargetBuiltins.h    |   13 +-
 clang/include/clang/Basic/TargetCXXABI.def    |    3 +
 clang/include/clang/Basic/TargetCXXABI.h      |    6 +
 clang/include/clang/Basic/TargetInfo.h        |    3 +
 clang/include/clang/Driver/Options.td         |   43 +
 clang/include/clang/Sema/Sema.h               |    3 +
 clang/lib/AST/ASTContext.cpp                  |   56 +
 clang/lib/Basic/CMakeLists.txt                |    1 +
 clang/lib/Basic/Targets.cpp                   |    4 +
 clang/lib/Basic/Targets/Sw64.cpp              |  125 ++
 clang/lib/Basic/Targets/Sw64.h                |  141 ++
 clang/lib/CodeGen/CGBuiltin.cpp               |  108 ++
 clang/lib/CodeGen/CMakeLists.txt              |    1 +
 clang/lib/CodeGen/CodeGenFunction.h           |    2 +
 clang/lib/CodeGen/CodeGenModule.cpp           |    3 +
 clang/lib/CodeGen/ItaniumCXXABI.cpp           |    3 +
 clang/lib/CodeGen/TargetInfo.h                |    3 +
 clang/lib/CodeGen/Targets/Sw64.cpp            |  545 ++++++
 clang/lib/Driver/CMakeLists.txt               |    2 +
 clang/lib/Driver/Driver.cpp                   |    4 +
 clang/lib/Driver/ToolChains/Arch/Sw64.cpp     |   94 +
 clang/lib/Driver/ToolChains/Arch/Sw64.h       |   34 +
 clang/lib/Driver/ToolChains/Clang.cpp         |   89 +
 clang/lib/Driver/ToolChains/Clang.h           |    2 +
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |    7 +
 clang/lib/Driver/ToolChains/Gnu.cpp           |   23 +
 clang/lib/Driver/ToolChains/Linux.cpp         |   16 +
 clang/lib/Driver/ToolChains/Sw64Toolchain.cpp |  184 ++
 clang/lib/Driver/ToolChains/Sw64Toolchain.h   |   79 +
 clang/lib/Driver/XRayArgs.cpp                 |    1 +
 clang/lib/Frontend/CompilerInvocation.cpp     |    3 +-
 clang/lib/Headers/CMakeLists.txt              |   12 +
 clang/lib/Headers/sw64intrin.h                | 1590 +++++++++++++++++
 clang/lib/Sema/SemaChecking.cpp               |  136 ++
 clang/lib/Sema/SemaDeclAttr.cpp               |   16 +
 40 files changed, 3631 insertions(+), 2 deletions(-)
 create mode 100644 clang/include/clang/Basic/BuiltinsSw64.def
 create mode 100644 clang/lib/Basic/Targets/Sw64.cpp
 create mode 100644 clang/lib/Basic/Targets/Sw64.h
 create mode 100644 clang/lib/CodeGen/Targets/Sw64.cpp
 create mode 100644 clang/lib/Driver/ToolChains/Arch/Sw64.cpp
 create mode 100644 clang/lib/Driver/ToolChains/Arch/Sw64.h
 create mode 100644 clang/lib/Driver/ToolChains/Sw64Toolchain.cpp
 create mode 100644 clang/lib/Driver/ToolChains/Sw64Toolchain.h
 create mode 100644 clang/lib/Headers/sw64intrin.h

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index d5204b286966..6ea5e5ee98b9 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -428,6 +428,7 @@ def TargetX86 : TargetArch<["x86"]>;
 def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
 def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
 def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
+def TargetSw64 : TargetArch<["sw_64"]>;
 def TargetWindows : TargetSpec {
   let OSes = ["Win32"];
 }
@@ -891,6 +892,17 @@ def AVRSignal : InheritableAttr, TargetSpecificAttr<TargetAVR> {
   let Documentation = [AVRSignalDocs];
 }
 
+def Sw64Interrupt : InheritableAttr, TargetSpecificAttr<TargetSw64> {
+  let Spellings = [GCC<"interrupt">];
+  let Subjects = SubjectList<[Function]>;
+  let Args = [EnumArgument<"Interrupt", "InterruptType",
+                           ["user", "supervisor", "machine"],
+                           ["user", "supervisor", "machine"],
+                           1>];
+  let ParseKind = "Interrupt";
+  let Documentation = [Sw64InterruptDocs];
+}
+
 def AsmLabel : InheritableAttr {
   let Spellings = [CustomKeyword<"asm">, CustomKeyword<"__asm__">];
   let Args = [
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 2c950231255d..c59c6efd1982 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2375,6 +2375,17 @@ of the type before passing to the attribute.
 }];
 }
 
+def Sw64InterruptDocs : Documentation {
+  let Category = DocCatFunction;
+  let Heading = "interrupt (SW64)";
+  let Content = [{
+Clang supports the GNU style ``__attribute__((interrupt))`` attribute on SW64
+targets. This attribute may be attached to a function definition and instructs
+the backend to generate appropriate function entry/exit code so that it can be
+used directly as an interrupt service routine.
+  }];
+}
+
 def AVRInterruptDocs : Documentation {
   let Category = DocCatFunction;
   let Heading = "interrupt (AVR)";
diff --git a/clang/include/clang/Basic/BuiltinsSw64.def b/clang/include/clang/Basic/BuiltinsSw64.def
new file mode 100644
index 000000000000..d3e85bf6c876
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsSw64.def
@@ -0,0 +1,249 @@
+//===--- BuiltinsSw64.def - Sw64 Builtin function database ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sw64-specific builtin function database.  Users of
+// this file must define the BUILTIN macro to make use of this information.
+//
+//===----------------------------------------------------------------------===//
+
+// The format of this database matches clang/Basic/Builtins.def.
+
+BUILTIN(__builtin_bitrev, "UiUi", "nc")
+BUILTIN(__builtin_getid, "Si", "nc")
+BUILTIN(__builtin_getps, "UiUi", "n")
+BUILTIN(__builtin_setps, "vUiUi", "n")
+
+BUILTIN(__builtin_sw64_crc32b, "LiLiLi", "n")
+BUILTIN(__builtin_sw64_crc32h, "LiLiLi", "n")
+BUILTIN(__builtin_sw64_crc32w, "LiLiLi", "n")
+BUILTIN(__builtin_sw64_crc32l, "LiLiLi", "n")
+BUILTIN(__builtin_sw64_crc32cb, "LiLiLi", "n")
+BUILTIN(__builtin_sw64_crc32ch, "LiLiLi", "n")
+BUILTIN(__builtin_sw64_crc32cw, "LiLiLi", "n")
+BUILTIN(__builtin_sw64_crc32cl, "LiLiLi", "n")
+
+BUILTIN(__builtin_sw64_sbt, "LiLiLi", "n")
+BUILTIN(__builtin_sw64_cbt, "LiLiLi", "n")
+
+BUILTIN(__builtin_sw_vaddw, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vsubw, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vucaddw, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vucsubw, "V8iV8iV8i", "n")
+
+BUILTIN(__builtin_sw_vaddl, "V4LiV4LiV4Li", "n")
+BUILTIN(__builtin_sw_vsubl, "V4LiV4LiV4Li", "n")
+
+BUILTIN(__builtin_sw_vucaddh, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vucsubh, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vucaddb, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vucsubb, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vucaddhi, "V8iV8iLi", "n")
+BUILTIN(__builtin_sw_vucsubhi, "V8iV8iLi", "n")
+BUILTIN(__builtin_sw_vucaddbi, "V8iV8iLi", "n")
+BUILTIN(__builtin_sw_vucsubbi, "V8iV8iLi", "n")
+
+BUILTIN(__builtin_sw_vucaddh_v16hi, "V16sV16sV16s", "n")
+BUILTIN(__builtin_sw_vucsubh_v16hi, "V16sV16sV16s", "n")
+BUILTIN(__builtin_sw_vucaddb_v32qi, "V32cV32cV32c", "n")
+BUILTIN(__builtin_sw_vucsubb_v32qi, "V32cV32cV32c", "n")
+
+BUILTIN(__builtin_sw_vsumw, "LiV8i", "n")
+BUILTIN(__builtin_sw_vsuml, "LiV4Li", "n")
+BUILTIN(__builtin_sw_ctpopow, "LiV8i", "n")
+BUILTIN(__builtin_sw_ctlzow, "LiV8i", "n")
+
+BUILTIN(__builtin_sw_vsll, "v.", "t")
+BUILTIN(__builtin_sw_vsrl, "v.", "t")
+BUILTIN(__builtin_sw_vsra, "v.", "t")
+BUILTIN(__builtin_sw_vrol, "v.", "t")
+
+BUILTIN(__builtin_sw_vsllw, "V8iV8iLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsrlw, "V8iV8iLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsraw, "V8iV8iLi", "ncV:256:")
+BUILTIN(__builtin_sw_vrolw, "V8iV8iLi", "ncV:256:")
+
+BUILTIN(__builtin_sw_vsllb, "V32cV32cLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsrlb, "V32cV32cLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsrab, "V32cV32cLi", "ncV:256:")
+BUILTIN(__builtin_sw_vrolb, "V32cV32cLi", "ncV:256:")
+
+BUILTIN(__builtin_sw_vslll, "V4LiV4LiLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsrll, "V4LiV4LiLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsral, "V4LiV4LiLi", "ncV:256:")
+BUILTIN(__builtin_sw_vroll, "V4LiV4LiLi", "ncV:256:")
+
+BUILTIN(__builtin_sw_vsllh, "V16sV16sLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsrlh, "V16sV16sLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsrah, "V16sV16sLi", "ncV:256:")
+BUILTIN(__builtin_sw_vrolh, "V16sV16sLi", "ncV:256:")
+
+BUILTIN(__builtin_sw_sllow, "V4LiV4LiLi", "ncV:256:")
+BUILTIN(__builtin_sw_srlow, "V4LiV4LiLi", "ncV:256:")
+BUILTIN(__builtin_sw_sraow, "V4LiV4LiLi", "ncV:256:")
+
+BUILTIN(__builtin_sw_vslls, "V4fV4fLi", "ncV:256:")
+BUILTIN(__builtin_sw_vslld, "V4dV4dLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsrls, "V4fV4fLi", "ncV:256:")
+BUILTIN(__builtin_sw_vsrld, "V4dV4dLi", "ncV:256:")
+
+BUILTIN(__builtin_sw_vcmpgew, "LiV8iV8i", "n")
+BUILTIN(__builtin_sw_vcmpeqw, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vcmplew, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vcmpltw, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vcmpulew, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vcmpultw, "V8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vcmpueqb, "V32cV32cV32c", "n")
+BUILTIN(__builtin_sw_vcmpugtb, "V32cV32cV32c", "n")
+
+BUILTIN(__builtin_sw_vmaxb, "V32cV32cV32c", "n")
+BUILTIN(__builtin_sw_vmaxh, "V16sV16sV16s", "n")
+BUILTIN(__builtin_sw_vmaxw, "V8iV8iV8i",    "n")
+BUILTIN(__builtin_sw_vmaxl, "V4LiV4LiV4Li", "n")
+
+BUILTIN(__builtin_sw_vumaxb, "V32cV32cV32c", "n")
+BUILTIN(__builtin_sw_vumaxh, "V16sV16sV16s", "n")
+BUILTIN(__builtin_sw_vumaxw, "V8iV8iV8i",    "n")
+BUILTIN(__builtin_sw_vumaxl, "V4LiV4LiV4Li", "n")
+
+BUILTIN(__builtin_sw_vminb, "V32cV32cV32c",  "n")
+BUILTIN(__builtin_sw_vminh, "V16sV16sV16s",  "n")
+BUILTIN(__builtin_sw_vminw, "V8iV8iV8i",     "n")
+BUILTIN(__builtin_sw_vminl, "V4LiV4LiV4Li",  "n")
+
+BUILTIN(__builtin_sw_vuminb, "V32cV32cV32c", "n")
+BUILTIN(__builtin_sw_vuminh, "V16sV16sV16s", "n")
+BUILTIN(__builtin_sw_vuminw, "V8iV8iV8i",    "n")
+BUILTIN(__builtin_sw_vuminl, "V4LiV4LiV4Li", "n")
+
+BUILTIN(__builtin_sw_vseleqw,  "V8iV8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vsellew,  "V8iV8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vselltw,  "V8iV8iV8iV8i", "n")
+BUILTIN(__builtin_sw_vsellbcw, "V8iV8iV8iV8i", "n")
+
+BUILTIN(__builtin_sw_vseleqwi,  "V8iV8iV8iLi", "n")
+BUILTIN(__builtin_sw_vsellewi,  "V8iV8iV8iLi", "n")
+BUILTIN(__builtin_sw_vselltwi,  "V8iV8iV8iLi", "n")
+BUILTIN(__builtin_sw_vsellbcwi, "V8iV8iV8iLi", "n")
+
+BUILTIN(__builtin_sw_vxor, "V4LiV4LiV4Li", "n")
+BUILTIN(__builtin_sw_vnot, "V4LiV4LiV4Li", "n")
+BUILTIN(__builtin_sw_vorr, "V4LiV4LiV4Li", "n")
+BUILTIN(__builtin_sw_vbic, "V4LiV4LiV4Li", "n")
+BUILTIN(__builtin_sw_vornot, "V4LiV4LiV4Li", "n")
+BUILTIN(__builtin_sw_veqv, "V4LiV4LiV4Li", "n")
+
+BUILTIN(__builtin_sw_vsqrts, "V4fV4f", "n")
+BUILTIN(__builtin_sw_vsqrtd, "V4dV4d", "n")
+
+BUILTIN(__builtin_sw_vsums, "fV4f", "n")
+BUILTIN(__builtin_sw_vsumd, "dV4d", "n")
+
+BUILTIN(__builtin_sw_vfrecs, "V4fV4f", "n")
+BUILTIN(__builtin_sw_vfrecd, "V4dV4d", "n")
+
+BUILTIN(__builtin_sw_vfcmpeqs, "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vfcmplts, "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vfcmples, "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vfcmpuns, "V4fV4fV4f", "n")
+
+BUILTIN(__builtin_sw_vfcmpeqd, "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vfcmpltd, "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vfcmpled, "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vfcmpund, "V4dV4dV4d", "n")
+
+BUILTIN(__builtin_sw_vfcvtsd, "V4dV4f", "n")
+BUILTIN(__builtin_sw_vfcvtds, "V4fV4d", "n")
+BUILTIN(__builtin_sw_vfcvtld, "V4dV4Li", "n")
+BUILTIN(__builtin_sw_vfcvtls, "V4fV4Li", "n")
+BUILTIN(__builtin_sw_vfcvtsh, "V4dV4fV4fLi", "n")
+BUILTIN(__builtin_sw_vfcvths, "V4fV4dLi", "n")
+
+BUILTIN(__builtin_sw_vfcvtdl, "V4LiV4d", "n")
+BUILTIN(__builtin_sw_vfcvtdl_g, "V4LiV4d", "n")
+BUILTIN(__builtin_sw_vfcvtdl_p, "V4LiV4d", "n")
+BUILTIN(__builtin_sw_vfcvtdl_z, "V4LiV4d", "n")
+BUILTIN(__builtin_sw_vfcvtdl_n, "V4LiV4d", "n")
+
+BUILTIN(__builtin_sw_vfris, "V4fV4f", "n")
+BUILTIN(__builtin_sw_vfris_g, "V4fV4f", "n")
+BUILTIN(__builtin_sw_vfris_p, "V4fV4f", "n")
+BUILTIN(__builtin_sw_vfris_z, "V4fV4f", "n")
+BUILTIN(__builtin_sw_vfris_n, "V4fV4f", "n")
+
+BUILTIN(__builtin_sw_vfrid,  "V4dV4d", "n")
+BUILTIN(__builtin_sw_vfrid_g, "V4dV4d", "n")
+BUILTIN(__builtin_sw_vfrid_p, "V4dV4d", "n")
+BUILTIN(__builtin_sw_vfrid_z, "V4dV4d", "n")
+BUILTIN(__builtin_sw_vfrid_n, "V4dV4d", "n")
+
+BUILTIN(__builtin_sw_vmaxs,  "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vmaxd,  "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vmins,  "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vmind,  "V4dV4dV4d", "n")
+
+BUILTIN(__builtin_sw_vcpyss,   "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vcpyses,  "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vcpysns,  "V4fV4fV4f", "n")
+
+BUILTIN(__builtin_sw_vcpysd,   "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vcpysed,  "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vcpysnd,  "V4dV4dV4d", "n")
+
+BUILTIN(__builtin_sw_vfseleqs,  "V4fV4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vfsellts,  "V4fV4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vfselles,  "V4fV4fV4fV4f", "n")
+
+BUILTIN(__builtin_sw_vfseleqd,  "V4dV4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vfselltd,  "V4dV4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vfselled,  "V4dV4dV4dV4d", "n")
+
+BUILTIN(__builtin_sw_vmas,   "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vmss,   "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vnmas,  "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vnmss,  "V4fV4fV4f", "n")
+BUILTIN(__builtin_sw_vmad,   "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vmsd,   "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vnmad,  "V4dV4dV4d", "n")
+BUILTIN(__builtin_sw_vnmsd,  "V4dV4dV4d", "n")
+
+BUILTIN(__builtin_sw_vinsb,  "V32cLiV32cLi", "n")
+BUILTIN(__builtin_sw_vinsh,  "V16sLiV16sLi", "n")
+BUILTIN(__builtin_sw_vinsw,  "V8iLiV8iLi", "n")
+BUILTIN(__builtin_sw_vinsl,  "V4LiLiV4LiLi", "n")
+BUILTIN(__builtin_sw_vinsfs, "V4ffV4fLi", "n")
+BUILTIN(__builtin_sw_vinsfd, "V4ddV4dLi", "n")
+
+BUILTIN(__builtin_sw_vextw,  "LiV8iLi", "n")
+BUILTIN(__builtin_sw_vextl,  "LiV4LiLi", "n")
+BUILTIN(__builtin_sw_vextfs,  "fV4fLi", "n")
+BUILTIN(__builtin_sw_vextfd,  "dV4dLi", "n")
+
+BUILTIN(__builtin_sw_vshfw,  "V8iV8iV8iLi", "n")
+BUILTIN(__builtin_sw_vshfq,  "V8iV8iV8iLi", "n")
+BUILTIN(__builtin_sw_vshfqb,  "V32cV32cV32c", "n")
+
+BUILTIN(__builtin_sw_vconw,  "V8iV8iV8iv*", "n")
+BUILTIN(__builtin_sw_vconl,  "V4LiV4LiV4Liv*", "n")
+BUILTIN(__builtin_sw_vcons,  "V4fV4fV4fv*", "n")
+BUILTIN(__builtin_sw_vcond,  "V4dV4dV4dv*", "n")
+
+BUILTIN(__builtin_sw_vlogzz,  "V4LiV4LiV4LiV4LiLi", "n")
+BUILTIN(__builtin_sw_vload,     "v.", "t")
+BUILTIN(__builtin_sw_vloadu,    "v.", "t")
+BUILTIN(__builtin_sw_vload_u,   "v.", "t")
+BUILTIN(__builtin_sw_vloade,    "v.", "t")
+BUILTIN(__builtin_sw_vloadnc,   "v.", "t")
+BUILTIN(__builtin_sw_vstore,    "v.", "t")
+BUILTIN(__builtin_sw_vstoreu,   "v.", "t")
+BUILTIN(__builtin_sw_vstore_u,  "v.", "t")
+BUILTIN(__builtin_sw_vstoreuh,  "v.", "t")
+BUILTIN(__builtin_sw_vstoreul,  "v.", "t")
+BUILTIN(__builtin_sw_vstorenc,  "v.", "t")
+
+#undef BUILTIN
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 060f96118364..736260b17322 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -31,6 +31,8 @@ def err_drv_invalid_riscv_arch_name : Error<
   "invalid arch name '%0', %1">;
 def err_drv_invalid_riscv_cpu_name_for_target : Error<
   "cpu '%0' does not support rv%select{32|64}1">;
+def err_drv_invalid_sw64_ext_arch_name : Error<
+  "invalid arch name '%0', %1 '%2'">;
 def warn_drv_invalid_arch_name_with_suggestion : Warning<
   "ignoring invalid /arch: argument '%0'; for %select{64|32}1-bit expected one of %2">,
   InGroup<UnusedCommandLineArgument>;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 0e97620945af..b74b381b374b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11909,4 +11909,8 @@ def err_wasm_builtin_arg_must_match_table_element_type : Error <
   "%ordinal0 argument must match the element type of the WebAssembly table in the %ordinal1 argument">;
 def err_wasm_builtin_arg_must_be_integer_type : Error <
   "%ordinal0 argument must be an integer">;
+
+// Sw64-specific Diagnostics
+def err_invalid_sw64_type_code : Error<
+  "incompatible type for this __builtin_sw64 function">;
 } // end of sema component.
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 8f7881abf26f..59487eb04140 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -174,6 +174,16 @@ namespace clang {
   };
   } // namespace LoongArch
 
+  /// Sw64 builtins
+  namespace Sw64 {
+  enum {
+    LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#include "clang/Basic/BuiltinsSw64.def"
+    LastTSBuiltin
+  };
+  } // namespace Sw64
+
   /// Flags to identify the types for overloaded Neon builtins.
   ///
   /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h.
@@ -369,7 +379,8 @@ namespace clang {
        PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin,
        X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin,
        Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin,
-       SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin});
+       SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin,
+       Sw64::LastTSBuiltin});
 
 } // end namespace clang.
 
diff --git a/clang/include/clang/Basic/TargetCXXABI.def b/clang/include/clang/Basic/TargetCXXABI.def
index 9501cca76094..70573e5864a0 100644
--- a/clang/include/clang/Basic/TargetCXXABI.def
+++ b/clang/include/clang/Basic/TargetCXXABI.def
@@ -88,6 +88,9 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64")
 ///   - representation of member function pointers adjusted as in ARM.
 ITANIUM_CXXABI(GenericMIPS, "mips")
 
+/// The generic Sw64 ABI is a modified version of the Itanium ABI.
+ITANIUM_CXXABI(GenericSW64, "sw_64")
+
 /// The WebAssembly ABI is a modified version of the Itanium ABI.
 ///
 /// The changes from the Itanium ABI are:
diff --git a/clang/include/clang/Basic/TargetCXXABI.h b/clang/include/clang/Basic/TargetCXXABI.h
index c113a6a048ad..b62f97be512b 100644
--- a/clang/include/clang/Basic/TargetCXXABI.h
+++ b/clang/include/clang/Basic/TargetCXXABI.h
@@ -103,6 +103,9 @@ public:
     case GenericMIPS:
       return T.isMIPS();
 
+    case GenericSW64:
+      return T.isSw64();
+
     case WebAssembly:
       return T.isWasm();
 
@@ -165,6 +168,7 @@ public:
     case GenericARM:
     case GenericAArch64:
     case GenericMIPS:
+    case GenericSW64:
       // TODO: ARM-style pointers to member functions put the discriminator in
       //       the this adjustment, so they don't require functions to have any
       //       special alignment and could therefore also return false.
@@ -249,6 +253,7 @@ public:
     case iOS:   // old iOS compilers did not follow this rule
     case Microsoft:
     case GenericMIPS:
+    case GenericSW64:
     case XL:
       return true;
     }
@@ -287,6 +292,7 @@ public:
     case GenericARM:
     case iOS:
     case GenericMIPS:
+    case GenericSW64:
     case XL:
       return UseTailPaddingUnlessPOD03;
 
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index aeadb7273799..b2575eb6c334 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -332,6 +332,9 @@ public:
     //   } va_list[1];
     SystemZBuiltinVaList,
 
+    // __builtin_va_list as defined by the Sw64 ABI
+    Sw64ABIBuiltinVaList,
+
     // typedef struct __va_list_tag {
     //    void *__current_saved_reg_area_pointer;
     //    void *__saved_reg_area_end_pointer;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index c109d7a8fcab..b8971182ae76 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -194,6 +194,8 @@ def m_riscv_Features_Group : OptionGroup<"<riscv features group>">,
                              Group<m_Group>, DocName<"RISC-V">;
 def m_loongarch_Features_Group : OptionGroup<"<loongarch features group>">,
                                  Group<m_Group>, DocName<"LoongArch">;
+def m_sw_64_Features_Group : OptionGroup<"<sw_64 features group>">,
+                             Group<m_Group>, DocName<"SW64">;
 
 def m_libc_Group : OptionGroup<"<m libc group>">, Group<m_mips_Features_Group>,
                    Flags<[HelpHidden]>;
@@ -4247,6 +4249,41 @@ def mno_lasx : Flag<["-"], "mno-lasx">, Group<m_loongarch_Features_Group>,
 def msimd_EQ : Joined<["-"], "msimd=">, Group<m_loongarch_Features_Group>,
   Flags<[TargetSpecific]>,
   HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">;
+def mieee : Flag<["-"], "mieee">, Group<m_sw_64_Features_Group>,
+  HelpText<"Use mieee to set setfpec (SW64 only)">;
+foreach i = {0-31} in
+  def ffixed_sw_#i : Flag<["-"], "ffixed-sw-"#i>, Group<m_sw_64_Features_Group>,
+    HelpText<"Reserve the "#i#" register (SW64 only)">;
+def FS_LOAD : Flag<["-"], "fastload">, Group<M_Group>, Flags<[CC1Option]>,
+    HelpText<"enable fast load/store instrs in sw_64 target.(Development)">;
+def fsw_int_divmod : Flag<["-"], "fsw-int-divmod">, Group<f_Group>,
+  HelpText<"Enable sw64 core4 int-div/rem instructions">, Flags<[CC1Option]>;
+def fsw_shift_word : Flag<["-"], "fsw-shift-word">, Group<f_Group>,
+  HelpText<"Enable sw64 core4 int-shift instructions">, Flags<[CC1Option]>;
+def fsw_rev : Flag<["-"], "fsw-rev">, Group<f_Group>,
+  HelpText<"Enable sw64 core4 byte-rev instructions">, Flags<[CC1Option]>;
+def fsw_recip : Flag<["-"], "fsw-recip">, Group<f_Group>,
+  HelpText<"Enable sw64 core4 fp-rec instructions">, Flags<[CC1Option]>;
+def fsw_fprnd : Flag<["-"], "fsw-fprnd">, Group<f_Group>,
+  HelpText<"Enable sw64 core4 fp-round instructions">, Flags<[CC1Option]>;
+def fsw_cmov : Flag<["-"], "fsw-cmov">, Group<f_Group>,
+  HelpText<"Enable sw64 core4 fp-cmov instructions">, Flags<[CC1Option]>;
+def fsw_auto_inc_dec : Flag<["-"], "fsw-auto-inc-dec">, Group<f_Group>,
+  HelpText<"Enable sw64 core4 post-inc instructions">, Flags<[CC1Option]>;
+def fsw_use_cas : Flag<["-"], "fsw-use-cas">, Group<f_Group>,
+  HelpText<"Enable sw64 core4 atomic-cas instructions">, Flags<[CC1Option]>;
+def msw64_relax : Flag<["-"], "sw64-mrelax">, Group<m_sw_64_Features_Group>,
+    HelpText<"Enable linker relaxation">;
+def msw64_no_relax : Flag<["-"], "sw64-mno-relax">, Group<m_sw_64_Features_Group>,
+    HelpText<"Disable linker relaxation">;
+def msw6a : Flag<["-"], "sw6a">,
+  Alias<march_EQ>, AliasArgs<["sw6a"]>, Group<m_sw_64_Features_Group>,
+  HelpText<"sw6a">, Flags<[HelpHidden]>;
+def msw6b : Flag<["-"], "sw6b">,
+  Alias<march_EQ>, AliasArgs<["sw6b"]>, Group<m_sw_64_Features_Group>,
+  HelpText<"sw6b">, Flags<[HelpHidden]>;
+def mswEv : Flag<["-"], "mswEv">, Group<m_sw_64_Features_Group>;
+
 def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">,
   Flags<[CC1Option]>, Group<m_Group>,
   MarshallingInfoFlag<CodeGenOpts<"MNopMCount">>;
@@ -4297,6 +4334,12 @@ def mmsa : Flag<["-"], "mmsa">, Group<m_mips_Features_Group>,
   HelpText<"Enable MSA ASE (MIPS only)">;
 def mno_msa : Flag<["-"], "mno-msa">, Group<m_mips_Features_Group>,
   HelpText<"Disable MSA ASE (MIPS only)">;
+
+def msimd : Flag<["-"], "msimd">, Group<m_sw_64_Features_Group>,
+  HelpText<"Enable SIMD (SW64 only)">;
+def mno_simd : Flag<["-"], "mno-simd">, Group<m_sw_64_Features_Group>,
+  HelpText<"Disable SIMD (SW64 only)">;
+
 def mmt : Flag<["-"], "mmt">, Group<m_mips_Features_Group>,
   HelpText<"Enable MT ASE (MIPS only)">;
 def mno_mt : Flag<["-"], "mno-mt">, Group<m_mips_Features_Group>,
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index b2ab6d0f8445..b41933afb5a4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13621,6 +13621,9 @@ private:
   bool CheckMipsBuiltinCpu(const TargetInfo &TI, unsigned BuiltinID,
                            CallExpr *TheCall);
   bool CheckMipsBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckSw64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckSw64VectorMemoryIntr(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckSw64VectorShift(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall);
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 76000156fece..62a8c227a3a4 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -884,6 +884,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) {
   case TargetCXXABI::WatchOS:
   case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericMIPS:
+  case TargetCXXABI::GenericSW64:
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::WebAssembly:
   case TargetCXXABI::XL:
@@ -9009,6 +9010,57 @@ CreateAAPCSABIBuiltinVaListDecl(const ASTContext *Context) {
   return Context->buildImplicitTypedef(T, "__builtin_va_list");
 }
 
+static TypedefDecl *CreateSw64ABIBuiltinVaListDecl(const ASTContext *Context) {
+  // struct __va_list {
+  RecordDecl *VaListTagDecl = Context->buildImplicitRecord("__va_list");
+
+  if (Context->getLangOpts().CPlusPlus) {
+
+    // namespace std {
+    //   struct __va_list {
+    NamespaceDecl *NS;
+    NS = NamespaceDecl::Create(const_cast<ASTContext &>(*Context),
+                               Context->getTranslationUnitDecl(),
+                               /*Inline*/ false, SourceLocation(),
+                               SourceLocation(), &Context->Idents.get("std"),
+                               /*PrevDecl*/ nullptr, /*Nested=*/false);
+    NS->setImplicit();
+    VaListTagDecl->setDeclContext(NS);
+  }
+
+  VaListTagDecl->startDefinition();
+
+  const size_t NumFields = 2;
+  QualType FieldTypes[NumFields];
+  const char *FieldNames[NumFields];
+
+  //   unsigned gp_offset;
+  FieldTypes[0] = Context->getPointerType(Context->VoidTy);
+  FieldNames[0] = "__stack";
+
+  //   unsigned fp_offset;
+  FieldTypes[1] = Context->IntTy;
+  FieldNames[1] = "__offs";
+
+  // Create fields
+  for (unsigned i = 0; i < NumFields; ++i) {
+    FieldDecl *Field = FieldDecl::Create(
+        const_cast<ASTContext &>(*Context), VaListTagDecl, SourceLocation(),
+        SourceLocation(), &Context->Idents.get(FieldNames[i]), FieldTypes[i],
+        /*TInfo=*/nullptr,
+        /*BitWidth=*/nullptr,
+        /*Mutable=*/false, ICIS_NoInit);
+    Field->setAccess(AS_public);
+    VaListTagDecl->addDecl(Field);
+  }
+  VaListTagDecl->completeDefinition();
+  Context->VaListTagDecl = VaListTagDecl;
+  QualType VaListTagType = Context->getRecordType(VaListTagDecl);
+
+  // };
+  return Context->buildImplicitTypedef(VaListTagType, "__builtin_va_list");
+}
+
 static TypedefDecl *
 CreateSystemZBuiltinVaListDecl(const ASTContext *Context) {
   // struct __va_list_tag {
@@ -9136,6 +9188,8 @@ static TypedefDecl *CreateVaListDecl(const ASTContext *Context,
     return CreateSystemZBuiltinVaListDecl(Context);
   case TargetInfo::HexagonBuiltinVaList:
     return CreateHexagonBuiltinVaListDecl(Context);
+  case TargetInfo::Sw64ABIBuiltinVaList:
+    return CreateSw64ABIBuiltinVaListDecl(Context);
   }
 
   llvm_unreachable("Unhandled __builtin_va_list type kind");
@@ -12041,6 +12095,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::GenericMIPS:
+  case TargetCXXABI::GenericSW64:
   case TargetCXXABI::iOS:
   case TargetCXXABI::WebAssembly:
   case TargetCXXABI::WatchOS:
@@ -12062,6 +12117,7 @@ MangleContext *ASTContext::createDeviceMangleContext(const TargetInfo &T) {
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::GenericMIPS:
+  case TargetCXXABI::GenericSW64:
   case TargetCXXABI::iOS:
   case TargetCXXABI::WebAssembly:
   case TargetCXXABI::WatchOS:
diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt
index caa1b6002e6f..e830db015d0c 100644
--- a/clang/lib/Basic/CMakeLists.txt
+++ b/clang/lib/Basic/CMakeLists.txt
@@ -109,6 +109,7 @@ add_clang_library(clangBasic
   Targets/RISCV.cpp
   Targets/SPIR.cpp
   Targets/Sparc.cpp
+  Targets/Sw64.cpp
   Targets/SystemZ.cpp
   Targets/TCE.cpp
   Targets/VE.cpp
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index b14d11333412..432f34f94414 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -35,6 +35,7 @@
 #include "Targets/RISCV.h"
 #include "Targets/SPIR.h"
 #include "Targets/Sparc.h"
+#include "Targets/Sw64.h"
 #include "Targets/SystemZ.h"
 #include "Targets/TCE.h"
 #include "Targets/VE.h"
@@ -132,6 +133,9 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple,
   case llvm::Triple::lanai:
     return std::make_unique<LanaiTargetInfo>(Triple, Opts);
 
+  case llvm::Triple::sw_64:
+    return std::make_unique<Sw64TargetInfo>(Triple, Opts);
+
   case llvm::Triple::aarch64_32:
     if (Triple.isOSDarwin())
       return std::make_unique<DarwinAArch64TargetInfo>(Triple, Opts);
diff --git a/clang/lib/Basic/Targets/Sw64.cpp b/clang/lib/Basic/Targets/Sw64.cpp
new file mode 100644
index 000000000000..c622a4b7a4e1
--- /dev/null
+++ b/clang/lib/Basic/Targets/Sw64.cpp
@@ -0,0 +1,125 @@
+//===--- Sw64.cpp - Implement Sw64 target feature support ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Sw64 TargetInfo objects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64.h"
+#include "Targets.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/MacroBuilder.h"
+#include "clang/Basic/TargetBuiltins.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Sw64TargetParser.h"
+
+using namespace clang;
+using namespace clang::targets;
+
+ArrayRef<const char *> Sw64TargetInfo::getGCCRegNames() const {
+  static const char *const GCCRegNames[] = {
+      "$0",   "$1",   "$2",   "$3",   "$4",   "$5",   "$6",   "$7",
+      "$8",   "$9",   "$10",  "$11",  "$12",  "$13",  "$14",  "$15",
+      "$16",  "$17",  "$18",  "$19",  "$20",  "$21",  "$22",  "$23",
+      "$24",  "$25",  "$26",  "$27",  "$28",  "$29",  "$30",  "$31",
+      "$f0",  "$f1",  "$f2",  "$f3",  "$f4",  "$f5",  "$f6",  "$f7",
+      "$f8",  "$f9",  "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",
+      "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",
+      "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31"};
+  return llvm::makeArrayRef(GCCRegNames);
+}
+
+ArrayRef<TargetInfo::GCCRegAlias> Sw64TargetInfo::getGCCRegAliases() const {
+  static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
+      {{"v0"}, "$0"},   {{"t0"}, "$1"},   {{"t1"}, "$2"},  {{"t2"}, "$3"},
+      {{"t3"}, "$4"},   {{"t4"}, "$5"},   {{"t5"}, "$6"},  {{"t6"}, "$7"},
+      {{"t7"}, "$8"},   {{"s0"}, "$9"},   {{"s1"}, "$10"}, {{"s2"}, "$11"},
+      {{"s3"}, "$12"},  {{"s4"}, "$13"},  {{"s5"}, "$14"}, {{"fp"}, "$15"},
+      {{"a0"}, "$16"},  {{"a1"}, "$17"},  {{"a2"}, "$18"}, {{"a3"}, "$19"},
+      {{"a4"}, "$20"},  {{"a5"}, "$21"},  {{"t8"}, "$22"}, {{"t9"}, "$23"},
+      {{"t10"}, "$24"}, {{"t11"}, "$25"}, {{"ra"}, "$26"}, {{"t12"}, "$27"},
+      {{"at"}, "$28"},  {{"gp"}, "$29"},  {{"sp"}, "$30"}, {{"zero"}, "$31"}};
+  return llvm::makeArrayRef(GCCRegAliases);
+}
+
+const Builtin::Info Sw64TargetInfo::BuiltinInfo[] = {
+#define BUILTIN(ID, TYPE, ATTRS)                                               \
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
+  {#ID, TYPE, ATTRS, HEADER, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+#include "clang/Basic/BuiltinsSw64.def"
+};
+
+void Sw64TargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  llvm::Sw64::fillValidCPUArchList(Values, true);
+}
+
+bool Sw64TargetInfo::isValidTuneCPUName(StringRef Name) const {
+  return llvm::Sw64::checkTuneCPUKind(llvm::Sw64::parseTuneCPUKind(Name, true),
+                                      /*Is64Bit=*/true);
+}
+
+void Sw64TargetInfo::fillValidTuneCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  llvm::Sw64::fillValidTuneCPUArchList(Values, true);
+}
+
+bool Sw64TargetInfo::isValidCPUName(StringRef Name) const {
+  return llvm::Sw64::parseCPUArch(Name) != llvm::Sw64::CK_INVALID;
+}
+
+bool Sw64TargetInfo::setCPU(const std::string &Name) {
+  return isValidCPUName(Name);
+}
+
+void Sw64TargetInfo::getTargetDefines(const LangOptions &Opts,
+                                      MacroBuilder &Builder) const {
+  DefineStd(Builder, "sw_64", Opts);
+
+  Builder.defineMacro("__REGISTER_PREFIX__", "");
+  Builder.defineMacro("__LONG_DOUBLE_128__");
+
+  Builder.defineMacro("__ELF__");
+  Builder.defineMacro("__sw_64__");
+  Builder.defineMacro("__sw_64_sw6a__");
+  Builder.defineMacro("__sw_64");
+  // Consistent with GCC
+  Builder.defineMacro("__gnu_linux__");
+
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+
+  DefineStd(Builder, "unix", Opts);
+  DefineStd(Builder, "linux", Opts);
+
+  if (HasCore4)
+    Builder.defineMacro("__sw_64_sw8a__");
+
+  if (Opts.CPlusPlus)
+    Builder.defineMacro("_GNU_SOURCE");
+}
+
+/// Return true if has this feature, need to sync with handleTargetFeatures.
+bool Sw64TargetInfo::hasFeature(StringRef Feature) const {
+  return llvm::StringSwitch<bool>(Feature)
+      .Case("sw_64", true)
+      .Case("core3b", HasCore3)
+      .Case("core4", HasCore4)
+      .Case("simd", HasSIMD)
+      .Default(false);
+}
+
+ArrayRef<Builtin::Info> Sw64TargetInfo::getTargetBuiltins() const {
+  return llvm::makeArrayRef(BuiltinInfo, clang::Sw64::LastTSBuiltin -
+                                             Builtin::FirstTSBuiltin);
+}
diff --git a/clang/lib/Basic/Targets/Sw64.h b/clang/lib/Basic/Targets/Sw64.h
new file mode 100644
index 000000000000..791d893a7ea3
--- /dev/null
+++ b/clang/lib/Basic/Targets/Sw64.h
@@ -0,0 +1,141 @@
+//===--- Sw64.h - Declare Sw64 target feature support ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Sw64 TargetInfo objects.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_SW64_H
+#define LLVM_CLANG_LIB_BASIC_TARGETS_SW64_H
+
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Sw64TargetParser.h"
+#include "llvm/TargetParser/Triple.h"
+
+namespace clang {
+namespace targets {
+
+class LLVM_LIBRARY_VISIBILITY Sw64TargetInfo : public TargetInfo {
+  static const Builtin::Info BuiltinInfo[];
+  bool HasCore3 = false;
+  bool HasCore4 = false;
+
+  // for futrure update
+  // change data length
+  void setDataLayout() {
+    StringRef Layout;
+    Layout =
+        "e-m:e-p:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n64-S128-v256:256";
+    resetDataLayout(Layout.str());
+  }
+
+  bool HasSIMD;
+
+public:
+  Sw64TargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+      : TargetInfo(Triple), HasSIMD(false) {
+    NoAsmVariants = true;
+    MCountName = "";
+    setABI("sw_64");
+    UseZeroLengthBitfieldAlignment = false;
+    IntMaxType = SignedLong;
+  }
+
+  bool setABI(const std::string &Name) override {
+    set64ABITypes();
+    return true;
+  }
+
+  void set64ABITypes(void) {
+    LongWidth = LongAlign = 64;
+    PointerWidth = PointerAlign = 64;
+    LongDoubleWidth = LongDoubleAlign = 128;
+    LongDoubleFormat = &llvm::APFloat::IEEEquad();
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+    DoubleAlign = LongLongAlign = 64;
+    SuitableAlign = 128;
+    MaxVectorAlign = 256;
+    SizeType = UnsignedLong;
+    PtrDiffType = SignedLong;
+    IntPtrType = SignedLong;
+    WCharType = SignedInt;
+    WIntType = UnsignedInt;
+  }
+
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override;
+
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::Sw64ABIBuiltinVaList;
+  }
+
+  ArrayRef<const char *> getGCCRegNames() const override;
+
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
+
+  std::string_view getClobbers() const override { return ""; }
+
+  bool hasFeature(StringRef Feature) const override;
+  bool handleTargetFeatures(std::vector<std::string> &Features,
+                            DiagnosticsEngine &Diags) override {
+    for (const auto &Feature : Features) {
+      if (Feature == "+simd")
+        HasSIMD = true;
+      if (Feature == "+core3b")
+        HasCore3 = true;
+      if (Feature == "+core4")
+        HasCore4 = true;
+    }
+    setDataLayout();
+    return true;
+  };
+
+  bool isValidCPUName(StringRef Name) const override;
+  bool setCPU(const std::string &Name) override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+  bool isValidTuneCPUName(StringRef Name) const override;
+  void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const override;
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &Info) const override {
+    switch (*Name) {
+    default:
+      return false;
+    case 'I': // Signed 16-bit constant
+    case 'J': // Integer 0
+    case 'K': // Unsigned 16-bit constant
+    case 'L': // Signed 32-bit constant, lower 16-bit zeros (for lui)
+    case 'M': // Constants not loadable via lui, addiu, or ori
+    case 'N': // Constant -1 to -65535
+    case 'O': // A signed 15-bit constant
+    case 'P': // A constant between 1 go 65535
+      return true;
+    }
+  }
+  // Return the register number that __builtin_eh_return_regno would return with
+  // the specified argument.
+  //
+  // This corresponds with TargetLowering's getExceptionPointerRegister and
+  // getExceptionSelectorRegister in the backend.
+  int getEHDataRegisterNumber(unsigned RegNo) const override {
+    if (RegNo == 0)
+      return 16;
+    if (RegNo == 1)
+      return 17;
+    return -1;
+  }
+
+  bool allowsLargerPreferedTypeAlignment() const override { return false; }
+  bool hasBitIntType() const override { return true; }
+};
+} // namespace targets
+} // namespace clang
+#endif
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8f87c4d46109..f63fac117516 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -48,6 +48,7 @@
 #include "llvm/IR/IntrinsicsR600.h"
 #include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/IR/IntrinsicsS390.h"
+#include "llvm/IR/IntrinsicsSw64.h"
 #include "llvm/IR/IntrinsicsVE.h"
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
@@ -5601,6 +5602,8 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
   case llvm::Triple::riscv32:
   case llvm::Triple::riscv64:
     return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
+  case llvm::Triple::sw_64:
+    return CGF->EmitSw64BuiltinExpr(BuiltinID, E, ReturnValue);
   default:
     return nullptr;
   }
@@ -20428,3 +20431,108 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
   return Builder.CreateCall(F, Ops, "");
 }
+
+Value *CodeGenFunction::EmitSw64BuiltinExpr(unsigned BuiltinID,
+                                            const CallExpr *E,
+                                            ReturnValueSlot ReturnValue) {
+  SmallVector<Value *, 4> Ops;
+  llvm::Type *ResultType = ConvertType(E->getType());
+  Intrinsic::ID ID = Intrinsic::not_intrinsic;
+
+  switch (BuiltinID) {
+  default:
+    llvm_unreachable("unexpected builtin ID");
+  case Sw64::BI__builtin_sw_vload:
+    ID = Intrinsic::sw64_vload;
+    break;
+  case Sw64::BI__builtin_sw_vloade:
+    ID = Intrinsic::sw64_vloade;
+    break;
+  case Sw64::BI__builtin_sw_vloadu:
+    ID = Intrinsic::sw64_vloadu;
+    break;
+  case Sw64::BI__builtin_sw_vload_u:
+    ID = Intrinsic::sw64_vload_u;
+    break;
+  case Sw64::BI__builtin_sw_vloadnc:
+    ID = Intrinsic::sw64_vloadnc;
+    break;
+  case Sw64::BI__builtin_sw_vstore:
+    ID = Intrinsic::sw64_vstore;
+    break;
+  case Sw64::BI__builtin_sw_vstoreu:
+    ID = Intrinsic::sw64_vstoreu;
+    break;
+  case Sw64::BI__builtin_sw_vstore_u:
+    ID = Intrinsic::sw64_vstore_u;
+    break;
+  case Sw64::BI__builtin_sw_vstoreul:
+    ID = Intrinsic::sw64_vstoreul;
+    break;
+  case Sw64::BI__builtin_sw_vstoreuh:
+    ID = Intrinsic::sw64_vstoreuh;
+    break;
+  case Sw64::BI__builtin_sw_vstorenc:
+    ID = Intrinsic::sw64_vstorenc;
+    break;
+  case Sw64::BI__builtin_sw_vsll:
+    ID = Intrinsic::sw64_vsll;
+    break;
+  case Sw64::BI__builtin_sw_vsrl:
+    ID = Intrinsic::sw64_vsrl;
+    break;
+  case Sw64::BI__builtin_sw_vsra:
+    ID = Intrinsic::sw64_vsra;
+    break;
+  case Sw64::BI__builtin_sw_vrol:
+    ID = Intrinsic::sw64_vrol;
+    break;
+  }
+
+  if (BuiltinID == Sw64::BI__builtin_sw_vload ||
+      BuiltinID == Sw64::BI__builtin_sw_vloade ||
+      BuiltinID == Sw64::BI__builtin_sw_vloadu ||
+      BuiltinID == Sw64::BI__builtin_sw_vload_u ||
+      BuiltinID == Sw64::BI__builtin_sw_vloadnc) {
+    bool isLoadExt = BuiltinID == Sw64::BI__builtin_sw_vloade;
+
+    Value *LoadAddr = EmitScalarExpr(E->getArg(0));
+    QualType Ty = E->getType();
+    llvm::Type *ArgTy = LoadAddr->getType();
+    llvm::Type *RealResTy = ConvertType(Ty);
+    llvm::Type *ResPTy = RealResTy->getPointerTo();
+    // if target is Load duplicated in vector, do not emit BitCast
+    ResPTy = isLoadExt ? LoadAddr->getType() : ResPTy;
+    if (!isLoadExt) {
+      LoadAddr = Builder.CreateBitCast(LoadAddr, ResPTy);
+    }
+    llvm::Type *Tys[2] = {RealResTy, ResPTy};
+    Function *F = CGM.getIntrinsic(ID, Tys);
+    return Builder.CreateCall(F, LoadAddr, "vload");
+  } else if (BuiltinID == Sw64::BI__builtin_sw_vstore ||
+             BuiltinID == Sw64::BI__builtin_sw_vstoreu ||
+             BuiltinID == Sw64::BI__builtin_sw_vstore_u ||
+             BuiltinID == Sw64::BI__builtin_sw_vstoreuh ||
+             BuiltinID == Sw64::BI__builtin_sw_vstoreul ||
+             BuiltinID == Sw64::BI__builtin_sw_vstorenc) {
+    Value *StoreVal = EmitScalarExpr(E->getArg(0));
+    Value *StoreAddr = EmitScalarExpr(E->getArg(1));
+    QualType Ty = E->getArg(0)->getType();
+    llvm::Type *StoreTy = StoreVal->getType();
+    StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
+    Function *F =
+        CGM.getIntrinsic(ID, {StoreVal->getType(), StoreAddr->getType()});
+    return Builder.CreateCall(F, {StoreVal, StoreAddr}, "");
+  } else if (BuiltinID == Sw64::BI__builtin_sw_vsll ||
+             BuiltinID == Sw64::BI__builtin_sw_vsra ||
+             BuiltinID == Sw64::BI__builtin_sw_vsrl ||
+             BuiltinID == Sw64::BI__builtin_sw_vrol) {
+    Value *ShiftVal = EmitScalarExpr(E->getArg(0));
+    Value *ShiftImm = EmitScalarExpr(E->getArg(1));
+    QualType Ty = E->getArg(0)->getType();
+
+    Function *F =
+        CGM.getIntrinsic(ID, {ShiftVal->getType(), ShiftImm->getType()});
+    return Builder.CreateCall(F, {ShiftVal, ShiftImm}, "");
+  }
+}
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index 1debeb6d9cce..a575aa57d75e 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -108,6 +108,7 @@ add_clang_library(clangCodeGen
   Targets/RISCV.cpp
   Targets/SPIR.cpp
   Targets/Sparc.cpp
+  Targets/Sw64.cpp
   Targets/SystemZ.cpp
   Targets/TCE.cpp
   Targets/VE.cpp
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 143e0707b942..afcc0a5b927c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4310,6 +4310,8 @@ public:
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitSw64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                   ReturnValueSlot ReturnValue);
   llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
                                           const CallExpr *E);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index eabc4aabea06..4651228817b5 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -90,6 +90,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
   case TargetCXXABI::iOS:
   case TargetCXXABI::WatchOS:
   case TargetCXXABI::GenericMIPS:
+  case TargetCXXABI::GenericSW64:
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::WebAssembly:
   case TargetCXXABI::XL:
@@ -268,6 +269,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
       return createX86_64TargetCodeGenInfo(CGM, AVXLevel);
     }
   }
+  case llvm::Triple::sw_64:
+    return createSw64TargetCodeGenInfo(CGM);
   case llvm::Triple::hexagon:
     return createHexagonTargetCodeGenInfo(CGM);
   case llvm::Triple::lanai:
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index ede9efb019ce..32b441813bb9 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -570,6 +570,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
   case TargetCXXABI::GenericMIPS:
     return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true);
 
+  case TargetCXXABI::GenericSW64:
+    return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true);
+
   case TargetCXXABI::WebAssembly:
     return new WebAssemblyCXXABI(CGM);
 
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 14ed5e5d2d2c..a1fc372d46f6 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -459,6 +459,9 @@ std::unique_ptr<TargetCodeGenInfo>
 createLoongArchTargetCodeGenInfo(CodeGenModule &CGM, unsigned GRLen,
                                  unsigned FLen);
 
+std::unique_ptr<TargetCodeGenInfo>
+createSw64TargetCodeGenInfo(CodeGenModule &CGM);
+
 std::unique_ptr<TargetCodeGenInfo>
 createM68kTargetCodeGenInfo(CodeGenModule &CGM);
 
diff --git a/clang/lib/CodeGen/Targets/Sw64.cpp b/clang/lib/CodeGen/Targets/Sw64.cpp
new file mode 100644
index 000000000000..0752efaef3c6
--- /dev/null
+++ b/clang/lib/CodeGen/Targets/Sw64.cpp
@@ -0,0 +1,545 @@
+//===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes wrap the information about a call or function
+// definition used to handle ABI compliancy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TargetInfo.h"
+#include "ABIInfoImpl.h"
+#include "clang/Basic/DiagnosticFrontend.h"
+#include "llvm/ADT/SmallBitVector.h"
+
+using namespace clang;
+using namespace clang::CodeGen;
+
+//===----------------------------------------------------------------------===//
+// SW64 ABI Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class Sw64ABIInfo : public ABIInfo {
+  /// Similar to llvm::CCState, but for Clang.
+  struct CCState {
+    CCState(CGFunctionInfo &FI)
+        : IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()),
+          Required(FI.getRequiredArgs()), IsDelegateCall(FI.isDelegateCall()) {}
+
+    llvm::SmallBitVector IsPreassigned;
+    unsigned CC = CallingConv::CC_C;
+    unsigned FreeRegs = 0;
+    unsigned FreeSSERegs = 0;
+    RequiredArgs Required;
+    bool IsDelegateCall = false;
+  };
+  unsigned MinABIStackAlignInBytes, StackAlignInBytes;
+  void CoerceToIntArgs(uint64_t TySize,
+                       SmallVectorImpl<llvm::Type *> &ArgList) const;
+  llvm::Type *HandleAggregates(QualType Ty, uint64_t TySize) const;
+  llvm::Type *returnAggregateInRegs(QualType RetTy, uint64_t Size) const;
+  llvm::Type *getPaddingType(uint64_t Align, uint64_t Offset) const;
+
+public:
+  Sw64ABIInfo(CodeGenTypes &CGT)
+      : ABIInfo(CGT), MinABIStackAlignInBytes(8), StackAlignInBytes(16) {}
+
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset,
+                                  CCState &State) const;
+  ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
+  ABIArgInfo extendType(QualType Ty) const;
+};
+
+class Sw64TargetCodeGenInfo : public TargetCodeGenInfo {
+  unsigned SizeOfUnwindException;
+
+public:
+  Sw64TargetCodeGenInfo(CodeGenTypes &CGT)
+      : TargetCodeGenInfo(std::make_unique<Sw64ABIInfo>(CGT)),
+        SizeOfUnwindException(32) {}
+
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
+    return 30;
+  }
+
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &CGM) const override {
+    const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
+    if (!FD)
+      return;
+
+    // Other attributes do not have a meaning for declarations.
+    if (GV->isDeclaration())
+      return;
+
+    // FIXME:Interrupte Attr doesn`t write in SW64.
+    // const auto *attr = FD->getAttr<Sw64IntteruptAttr>();
+    // if(!attr)
+    //   return
+    // const char *Kind;
+    // ...
+    //
+  }
+
+  bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+                               llvm::Value *Address) const override;
+
+  unsigned getSizeOfUnwindException() const override {
+    return SizeOfUnwindException;
+  }
+};
+} // namespace
+
+void Sw64ABIInfo::CoerceToIntArgs(
+    uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const {
+  llvm::IntegerType *IntTy =
+      llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8);
+
+  // Add (TySize / MinABIStackAlignInBytes) args of IntTy.
+  for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N)
+    ArgList.push_back(IntTy);
+
+  // If necessary, add one more integer type to ArgList.
+  unsigned R = TySize % (MinABIStackAlignInBytes * 8);
+
+  if (R)
+    ArgList.push_back(llvm::IntegerType::get(getVMContext(), R));
+}
+
+// In N32/64, an aligned double precision floating point field is passed in
+// a register.
+llvm::Type *Sw64ABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
+  SmallVector<llvm::Type *, 8> ArgList, IntArgList;
+
+  if (Ty->isComplexType())
+    return CGT.ConvertType(Ty);
+
+  const RecordType *RT = Ty->getAs<RecordType>();
+
+  // Unions/vectors are passed in integer registers.
+  if (!RT || !RT->isStructureOrClassType()) {
+    CoerceToIntArgs(TySize, ArgList);
+    return llvm::StructType::get(getVMContext(), ArgList);
+  }
+
+  const RecordDecl *RD = RT->getDecl();
+  const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+  assert(!(TySize % 8) && "Size of structure must be multiple of 8.");
+
+  uint64_t LastOffset = 0;
+  unsigned idx = 0;
+  llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64);
+
+  // Iterate over fields in the struct/class and check if there are any aligned
+  // double fields.
+  for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
+       i != e; ++i, ++idx) {
+    const QualType Ty = i->getType();
+    const BuiltinType *BT = Ty->getAs<BuiltinType>();
+
+    if (!BT || BT->getKind() != BuiltinType::Double)
+      continue;
+
+    uint64_t Offset = Layout.getFieldOffset(idx);
+    if (Offset % 64) // Ignore doubles that are not aligned.
+      continue;
+
+    // Add ((Offset - LastOffset) / 64) args of type i64.
+    for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j)
+      ArgList.push_back(I64);
+
+    // Add double type.
+    // ArgList.push_back(llvm::Type::getDoubleTy(getVMContext()));
+    ArgList.push_back(llvm::Type::getInt64Ty(getVMContext()));
+    LastOffset = Offset + 64;
+  }
+
+  CoerceToIntArgs(TySize - LastOffset, IntArgList);
+  ArgList.append(IntArgList.begin(), IntArgList.end());
+
+  return llvm::StructType::get(getVMContext(), ArgList);
+}
+
+llvm::Type *Sw64ABIInfo::getPaddingType(uint64_t OrigOffset,
+                                        uint64_t Offset) const {
+  if (OrigOffset + MinABIStackAlignInBytes > Offset)
+    return nullptr;
+
+  return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
+}
+
+ABIArgInfo Sw64ABIInfo::classifyArgumentType(QualType Ty) const {
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+  if (isAggregateTypeForABI(Ty)) {
+    // Records with non trivial destructors/constructors should not be passed
+    // by value.
+    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+      return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
+    return getNaturalAlignIndirect(Ty);
+  }
+
+  // Treat an enum type as its underlying type.
+  if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+    Ty = EnumTy->getDecl()->getIntegerType();
+
+  if (const BuiltinType *BuiltinTy = Ty->getAs<BuiltinType>()) {
+    if (BuiltinTy->getKind() == BuiltinType::LongDouble &&
+        getContext().getTypeSize(Ty) == 128)
+      return getNaturalAlignIndirect(Ty, false);
+  }
+  return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
+                                           : ABIArgInfo::getDirect();
+}
+ABIArgInfo Sw64ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
+                                          CCState &State) const {
+  if (!ByVal) {
+    if (State.FreeRegs) {
+      --State.FreeRegs; // Non-byval indirects just use one pointer.
+      return getNaturalAlignIndirectInReg(Ty);
+    }
+    return getNaturalAlignIndirect(Ty, false);
+  }
+
+  // Compute the byval alignment.
+  unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
+  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
+                                 /*Realign=*/TypeAlign >
+                                     MinABIStackAlignInBytes);
+}
+
+ABIArgInfo Sw64ABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset,
+                                             CCState &State) const {
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+  // Check with the C++ ABI first.
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (RT) {
+    CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+    if (RAA == CGCXXABI::RAA_Indirect) {
+      return getIndirectResult(Ty, /*ByVal=*/false, State);
+    } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
+      return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
+    }
+  }
+
+  if (Ty->isVectorType()) {
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (Size > 256)
+      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+    else if (Size < 128) {
+      llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+  }
+
+  if (Ty->isAnyComplexType()) {
+    if (getContext().getTypeSize(Ty) <= 128) {
+      return ABIArgInfo::getDirect();
+    } else {
+      return getNaturalAlignIndirect(Ty, false);
+    }
+  }
+
+  uint64_t OrigOffset = Offset;
+  uint64_t TySize = getContext().getTypeSize(Ty);
+  uint64_t Align = getContext().getTypeAlign(Ty) / 8;
+
+  Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
+                   (uint64_t)StackAlignInBytes);
+  unsigned CurrOffset = llvm::alignTo(Offset, Align);
+  Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
+
+  if (isAggregateTypeForABI(Ty)) {
+    // Ignore empty aggregates.
+    if (TySize == 0)
+      return ABIArgInfo::getIgnore();
+
+    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+      Offset = OrigOffset + MinABIStackAlignInBytes;
+      return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+    }
+    llvm::LLVMContext &LLVMContext = getVMContext();
+    unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 63) / 64;
+    if (SizeInRegs <= State.FreeRegs) {
+      llvm::IntegerType *Int64 = llvm::Type::getInt64Ty(LLVMContext);
+      SmallVector<llvm::Type *, 6> Elements(SizeInRegs, Int64);
+      llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
+      return ABIArgInfo::getDirectInReg(Result);
+    } else {
+      // If we have reached here, aggregates are passed directly by coercing to
+      // another structure type. Padding is inserted if the offset of the
+      // aggregate is unaligned.
+      ABIArgInfo ArgInfo =
+          ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0,
+                                getPaddingType(OrigOffset, CurrOffset));
+      ArgInfo.setInReg(true);
+      return ArgInfo;
+    }
+  }
+
+  if (const BuiltinType *BuiltinTy = Ty->getAs<BuiltinType>()) {
+    if (BuiltinTy->getKind() == BuiltinType::LongDouble &&
+        getContext().getTypeSize(Ty) == 128)
+      return getNaturalAlignIndirect(Ty, false);
+  }
+
+  // Treat an enum type as its underlying type.
+  if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+    Ty = EnumTy->getDecl()->getIntegerType();
+
+  // All integral types are promoted to the GPR width.
+  if (Ty->isIntegralOrEnumerationType())
+    return extendType(Ty);
+
+  return ABIArgInfo::getDirect(nullptr, 0,
+                               getPaddingType(OrigOffset, CurrOffset));
+}
+
+llvm::Type *Sw64ABIInfo::returnAggregateInRegs(QualType RetTy,
+                                               uint64_t Size) const {
+  const RecordType *RT = RetTy->getAs<RecordType>();
+  SmallVector<llvm::Type *, 8> RTList;
+
+  if (RT && RT->isStructureOrClassType()) {
+    const RecordDecl *RD = RT->getDecl();
+    const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
+    unsigned FieldCnt = Layout.getFieldCount();
+
+    // N32/64 returns struct/classes in floating point registers if the
+    // following conditions are met:
+    // 1. The size of the struct/class is no larger than 128-bit.
+    // 2. The struct/class has one or two fields all of which are floating
+    //    point types.
+    // 3. The offset of the first field is zero (this follows what gcc does).
+    //
+    // Any other composite results are returned in integer registers.
+    //
+    if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) {
+      RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end();
+      for (; b != e; ++b) {
+        const BuiltinType *BT = b->getType()->getAs<BuiltinType>();
+
+        if (!BT || !BT->isFloatingPoint())
+          break;
+
+        RTList.push_back(CGT.ConvertType(b->getType()));
+      }
+      if (b == e)
+        return llvm::StructType::get(getVMContext(), RTList,
+                                     RD->hasAttr<PackedAttr>());
+
+      RTList.clear();
+    }
+  }
+
+  CoerceToIntArgs(Size, RTList);
+  return llvm::StructType::get(getVMContext(), RTList);
+}
+
+ABIArgInfo Sw64ABIInfo::classifyReturnType(QualType RetTy) const {
+  uint64_t Size = getContext().getTypeSize(RetTy);
+
+  if (RetTy->isVoidType())
+    return ABIArgInfo::getIgnore();
+
+  // However, N32/N64 ignores zero sized return values.
+  if (Size == 0)
+    return ABIArgInfo::getIgnore();
+
+  // Large vector types should be returned via memory.
+  if (RetTy->isVectorType() && Size == 256)
+    return ABIArgInfo::getDirect();
+
+  if (const auto *BT = RetTy->getAs<BuiltinType>())
+    if (BT->getKind() == BuiltinType::LongDouble || Size >= 128)
+      return getNaturalAlignIndirect(RetTy);
+
+  if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) {
+    if ((RetTy->hasFloatingRepresentation() && Size <= 128) ||
+        (!RetTy->hasFloatingRepresentation() && Size <= 64)) {
+      if (RetTy->isComplexType())
+        return ABIArgInfo::getDirect();
+
+      if (RetTy->isComplexIntegerType() ||
+          (RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) {
+        ABIArgInfo ArgInfo =
+            ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size));
+        ArgInfo.setInReg(true);
+        return ArgInfo;
+      }
+    }
+
+    return getNaturalAlignIndirect(RetTy);
+  }
+
+  // Treat an enum type as its underlying type.
+  if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
+    RetTy = EnumTy->getDecl()->getIntegerType();
+
+  if (isPromotableIntegerTypeForABI(RetTy))
+    return ABIArgInfo::getExtend(RetTy);
+
+  if ((RetTy->isUnsignedIntegerOrEnumerationType() ||
+       RetTy->isSignedIntegerOrEnumerationType()) &&
+      Size == 32)
+    return ABIArgInfo::getSignExtend(RetTy);
+
+  return ABIArgInfo::getDirect();
+}
+
+void Sw64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+
+  CCState State(FI);
+  if (FI.getHasRegParm()) {
+    State.FreeRegs = FI.getRegParm();
+  } else {
+    State.FreeRegs = 6;
+  }
+
+  ABIArgInfo &RetInfo = FI.getReturnInfo();
+  if (!getCXXABI().classifyReturnType(FI))
+    RetInfo = classifyReturnType(FI.getReturnType());
+
+  // Check if a pointer to an aggregate is passed as a hidden argument.
+  uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;
+
+  for (auto &I : FI.arguments())
+    I.info = classifyArgumentType(I.type, Offset, State);
+}
+
+Address Sw64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                               QualType OrigTy) const {
+
+  QualType Ty = OrigTy;
+  auto TyAlign = getContext().getTypeInfoInChars(Ty).Align;
+  if (!Ty->isStructureOrClassType() && (TyAlign.getQuantity() <= 8)) {
+    ABIArgInfo AI = classifyArgumentType(Ty);
+    return EmitVAArgInstr(CGF, VAListAddr, OrigTy, AI);
+  }
+
+  bool DidPromote = false;
+  auto TyInfo = getContext().getTypeInfoInChars(Ty);
+
+  // The alignment of things in the argument area is never larger than
+  // StackAlignInBytes.
+  TyInfo.Align =
+      std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes));
+
+  bool IsIndirect = false;
+  bool AllowHigherAlign = true;
+
+  CharUnits DirectSize, DirectAlign;
+  if (IsIndirect) {
+    DirectAlign = CGF.getPointerAlign();
+  } else {
+    DirectAlign = TyInfo.Align;
+  }
+  // Cast the address we've calculated to the right type.
+  llvm::Type *DirectTy = CGF.ConvertTypeForMem(Ty), *ElementTy = DirectTy;
+  if (IsIndirect)
+    DirectTy = DirectTy->getPointerTo(0);
+
+  CharUnits SlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes);
+
+  // Handle vaList specified on Sw64, struct{char *ptr, int offset}
+  Address vaList_ptr_p = CGF.Builder.CreateStructGEP(VAListAddr, 0);
+  llvm::Value *vaList_ptr = CGF.Builder.CreateLoad(vaList_ptr_p);
+  Address vaList_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1);
+  llvm::Value *vaList_offset = CGF.Builder.CreateLoad(vaList_offset_p);
+
+  uint64_t TySize = TyInfo.Width.getQuantity();
+  llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, TySize);
+  CGF.Builder.CreateStore(CGF.Builder.CreateAdd(vaList_offset, Offset),
+                          vaList_offset_p);
+
+  llvm::Value *GPAddr =
+      CGF.Builder.CreateGEP(CGF.Int8Ty, vaList_ptr, vaList_offset);
+
+  // If the CC aligns values higher than the slot size, do so if needed.
+  Address Addr = Address::invalid();
+  if (AllowHigherAlign && DirectAlign > SlotSize) {
+    Addr = Address(emitRoundPointerUpToAlignment(CGF, GPAddr, DirectAlign),
+                   CGF.Int8Ty, DirectAlign);
+  } else {
+    Addr = Address(GPAddr, CGF.Int8Ty, SlotSize);
+  }
+
+  Addr = Addr.withElementType(DirectTy);
+
+  if (IsIndirect) {
+    Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, TyInfo.Align);
+  }
+
+  // If there was a promotion, "unpromote" into a temporary.
+  // TODO: can we just use a pointer into a subset of the original slot?
+  if (DidPromote) {
+    Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp");
+    llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr);
+
+    // Truncate down to the right width.
+    llvm::Type *IntTy =
+        (OrigTy->isIntegerType() ? Temp.getElementType() : CGF.IntPtrTy);
+    llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy);
+    if (OrigTy->isPointerType())
+      V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType());
+
+    CGF.Builder.CreateStore(V, Temp);
+    Addr = Temp;
+  }
+
+  return Addr;
+}
+
+ABIArgInfo Sw64ABIInfo::extendType(QualType Ty) const {
+  int TySize = getContext().getTypeSize(Ty);
+
+  // SW64 ABI requires unsigned 32 bit integers to be sign extended.
+  if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
+    return ABIArgInfo::getSignExtend(Ty);
+
+  return ABIArgInfo::getExtend(Ty);
+}
+
+bool Sw64TargetCodeGenInfo::initDwarfEHRegSizeTable(
+    CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const {
+  // SW have much different from Mips. This should be rewrite.
+
+  // This information comes from gcc's implementation, which seems to
+  // as canonical as it gets.
+
+  // Everything on Sw64 is 4 bytes.  Double-precision FP registers
+  // are aliased to pairs of single-precision FP registers.
+  llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);
+
+  // 0-31 are the general purpose registers, $0 - $31.
+  // 32-63 are the floating-point registers, $f0 - $f31.
+  // 64 and 65 are the multiply/divide registers, $hi and $lo.
+  // 66 is the (notional, I think) register for signal-handler return.
+  AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65);
+
+  // 67-74 are the floating-point status registers, $fcc0 - $fcc7.
+  // They are one bit wide and ignored here.
+
+  // 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31.
+  // (coprocessor 1 is the FP unit)
+  // 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31.
+  // 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31.
+  // 176-181 are the DSP accumulator registers.
+  AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181);
+  return false;
+}
+
+std::unique_ptr<TargetCodeGenInfo>
+CodeGen::createSw64TargetCodeGenInfo(CodeGenModule &CGM) {
+  return std::make_unique<Sw64TargetCodeGenInfo>(CGM.getTypes());
+}
diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt
index ac30007588b1..a1757224b2d8 100644
--- a/clang/lib/Driver/CMakeLists.txt
+++ b/clang/lib/Driver/CMakeLists.txt
@@ -44,6 +44,7 @@ add_clang_library(clangDriver
   ToolChains/Arch/PPC.cpp
   ToolChains/Arch/RISCV.cpp
   ToolChains/Arch/Sparc.cpp
+  ToolChains/Arch/Sw64.cpp
   ToolChains/Arch/SystemZ.cpp
   ToolChains/Arch/VE.cpp
   ToolChains/Arch/X86.cpp
@@ -94,6 +95,7 @@ add_clang_library(clangDriver
   ToolChains/XCore.cpp
   ToolChains/PPCLinux.cpp
   ToolChains/PPCFreeBSD.cpp
+  ToolChains/Sw64Toolchain.cpp
   ToolChains/InterfaceStubs.cpp
   ToolChains/ZOS.cpp
   Types.cpp
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 28b33c8862e4..6a1c6ca2c867 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -49,6 +49,7 @@
 #include "ToolChains/RISCVToolchain.h"
 #include "ToolChains/SPIRV.h"
 #include "ToolChains/Solaris.h"
+#include "ToolChains/Sw64Toolchain.h"
 #include "ToolChains/TCE.h"
 #include "ToolChains/VEToolchain.h"
 #include "ToolChains/WebAssembly.h"
@@ -6497,6 +6498,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
       case llvm::Triple::csky:
         TC = std::make_unique<toolchains::CSKYToolChain>(*this, Target, Args);
         break;
+      case llvm::Triple::sw_64:
+        TC = std::make_unique<toolchains::Sw64Toolchain>(*this, Target, Args);
+        break;
       default:
         if (Target.getVendor() == llvm::Triple::Myriad)
           TC = std::make_unique<toolchains::MyriadToolChain>(*this, Target,
diff --git a/clang/lib/Driver/ToolChains/Arch/Sw64.cpp b/clang/lib/Driver/ToolChains/Arch/Sw64.cpp
new file mode 100644
index 000000000000..895175d223ad
--- /dev/null
+++ b/clang/lib/Driver/ToolChains/Arch/Sw64.cpp
@@ -0,0 +1,94 @@
+//===--------- Sw64.cpp - Sw64 Helpers for Tools ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64.h"
+#include "ToolChains/CommonArgs.h"
+#include "clang/Driver/Options.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/Sw64TargetParser.h"
+
+using namespace clang::driver;
+using namespace clang::driver::tools;
+using namespace clang;
+using namespace llvm::opt;
+
+const char *Sw64::getSw64TargetCPU(const ArgList &Args) {
+  if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ)) {
+    StringRef Mcpu = llvm::Sw64::getMcpuFromMArch(A->getValue());
+    if (Mcpu != "")
+      return Mcpu.data();
+    else
+      return A->getValue();
+  }
+  if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ))
+    return A->getValue();
+  if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ))
+    return A->getValue();
+  return "sw6b";
+}
+
+void Sw64::getSw64TargetFeatures(const Driver &D, const ArgList &Args,
+                                 std::vector<llvm::StringRef> &Features) {
+  // -m(no-)simd overrides use of the vector facility.
+  AddTargetFeature(Args, Features, options::OPT_msimd, options::OPT_mno_simd,
+                   "simd");
+
+  if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+    StringRef Mcpu = A->getValue();
+    if (Mcpu.startswith("sw6b") || Mcpu.startswith("sw4d"))
+      Features.push_back("+core3b");
+    else if (Mcpu.startswith("sw8a"))
+      Features.push_back("+core4");
+  }
+
+  if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
+    StringRef March = A->getValue();
+    if (March.startswith("core3b"))
+      Features.push_back("+core3b");
+    else if (March.startswith("core4"))
+      Features.push_back("+core4");
+  }
+
+  if (Args.hasArg(options::OPT_ffixed_sw_1))
+    Features.push_back("+reserve-r1");
+  if (Args.hasArg(options::OPT_ffixed_sw_2))
+    Features.push_back("+reserve-r2");
+  if (Args.hasArg(options::OPT_ffixed_sw_3))
+    Features.push_back("+reserve-r3");
+  if (Args.hasArg(options::OPT_ffixed_sw_4))
+    Features.push_back("+reserve-r4");
+  if (Args.hasArg(options::OPT_ffixed_sw_5))
+    Features.push_back("+reserve-r5");
+  if (Args.hasArg(options::OPT_ffixed_sw_6))
+    Features.push_back("+reserve-r6");
+  if (Args.hasArg(options::OPT_ffixed_sw_7))
+    Features.push_back("+reserve-r7");
+  if (Args.hasArg(options::OPT_ffixed_sw_8))
+    Features.push_back("+reserve-r8");
+  if (Args.hasArg(options::OPT_ffixed_sw_9))
+    Features.push_back("+reserve-r9");
+  if (Args.hasArg(options::OPT_ffixed_sw_10))
+    Features.push_back("+reserve-r10");
+  if (Args.hasArg(options::OPT_ffixed_sw_11))
+    Features.push_back("+reserve-r11");
+  if (Args.hasArg(options::OPT_ffixed_sw_12))
+    Features.push_back("+reserve-r12");
+  if (Args.hasArg(options::OPT_ffixed_sw_13))
+    Features.push_back("+reserve-r13");
+  if (Args.hasArg(options::OPT_ffixed_sw_14))
+    Features.push_back("+reserve-r14");
+  if (Args.hasArg(options::OPT_ffixed_sw_22))
+    Features.push_back("+reserve-r22");
+  if (Args.hasArg(options::OPT_ffixed_sw_23))
+    Features.push_back("+reserve-r23");
+  if (Args.hasArg(options::OPT_ffixed_sw_24))
+    Features.push_back("+reserve-r24");
+  if (Args.hasArg(options::OPT_ffixed_sw_25))
+    Features.push_back("+reserve-r25");
+}
diff --git a/clang/lib/Driver/ToolChains/Arch/Sw64.h b/clang/lib/Driver/ToolChains/Arch/Sw64.h
new file mode 100644
index 000000000000..cc319026b2d1
--- /dev/null
+++ b/clang/lib/Driver/ToolChains/Arch/Sw64.h
@@ -0,0 +1,34 @@
+//===--- Sw64.h - Sw64-specific Tool Helpers --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H
+#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H
+
+#include "clang/Driver/Driver.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/Option.h"
+#include "llvm/TargetParser/Triple.h"
+#include <vector>
+
+namespace clang {
+namespace driver {
+namespace tools {
+namespace Sw64 {
+
+const char *getSw64TargetCPU(const llvm::opt::ArgList &Args);
+
+void getSw64TargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
+                           std::vector<llvm::StringRef> &Features);
+
+} // end namespace Sw64
+} // end namespace tools
+} // end namespace driver
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_SW64_H
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 7e78e4d8d351..fd441db9b1c9 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -17,6 +17,7 @@
 #include "Arch/PPC.h"
 #include "Arch/RISCV.h"
 #include "Arch/Sparc.h"
+#include "Arch/Sw64.h"
 #include "Arch/SystemZ.h"
 #include "Arch/VE.h"
 #include "Arch/X86.h"
@@ -53,6 +54,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/RISCVISAInfo.h"
+#include "llvm/Support/Sw64TargetParser.h"
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/TargetParser/ARMTargetParserCommon.h"
 #include "llvm/TargetParser/Host.h"
@@ -478,6 +480,7 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args,
     case llvm::Triple::mips64el:
     case llvm::Triple::mips:
     case llvm::Triple::mipsel:
+    case llvm::Triple::sw_64:
     case llvm::Triple::systemz:
     case llvm::Triple::x86:
     case llvm::Triple::x86_64:
@@ -1735,6 +1738,10 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
     AddSparcTargetArgs(Args, CmdArgs);
     break;
 
+  case llvm::Triple::sw_64:
+    AddSw64TargetArgs(Args, CmdArgs);
+    break;
+
   case llvm::Triple::systemz:
     AddSystemZTargetArgs(Args, CmdArgs);
     break;
@@ -2233,6 +2240,34 @@ void Clang::AddSparcTargetArgs(const ArgList &Args,
   }
 }
 
+void Clang::AddSw64TargetArgs(const ArgList &Args,
+                              ArgStringList &CmdArgs) const {
+  std::string TuneCPU;
+
+  if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) {
+    StringRef Name = A->getValue();
+
+    Name = llvm::Sw64::resolveTuneCPUAlias(Name, true);
+    TuneCPU = std::string(Name);
+  }
+  if (!TuneCPU.empty()) {
+    CmdArgs.push_back("-tune-cpu");
+    CmdArgs.push_back(Args.MakeArgString(TuneCPU));
+  }
+
+  if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+    StringRef OOpt;
+    if (A->getOption().matches(options::OPT_O))
+      OOpt = A->getValue();
+
+    if (A->getOption().matches(options::OPT_O0) || OOpt == "1" || OOpt == "s")
+      return;
+
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-loop-prefetch-writes=true");
+  }
+}
+
 void Clang::AddSystemZTargetArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
   if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) {
@@ -5096,6 +5131,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
         options::OPT_Wa_COMMA,
         options::OPT_Xassembler,
         options::OPT_mllvm,
+        options::OPT_fsw_int_divmod,
+        options::OPT_fsw_shift_word,
+        options::OPT_fsw_rev,
+        options::OPT_fsw_recip,
+        options::OPT_fsw_fprnd,
+        options::OPT_fsw_cmov,
+        options::OPT_fsw_auto_inc_dec,
+        options::OPT_fsw_use_cas,
     };
     for (const auto &A : Args)
       if (llvm::is_contained(kBitcodeOptionIgnorelist, A->getOption().getID()))
@@ -5287,6 +5330,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   unsigned PICLevel;
   bool IsPIE;
   std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(TC, Args);
+  if (TC.getArch() == llvm::Triple::sw_64 &&
+      RelocationModel != llvm::Reloc::PIC_)
+    RelocationModel = llvm::Reloc::PIC_;
+
   Arg *LastPICDataRelArg =
       Args.getLastArg(options::OPT_mno_pic_data_is_text_relative,
                       options::OPT_mpic_data_is_text_relative);
@@ -5649,6 +5696,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     else if (TC.getTriple().isPPC() &&
              (A->getOption().getID() != options::OPT_mlong_double_80))
       A->render(Args, CmdArgs);
+    else if (TC.getTriple().isSw64())
+      A->render(Args, CmdArgs);
     else
       D.Diag(diag::err_drv_unsupported_opt_for_target)
           << A->getAsString(Args) << TripleStr;
@@ -6623,6 +6672,46 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   Args.AddLastArg(CmdArgs, options::OPT_ftrap_function_EQ);
 
+  if (Args.getLastArg(options::OPT_fsw_int_divmod)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-sw-int-divmod");
+  }
+
+  if (Args.getLastArg(options::OPT_fsw_shift_word)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-sw-shift-word");
+  }
+
+  if (Args.getLastArg(options::OPT_fsw_rev)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-sw-rev");
+  }
+
+  if (Args.getLastArg(options::OPT_fsw_recip)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-sw-recip");
+  }
+
+  if (Args.getLastArg(options::OPT_fsw_fprnd)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-sw-fprnd");
+  }
+
+  if (Args.getLastArg(options::OPT_fsw_cmov)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-sw-cmov");
+  }
+
+  if (Args.getLastArg(options::OPT_fsw_auto_inc_dec)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-sw-auto-inc-dec");
+  }
+
+  if (Args.getLastArg(options::OPT_fsw_use_cas)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-sw-use-cas");
+  }
+
   // -fno-strict-overflow implies -fwrapv if it isn't disabled, but
   // -fstrict-overflow won't turn off an explicitly enabled -fwrapv.
   if (Arg *A = Args.getLastArg(options::OPT_fwrapv, options::OPT_fno_wrapv)) {
diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h
index 64fc86b6b0a7..667fe246d80d 100644
--- a/clang/lib/Driver/ToolChains/Clang.h
+++ b/clang/lib/Driver/ToolChains/Clang.h
@@ -69,6 +69,8 @@ private:
                           llvm::opt::ArgStringList &CmdArgs) const;
   void AddSparcTargetArgs(const llvm::opt::ArgList &Args,
                           llvm::opt::ArgStringList &CmdArgs) const;
+  void AddSw64TargetArgs(const llvm::opt::ArgList &Args,
+                         llvm::opt::ArgStringList &CmdArgs) const;
   void AddSystemZTargetArgs(const llvm::opt::ArgList &Args,
                             llvm::opt::ArgStringList &CmdArgs) const;
   void AddX86TargetArgs(const llvm::opt::ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index e01b21e102b1..0f599fdbeef3 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -16,6 +16,7 @@
 #include "Arch/PPC.h"
 #include "Arch/RISCV.h"
 #include "Arch/Sparc.h"
+#include "Arch/Sw64.h"
 #include "Arch/SystemZ.h"
 #include "Arch/VE.h"
 #include "Arch/X86.h"
@@ -514,6 +515,9 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args,
   case llvm::Triple::loongarch32:
   case llvm::Triple::loongarch64:
     return loongarch::getLoongArchTargetCPU(Args, T);
+
+  case llvm::Triple::sw_64:
+    return Sw64::getSw64TargetCPU(Args);
   }
 }
 
@@ -610,6 +614,9 @@ void tools::getTargetFeatureList(const Driver &D,
   case llvm::Triple::loongarch64:
     loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features);
     break;
+  case llvm::Triple::sw_64:
+    Sw64::getSw64TargetFeatures(D, Args, Features);
+    break;
   }
 #ifdef ENABLE_CLASSIC_FLANG
 }
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 243724ef528f..20900620a80c 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -14,6 +14,7 @@
 #include "Arch/PPC.h"
 #include "Arch/RISCV.h"
 #include "Arch/Sparc.h"
+#include "Arch/Sw64.h"
 #include "Arch/SystemZ.h"
 #include "CommonArgs.h"
 #include "Linux.h"
@@ -287,6 +288,8 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
     return "elf64ve";
   case llvm::Triple::csky:
     return "cskyelf_linux";
+  case llvm::Triple::sw_64:
+    return "elf64sw_64";
   default:
     return nullptr;
   }
@@ -974,6 +977,11 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
         CmdArgs.push_back(Args.MakeArgString("-mmsa"));
     }
 
+    if (Arg *A = Args.getLastArg(options::OPT_msimd, options::OPT_mno_simd)) {
+      if (A->getOption().matches(options::OPT_msimd))
+        CmdArgs.push_back(Args.MakeArgString("-msimd"));
+    }
+
     Args.AddLastArg(CmdArgs, options::OPT_mhard_float,
                     options::OPT_msoft_float);
 
@@ -2495,6 +2503,12 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
       "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
       "s390x-suse-linux", "s390x-redhat-linux"};
 
+	static const char *const Sw64LibDirs[] = {"/lib64", "/lib",
+                                            "/lib/gcc/sw_64-sunway-linux-gnu/",
+                                            "/sw_64-sunway-linux-gnu/lib"};
+  static const char *const Sw64Triples[] = {
+      "sw_64-sunway-linux-gnu", "sw_64-unknown-linux-gnu", "sw_64-linux-gnu",
+      "sw_64-openEuler-linux"};
 
   using std::begin;
   using std::end;
@@ -2748,6 +2762,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
     LibDirs.append(begin(SystemZLibDirs), end(SystemZLibDirs));
     TripleAliases.append(begin(SystemZTriples), end(SystemZTriples));
     break;
+  case llvm::Triple::sw_64:
+    LibDirs.append(begin(Sw64LibDirs), end(Sw64LibDirs));
+    TripleAliases.append(begin(Sw64Triples), end(Sw64Triples));
+    break;
   default:
     // By default, just rely on the standard lib directories and the original
     // triple.
@@ -3364,4 +3382,9 @@ void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
   if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
                           options::OPT_fno_use_init_array, true))
     CC1Args.push_back("-fno-use-init-array");
+  if (getTriple().getArch() == llvm::Triple::sw_64 &&
+      DriverArgs.hasArg(options::OPT_mieee)) {
+    CC1Args.push_back("-mllvm");
+    CC1Args.push_back("-mieee");
+  }
 }
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index 186b45e3c437..9557e5f1e348 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -12,6 +12,7 @@
 #include "Arch/Mips.h"
 #include "Arch/PPC.h"
 #include "Arch/RISCV.h"
+#include "Arch/Sw64.h"
 #include "CommonArgs.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Distro.h"
@@ -164,6 +165,8 @@ std::string Linux::getMultiarchTriple(const Driver &D,
     return "sparc64-linux-gnu";
   case llvm::Triple::systemz:
     return "s390x-linux-gnu";
+  case llvm::Triple::sw_64:
+    return "sw_64-linux-gnu";
   }
   return TargetTriple.str();
 }
@@ -256,6 +259,10 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   const bool IsHexagon = Arch == llvm::Triple::hexagon;
   const bool IsRISCV = Triple.isRISCV();
   const bool IsCSKY = Triple.isCSKY();
+  const bool IsSw64 = Triple.isSw64();
+
+  if (IsSw64 && !SysRoot.empty())
+    ExtraOpts.push_back("--sysroot=" + SysRoot);
 
   if (IsCSKY && !SelectedMultilibs.empty())
     SysRoot = SysRoot + SelectedMultilibs.back().osSuffix();
@@ -330,6 +337,11 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     addPathIfExists(D, concat(SysRoot, "/usr", OSLibDir, ABIName), Paths);
   }
 
+  if (IsSw64) {
+    addPathIfExists(D, SysRoot + "/usr/lib/gcc/sw_64-sunway-linux-gnu/", Paths);
+    addPathIfExists(D, SysRoot + "/usr/sw_64-sunway-linux-gnu/lib", Paths);
+  }
+
   Generic_GCC::AddMultiarchPaths(D, SysRoot, OSLibDir, Paths);
 
   addPathIfExists(D, concat(SysRoot, "/lib"), Paths);
@@ -645,6 +657,10 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const {
     LibDir = "lib64";
     Loader = "ld-linux.so.2";
     break;
+  case llvm::Triple::sw_64:
+    LibDir = "lib";
+    Loader = "ld-linux.so.2";
+    break;
   case llvm::Triple::systemz:
     LibDir = "lib";
     Loader = "ld64.so.1";
diff --git a/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp b/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp
new file mode 100644
index 000000000000..9992b350dda5
--- /dev/null
+++ b/clang/lib/Driver/ToolChains/Sw64Toolchain.cpp
@@ -0,0 +1,184 @@
+//===--- Sw64Toolchain.cpp - Sw64 ToolChain Implementations -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sw64Toolchain.h"
+#include "CommonArgs.h"
+#include "Gnu.h"
+#include "clang/Config/config.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/DriverDiagnostic.h"
+#include "clang/Driver/Options.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
+
+using namespace clang;
+using namespace clang::driver;
+using namespace clang::driver::tools;
+using namespace clang::driver::toolchains;
+using namespace llvm::opt;
+
+void Sw64::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
+                                   const InputInfo &Output,
+                                   const InputInfoList &Inputs,
+                                   const ArgList &Args,
+                                   const char *LinkingOutput) const {
+  claimNoWarnArgs(Args);
+  ArgStringList CmdArgs;
+
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
+
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  for (const auto &II : Inputs)
+    CmdArgs.push_back(II.getFilename());
+
+  const char *Exec = Args.MakeArgString(
+      getToolChain().GetProgramPath("sw_64-sunway-linux-gnu-as"));
+  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+                                         Exec, CmdArgs, Inputs, Output));
+}
+
+void Sw64::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                const InputInfo &Output,
+                                const InputInfoList &Inputs,
+                                const ArgList &Args,
+                                const char *LinkingOutput) const {
+  const Driver &D = getToolChain().getDriver();
+  ArgStringList CmdArgs;
+
+  if (Output.isFilename()) {
+    CmdArgs.push_back("-o");
+    CmdArgs.push_back(Output.getFilename());
+  } else {
+    assert(Output.isNothing() && "Invalid output.");
+  }
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
+    CmdArgs.push_back(
+        Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
+  }
+
+  Args.AddAllArgs(CmdArgs,
+                  {options::OPT_L, options::OPT_T_Group, options::OPT_e});
+
+  AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
+
+  getToolChain().addProfileRTLibs(Args, CmdArgs);
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+    if (D.CCCIsCXX()) {
+      if (getToolChain().ShouldLinkCXXStdlib(Args))
+        getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
+      CmdArgs.push_back("-lm");
+    }
+  }
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    if (Args.hasArg(options::OPT_pthread))
+      CmdArgs.push_back("-lpthread");
+    CmdArgs.push_back("-lc");
+    CmdArgs.push_back("-lgcc");
+    CmdArgs.push_back("-lgcc_s");
+    CmdArgs.push_back(
+        Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
+  }
+
+  const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
+  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+                                         Exec, CmdArgs, Inputs, Output));
+}
+
+/// Sw64Toolchain - Sw64 tool chain which can call as(1) and ld(1) directly.
+
+Sw64Toolchain::Sw64Toolchain(const Driver &D, const llvm::Triple &Triple,
+                             const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
+  getFilePaths().push_back(getDriver().Dir + "/../lib");
+  getFilePaths().push_back("/usr/lib");
+}
+
+Tool *Sw64Toolchain::buildAssembler() const {
+  return new Sw64::Assembler(*this);
+}
+
+Tool *Sw64Toolchain::buildLinker() const { return new Sw64::Linker(*this); }
+
+void Sw64Toolchain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                              ArgStringList &CC1Args) const {
+  const Driver &D = getDriver();
+
+  if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc))
+    return;
+
+  if (!DriverArgs.hasArg(options::OPT_nostdlibinc))
+    addSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/local/include");
+
+  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
+    SmallString<128> P(D.ResourceDir);
+    llvm::sys::path::append(P, "include");
+    addSystemInclude(DriverArgs, CC1Args, P);
+  }
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+    return;
+
+  // Check for configure-time C include directories.
+  StringRef CIncludeDirs(C_INCLUDE_DIRS);
+  if (CIncludeDirs != "") {
+    SmallVector<StringRef, 5> dirs;
+    CIncludeDirs.split(dirs, ":");
+    for (StringRef dir : dirs) {
+      StringRef Prefix =
+          llvm::sys::path::is_absolute(dir) ? StringRef(D.SysRoot) : "";
+      addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir);
+    }
+    return;
+  }
+
+  // Add include directories specific to the selected multilib set and multilib.
+  if (GCCInstallation.isValid()) {
+    const MultilibSet::IncludeDirsFunc &Callback =
+        Multilibs.includeDirsCallback();
+    if (Callback) {
+      for (const auto &Path : Callback(GCCInstallation.getMultilib()))
+        addExternCSystemIncludeIfExists(
+            DriverArgs, CC1Args, GCCInstallation.getInstallPath() + Path);
+    }
+  }
+
+  addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include");
+}
+
+void Sw64Toolchain::addLibStdCxxIncludePaths(
+    const llvm::opt::ArgList &DriverArgs,
+    llvm::opt::ArgStringList &CC1Args) const {
+  // We need a detected GCC installation on Sw64 (similar to Linux)
+  // to provide libstdc++'s headers.
+  if (!GCCInstallation.isValid())
+    return;
+
+  // By default, look for the C++ headers in an include directory adjacent to
+  // the lib directory of the GCC installation.
+  // On Sw64 this usually looks like /usr/gcc/X.Y/include/c++/X.Y.Z
+  StringRef LibDir = GCCInstallation.getParentLibPath();
+  StringRef TripleStr = GCCInstallation.getTriple().str();
+  const Multilib &Multilib = GCCInstallation.getMultilib();
+  const GCCVersion &Version = GCCInstallation.getVersion();
+
+  // The primary search for libstdc++ supports multiarch variants.
+  addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
+                           TripleStr, Multilib.includeSuffix(), DriverArgs,
+                           CC1Args);
+}
diff --git a/clang/lib/Driver/ToolChains/Sw64Toolchain.h b/clang/lib/Driver/ToolChains/Sw64Toolchain.h
new file mode 100644
index 000000000000..c32f628b812d
--- /dev/null
+++ b/clang/lib/Driver/ToolChains/Sw64Toolchain.h
@@ -0,0 +1,79 @@
+//===--- Sw64Toolchain.h - Sw64 ToolChain Implementations -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H
+#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H
+
+#include "Gnu.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/DriverDiagnostic.h"
+#include "clang/Driver/Tool.h"
+#include "clang/Driver/ToolChain.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/Option.h"
+
+namespace clang {
+namespace driver {
+namespace toolchains {
+
+class LLVM_LIBRARY_VISIBILITY Sw64Toolchain : public Generic_ELF {
+public:
+  Sw64Toolchain(const Driver &D, const llvm::Triple &Triple,
+                const llvm::opt::ArgList &Args);
+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
+
+  void
+  addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
+                           llvm::opt::ArgStringList &CC1Args) const override;
+
+  unsigned GetDefaultDwarfVersion() const override { return 2; }
+
+protected:
+  Tool *buildAssembler() const override;
+  Tool *buildLinker() const override;
+};
+
+} // end namespace toolchains
+
+/// Sw64 -- Directly call GNU Binutils assembler and linker
+namespace tools {
+namespace Sw64 {
+class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
+public:
+  Assembler(const ToolChain &TC) : Tool("sw_64::Assembler", "assembler", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+public:
+  Linker(const ToolChain &TC) : Tool("sw_64::Linker", "linker", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace Sw64
+} // end namespace tools
+
+} // end namespace driver
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SW64TOOLCHAIN_H
diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp
index 8c5134e25013..75bf3bc28b51 100644
--- a/clang/lib/Driver/XRayArgs.cpp
+++ b/clang/lib/Driver/XRayArgs.cpp
@@ -53,6 +53,7 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
     case llvm::Triple::mipsel:
     case llvm::Triple::mips64:
     case llvm::Triple::mips64el:
+    case llvm::Triple::sw_64:
       break;
     default:
       D.Diag(diag::err_drv_unsupported_opt_for_target)
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 6ec2e3fc2af0..04d0f1c9f7a7 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1733,7 +1733,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
   const llvm::Triple::ArchType DebugEntryValueArchs[] = {
       llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64,
       llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips,
-      llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el};
+      llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el,
+      llvm::Triple::sw_64};
 
   if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() &&
       llvm::is_contained(DebugEntryValueArchs, T.getArch()))
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 356009ae9157..abba9f45221c 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -108,6 +108,10 @@ set(sifive_files
   sifive_vector.h
   )
 
+set(sw64_files
+  sw64intrin.h
+  )
+
 set(systemz_files
   s390intrin.h
   vecintrin.h
@@ -260,6 +264,7 @@ set(files
   ${ppc_htm_files}
   ${riscv_files}
   ${sifive_files}
+  ${sw64_files}
   ${systemz_files}
   ${ve_files}
   ${x86_files}
@@ -463,6 +468,7 @@ add_header_target("mips-resource-headers" "${mips_msa_files}")
 add_header_target("ppc-resource-headers" "${ppc_files};${ppc_wrapper_files}")
 add_header_target("ppc-htm-resource-headers" "${ppc_htm_files}")
 add_header_target("riscv-resource-headers" "${riscv_files};${riscv_generated_files}")
+add_header_target("sw64-resource-headers" "${sw64_files}")
 add_header_target("systemz-resource-headers" "${systemz_files}")
 add_header_target("ve-resource-headers" "${ve_files}")
 add_header_target("webassembly-resource-headers" "${webassembly_files}")
@@ -608,6 +614,12 @@ install(
   EXCLUDE_FROM_ALL
   COMPONENT riscv-resource-headers)
 
+install(
+	FILES ${sw64_files}
+	DESTINATION ${header_install_dir}
+	EXCLUDE_FROM_ALL
+	COMPONENT sw64-resource-headers)
+
 install(
   FILES ${systemz_files}
   DESTINATION ${header_install_dir}
diff --git a/clang/lib/Headers/sw64intrin.h b/clang/lib/Headers/sw64intrin.h
new file mode 100644
index 000000000000..86a20c53a7ac
--- /dev/null
+++ b/clang/lib/Headers/sw64intrin.h
@@ -0,0 +1,1590 @@
+
+#ifndef __SW64INTRIN_H
+#define __SW64INTRIN_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+typedef int8_t charv32 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef uint8_t ucharv32 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef int16_t shortv16 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef uint16_t ushortv16
+    __attribute__((__vector_size__(32), __aligned__(32)));
+typedef int32_t intv8 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef uint32_t uintv8 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef int64_t longv4 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef uint64_t ulongv4 __attribute__((__vector_size__(32), __aligned__(32)));
+
+// as sw64 target float4v4 is a very special cases, we leaving this for now.
+typedef float floatv4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef double doublev4 __attribute__((__vector_size__(32), __aligned__(32)));
+// special case for int256
+typedef long long int256 __attribute__((__vector_size__(32), __aligned__(32)));
+typedef unsigned long long uint256
+    __attribute__((__vector_size__(32), __aligned__(32)));
+
+// special case for bytes compare
+typedef int32_t int1v32_t;
+// special case for half transform
+typedef unsigned short float16v4_t
+    __attribute__((__vector_size__(8), __aligned__(8)));
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("simd"),           \
+                 __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS_CORE4                                               \
+  __attribute__((__always_inline__, __nodebug__, __target__("core4,simd"),     \
+                 __min_vector_width__(256)))
+
+static __inline void simd_fprint_charv32(FILE *fp, charv32 a) {
+  union {
+    char __a[32];
+    charv32 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[31], __u.__a[30],
+          __u.__a[29], __u.__a[28], __u.__a[27], __u.__a[26], __u.__a[25],
+          __u.__a[24]);
+  fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[23], __u.__a[22],
+          __u.__a[21], __u.__a[20], __u.__a[19], __u.__a[18], __u.__a[17],
+          __u.__a[16]);
+  fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[15], __u.__a[14],
+          __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9],
+          __u.__a[8]);
+  fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6],
+          __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1],
+          __u.__a[0]);
+}
+
+static __inline void simd_fprint_ucharv32(FILE *fp, ucharv32 a) {
+  union {
+    unsigned char __a[32];
+    ucharv32 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[31], __u.__a[30],
+          __u.__a[29], __u.__a[28], __u.__a[27], __u.__a[26], __u.__a[25],
+          __u.__a[24]);
+  fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[23], __u.__a[22],
+          __u.__a[21], __u.__a[20], __u.__a[19], __u.__a[18], __u.__a[17],
+          __u.__a[16]);
+  fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[15], __u.__a[14],
+          __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9],
+          __u.__a[8]);
+  fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6],
+          __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1],
+          __u.__a[0]);
+}
+
+static __inline void simd_fprint_shortv16(FILE *fp, shortv16 a) {
+  union {
+    short __a[16];
+    shortv16 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d \n", __u.__a[15], __u.__a[14],
+          __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9],
+          __u.__a[8]);
+  fprintf(fp, " %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6],
+          __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1],
+          __u.__a[0]);
+}
+
+static __inline void simd_fprint_ushortv16(FILE *fp, ushortv16 a) {
+  union {
+    unsigned short __a[16];
+    ushortv16 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u \n", __u.__a[15], __u.__a[14],
+          __u.__a[13], __u.__a[12], __u.__a[11], __u.__a[10], __u.__a[9],
+          __u.__a[8]);
+  fprintf(fp, " %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6],
+          __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1],
+          __u.__a[0]);
+}
+
+static __inline void simd_fprint_intv8(FILE *fp, intv8 a) {
+  union {
+    int __a[8];
+    intv8 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %d, %d, %d, %d, %d, %d, %d, %d ]\n", __u.__a[7], __u.__a[6],
+          __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1],
+          __u.__a[0]);
+}
+
+static __inline void simd_fprint_uintv8(FILE *fp, uintv8 a) {
+  union {
+    unsigned int __a[8];
+    uintv8 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %u, %u, %u, %u, %u, %u, %u, %u ]\n", __u.__a[7], __u.__a[6],
+          __u.__a[5], __u.__a[4], __u.__a[3], __u.__a[2], __u.__a[1],
+          __u.__a[0]);
+}
+
+static __inline void simd_fprint_longv4(FILE *fp, longv4 a) {
+  union {
+    long __a[4];
+    longv4 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %ld, %ld, %ld, %ld ]\n", __u.__a[3], __u.__a[2], __u.__a[1],
+          __u.__a[0]);
+}
+
+static __inline void simd_fprint_ulongv4(FILE *fp, ulongv4 a) {
+  union {
+    unsigned long __a[4];
+    ulongv4 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %lu, %lu, %lu, %lu ]\n", __u.__a[3], __u.__a[2], __u.__a[1],
+          __u.__a[0]);
+}
+
+static __inline void simd_fprint_floatv4(FILE *fp, floatv4 a) {
+  union {
+    float __a[4];
+    floatv4 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %.8e, %.8e, %.8e, %.8e ]\n", __u.__a[3], __u.__a[2],
+          __u.__a[1], __u.__a[0]);
+}
+
+static __inline void simd_fprint_doublev4(FILE *fp, doublev4 a) {
+  union {
+    double __a[4];
+    doublev4 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ %.16e, %.16e, %.16e, %.16e ]\n", __u.__a[3], __u.__a[2],
+          __u.__a[1], __u.__a[0]);
+}
+
+static __inline void simd_fprint_int256(FILE *fp, int256 a) {
+  volatile union {
+    long __a[4];
+    int256 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ 0x%lx, 0x%lx, 0x%lx, 0x%lx ]\n", __u.__a[3], __u.__a[2],
+          __u.__a[1], __u.__a[0]);
+}
+
+static __inline void simd_fprint_uint256(FILE *fp, uint256 a) {
+  volatile union {
+    unsigned long __a[4];
+    uint256 __v;
+  } __u;
+  __u.__v = a;
+  fprintf(fp, "[ 0x%lx, 0x%lx, 0x%lx, 0x%lx ]\n", __u.__a[3], __u.__a[2],
+          __u.__a[1], __u.__a[0]);
+}
+
+static __inline void simd_print_charv32(charv32 arg) {
+  simd_fprint_charv32(stdout, arg);
+}
+static __inline void simd_print_ucharv32(ucharv32 arg) {
+  simd_fprint_ucharv32(stdout, arg);
+}
+static __inline void simd_print_shortv16(shortv16 arg) {
+  simd_fprint_shortv16(stdout, arg);
+}
+static __inline void simd_print_ushortv16(ushortv16 arg) {
+  simd_fprint_ushortv16(stdout, arg);
+}
+static __inline void simd_print_intv8(intv8 arg) {
+  simd_fprint_intv8(stdout, arg);
+}
+static __inline void simd_print_uintv8(uintv8 arg) {
+  simd_fprint_uintv8(stdout, arg);
+}
+static __inline void simd_print_longv4(longv4 arg) {
+  simd_fprint_longv4(stdout, arg);
+}
+static __inline void simd_print_ulongv4(ulongv4 arg) {
+  simd_fprint_ulongv4(stdout, arg);
+}
+static __inline void simd_print_floatv4(floatv4 arg) {
+  simd_fprint_floatv4(stdout, arg);
+}
+static __inline void simd_print_doublev4(doublev4 arg) {
+  simd_fprint_doublev4(stdout, arg);
+}
+static __inline void simd_print_int256(int256 arg) {
+  simd_fprint_int256(stdout, arg);
+}
+static __inline void simd_print_uint256(uint256 arg) {
+  simd_fprint_uint256(stdout, arg);
+}
+
+// Vector Load Intrinsic
+
+#define simd_load(dest, src)                                                   \
+  do {                                                                         \
+    (dest) = __builtin_sw_vload(src);                                          \
+  } while (0)
+
+#define simd_loadu(dest, src)                                                  \
+  do {                                                                         \
+    (dest) = __builtin_sw_vloadu(src);                                         \
+  } while (0)
+
+#define simd_load_u(dest, src)                                                 \
+  do {                                                                         \
+    (dest) = __builtin_sw_vload_u(src);                                        \
+  } while (0)
+
+#define simd_loade(dest, src)                                                  \
+  do {                                                                         \
+    (dest) = __builtin_sw_vloade(src);                                         \
+  } while (0)
+
+#define simd_vload_nc(dest, src)                                               \
+  do {                                                                         \
+    (dest) = __builtin_sw_vloadnc(src);                                        \
+  } while (0)
+
+#define simd_store(src, dest)                                                  \
+  do {                                                                         \
+    __builtin_sw_vstore(src, dest);                                            \
+  } while (0)
+
+#define simd_storeu(src, dest)                                                 \
+  do {                                                                         \
+    __builtin_sw_vstoreu(src, dest);                                           \
+  } while (0)
+
+#define simd_store_u(src, dest)                                                \
+  do {                                                                         \
+    __builtin_sw_vstore_u(src, dest);                                          \
+  } while (0)
+
+#define simd_storeuh(src, dest)                                                \
+  do {                                                                         \
+    uint64_t __ptr = (uint64_t)dest + (uint64_t)sizeof(src);                   \
+    __builtin_sw_vstoreuh(src, (__typeof__(dest))__ptr);                       \
+  } while (0)
+
+#define simd_storeul(src, dest)                                                \
+  do {                                                                         \
+    __builtin_sw_vstoreul(src, dest);                                          \
+  } while (0)
+
+#define simd_vstore_nc(src, dest)                                              \
+  do {                                                                         \
+    __builtin_sw_vstorenc(src, dest);                                          \
+  } while (0)
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_loads(const float *__ptr) {
+  return *(floatv4 *)__ptr;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_loadd(const double *__ptr) {
+  return *(doublev4 *)__ptr;
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_stores(const float *__ptr,
+                                                         floatv4 a) {
+  *(floatv4 *)__ptr = a;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_stored(const double *__ptr,
+                                                          doublev4 a) {
+  *(doublev4 *)__ptr = a;
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_loadew(const int32_t *__ptr) {
+  int32_t __a = *__ptr;
+  return __extension__(intv8){__a, __a, __a, __a, __a, __a, __a, __a};
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS simd_loadel(const int64_t *__ptr) {
+  int64_t __a = *__ptr;
+  return __extension__(longv4){__a, __a, __a, __a};
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_loades(const float *__ptr) {
+  float __a = *__ptr;
+  return __extension__(floatv4){__a, __a, __a, __a};
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_loaded(const double *__ptr) {
+  double __a = *__ptr;
+  return __extension__(doublev4){__a, __a, __a, __a};
+}
+
+// Vector Setting Intrinsic Sw64
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS simd_set_charv32(
+    int8_t __b31, int8_t __b30, int8_t __b29, int8_t __b28, int8_t __b27,
+    int8_t __b26, int8_t __b25, int8_t __b24, int8_t __b23, int8_t __b22,
+    int8_t __b21, int8_t __b20, int8_t __b19, int8_t __b18, int8_t __b17,
+    int8_t __b16, int8_t __b15, int8_t __b14, int8_t __b13, int8_t __b12,
+    int8_t __b11, int8_t __b10, int8_t __b09, int8_t __b08, int8_t __b07,
+    int8_t __b06, int8_t __b05, int8_t __b04, int8_t __b03, int8_t __b02,
+    int8_t __b01, int8_t __b00) {
+  return __extension__(charv32){__b31, __b30, __b29, __b28, __b27, __b26, __b25,
+                                __b24, __b23, __b22, __b21, __b20, __b19, __b18,
+                                __b17, __b16, __b15, __b14, __b13, __b12, __b11,
+                                __b10, __b09, __b08, __b07, __b06, __b05, __b04,
+                                __b03, __b02, __b01, __b00};
+}
+#define simd_set_ucharv32 simd_set_charv32
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS
+simd_set_shortv16(int16_t __b15, int16_t __b14, int16_t __b13, int16_t __b12,
+                  int16_t __b11, int16_t __b10, int16_t __b09, int16_t __b08,
+                  int16_t __b07, int16_t __b06, int16_t __b05, int16_t __b04,
+                  int16_t __b03, int16_t __b02, int16_t __b01, int16_t __b00) {
+  return __extension__(shortv16){__b15, __b14, __b13, __b12, __b11, __b10,
+                                 __b09, __b08, __b07, __b06, __b05, __b04,
+                                 __b03, __b02, __b01, __b00};
+}
+#define simd_set_ushortv16 simd_set_shortv16
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS
+simd_set_intv8(int32_t __b07, int32_t __b06, int32_t __b05, int32_t __b04,
+               int32_t __b03, int32_t __b02, int32_t __b01, int32_t __b00) {
+  return __extension__(intv8){__b07, __b06, __b05, __b04,
+                              __b03, __b02, __b01, __b00};
+}
+#define simd_set_uintv8 simd_set_intv8
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS simd_set_longv4(int64_t __b03,
+                                                            int64_t __b02,
+                                                            int64_t __b01,
+                                                            int64_t __b00) {
+  return __extension__(longv4){__b03, __b02, __b01, __b00};
+}
+#define simd_set_ulongv4 simd_set_longv4
+#define simd_set_int256 simd_set_longv4
+#define simd_set_uint256 simd_set_longv4
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_set_floatv4(float __b03,
+                                                              float __b02,
+                                                              float __b01,
+                                                              float __b00) {
+  return __extension__(floatv4){__b03, __b02, __b01, __b00};
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_set_doublev4(double __b03,
+                                                                double __b02,
+                                                                double __b01,
+                                                                double __b00) {
+  return __extension__(doublev4){__b03, __b02, __b01, __b00};
+}
+
+// Integer Araith Intrinsic Sw64
+// Caculate adden for given vector as int32_tx8,
+// it isn't normal overflow result.
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vaddw(intv8 a, intv8 b) {
+  return a + b;
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vaddwi(intv8 a,
+                                                       const int32_t b) {
+  intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b};
+  return a + tmp;
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsubw(intv8 a, intv8 b) {
+  return a - b;
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsubwi(intv8 a,
+                                                       const int32_t b) {
+  intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b};
+  return a - tmp;
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddw(intv8 a, intv8 b) {
+  return __builtin_sw_vucaddw(a, b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddwi(intv8 a,
+                                                         const int32_t b) {
+  intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b};
+  return __builtin_sw_vucaddw(a, tmp);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubw(intv8 a, intv8 b) {
+  return __builtin_sw_vucsubw(a, b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubwi(intv8 a,
+                                                         const int32_t b) {
+  intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b};
+  return __builtin_sw_vucsubw(a, tmp);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vaddl(longv4 a, longv4 b) {
+  return a + b;
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vaddli(longv4 a,
+                                                        const int64_t __b) {
+  longv4 __tmp = __extension__(longv4){__b, __b, __b, __b};
+  return a + __tmp;
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vsubl(longv4 a, longv4 b) {
+  return a - b;
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vsubli(longv4 a,
+                                                        const int64_t __b) {
+  longv4 __tmp = __extension__(longv4){__b, __b, __b, __b};
+  return a - __tmp;
+}
+
+// for core3 simd doesn't support v16i16, v32i8
+// it must use v8i32 instead.
+#ifdef __sw_64_sw8a__
+static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucaddh(shortv16 a,
+                                                           shortv16 b) {
+  return __builtin_sw_vucaddh_v16hi(a, b);
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucaddhi(shortv16 a,
+                                                            const int b) {
+  int16_t __b = (int16_t)b;
+  shortv16 tmp =
+      __extension__(shortv16){__b, __b, __b, __b, __b, __b, __b, __b,
+                              __b, __b, __b, __b, __b, __b, __b, __b};
+  return __builtin_sw_vucaddh_v16hi(a, tmp);
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucsubh(shortv16 a,
+                                                           shortv16 b) {
+  return __builtin_sw_vucsubh_v16hi(a, b);
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS simd_vucsubhi(shortv16 a,
+                                                            const int b) {
+  int16_t __b = (int16_t)b;
+  shortv16 tmp =
+      __extension__(shortv16){__b, __b, __b, __b, __b, __b, __b, __b,
+                              __b, __b, __b, __b, __b, __b, __b, __b};
+  return __builtin_sw_vucsubh_v16hi(a, tmp);
+}
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucaddb(charv32 a,
+                                                          charv32 b) {
+  return __builtin_sw_vucaddb_v32qi(a, b);
+}
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucaddbi(charv32 a,
+                                                           const int b) {
+  int8_t __b = (int8_t)b;
+  charv32 tmp = __extension__(charv32){__b, __b, __b, __b, __b, __b, __b, __b,
+                                       __b, __b, __b, __b, __b, __b, __b, __b,
+                                       __b, __b, __b, __b, __b, __b, __b, __b,
+                                       __b, __b, __b, __b, __b, __b, __b, __b};
+  return __builtin_sw_vucaddb_v32qi(a, tmp);
+}
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucsubb(charv32 a,
+                                                          charv32 b) {
+  charv32 tmp =
+      __extension__(charv32){b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b,
+                             b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b};
+  return __builtin_sw_vucsubb_v32qi(a, b);
+}
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS simd_vucsubbi(charv32 a,
+                                                           const int b) {
+  int8_t __b = (int8_t)b;
+  charv32 tmp = __extension__(charv32){__b, __b, __b, __b, __b, __b, __b, __b,
+                                       __b, __b, __b, __b, __b, __b, __b, __b,
+                                       __b, __b, __b, __b, __b, __b, __b, __b,
+                                       __b, __b, __b, __b, __b, __b, __b, __b};
+  return __builtin_sw_vucsubb_v32qi(a, tmp);
+}
+#else
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddh(intv8 a, intv8 b) {
+  return __builtin_sw_vucaddh(a, b);
+}
+
+#define simd_vucaddhi __builtin_sw_vucaddhi
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubh(intv8 a, intv8 b) {
+  return __builtin_sw_vucsubh(a, b);
+}
+
+#define simd_vucsubhi __builtin_sw_vucsubhi
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucaddb(intv8 a, intv8 b) {
+  return __builtin_sw_vucaddb(a, b);
+}
+
+#define simd_vucaddbi __builtin_sw_vucaddbi
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vucsubb(intv8 a, intv8 b) {
+  return __builtin_sw_vucsubb(a, b);
+}
+
+#define simd_vucsubbi __builtin_sw_vucsubbi
+#endif
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS_CORE4 simd_vsumw(intv8 a) {
+  return __builtin_sw_vsumw(a);
+}
+
+static __inline__ int64_t __DEFAULT_FN_ATTRS_CORE4 simd_vsuml(longv4 a) {
+  return __builtin_sw_vsuml(a);
+}
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS simd_ctpopow(int256 a) {
+  return __builtin_sw_ctpopow(a);
+}
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS simd_ctlzow(int256 a) {
+  return __builtin_sw_ctlzow(a);
+}
+
+// Vector Shift intrinsics
+// Gerate vsll(b|h|w|l) instruction due to Type define
+
+static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vsllw(uintv8 a, int i) {
+  return __builtin_sw_vsll(a, (int64_t)i);
+}
+
+static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vsrlw(uintv8 a, int i) {
+  return __builtin_sw_vsrl(a, (int64_t)i);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsraw(intv8 a, int i) {
+  return __builtin_sw_vsra(a, (int64_t)i);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vrolw(intv8 a, int i) {
+  return __builtin_sw_vrol(a, (int64_t)i);
+}
+
+#define simd_vsllwi simd_vsllw
+#define simd_vsrlwi simd_vsrlw
+#define simd_vsrawi simd_vsraw
+#define simd_vrolwi simd_vrolw
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsllb(charv32 a,
+                                                              int i) {
+  return __builtin_sw_vsll(a, (int64_t)i);
+}
+
+static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsrlb(ucharv32 a,
+                                                               int i) {
+  return __builtin_sw_vsrl(a, (int64_t)i);
+}
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vsrab(charv32 a,
+                                                              int i) {
+  return __builtin_sw_vsra(a, (int64_t)i);
+}
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vrolb(charv32 a,
+                                                              int i) {
+  return __builtin_sw_vrol(a, (int64_t)i);
+}
+
+#define simd_vsllbi simd_vsllb
+#define simd_vsrlbi simd_vsrlb
+#define simd_vsrabi simd_vsrab
+#define simd_vrolbi simd_vrolb
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vslll(longv4 a, int i) {
+  return __builtin_sw_vsll(a, (int64_t)i);
+}
+
+static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vsrll(ulongv4 a,
+                                                              int i) {
+  return __builtin_sw_vsrl(a, (int64_t)i);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vsral(longv4 a, int i) {
+  return __builtin_sw_vsra(a, (int64_t)i);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vroll(longv4 a, int i) {
+  return __builtin_sw_vrol(a, (int64_t)i);
+}
+
+#define simd_vsllli simd_vslll
+#define simd_vsrlli simd_vsrll
+#define simd_vsrali simd_vsral
+#define simd_vrolli simd_vroll
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsllh(shortv16 a,
+                                                               int i) {
+  return __builtin_sw_vsll(a, (int64_t)i);
+}
+
+static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsrlh(ushortv16 a,
+                                                                int i) {
+  return __builtin_sw_vsrl(a, (int64_t)i);
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vsrah(shortv16 a,
+                                                               int i) {
+  return __builtin_sw_vsra(a, (int64_t)i);
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vrolh(shortv16 a,
+                                                               int i) {
+  return __builtin_sw_vrol(a, (int64_t)i);
+}
+
+#define simd_vsllhi simd_vsllh
+#define simd_vsrlhi simd_vsrlh
+#define simd_vsrahi simd_vsrah
+#define simd_vrolhi simd_vrolh
+
+static __inline__ int256 __DEFAULT_FN_ATTRS simd_srlow(int256 a, int i) {
+  return __builtin_sw_srlow(a, (int64_t)i);
+}
+
+static __inline__ int256 __DEFAULT_FN_ATTRS simd_sllow(int256 a, int i) {
+  return __builtin_sw_sllow(a, (int64_t)i);
+}
+
+static __inline__ int256 __DEFAULT_FN_ATTRS simd_sraow(int256 a, int i) {
+  return __builtin_sw_sraow(a, (int64_t)i);
+}
+
+#define simd_srlowi simd_srlow
+#define simd_sllowi simd_sllow
+#define simd_sraowi simd_sraow
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls1(floatv4 a) {
+  return __builtin_sw_vslls(a, 64);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls2(floatv4 a) {
+  return __builtin_sw_vslls(a, 128);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vslls3(floatv4 a) {
+  return __builtin_sw_vslls(a, 192);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld1(doublev4 a) {
+  return __builtin_sw_vslld(a, 64);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld2(doublev4 a) {
+  return __builtin_sw_vslld(a, 128);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vslld3(doublev4 a) {
+  return __builtin_sw_vslld(a, 192);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls1(floatv4 a) {
+  return __builtin_sw_vsrls(a, 64);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls2(floatv4 a) {
+  return __builtin_sw_vsrls(a, 128);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsrls3(floatv4 a) {
+  return __builtin_sw_vsrls(a, 192);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld1(doublev4 a) {
+  return __builtin_sw_vsrld(a, 64);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld2(doublev4 a) {
+  return __builtin_sw_vsrld(a, 128);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsrld3(doublev4 a) {
+  return __builtin_sw_vsrld(a, 192);
+}
+
+// Integer Compare Inst
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS simd_vcmpgew(intv8 a, intv8 b) {
+  return __builtin_sw_vcmpgew(a, b);
+}
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS simd_vcmpgewi(intv8 a,
+                                                           const int32_t b) {
+  intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b};
+  return __builtin_sw_vcmpgew(a, tmp);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpeqw(intv8 a, intv8 b) {
+  return __builtin_sw_vcmpeqw(a, b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpeqwi(intv8 a,
+                                                         const int32_t b) {
+  intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b};
+  return __builtin_sw_vcmpeqw(a, tmp);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmplew(intv8 a, intv8 b) {
+  return __builtin_sw_vcmplew(a, b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmplewi(intv8 a,
+                                                         const int32_t b) {
+  intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b};
+  return __builtin_sw_vcmplew(a, tmp);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpltw(intv8 a, intv8 b) {
+  return __builtin_sw_vcmpltw(a, b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcmpltwi(intv8 a,
+                                                         const int32_t b) {
+  intv8 tmp = __extension__(intv8){b, b, b, b, b, b, b, b};
+  return __builtin_sw_vcmpltw(a, tmp);
+}
+
+static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpulew(uintv8 a, uintv8 b) {
+  return __builtin_sw_vcmpulew(a, b);
+}
+
+static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpulewi(uintv8 a,
+                                                           const uint32_t b) {
+  uintv8 tmp = __extension__(uintv8){b, b, b, b, b, b, b, b};
+  return __builtin_sw_vcmpulew(a, tmp);
+}
+
+static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpultw(uintv8 a, uintv8 b) {
+  return __builtin_sw_vcmpultw(a, b);
+}
+
+static __inline__ uintv8 __DEFAULT_FN_ATTRS simd_vcmpultwi(uintv8 a,
+                                                           const uint32_t b) {
+  uintv8 tmp = __extension__(uintv8){b, b, b, b, b, b, b, b};
+  return __builtin_sw_vcmpultw(a, tmp);
+}
+
+static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcmpueqb(ucharv32 a,
+                                                                  ucharv32 b) {
+  ucharv32 res = (ucharv32)__builtin_sw_vcmpueqb(a, b);
+  return res;
+}
+
+static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4
+simd_vcmpueqbi(ucharv32 a, const uint32_t b) {
+  uint8_t __b = (uint8_t)b;
+  ucharv32 tmp = __extension__(ucharv32){
+      __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
+      __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
+      __b, __b, __b, __b, __b, __b, __b, __b, __b, __b};
+  ucharv32 res = (ucharv32)__builtin_sw_vcmpueqb(a, tmp);
+  return res;
+}
+
+static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcmpugtb(ucharv32 a,
+                                                                  ucharv32 b) {
+  ucharv32 res = (ucharv32)__builtin_sw_vcmpugtb(a, b);
+  return res;
+}
+
+static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4
+simd_vcmpugtbi(ucharv32 a, const uint32_t b) {
+  uint8_t __b = (uint8_t)b;
+  ucharv32 tmp = __extension__(ucharv32){
+      __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
+      __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
+      __b, __b, __b, __b, __b, __b, __b, __b, __b, __b};
+  ucharv32 res = (ucharv32)__builtin_sw_vcmpugtb(a, tmp);
+  return res;
+}
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxb(charv32 a,
+                                                              charv32 b) {
+  return __builtin_sw_vmaxb(a, b);
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxh(shortv16 a,
+                                                               shortv16 b) {
+  return __builtin_sw_vmaxh(a, b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxw(intv8 a, intv8 b) {
+  return __builtin_sw_vmaxw(a, b);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxl(longv4 a,
+                                                             longv4 b) {
+  return __builtin_sw_vmaxl(a, b);
+}
+
+static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxb(ucharv32 a,
+                                                                ucharv32 b) {
+  return __builtin_sw_vumaxb(a, b);
+}
+
+static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxh(ushortv16 a,
+                                                                 ushortv16 b) {
+  return __builtin_sw_vumaxh(a, b);
+}
+
+static __inline__ uintv8 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxw(uintv8 a,
+                                                              uintv8 b) {
+  return __builtin_sw_vumaxw(a, b);
+}
+
+static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vumaxl(ulongv4 a,
+                                                               ulongv4 b) {
+  return __builtin_sw_vumaxl(a, b);
+}
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vminb(charv32 a,
+                                                              charv32 b) {
+  return __builtin_sw_vminb(a, b);
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vminh(shortv16 a,
+                                                               shortv16 b) {
+  return __builtin_sw_vminh(a, b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vminw(intv8 a, intv8 b) {
+  return __builtin_sw_vminw(a, b);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vminl(longv4 a,
+                                                             longv4 b) {
+  return __builtin_sw_vminl(a, b);
+}
+
+static __inline__ ucharv32 __DEFAULT_FN_ATTRS_CORE4 simd_vuminb(ucharv32 a,
+                                                                ucharv32 b) {
+  return __builtin_sw_vuminb(a, b);
+}
+
+static __inline__ ushortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vuminh(ushortv16 a,
+                                                                 ushortv16 b) {
+  return __builtin_sw_vuminh(a, b);
+}
+
+static __inline__ uintv8 __DEFAULT_FN_ATTRS_CORE4 simd_vuminw(uintv8 a,
+                                                              uintv8 b) {
+  return __builtin_sw_vuminw(a, b);
+}
+
+static __inline__ ulongv4 __DEFAULT_FN_ATTRS_CORE4 simd_vuminl(ulongv4 a,
+                                                               ulongv4 b) {
+  return __builtin_sw_vuminl(a, b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vseleqw(intv8 a, intv8 b,
+                                                        intv8 c) {
+  return __builtin_sw_vseleqw(a, b, c);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellew(intv8 a, intv8 b,
+                                                        intv8 c) {
+  return __builtin_sw_vsellew(a, b, c);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vselltw(intv8 a, intv8 b,
+                                                        intv8 c) {
+  return __builtin_sw_vselltw(a, b, c);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellbcw(intv8 a, intv8 b,
+                                                         intv8 c) {
+  return __builtin_sw_vsellbcw(a, b, c);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vseleqwi(intv8 a, intv8 b,
+                                                         int32_t c) {
+  intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c};
+  return __builtin_sw_vseleqw(a, b, tmp);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellewi(intv8 a, intv8 b,
+                                                         int32_t c) {
+  intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c};
+  return __builtin_sw_vsellew(a, b, tmp);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vselltwi(intv8 a, intv8 b,
+                                                         int32_t c) {
+  intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c};
+  return __builtin_sw_vselltw(a, b, tmp);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vsellbcwi(intv8 a, intv8 b,
+                                                          int32_t c) {
+  intv8 tmp = __extension__(intv8){c, c, c, c, c, c, c, c};
+  return __builtin_sw_vsellbcw(a, b, tmp);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vseleql(longv4 a, longv4 b,
+                                                         longv4 c) {
+  doublev4 tmp_a = (doublev4)a;
+  doublev4 tmp_b = (doublev4)b;
+  doublev4 tmp_c = (doublev4)c;
+  return (longv4)__builtin_sw_vfseleqd(tmp_a, tmp_b, tmp_c);
+}
+
+// Vector Logic Operation
+
+#define simd_vlog(a, b, c, opcode) __builtin_sw_vlogzz(a, b, c, opcode)
+
+#define simd_vand(SUFFIX, TYPE)                                                \
+  static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vand##SUFFIX(TYPE a,          \
+                                                              TYPE b) {        \
+    return a & b;                                                              \
+  }
+
+simd_vand(b, charv32)
+simd_vand(h, shortv16)
+simd_vand(w, intv8)
+simd_vand(l, longv4)
+
+#define simd_vbic(SUFFIX, TYPE)                                                \
+  static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vbic##SUFFIX(TYPE a,          \
+                                                              TYPE b) {        \
+    return a & ~b;                                                             \
+  }
+
+simd_vbic(b, charv32)
+simd_vbic(h, shortv16)
+simd_vbic(w, intv8)
+simd_vbic(l, longv4)
+
+#define simd_vbis(SUFFIX, TYPE)                                                \
+  static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vbis##SUFFIX(TYPE a,          \
+                                                              TYPE b) {        \
+    return a | b;                                                              \
+  }
+
+simd_vbis(b, charv32)
+simd_vbis(h, shortv16)
+simd_vbis(w, intv8)
+simd_vbis(l, longv4)
+
+#define simd_vornot(SUFFIX, TYPE)                                              \
+  static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vornot##SUFFIX(TYPE a,        \
+                                                                TYPE b) {      \
+    return a | ~b;                                                             \
+  }
+
+simd_vornot(b, charv32)
+simd_vornot(h, shortv16)
+simd_vornot(w, intv8)
+simd_vornot(l, longv4)
+
+#define simd_vxor(SUFFIX, TYPE)                                                \
+  static __inline__ TYPE __DEFAULT_FN_ATTRS simd_vxor##SUFFIX(TYPE a,          \
+                                                              TYPE b) {        \
+    return a ^ b;                                                              \
+  }
+
+simd_vxor(b, charv32)
+simd_vxor(h, shortv16)
+simd_vxor(w, intv8)
+simd_vxor(l, longv4)
+
+#define simd_veqv(SUFFIX, TYPE)                                                \
+  static __inline__ TYPE __DEFAULT_FN_ATTRS simd_veqv##SUFFIX(TYPE a,          \
+                                                              TYPE b) {        \
+    return ~(a ^ b);                                                           \
+  }
+
+simd_veqv(b, charv32)
+simd_veqv(h, shortv16)
+simd_veqv(w, intv8)
+simd_veqv(l, longv4)
+
+// float arithmetic Operation
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vadds(floatv4 a, floatv4 b) {
+  return a + b;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vaddd(doublev4 a,
+                                                         doublev4 b) {
+  return a + b;
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsubs(floatv4 a, floatv4 b) {
+  return a - b;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsubd(doublev4 a,
+                                                         doublev4 b) {
+  return a - b;
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmuls(floatv4 a, floatv4 b) {
+  return a * b;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmuld(doublev4 a,
+                                                         doublev4 b) {
+  return a * b;
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vdivs(floatv4 a, floatv4 b) {
+  return a / b;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vdivd(doublev4 a,
+                                                         doublev4 b) {
+  return a / b;
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vsqrts(floatv4 a) {
+  return __builtin_sw_vsqrts(a);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vsqrtd(doublev4 a) {
+  return __builtin_sw_vsqrtd(a);
+}
+
+static __inline__ float __DEFAULT_FN_ATTRS_CORE4 simd_vsums(floatv4 a) {
+  return __builtin_sw_vsums(a);
+}
+
+static __inline__ double __DEFAULT_FN_ATTRS_CORE4 simd_vsumd(doublev4 a) {
+  return __builtin_sw_vsumd(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrecs(floatv4 a) {
+  return __builtin_sw_vfrecs(a);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrecd(doublev4 a) {
+  return __builtin_sw_vfrecd(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmpeqs(floatv4 a,
+                                                           floatv4 b) {
+  return __builtin_sw_vfcmpeqs(a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmples(floatv4 a,
+                                                           floatv4 b) {
+  return __builtin_sw_vfcmples(a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmplts(floatv4 a,
+                                                           floatv4 b) {
+  return __builtin_sw_vfcmplts(a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfcmpuns(floatv4 a,
+                                                           floatv4 b) {
+  return __builtin_sw_vfcmpuns(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpeqd(doublev4 a,
+                                                            doublev4 b) {
+  return __builtin_sw_vfcmpeqd(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpled(doublev4 a,
+                                                            doublev4 b) {
+  return __builtin_sw_vfcmpled(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpltd(doublev4 a,
+                                                            doublev4 b) {
+  return __builtin_sw_vfcmpltd(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfcmpund(doublev4 a,
+                                                            doublev4 b) {
+  return __builtin_sw_vfcmpund(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtsd(floatv4 a) {
+  return __builtin_sw_vfcvtsd(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtds(doublev4 a) {
+  return __builtin_sw_vfcvtds(a);
+}
+
+#define simd_vfcvtsh(a, b, c) __builtin_sw_vfcvtsh(a, b, c)
+#define simd_vfcvths(a, b) __builtin_sw_vfcvths(a, b)
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtls(longv4 a) {
+  return __builtin_sw_vfcvtls(a);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtld(longv4 a) {
+  return __builtin_sw_vfcvtld(a);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtsl(floatv4 a) {
+  doublev4 tmp = __builtin_sw_vfcvtsd(a);
+  return __builtin_sw_vfcvtdl(tmp);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl(doublev4 a) {
+  return __builtin_sw_vfcvtdl(a);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_g(doublev4 a) {
+  return __builtin_sw_vfcvtdl_g(a);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_p(doublev4 a) {
+  return __builtin_sw_vfcvtdl_p(a);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_z(doublev4 a) {
+  return __builtin_sw_vfcvtdl_z(a);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfcvtdl_n(doublev4 a) {
+  return __builtin_sw_vfcvtdl_n(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris(floatv4 a) {
+  return __builtin_sw_vfris(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_g(floatv4 a) {
+  return __builtin_sw_vfris_g(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_p(floatv4 a) {
+  return __builtin_sw_vfris_p(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_z(floatv4 a) {
+  return __builtin_sw_vfris_z(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vfris_n(floatv4 a) {
+  return __builtin_sw_vfris_n(a);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid(doublev4 a) {
+  return __builtin_sw_vfrid(a);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_g(doublev4 a) {
+  return __builtin_sw_vfrid_g(a);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_p(doublev4 a) {
+  return __builtin_sw_vfrid_p(a);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_z(doublev4 a) {
+  return __builtin_sw_vfrid_z(a);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vfrid_n(doublev4 a) {
+  return __builtin_sw_vfrid_n(a);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxs(floatv4 a,
+                                                              floatv4 b) {
+  return __builtin_sw_vmaxs(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vmaxd(doublev4 a,
+                                                               doublev4 b) {
+  return __builtin_sw_vmaxd(a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS_CORE4 simd_vmins(floatv4 a,
+                                                              floatv4 b) {
+  return __builtin_sw_vmins(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS_CORE4 simd_vmind(doublev4 a,
+                                                               doublev4 b) {
+  return __builtin_sw_vmind(a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyss(floatv4 a, floatv4 b) {
+  return __builtin_sw_vcpyss(a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyses(floatv4 a,
+                                                          floatv4 b) {
+  return __builtin_sw_vcpyses(a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpysns(floatv4 a,
+                                                          floatv4 b) {
+  return __builtin_sw_vcpysns(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysd(doublev4 a,
+                                                          doublev4 b) {
+  return __builtin_sw_vcpysd(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysed(doublev4 a,
+                                                           doublev4 b) {
+  return __builtin_sw_vcpysed(a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpysnd(doublev4 a,
+                                                           doublev4 b) {
+  return __builtin_sw_vcpysnd(a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfseleqs(floatv4 cond,
+                                                           floatv4 a,
+                                                           floatv4 b) {
+  return __builtin_sw_vfseleqs(cond, a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfsellts(floatv4 cond,
+                                                           floatv4 a,
+                                                           floatv4 b) {
+  return __builtin_sw_vfsellts(cond, a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vfselles(floatv4 cond,
+                                                           floatv4 a,
+                                                           floatv4 b) {
+  return __builtin_sw_vfselles(cond, a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfseleqd(doublev4 cond,
+                                                            doublev4 a,
+                                                            doublev4 b) {
+  return __builtin_sw_vfseleqd(cond, a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfselltd(doublev4 cond,
+                                                            doublev4 a,
+                                                            doublev4 b) {
+  return __builtin_sw_vfselltd(cond, a, b);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vfselled(doublev4 cond,
+                                                            doublev4 a,
+                                                            doublev4 b) {
+  return __builtin_sw_vfselled(cond, a, b);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmas(floatv4 a, floatv4 b,
+                                                       floatv4 c) {
+  return a * b + c;
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vmss(floatv4 a, floatv4 b,
+                                                       floatv4 c) {
+  return a * b - c;
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vnmas(floatv4 a, floatv4 b,
+                                                        floatv4 c) {
+  return -a * b + c;
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vnmss(floatv4 a, floatv4 b,
+                                                        floatv4 c) {
+  return -(a * b + c);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmad(doublev4 a, doublev4 b,
+                                                        doublev4 c) {
+  return a * b + c;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vmsd(doublev4 a, doublev4 b,
+                                                        doublev4 c) {
+  return a * b - c;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vnmad(doublev4 a, doublev4 b,
+                                                         doublev4 c) {
+  return -a * b + c;
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vnmsd(doublev4 a, doublev4 b,
+                                                         doublev4 c) {
+  return -(a * b + c);
+}
+
+// SIMD element Operation
+
+#ifdef __sw_64_sw8a__
+#define simd_vinsb(elt, vect, num) __builtin_sw_vinsb(elt, vect, num)
+#define simd_vinsh(elt, vect, num) __builtin_sw_vinsh(elt, vect, num)
+#endif
+
+#define simd_vinsw(elt, vect, num) __builtin_sw_vinsw(elt, vect, num)
+#define simd_vinsl(elt, vect, num) __builtin_sw_vinsl(elt, vect, num)
+#define simd_vinsfs(elt, vect, num) __builtin_sw_vinsfs(elt, vect, num)
+#define simd_vinsfd(elt, vect, num) __builtin_sw_vinsfd(elt, vect, num)
+
+#define simd_vinsw0(elt, vect) simd_vinsw(elt, vect, 0)
+#define simd_vinsw1(elt, vect) simd_vinsw(elt, vect, 1)
+#define simd_vinsw2(elt, vect) simd_vinsw(elt, vect, 2)
+#define simd_vinsw3(elt, vect) simd_vinsw(elt, vect, 3)
+#define simd_vinsw4(elt, vect) simd_vinsw(elt, vect, 4)
+#define simd_vinsw5(elt, vect) simd_vinsw(elt, vect, 5)
+#define simd_vinsw6(elt, vect) simd_vinsw(elt, vect, 6)
+#define simd_vinsw7(elt, vect) simd_vinsw(elt, vect, 7)
+
+#define simd_vinsl0(elt, vect) simd_vinsl(elt, vect, 0)
+#define simd_vinsl1(elt, vect) simd_vinsl(elt, vect, 1)
+#define simd_vinsl2(elt, vect) simd_vinsl(elt, vect, 2)
+#define simd_vinsl3(elt, vect) simd_vinsl(elt, vect, 3)
+
+#define simd_vinsfs0(elt, vect) simd_vinsfs(elt, vect, 0)
+#define simd_vinsfs1(elt, vect) simd_vinsfs(elt, vect, 1)
+#define simd_vinsfs2(elt, vect) simd_vinsfs(elt, vect, 2)
+#define simd_vinsfs3(elt, vect) simd_vinsfs(elt, vect, 3)
+
+#define simd_vinsfd0(elt, vect) simd_vinsfd(elt, vect, 0)
+#define simd_vinsfd1(elt, vect) simd_vinsfd(elt, vect, 1)
+#define simd_vinsfd2(elt, vect) simd_vinsfd(elt, vect, 2)
+#define simd_vinsfd3(elt, vect) simd_vinsfd(elt, vect, 3)
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectlb(charv32 __a,
+                                                                  charv32 __b) {
+  return __builtin_shufflevector(
+      __a, __b, 0, 0 + 32, 1, 1 + 32, 2, 2 + 32, 3, 3 + 32, 4, 4 + 32, 5,
+      5 + 32, 6, 6 + 32, 7, 7 + 32, 8, 8 + 32, 9, 9 + 32, 10, 10 + 32, 11,
+      11 + 32, 12, 12 + 32, 13, 13 + 32, 14, 14 + 32, 15, 15 + 32);
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4
+simd_vinsectlh(shortv16 __a, shortv16 __b) {
+  return __builtin_shufflevector(__a, __b, 0, 0 + 16, 1, 1 + 16, 2, 2 + 16, 3,
+                                 3 + 16, 4, 4 + 16, 5, 5 + 16, 6, 6 + 16, 7,
+                                 7 + 16);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectlw(intv8 __a,
+                                                                intv8 __b) {
+  return __builtin_shufflevector(__a, __b, 0, 0 + 8, 1, 1 + 8, 2, 2 + 8, 3,
+                                 3 + 8);
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS_CORE4 simd_vinsectll(longv4 __a,
+                                                                 longv4 __b) {
+  return __builtin_shufflevector(__a, __b, 0, 0 + 4, 1, 1 + 4);
+}
+
+#ifdef __sw_64_sw8a__
+#define simd_vshfq(__a, __b, idx) __builtin_sw_vshfq(__a, __b, idx)
+#endif
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vshfqb(charv32 __a,
+                                                               charv32 __b) {
+  return __builtin_sw_vshfqb(__a, __b);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vshfw(intv8 __a, intv8 __b,
+                                                      int64_t idx) {
+  return __builtin_sw_vshfw(__a, __b, idx);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vconw(intv8 __a, intv8 __b,
+                                                      void *ptr) {
+  return __builtin_sw_vconw(__a, __b, ptr);
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vconl(intv8 __a, intv8 __b,
+                                                      void *ptr) {
+  return __builtin_sw_vconl(__a, __b, ptr);
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcons(floatv4 __a,
+                                                        floatv4 __b,
+                                                        void *ptr) {
+  return __builtin_sw_vcons(__a, __b, ptr);
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcond(doublev4 __a,
+                                                         doublev4 __b,
+                                                         void *ptr) {
+  return __builtin_sw_vcond(__a, __b, ptr);
+}
+
+#define simd_vextw(vect, num) __builtin_sw_vextw(vect, num)
+#define simd_vextl(vect, num) __builtin_sw_vextl(vect, num)
+#define simd_vextfs(vect, num) __builtin_sw_vextfs(vect, num)
+#define simd_vextfd(vect, num) __builtin_sw_vextfd(vect, num)
+
+#define simd_vextw0(args) simd_vextw(args, 0)
+#define simd_vextw1(args) simd_vextw(args, 1)
+#define simd_vextw2(args) simd_vextw(args, 2)
+#define simd_vextw3(args) simd_vextw(args, 3)
+#define simd_vextw4(args) simd_vextw(args, 4)
+#define simd_vextw5(args) simd_vextw(args, 5)
+#define simd_vextw6(args) simd_vextw(args, 6)
+#define simd_vextw7(args) simd_vextw(args, 7)
+
+#define simd_vextl0(args) simd_vextl(args, 0)
+#define simd_vextl1(args) simd_vextl(args, 1)
+#define simd_vextl2(args) simd_vextl(args, 2)
+#define simd_vextl3(args) simd_vextl(args, 3)
+
+#define simd_vextfs0(args) simd_vextfs(args, 0)
+#define simd_vextfs1(args) simd_vextfs(args, 1)
+#define simd_vextfs2(args) simd_vextfs(args, 2)
+#define simd_vextfs3(args) simd_vextfs(args, 3)
+
+#define simd_vextfd0(args) simd_vextfd(args, 0)
+#define simd_vextfd1(args) simd_vextfd(args, 1)
+#define simd_vextfd2(args) simd_vextfd(args, 2)
+#define simd_vextfd3(args) simd_vextfd(args, 3)
+
+static __inline__ charv32 __DEFAULT_FN_ATTRS_CORE4 simd_vcpyb(int8_t b) {
+  return __extension__(charv32){b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b,
+                                b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b};
+}
+
+static __inline__ shortv16 __DEFAULT_FN_ATTRS_CORE4 simd_vcpyh(int16_t b) {
+  return __extension__(shortv16){b, b, b, b, b, b, b, b,
+                                 b, b, b, b, b, b, b, b};
+}
+
+static __inline__ intv8 __DEFAULT_FN_ATTRS simd_vcpyw(int32_t b) {
+  return __extension__(intv8){b, b, b, b, b, b, b, b};
+}
+
+static __inline__ longv4 __DEFAULT_FN_ATTRS simd_vcpyl(int64_t __a) {
+  return __extension__(longv4){__a, __a, __a, __a};
+}
+
+static __inline__ floatv4 __DEFAULT_FN_ATTRS simd_vcpyfs(float __a) {
+  return __extension__(floatv4){__a, __a, __a, __a};
+}
+
+static __inline__ doublev4 __DEFAULT_FN_ATTRS simd_vcpyfd(double __a) {
+  return __extension__(doublev4){__a, __a, __a, __a};
+}
+
+// Test for core3
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_plusw(intv8 __a) {
+  intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
+  __a = __a + __shf;
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6);
+  __a = __a + __shf;
+  __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4);
+  __a = __a + __shf;
+  return __builtin_sw_vextw(__a, 0);
+}
+
+static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_pluss(floatv4 __a) {
+  floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
+  __a = __a + __shf;
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2);
+  __a = __a + __shf;
+  return __builtin_sw_vextfs(__a, 0);
+}
+
+static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_plusd(doublev4 __a) {
+  doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
+  __a = __a + __shf;
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2);
+  __a = __a + __shf;
+  return __builtin_sw_vextfd(__a, 0);
+}
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_smaxw(intv8 __a) {
+  intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
+  intv8 __cmp = simd_vcmpltw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __a, __shf);
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6);
+  __cmp = simd_vcmpltw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __a, __shf);
+  __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4);
+  __cmp = simd_vcmpltw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __a, __shf);
+  return __builtin_sw_vextw(__a, 0);
+}
+
+static __inline__ uint32_t __DEFAULT_FN_ATTRS simd_reduc_umaxw(uintv8 __a) {
+  uintv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
+  uintv8 __cmp = simd_vcmpultw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __a, __shf);
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6);
+  __cmp = simd_vcmpultw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __a, __shf);
+  __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4);
+  __cmp = simd_vcmpultw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __a, __shf);
+  return __builtin_sw_vextw(__a, 0);
+}
+
+static __inline__ int32_t __DEFAULT_FN_ATTRS simd_reduc_sminw(intv8 __a) {
+  intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
+  intv8 __cmp = simd_vcmpltw(__a, __a);
+  __a = simd_vseleqw(__cmp, __shf, __a);
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6);
+  __cmp = simd_vcmpltw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __shf, __a);
+  __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4);
+  __cmp = simd_vcmpltw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __shf, __a);
+  return __builtin_sw_vextw(__a, 0);
+}
+
+static __inline__ uint32_t __DEFAULT_FN_ATTRS simd_reduc_uminw(intv8 __a) {
+  intv8 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3, 5, 5, 7, 7);
+  intv8 __cmp = simd_vcmpultw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __shf, __a);
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2, 6, 6, 6, 6);
+  __cmp = simd_vcmpultw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __shf, __a);
+  __shf = __builtin_shufflevector(__a, __a, 4, 4, 4, 4, 4, 4, 4, 4);
+  __cmp = simd_vcmpultw(__a, __shf);
+  __a = simd_vseleqw(__cmp, __shf, __a);
+  return __builtin_sw_vextw(__a, 0);
+}
+
+static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_smaxs(floatv4 __a) {
+  floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
+  floatv4 __cmp = simd_vfcmplts(__a, __shf);
+  __a = simd_vfseleqs(__cmp, __a, __shf);
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2);
+  __cmp = simd_vfcmplts(__a, __shf);
+  __a = simd_vfseleqs(__cmp, __a, __shf);
+  return __builtin_sw_vextfs(__a, 0);
+}
+
+static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_smaxd(doublev4 __a) {
+  doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
+  doublev4 __cmp = simd_vfcmpltd(__a, __shf);
+  __a = simd_vfseleqd(__cmp, __a, __shf);
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2);
+  __cmp = simd_vfcmpltd(__a, __shf);
+  __a = simd_vfseleqd(__cmp, __a, __shf);
+  return __builtin_sw_vextfd(__a, 0);
+}
+
+static __inline__ float __DEFAULT_FN_ATTRS simd_reduc_smins(floatv4 __a) {
+  floatv4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
+  floatv4 __cmp = simd_vfcmplts(__a, __shf);
+  __a = simd_vfseleqs(__cmp, __shf, __a);
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2);
+  __cmp = simd_vfcmplts(__a, __shf);
+  __a = simd_vfseleqs(__cmp, __shf, __a);
+  return __builtin_sw_vextfs(__a, 0);
+}
+
+static __inline__ double __DEFAULT_FN_ATTRS simd_reduc_smind(doublev4 __a) {
+  doublev4 __shf = __builtin_shufflevector(__a, __a, 1, 1, 3, 3);
+  doublev4 __cmp = simd_vfcmpltd(__a, __shf);
+  __a = simd_vfseleqd(__cmp, __shf, __a);
+  __shf = __builtin_shufflevector(__a, __a, 2, 2, 2, 2);
+  __cmp = simd_vfcmpltd(__a, __shf);
+  __a = simd_vfseleqd(__cmp, __shf, __a);
+  return __builtin_sw_vextfd(__a, 0);
+}
+#endif
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 5ee20554c4cf..d8c57afe8523 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2008,6 +2008,8 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
     return CheckMipsBuiltinFunctionCall(TI, BuiltinID, TheCall);
+  case llvm::Triple::sw_64:
+    return CheckSw64BuiltinFunctionCall(BuiltinID, TheCall);
   case llvm::Triple::systemz:
     return CheckSystemZBuiltinFunctionCall(BuiltinID, TheCall);
   case llvm::Triple::x86:
@@ -5799,6 +5801,140 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
 
+bool Sema::CheckSw64VectorMemoryIntr(unsigned BuiltinID, CallExpr *TheCall) {
+  DeclRefExpr *DRE =
+      cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+  if (BuiltinID == Sw64::BI__builtin_sw_vload ||
+      BuiltinID == Sw64::BI__builtin_sw_vloadu ||
+      BuiltinID == Sw64::BI__builtin_sw_vload_u ||
+      BuiltinID == Sw64::BI__builtin_sw_vloade ||
+      BuiltinID == Sw64::BI__builtin_sw_vloadnc ||
+      BuiltinID == Sw64::BI__builtin_sw_vstore ||
+      BuiltinID == Sw64::BI__builtin_sw_vstoreu ||
+      BuiltinID == Sw64::BI__builtin_sw_vstore_u ||
+      BuiltinID == Sw64::BI__builtin_sw_vstoreuh ||
+      BuiltinID == Sw64::BI__builtin_sw_vstoreul ||
+      BuiltinID == Sw64::BI__builtin_sw_vstorenc) {
+
+    bool isLoad = BuiltinID == Sw64::BI__builtin_sw_vload ||
+                  BuiltinID == Sw64::BI__builtin_sw_vloadu ||
+                  BuiltinID == Sw64::BI__builtin_sw_vload_u ||
+                  BuiltinID == Sw64::BI__builtin_sw_vloade ||
+                  BuiltinID == Sw64::BI__builtin_sw_vloadnc;
+
+    bool isLoadExt = BuiltinID == Sw64::BI__builtin_sw_vloade;
+
+    bool isExtMem = BuiltinID == Sw64::BI__builtin_sw_vloadu ||
+                    BuiltinID == Sw64::BI__builtin_sw_vload_u ||
+                    BuiltinID == Sw64::BI__builtin_sw_vloade ||
+                    BuiltinID == Sw64::BI__builtin_sw_vstoreu ||
+                    BuiltinID == Sw64::BI__builtin_sw_vstore_u ||
+                    BuiltinID == Sw64::BI__builtin_sw_vstoreuh ||
+                    BuiltinID == Sw64::BI__builtin_sw_vstoreul;
+
+    if (checkArgCount(*this, TheCall, isLoad ? 1 : 2))
+      return true;
+
+    Expr *PointerArg = TheCall->getArg(isLoad ? 0 : 1);
+    ExprResult PointerArgRes = DefaultFunctionArrayLvalueConversion(PointerArg);
+    if (PointerArgRes.isInvalid())
+      return true;
+    PointerArg = PointerArgRes.get();
+    TheCall->setArg(isLoad ? 0 : 1, PointerArg);
+
+    const PointerType *pointerType =
+        PointerArg->getType()->getAs<PointerType>();
+    QualType ValType = pointerType->getPointeeType();
+    QualType VecTy;
+    bool isVoidPtr = pointerType->isVoidPointerType();
+    if (isExtMem) {
+      if (Context.getTypeSize(ValType) < 32 && !isVoidPtr) {
+        Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code);
+        return true;
+      }
+    }
+
+    if (ValType->isFloatingType() &&
+        (BuiltinID == Sw64::BI__builtin_sw_vloadnc)) {
+      if (Context.getTypeSize(ValType) <= 32) {
+        Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code);
+        return true;
+      }
+    }
+
+    // if Buitlin is Store, it has noreturn, do noting.
+    if (!isLoad)
+      return false;
+
+    if (ValType->isIntegerType())
+      VecTy =
+          Context.getExtVectorType(ValType, 256 / Context.getTypeSize(ValType));
+    else {
+      assert(ValType->isFloatingType() &&
+             "Builtin Value should be Integer or Floating type!");
+      VecTy = Context.getExtVectorType(ValType, 4);
+    }
+    if (isLoad) {
+      TheCall->setType(VecTy);
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Sema::CheckSw64VectorShift(unsigned BuiltinID, CallExpr *TheCall) {
+  DeclRefExpr *DRE =
+      cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+  if (BuiltinID == Sw64::BI__builtin_sw_vsll ||
+      BuiltinID == Sw64::BI__builtin_sw_vsrl ||
+      BuiltinID == Sw64::BI__builtin_sw_vsra ||
+      BuiltinID == Sw64::BI__builtin_sw_vrol) {
+    Expr *ShiftArg = TheCall->getArg(0);
+    Expr *ShiftImm = TheCall->getArg(1);
+    QualType ValType = ShiftArg->getType();
+    QualType Imm = ShiftImm->getType();
+
+    if (checkArgCount(*this, TheCall, 2))
+      return true;
+
+    if (ValType->isFloatingType() ||
+        !(ValType->isVectorType() && Imm->isIntegerType())) {
+      Diag(DRE->getBeginLoc(), diag::err_invalid_sw64_type_code);
+      return true;
+    }
+
+    TheCall->setType(ValType);
+    return false;
+  }
+  return true;
+}
+
+bool Sema::CheckSw64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  DeclRefExpr *DRE =
+      cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+  switch (BuiltinID) {
+  case Sw64::BI__builtin_sw_vload:
+  case Sw64::BI__builtin_sw_vloadu:
+  case Sw64::BI__builtin_sw_vload_u:
+  case Sw64::BI__builtin_sw_vloade:
+  case Sw64::BI__builtin_sw_vloadnc:
+  case Sw64::BI__builtin_sw_vstore:
+  case Sw64::BI__builtin_sw_vstoreu:
+  case Sw64::BI__builtin_sw_vstore_u:
+  case Sw64::BI__builtin_sw_vstoreuh:
+  case Sw64::BI__builtin_sw_vstoreul:
+  case Sw64::BI__builtin_sw_vstorenc:
+    return CheckSw64VectorMemoryIntr(BuiltinID, TheCall);
+  case Sw64::BI__builtin_sw_vsll:
+  case Sw64::BI__builtin_sw_vsrl:
+  case Sw64::BI__builtin_sw_vsra:
+  case Sw64::BI__builtin_sw_vrol:
+    return CheckSw64VectorShift(BuiltinID, TheCall);
+  }
+
+  return false;
+}
+
 bool Sema::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI,
                                                unsigned BuiltinID,
                                                CallExpr *TheCall) {
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index ed69e802c95d..0d1e28fa9dce 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -7744,6 +7744,19 @@ handleWebAssemblyImportNameAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   FD->addAttr(::new (S.Context) WebAssemblyImportNameAttr(S.Context, AL, Str));
 }
 
+static void handleSw64InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+	if (!isFunctionOrMethod(D)) {
+    S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type)
+        << AL << AL.isRegularKeywordAttribute() << ExpectedFunction;
+    return;
+  }
+
+  if (!AL.checkExactlyNumArgs(S, 0))
+    return;
+
+  handleSimpleAttribute<Sw64InterruptAttr>(S, D, AL);
+}
+
 static void handleRISCVInterruptAttr(Sema &S, Decl *D,
                                      const ParsedAttr &AL) {
   // Warn about repeated attributes.
@@ -7826,6 +7839,9 @@ static void handleInterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   case llvm::Triple::riscv64:
     handleRISCVInterruptAttr(S, D, AL);
     break;
+  case llvm::Triple::sw_64:
+    handleSw64InterruptAttr(S, D, AL);
+    break;
   default:
     handleARMInterruptAttr(S, D, AL);
     break;
-- 
Gitee


From 5298e93560dddcd7570a5c9d47625526081bd2f1 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Thu, 23 Jan 2025 14:51:38 +0800
Subject: [PATCH 3/3] [Sw64] Add Sw64 target support for openmp

---
 openmp/README.rst                             |   2 +-
 openmp/runtime/CMakeLists.txt                 |   9 +-
 openmp/runtime/README.txt                     |   1 +
 .../runtime/cmake/LibompGetArchitecture.cmake |   2 +
 openmp/runtime/cmake/LibompMicroTests.cmake   |   3 +
 openmp/runtime/cmake/LibompUtils.cmake        |   2 +
 openmp/runtime/cmake/config-ix.cmake          |   3 +-
 openmp/runtime/src/kmp_affinity.h             |  11 ++
 openmp/runtime/src/kmp_os.h                   |   8 +-
 openmp/runtime/src/kmp_platform.h             |   6 +-
 openmp/runtime/src/kmp_runtime.cpp            |   3 +-
 openmp/runtime/src/z_Linux_asm.S              | 157 +++++++++++++++++-
 openmp/runtime/src/z_Linux_util.cpp           |   2 +-
 openmp/runtime/test/ompt/callback.h           |  10 ++
 openmp/runtime/tools/lib/Platform.pm          |   7 +-
 openmp/runtime/tools/lib/Uname.pm             |   2 +
 16 files changed, 215 insertions(+), 13 deletions(-)

diff --git a/openmp/README.rst b/openmp/README.rst
index 2cdd38220d52..103cc0dd5f19 100644
--- a/openmp/README.rst
+++ b/openmp/README.rst
@@ -137,7 +137,7 @@ Options for all Libraries
 Options for ``libomp``
 ----------------------
 
-**LIBOMP_ARCH** = ``aarch64|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64``
+**LIBOMP_ARCH** = ``aarch64|arm|i386|loongarch64|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|sw_64``
   The default value for this option is chosen based on probing the compiler for
   architecture macros (e.g., is ``__x86_64__`` predefined by compiler?).
 
diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index 2b7a3eb5bfce..58265a9eaaa2 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD})
   # If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake
   libomp_get_architecture(LIBOMP_DETECTED_ARCH)
   set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING
-    "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).")
+    "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64/sw_64).")
   # Should assertions be enabled?  They are on by default.
   set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL
     "enable assertions?")
@@ -63,6 +63,8 @@ else() # Part of LLVM build
     set(LIBOMP_ARCH riscv64)
   elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64")
     set(LIBOMP_ARCH loongarch64)
+  elseif(LIBOMP_NATIVE_ARCH MATCHES "sw_64")
+    set(LIBOMP_ARCH sw_64)
   else()
     # last ditch effort
     libomp_get_architecture(LIBOMP_ARCH)
@@ -83,7 +85,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64")
   endif()
 endif()
 
-libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64)
+libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64 sw_64)
 
 set(LIBOMP_LIB_TYPE normal CACHE STRING
   "Performance,Profiling,Stubs library (normal/profile/stubs)")
@@ -162,6 +164,7 @@ set(MIPS64 FALSE)
 set(MIPS FALSE)
 set(RISCV64 FALSE)
 set(LOONGARCH64 FALSE)
+set(SW64 FALSE)
 if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32")    # IA-32 architecture
   set(IA32 TRUE)
 elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture
@@ -188,6 +191,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture
     set(RISCV64 TRUE)
 elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture
     set(LOONGARCH64 TRUE)
+elseif("${LIBOMP_ARCH}" STREQUAL "sw_64") # SW64 architecture
+    set(SW64 TRUE)
 endif()
 
 # Set some flags based on build_type
diff --git a/openmp/runtime/README.txt b/openmp/runtime/README.txt
index ddd8b0e4282d..2ecc429d92d5 100644
--- a/openmp/runtime/README.txt
+++ b/openmp/runtime/README.txt
@@ -55,6 +55,7 @@ Architectures Supported
 * MIPS and MIPS64 architecture
 * RISCV64 architecture
 * LoongArch64 architecture
+* SW64 architecture
 
 Supported RTL Build Configurations
 ==================================
diff --git a/openmp/runtime/cmake/LibompGetArchitecture.cmake b/openmp/runtime/cmake/LibompGetArchitecture.cmake
index c338493bad53..9ca2dfc5d7c7 100644
--- a/openmp/runtime/cmake/LibompGetArchitecture.cmake
+++ b/openmp/runtime/cmake/LibompGetArchitecture.cmake
@@ -49,6 +49,8 @@ function(libomp_get_architecture return_arch)
       #error ARCHITECTURE=riscv64
     #elif defined(__loongarch__) && __loongarch_grlen == 64
       #error ARCHITECTURE=loongarch64
+    #elif defined(__sw_64__)
+      #error ARCHITECTURE=sw_64
     #else
       #error ARCHITECTURE=UnknownArchitecture
     #endif
diff --git a/openmp/runtime/cmake/LibompMicroTests.cmake b/openmp/runtime/cmake/LibompMicroTests.cmake
index 88deb461dbaf..ff911af4b0b5 100644
--- a/openmp/runtime/cmake/LibompMicroTests.cmake
+++ b/openmp/runtime/cmake/LibompMicroTests.cmake
@@ -217,6 +217,9 @@ else()
     elseif(${LOONGARCH64})
       libomp_append(libomp_expected_library_deps libc.so.6)
       libomp_append(libomp_expected_library_deps ld.so.1)
+    elseif(${SW64})
+      libomp_append(libomp_expected_library_deps libc.so.6.1)
+      libomp_append(libomp_expected_library_deps ld-linux.so.2)
     endif()
     libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY)
     libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
diff --git a/openmp/runtime/cmake/LibompUtils.cmake b/openmp/runtime/cmake/LibompUtils.cmake
index b5ffc97fca3d..ce62b077bd22 100644
--- a/openmp/runtime/cmake/LibompUtils.cmake
+++ b/openmp/runtime/cmake/LibompUtils.cmake
@@ -111,6 +111,8 @@ function(libomp_get_legal_arch return_arch_string)
     set(${return_arch_string} "RISCV64" PARENT_SCOPE)
   elseif(${LOONGARCH64})
     set(${return_arch_string} "LOONGARCH64" PARENT_SCOPE)
+  elseif(${SW64})
+    set(${return_arch_string} "SW64" PARENT_SCOPE)
   else()
     set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE)
     libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}")
diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake
index 9869aeab0354..6cbac229eaed 100644
--- a/openmp/runtime/cmake/config-ix.cmake
+++ b/openmp/runtime/cmake/config-ix.cmake
@@ -325,7 +325,8 @@ else()
       (LIBOMP_ARCH STREQUAL ppc64le) OR
       (LIBOMP_ARCH STREQUAL ppc64) OR
       (LIBOMP_ARCH STREQUAL riscv64) OR
-      (LIBOMP_ARCH STREQUAL loongarch64))
+      (LIBOMP_ARCH STREQUAL loongarch64)) OR
+      (LIBOMP_ARCH STREQUAL sw_64)
      AND # OS supported?
      ((WIN32 AND LIBOMP_HAVE_PSAPI) OR APPLE OR (NOT WIN32 AND LIBOMP_HAVE_WEAK_ATTRIBUTE)))
     set(LIBOMP_HAVE_OMPT_SUPPORT TRUE)
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index f27dd9a5339e..bb9fdc410a73 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -281,6 +281,17 @@ public:
 #elif __NR_sched_getaffinity != 123
 #error Wrong code for getaffinity system call.
 #endif /* __NR_sched_getaffinity */
+#elif KMP_ARCH_SW64
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 395
+#elif __NR_sched_setaffinity != 395
+#error Wrong code for setaffinity system call.
+#endif /* __NR_sched_setaffinity */
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity 396
+#elif __NR_sched_getaffinity != 396
+#error Wrong code for getaffinity system call.
+#endif /* __NR_sched_getaffinity */
 #else
 #error Unknown or unsupported architecture
 #endif /* KMP_ARCH_* */
diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h
index fec589ab6018..ee1d2c7e3fd5 100644
--- a/openmp/runtime/src/kmp_os.h
+++ b/openmp/runtime/src/kmp_os.h
@@ -178,7 +178,8 @@ typedef unsigned long long kmp_uint64;
 #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
 #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
 #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 ||                 \
-    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 ||             \
+    KMP_ARCH_SW64
 #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
 #else
 #error "Can't determine size_t printf format specifier."
@@ -213,7 +214,7 @@ typedef kmp_uint32 kmp_uint;
 #define KMP_INT_MIN ((kmp_int32)0x80000000)
 
 // stdarg handling
-#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) &&                   \
+#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_SW64) &&  \
     (KMP_OS_FREEBSD || KMP_OS_LINUX)
 typedef va_list *kmp_va_list;
 #define kmp_va_deref(ap) (*(ap))
@@ -1043,7 +1044,8 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
 #endif /* KMP_OS_WINDOWS */
 
 #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS ||     \
-    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 ||             \
+    KMP_ARCH_SW64
 #if KMP_OS_WINDOWS
 #undef KMP_MB
 #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)
diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h
index fcfd8bc5d8d9..780ff3b185b3 100644
--- a/openmp/runtime/src/kmp_platform.h
+++ b/openmp/runtime/src/kmp_platform.h
@@ -93,6 +93,7 @@
 #define KMP_ARCH_MIPS64 0
 #define KMP_ARCH_RISCV64 0
 #define KMP_ARCH_LOONGARCH64 0
+#define KMP_ARCH_SW64 0
 
 #if KMP_OS_WINDOWS
 #if defined(_M_AMD64) || defined(__x86_64)
@@ -142,6 +143,9 @@
 #elif defined __loongarch__ && __loongarch_grlen == 64
 #undef KMP_ARCH_LOONGARCH64
 #define KMP_ARCH_LOONGARCH64 1
+#elif defined __sw_64__
+#undef KMP_ARCH_SW64
+#define KMP_ARCH_SW64 1
 #endif
 #endif
 
@@ -206,7 +210,7 @@
 // TODO: Fixme - This is clever, but really fugly
 #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 +     \
               KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 +             \
-              KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64)
+              KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH6464 + KMP_ARCH_SW64)
 #error Unknown or unsupported architecture
 #endif
 
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index c63bd1c63bfd..e86d132fee0f 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -8827,7 +8827,8 @@ __kmp_determine_reduction_method(
     int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
 
 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 ||                   \
-    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+    KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 ||             \
+    KMP_ARCH_SW64
 
 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
     KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S
index 27b063f09e7a..be3494f9589f 100644
--- a/openmp/runtime/src/z_Linux_asm.S
+++ b/openmp/runtime/src/z_Linux_asm.S
@@ -2060,6 +2060,159 @@ __kmp_invoke_microtask:
 
 #endif /* KMP_ARCH_LOONGARCH64 */
 
+#if KMP_ARCH_SW64
+
+//------------------------------------------------------------------------
+//
+// typedef void (*microtask_t)(int *gtid, int *tid, ...);
+//
+// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
+//                            void *p_argv[]
+// #if OMPT_SUPPORT
+//                            ,
+//                            void **exit_frame_ptr
+// #endif
+//                            ) {
+// #if OMPT_SUPPORT
+//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+//   (*pkfn)(&gtid, &tid, argv[0], ...);
+//
+//   return 1;
+// }
+//
+// Parameters:
+//   $16: pkfn
+//   $17: gtid
+//   $18: tid
+//   $19: argc
+//   $20: p_argv
+//   $21: exit_frame_ptr
+//
+// Locals:
+//   __gtid: gtid param pushed on stack so can pass &gtid to pkfn
+//   __tid: tid param pushed on stack so can pass &tid to pkfn
+//
+// Temp. registers:
+//
+//  $1: used to calculate the dynamic stack size / used to hold pkfn address
+//  $2: used as temporary for stack placement calculation
+//  $3: used as temporary for stack arguments
+//  $4: used as temporary for number of remaining pkfn parms
+//  $5: used to traverse p_argv array
+//
+// return: $0 (always 1/TRUE)
+
+__gtid = -20
+__tid = -24
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+        .text
+        .globl  __kmp_invoke_microtask
+        .p2align        1
+        .type   __kmp_invoke_microtask,@function
+__kmp_invoke_microtask:
+        .cfi_startproc
+
+        // First, save $26 and $15
+        ldi	$30,-16($30)
+        stl	$26, 8($30)
+        stl	$15, 0($30)
+        ldi	$15,16($30)
+        .cfi_def_cfa    $15, 0
+        .cfi_offset     $26, -8
+        .cfi_offset     $15, -16
+
+        // Compute the dynamic stack size:
+        //
+        // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
+        //   reference
+        // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
+        //   function by register. Given that we have 8 of such registers (a[0-5])
+        //   and two + 'argc' arguments (consider &gtid and &tid), we need to
+        //   reserve max(0, argc - 4)*8 extra bytes
+        //
+        // The total number of bytes is then max(0, argc - 4)*8 + 8
+
+        // Compute max(0, argc - 4) using the following bithack:
+        // max(0, x) = x - (x & (x >> 31)), where x := argc - 4
+        // Source: http://graphics.stanford.edu/~seander/bithacks.html//IntegerMinOrMax
+        subw    $19, 4, $1
+        sellt   $1, 0, $1, $1
+
+        ldi	$1,1($1)
+        s8addl	$1,0,$1
+        subl	$30, $1, $30
+
+        // Align the stack to 16 bytes
+        bic     $30, 0xf, $30
+        mov	$16, $27
+        mov	$19, $4
+        mov	$20, $5
+
+#if OMPT_SUPPORT
+        // Save frame pointer into exit_frame
+        stl	$15, 0($21)
+#endif
+
+        // Prepare arguments for the pkfn function (first 6 using $16-$21 registers)
+
+        stw      $17, __gtid($15)
+        stw      $18, __tid($15)
+
+        ldi      $16, __gtid($15)
+        ldi      $17, __tid($15)
+
+        beq    $4, .L_kmp_3
+        ldl    $18, 0($5)
+
+        ldi    $4,-1($4)
+        beq    $4, .L_kmp_3
+        ldl    $19, 8($5)
+
+        ldi    $4,-1($4)
+        beq    $4, .L_kmp_3
+        ldl    $20, 16($5)
+
+        ldi    $4,-1($4)
+        beq    $4, .L_kmp_3
+        ldl    $21, 24($5)
+
+        // Prepare any additional argument passed through the stack
+        ldi    $5, 32($5)
+        mov    $30, $2
+        br     $31, .L_kmp_2
+.L_kmp_1:
+        ldl      $3, 0($5)
+        stl      $3, 0($2)
+        ldi    $5, 8($5)
+        ldi    $2, 8($2)
+.L_kmp_2:
+        ldi    $4, -1($4)
+        bne    $4, .L_kmp_1
+
+.L_kmp_3:
+        // Call pkfn function
+        call   $26, ($27), 0
+
+        // Restore stack and return
+
+        ldi    $0, 1($31)
+
+        ldi	$30,-16($15)
+        ldl	$15, 0($30)
+        ldl	$26, 8($30)
+        ldi	$30,16($30)
+        ret	$31,($26),1
+.Lfunc_end0:
+        .size   __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
+        .cfi_endproc
+
+// -- End  __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_SW64 */
+
 #if KMP_ARCH_ARM || KMP_ARCH_MIPS
     .data
     COMMON .gomp_critical_user_, 32, 3
@@ -2073,7 +2226,7 @@ __kmp_unnamed_critical_addr:
 #endif
 #endif /* KMP_ARCH_ARM */
 
-#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
+#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_SW64
 #ifndef KMP_PREFIX_UNDERSCORE
 # define KMP_PREFIX_UNDERSCORE(x) x
 #endif
@@ -2088,7 +2241,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
     .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
 #endif
 #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
-          KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */
+          KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_SW64 */
 
 #if KMP_OS_LINUX
 # if KMP_ARCH_ARM || KMP_ARCH_AARCH64
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 260b982af200..cdfb14687500 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -2452,7 +2452,7 @@ finish: // Clean up and exit.
 #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC ||                            \
       ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) ||                 \
       KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 ||            \
-      KMP_ARCH_ARM)
+      KMP_ARCH_ARM || KMP_ARCH_SW64)
 
 // we really only need the case with 1 argument, because CLANG always build
 // a struct of pointers to shared variables referenced in the outlined function
diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h
index 8180b3d2663f..32af133ade73 100644
--- a/openmp/runtime/test/ompt/callback.h
+++ b/openmp/runtime/test/ompt/callback.h
@@ -212,6 +212,16 @@ ompt_label_##id:
   printf("%" PRIu64 ": current_address=%p or %p\n", \
          ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
 #endif
+#elif KMP_ARCH_SW64
+// On SW64 the NOP instruction is 4 bytes long, can be followed by some other
+// instructions (more bytes).
+#define print_possible_return_addresses(addr)                                  \
+  printf("%" PRIu64 ": current_address=%p or %p or %p or %p or %p or %p or "   \
+                    "%p or %p or %p or %p\n",                                  \
+         ompt_get_thread_data()->value, ((char *)addr) - 16,                   \
+         ((char *)addr) - 20, ((char *)addr) - 24, ((char *)addr) - 28,        \
+         ((char *)addr) - 32, ((char *)addr) - 36, ((char *)addr) - 40,        \
+         ((char *)addr) - 44, ((char *)addr) - 48, ((char *)addr) - 52)
 #elif KMP_ARCH_LOONGARCH64
 // On LoongArch64 the NOP instruction is 4 bytes long, can be followed by
 // inserted jump instruction (another 4 bytes long). And an additional jump
diff --git a/openmp/runtime/tools/lib/Platform.pm b/openmp/runtime/tools/lib/Platform.pm
index d62d450e9e5d..c7da02499c00 100644
--- a/openmp/runtime/tools/lib/Platform.pm
+++ b/openmp/runtime/tools/lib/Platform.pm
@@ -65,6 +65,8 @@ sub canon_arch($) {
             $arch = "riscv64";
         } elsif ( $arch =~ m{\Aloongarch64} ) {
             $arch = "loongarch64";
+        } elsif ( $arch =~ m{\Asw_64} ) {
+            $arch = "sw_64";
         } else {
             $arch = undef;
         }; # if
@@ -100,6 +102,7 @@ sub canon_mic_arch($) {
         "mips" => "MIPS",
         "mips64" => "MIPS64",
         "riscv64" => "RISC-V (64-bit)",
+        "sw_64" => "SW64",
     );
 
     sub legal_arch($) {
@@ -230,6 +233,8 @@ sub target_options() {
         $_host_arch = "riscv64";
     } elsif ( $hardware_platform eq "loongarch64" ) {
         $_host_arch = "loongarch64";
+    } elsif ( $hardware_platform eq "sw_64" ) {
+        $_host_arch = "sw_64";
     } else {
         die "Unsupported host hardware platform: \"$hardware_platform\"; stopped";
     }; # if
@@ -419,7 +424,7 @@ the script assumes host architecture is target one.
 
 Input string is an architecture name to canonize. The function recognizes many variants, for example:
 C<32e>, C<Intel64>, C<Intel(R) 64>, etc. Returned string is a canonized architecture name,
-one of: C<32>, C<32e>, C<64>, C<arm>, C<ppc64le>, C<ppc64>, C<mic>, C<mips>, C<mips64>, C<riscv64>, C<loongarch64> or C<undef> is input string is not recognized.
+one of: C<32>, C<32e>, C<64>, C<arm>, C<ppc64le>, C<ppc64>, C<mic>, C<mips>, C<mips64>, C<riscv64>, C<loongarch64>, C<sw_64> or C<undef> is input string is not recognized.
 
 =item B<legal_arch( $arch )>
 
diff --git a/openmp/runtime/tools/lib/Uname.pm b/openmp/runtime/tools/lib/Uname.pm
index 8a976addcff0..d21550711cc3 100644
--- a/openmp/runtime/tools/lib/Uname.pm
+++ b/openmp/runtime/tools/lib/Uname.pm
@@ -160,6 +160,8 @@ if ( 0 ) {
         $values{ hardware_platform } = "riscv64";
     } elsif ( $values{ machine } =~ m{\Aloongarch64\z} ) {
         $values{ hardware_platform } = "loongarch64";
+    } elsif ( $values{ machine } =~ m{\Asw_64\z} ) {
+        $values{ hardware_platform } = "sw_64";
     } else {
         die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped";
     }; # if
-- 
Gitee