From e1ae877d32bfbc80403d8d63fa0a883fc33f1af6 Mon Sep 17 00:00:00 2001 From: n002761 Date: Mon, 29 Apr 2024 08:49:31 +0800 Subject: [PATCH 1/6] Add LoongArch architecture compilation Signed-off-by: n002761 --- clang/bindings/python/tests/CMakeLists.txt | 2 +- .../include/clang/Basic/BuiltinsLoongArch.def | 1990 ++++ clang/include/clang/Basic/TargetBuiltins.h | 13 +- clang/include/clang/Basic/TargetCXXABI.def | 6 + clang/include/clang/Basic/TargetCXXABI.h | 6 + clang/include/clang/Driver/Options.td | 19 + clang/include/clang/Sema/Sema.h | 3 + clang/include/clang/module.modulemap | 1 + clang/lib/AST/ASTContext.cpp | 2 + clang/lib/Basic/CMakeLists.txt | 1 + clang/lib/Basic/Targets.cpp | 25 + clang/lib/Basic/Targets/LoongArch.cpp | 184 + clang/lib/Basic/Targets/LoongArch.h | 402 + clang/lib/CodeGen/CodeGenModule.cpp | 1 + clang/lib/CodeGen/ItaniumCXXABI.cpp | 3 + clang/lib/CodeGen/TargetInfo.cpp | 595 +- clang/lib/Driver/CMakeLists.txt | 1 + clang/lib/Driver/Driver.cpp | 23 + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 211 + clang/lib/Driver/ToolChains/Arch/LoongArch.h | 49 + clang/lib/Driver/ToolChains/Clang.cpp | 77 + clang/lib/Driver/ToolChains/Clang.h | 4 + clang/lib/Driver/ToolChains/CommonArgs.cpp | 21 + clang/lib/Driver/ToolChains/Gnu.cpp | 73 + clang/lib/Driver/ToolChains/Linux.cpp | 23 +- clang/lib/Driver/ToolChains/OHOS.cpp | 20 +- clang/lib/Driver/XRayArgs.cpp | 2 + clang/lib/Headers/CMakeLists.txt | 15 + clang/lib/Headers/larchintrin.h | 338 + clang/lib/Headers/lasxintrin.h | 5337 ++++++++++ clang/lib/Headers/lsxintrin.h | 5162 ++++++++++ clang/lib/Sema/SemaChecking.cpp | 556 + .../LoongArch/abi-lp64d-empty-structs.c | 144 + .../LoongArch/abi-lp64d-empty-unions.c | 25 + clang/test/CodeGen/LoongArch/abi-lp64d.c | 501 + .../LoongArch/inlineasm-float-double-in-gpr.c | 49 + clang/test/CodeGen/builtins-loongarch-base.c | 409 + .../CodeGen/builtins-loongarch-lasx-error.c | 266 + clang/test/CodeGen/builtins-loongarch-lasx.c | 3772 +++++++ .../CodeGen/builtins-loongarch-lsx-error.c | 250 + clang/test/CodeGen/builtins-loongarch-lsx.c | 3645 +++++++ .../CodeGen/loongarch-inline-asm-modifiers.c | 50 + clang/test/CodeGen/loongarch-inline-asm.c | 31 + .../CodeGenCXX/LoongArch/abi-lp64d-D91269.cpp | 35 + .../CodeGenCXX/LoongArch/abi-lp64d-D91278.cpp | 26 + .../LoongArch/abi-lp64d-struct-inherit.cpp | 95 + clang/test/Driver/baremetal.cpp | 2 +- clang/test/Driver/hexagon-toolchain-linux.c | 4 +- .../test/Driver/loongarch-alignment-feature.c | 8 + clang/test/Driver/loongarch-march.c | 15 + clang/test/Preprocessor/init-loongarch.c | 10 + .../InterpreterExceptionTest.cpp | 5 + .../cmake/Modules/AllSupportedArchDefs.cmake | 25 +- .../cmake/Modules/CompilerRTUtils.cmake | 9 + compiler-rt/cmake/base-config-ix.cmake | 4 + compiler-rt/cmake/builtin-config-ix.cmake | 3 +- compiler-rt/cmake/crt-config-ix.cmake | 3 +- compiler-rt/lib/asan/asan_interceptors.cpp | 2 +- compiler-rt/lib/asan/asan_mapping.h | 4 + compiler-rt/lib/asan/tests/asan_test.cpp | 14 +- compiler-rt/lib/builtins/CMakeLists.txt | 10 +- compiler-rt/lib/builtins/clear_cache.c | 2 + compiler-rt/lib/builtins/loongarch/fp_mode.c | 59 + compiler-rt/lib/fuzzer/FuzzerTracePC.cpp | 4 +- .../lib/interception/tests/CMakeLists.txt | 2 +- compiler-rt/lib/lsan/lsan_allocator.cpp | 2 +- compiler-rt/lib/lsan/lsan_common.cpp | 2 + compiler-rt/lib/lsan/lsan_common.h | 2 +- compiler-rt/lib/msan/msan.h | 28 +- compiler-rt/lib/msan/msan_allocator.cpp | 2 +- compiler-rt/lib/msan/msan_interceptors.cpp | 2 +- compiler-rt/lib/msan/tests/msan_test.cpp | 2 + ...ommon_interceptors_vfork_loongarch64.inc.S | 6 +- .../sanitizer_common_syscalls.inc | 4 +- .../lib/sanitizer_common/sanitizer_linux.cpp | 431 +- .../lib/sanitizer_common/sanitizer_linux.h | 9 +- .../sanitizer_linux_libcdep.cpp | 19 +- .../lib/sanitizer_common/sanitizer_platform.h | 25 +- .../sanitizer_platform_interceptors.h | 7 +- .../sanitizer_platform_limits_posix.cpp | 47 +- .../sanitizer_platform_limits_posix.h | 22 +- .../sanitizer_common/sanitizer_ring_buffer.h | 9 +- .../sanitizer_common/sanitizer_stacktrace.cpp | 4 +- .../sanitizer_common/sanitizer_stacktrace.h | 4 +- .../sanitizer_stoptheworld_linux_libcdep.cpp | 74 +- .../sanitizer_tls_get_addr.cpp | 6 +- .../lib/sanitizer_common/tests/CMakeLists.txt | 2 +- .../tests/sanitizer_allocator_test.cpp | 6 +- .../tests/sanitizer_ring_buffer_test.cpp | 5 +- .../tests/sanitizer_stacktrace_test.cpp | 2 +- compiler-rt/lib/tsan/rtl/CMakeLists.txt | 3 + .../lib/tsan/rtl/tsan_interceptors_posix.cpp | 4 +- compiler-rt/lib/tsan/rtl/tsan_platform.h | 39 + .../lib/tsan/rtl/tsan_platform_linux.cpp | 12 +- compiler-rt/lib/tsan/rtl/tsan_rtl.cpp | 4 +- compiler-rt/lib/tsan/rtl/tsan_rtl.h | 3 +- .../lib/tsan/rtl/tsan_rtl_loongarch64.S | 149 + compiler-rt/lib/xray/CMakeLists.txt | 6 + compiler-rt/lib/xray/tests/CMakeLists.txt | 1 + compiler-rt/lib/xray/xray_interface.cpp | 2 + compiler-rt/lib/xray/xray_loongarch.cpp | 173 + .../lib/xray/xray_trampoline_loongarch.S | 126 + compiler-rt/lib/xray/xray_tsc.h | 2 +- compiler-rt/test/asan/CMakeLists.txt | 2 +- .../asan/TestCases/Linux/leak_check_segv.cpp | 13 +- .../test/asan/TestCases/Linux/ptrace.cpp | 1 + .../asan/TestCases/Linux/segv_read_write.c | 2 +- compiler-rt/test/asan/lit.cfg.py | 2 +- compiler-rt/test/builtins/Unit/addtf3_test.c | 3 +- compiler-rt/test/builtins/Unit/subtf3_test.c | 3 +- compiler-rt/test/fuzzer/disable-leaks.test | 2 +- compiler-rt/test/fuzzer/exit_on_src_pos.test | 1 + compiler-rt/test/fuzzer/fork-ubsan.test | 2 +- .../test/lsan/TestCases/strace_test.cpp | 1 + .../test/lsan/TestCases/swapcontext.cpp | 2 +- .../test/lsan/TestCases/use_registers.cpp | 4 + compiler-rt/test/lsan/lit.common.cfg.py | 2 +- compiler-rt/test/msan/allocator_mapping.cpp | 2 +- compiler-rt/test/msan/lit.cfg.py | 2 +- compiler-rt/test/msan/mmap.cpp | 4 +- compiler-rt/test/msan/mmap_below_shadow.cpp | 2 +- compiler-rt/test/msan/param_tls_limit.cpp | 4 +- compiler-rt/test/msan/poison_in_signal.cpp | 3 +- compiler-rt/test/msan/strlen_of_shadow.cpp | 2 +- compiler-rt/test/msan/vararg.cpp | 3 +- compiler-rt/test/msan/vector_select.cpp | 2 +- .../TestCases/Linux/ptrace.cpp | 21 + .../test/sanitizer_common/print_address.h | 2 +- compiler-rt/test/tsan/map32bit.cpp | 1 + compiler-rt/test/tsan/mmap_large.cpp | 2 + compiler-rt/test/tsan/test.h | 2 + .../TestCases/Posix/arg1-arg0-logging.cpp | 2 +- .../test/xray/TestCases/Posix/arg1-logger.cpp | 2 +- .../Posix/arg1-logging-implicit-this.cpp | 2 +- .../TestCases/Posix/argv0-log-file-name.cpp | 1 + .../xray/TestCases/Posix/coverage-sample.cpp | 1 + .../TestCases/Posix/fixedsize-logging.cpp | 1 + .../xray/TestCases/Posix/func-id-utils.cpp | 1 + .../xray/TestCases/Posix/logging-modes.cpp | 1 + .../TestCases/Posix/optional-inmemory-log.cpp | 1 + .../TestCases/Posix/patching-unpatching.cpp | 1 + .../test/xray/TestCases/Posix/pic_test.cpp | 1 + .../string.capacity/max_size.pass.cpp | 2 + libcxxabi/src/demangle/ItaniumDemangle.h | 2 +- libunwind/include/__libunwind_config.h | 12 + libunwind/include/libunwind.h | 68 + libunwind/src/Registers.hpp | 266 + libunwind/src/UnwindCursor.hpp | 16 + libunwind/src/UnwindRegistersRestore.S | 189 +- libunwind/src/UnwindRegistersSave.S | 186 +- libunwind/src/config.h | 2 +- libunwind/src/libunwind.cpp | 2 + libunwind/test/unw_resume.pass.cpp | 34 + lld/ELF/Arch/LoongArch.cpp | 473 + lld/ELF/CMakeLists.txt | 1 + lld/ELF/Driver.cpp | 3 +- lld/ELF/InputFiles.cpp | 3 + lld/ELF/InputSection.cpp | 6 +- lld/ELF/Relocations.cpp | 49 +- lld/ELF/Relocations.h | 2 + lld/ELF/ScriptParser.cpp | 1 + lld/ELF/Target.cpp | 4 + lld/ELF/Target.h | 2 + lld/ELF/Writer.cpp | 14 +- lld/test/ELF/Inputs/loongarch.s | 3 + lld/test/ELF/loongarch-branch.s | 42 + .../ELF/loongarch-eflags-diff-abi-err.test | 24 + lld/test/ELF/loongarch-eflags-lp32.test | 29 + lld/test/ELF/loongarch-eflags-lp64.s | 8 + lld/test/ELF/loongarch-eflags-lpx32.test | 29 + lld/test/ELF/loongarch-got-reloc.s | 67 + lld/test/ELF/loongarch-ifunc.s | 35 + lld/test/ELF/loongarch-plt.s | 83 + lld/test/ELF/loongarch-relative.s | 59 + lld/test/ELF/loongarch-tls-gd.s | 137 + lld/test/ELF/loongarch-tls-ie.s | 61 + lld/test/ELF/loongarch-tls-le.s | 130 + lld/test/ELF/loongarch_eh_frame.s | 23 + .../ELF/loongarch_not_relative_eh_frame.test | 49 + lld/test/ELF/loongarch_preempt_hidden_sym.s | 12 + .../ELF/lto/loongarch-global-offset-table.ll | 26 + lld/test/ELF/lto/loongarch.ll | 10 + lld/test/ELF/x86-64-dyn-rel-error5.s | 7 +- lld/test/lit.cfg.py | 1 + llvm-build/Makefile | 6 + llvm-build/build.py | 28 +- llvm-build/build_musl.sh | 5 +- llvm/CMakeLists.txt | 1 + llvm/cmake/config-ix.cmake | 2 + llvm/cmake/config.guess | 3 + llvm/include/llvm/ADT/Triple.h | 11 + llvm/include/llvm/BinaryFormat/ELF.h | 23 +- .../llvm/BinaryFormat/ELFRelocs/LoongArch.def | 6 + llvm/include/llvm/Demangle/ItaniumDemangle.h | 2 +- .../llvm/ExecutionEngine/Orc/OrcABISupport.h | 36 + llvm/include/llvm/IR/CMakeLists.txt | 1 + llvm/include/llvm/IR/InlineAsm.h | 1 + llvm/include/llvm/IR/Intrinsics.td | 1 + llvm/include/llvm/IR/IntrinsicsLoongArch.td | 3657 +++++++ llvm/include/llvm/Object/ELFObjectFile.h | 3 +- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 3 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 10 + .../Orc/EPCIndirectionUtils.cpp | 3 + .../ExecutionEngine/Orc/IndirectionUtils.cpp | 5 + .../lib/ExecutionEngine/Orc/LazyReexports.cpp | 4 + .../lib/ExecutionEngine/Orc/OrcABISupport.cpp | 200 + .../RuntimeDyld/RuntimeDyldELF.cpp | 241 + .../RuntimeDyld/RuntimeDyldELF.h | 10 + llvm/lib/IR/Function.cpp | 1 + llvm/lib/MC/MCObjectFileInfo.cpp | 6 + llvm/lib/Object/RelocationResolver.cpp | 26 + llvm/lib/ObjectYAML/ELFYAML.cpp | 13 +- llvm/lib/Support/Triple.cpp | 1 + llvm/lib/Support/Unix/Memory.inc | 5 +- .../Target/LoongArch/AsmParser/CMakeLists.txt | 4 +- .../AsmParser/LoongArchAsmParser.cpp | 2530 ++++- llvm/lib/Target/LoongArch/CMakeLists.txt | 13 +- .../LoongArch/Disassembler/CMakeLists.txt | 4 +- .../Disassembler/LoongArchDisassembler.cpp | 921 +- llvm/lib/Target/LoongArch/LoongArch.h | 37 +- llvm/lib/Target/LoongArch/LoongArch.td | 166 +- .../Target/LoongArch/LoongArch32InstrInfo.td | 741 ++ .../Target/LoongArch/LoongArchAsmPrinter.cpp | 614 +- .../Target/LoongArch/LoongArchAsmPrinter.h | 115 +- .../lib/Target/LoongArch/LoongArchCCState.cpp | 165 + llvm/lib/Target/LoongArch/LoongArchCCState.h | 165 + .../Target/LoongArch/LoongArchCallingConv.td | 309 +- .../LoongArch/LoongArchExpandPseudo.cpp | 2482 +++++ .../LoongArch/LoongArchFloat32InstrInfo.td | 229 - .../LoongArch/LoongArchFloat64InstrInfo.td | 242 - .../LoongArch/LoongArchFloatInstrFormats.td | 241 - .../LoongArch/LoongArchFrameLowering.cpp | 610 +- .../Target/LoongArch/LoongArchFrameLowering.h | 61 +- .../LoongArch/LoongArchISelDAGToDAG.cpp | 946 +- .../Target/LoongArch/LoongArchISelDAGToDAG.h | 143 +- .../LoongArch/LoongArchISelLowering.cpp | 8936 +++++++++++++++-- .../Target/LoongArch/LoongArchISelLowering.h | 630 +- .../Target/LoongArch/LoongArchInstrFormats.td | 842 +- .../Target/LoongArch/LoongArchInstrInfo.cpp | 1067 +- .../lib/Target/LoongArch/LoongArchInstrInfo.h | 218 +- .../Target/LoongArch/LoongArchInstrInfo.td | 2705 +++-- .../Target/LoongArch/LoongArchInstrInfoF.td | 630 ++ .../LoongArch/LoongArchLASXInstrFormats.td | 448 + .../LoongArch/LoongArchLASXInstrInfo.td | 5499 ++++++++++ .../LoongArch/LoongArchLSXInstrFormats.td | 449 + .../Target/LoongArch/LoongArchLSXInstrInfo.td | 5747 +++++++++++ .../Target/LoongArch/LoongArchMCInstLower.cpp | 357 +- .../Target/LoongArch/LoongArchMCInstLower.h | 55 + .../LoongArch/LoongArchMachineFunction.cpp | 58 + .../LoongArch/LoongArchMachineFunction.h | 103 + .../LoongArch/LoongArchMachineFunctionInfo.h | 57 - .../LoongArch/LoongArchModuleISelDAGToDAG.cpp | 53 + .../LoongArch/LoongArchRegisterInfo.cpp | 376 +- .../Target/LoongArch/LoongArchRegisterInfo.h | 62 +- .../Target/LoongArch/LoongArchRegisterInfo.td | 484 +- .../Target/LoongArch/LoongArchSubtarget.cpp | 99 +- .../lib/Target/LoongArch/LoongArchSubtarget.h | 146 +- .../LoongArch/LoongArchTargetMachine.cpp | 170 +- .../Target/LoongArch/LoongArchTargetMachine.h | 34 +- .../LoongArch/LoongArchTargetObjectFile.cpp | 26 + .../LoongArch/LoongArchTargetObjectFile.h | 24 + .../LoongArch/LoongArchTargetStreamer.h | 130 + .../LoongArchTargetTransformInfo.cpp | 325 + .../LoongArch/LoongArchTargetTransformInfo.h | 91 + .../LoongArch/MCTargetDesc/CMakeLists.txt | 11 +- .../MCTargetDesc/LoongArchABIInfo.cpp | 106 + .../LoongArch/MCTargetDesc/LoongArchABIInfo.h | 76 + .../LoongArchAnalyzeImmediate.cpp | 64 + ...chMatInt.h => LoongArchAnalyzeImmediate.h} | 15 +- .../MCTargetDesc/LoongArchAsmBackend.cpp | 219 +- .../MCTargetDesc/LoongArchAsmBackend.h | 79 +- .../MCTargetDesc/LoongArchBaseInfo.cpp | 40 - .../MCTargetDesc/LoongArchBaseInfo.h | 134 +- .../MCTargetDesc/LoongArchELFObjectWriter.cpp | 173 +- .../MCTargetDesc/LoongArchELFStreamer.cpp | 138 + .../MCTargetDesc/LoongArchELFStreamer.h | 53 + .../MCTargetDesc/LoongArchFixupKinds.h | 90 + .../MCTargetDesc/LoongArchInstPrinter.cpp | 237 +- .../MCTargetDesc/LoongArchInstPrinter.h | 110 +- .../MCTargetDesc/LoongArchMCAsmInfo.cpp | 33 +- .../MCTargetDesc/LoongArchMCAsmInfo.h | 9 +- .../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1480 ++- .../MCTargetDesc/LoongArchMCCodeEmitter.h | 146 + .../MCTargetDesc/LoongArchMCExpr.cpp | 158 + .../LoongArch/MCTargetDesc/LoongArchMCExpr.h | 97 + .../MCTargetDesc/LoongArchMCTargetDesc.cpp | 144 +- .../MCTargetDesc/LoongArchMCTargetDesc.h | 28 +- .../MCTargetDesc/LoongArchMatInt.cpp | 51 - .../MCTargetDesc/LoongArchTargetStreamer.cpp | 330 + .../LoongArch/TargetInfo/CMakeLists.txt | 1 - .../TargetInfo/LoongArchTargetInfo.cpp | 18 +- .../TargetInfo/LoongArchTargetInfo.h | 4 +- llvm/lib/Target/Mips/MipsExpandPseudo.cpp | 35 + .../Instrumentation/AddressSanitizer.cpp | 6 +- .../Instrumentation/MemorySanitizer.cpp | 130 + llvm/lib/Transforms/Scalar/SROA.cpp | 61 +- llvm/lib/XRay/InstrumentationMap.cpp | 1 + llvm/test/CodeGen/LoongArch/1ri.mir | 96 - llvm/test/CodeGen/LoongArch/2r.mir | 230 - llvm/test/CodeGen/LoongArch/2ri.mir | 432 - llvm/test/CodeGen/LoongArch/3r.mir | 995 -- llvm/test/CodeGen/LoongArch/3ri.mir | 69 - llvm/test/CodeGen/LoongArch/align.ll | 8 + llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll | 902 ++ .../CodeGen/LoongArch/atomic-operand-imm0.ll | 17 + llvm/test/CodeGen/LoongArch/atomic_16_8.ll | 785 ++ llvm/test/CodeGen/LoongArch/atomic_64_32.ll | 323 + llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll | 1776 ++++ .../CodeGen/LoongArch/atomicrmw-minmax.ll | 1882 ++++ llvm/test/CodeGen/LoongArch/atomicrmw.ll | 3652 +++++++ llvm/test/CodeGen/LoongArch/bss.ll | 5 + llvm/test/CodeGen/LoongArch/bstrins_d.ll | 254 +- llvm/test/CodeGen/LoongArch/bstrins_w.ll | 238 +- llvm/test/CodeGen/LoongArch/bstrpick_d.ll | 121 +- llvm/test/CodeGen/LoongArch/bstrpick_w.ll | 101 +- .../LoongArch/builtins-loongarch-base.ll | 752 ++ llvm/test/CodeGen/LoongArch/const-mult.ll | 245 + .../CodeGen/LoongArch/disable-tail-calls.ll | 94 + llvm/test/CodeGen/LoongArch/divrem.ll | 68 + llvm/test/CodeGen/LoongArch/double-imm.ll | 89 - llvm/test/CodeGen/LoongArch/dup-tail.ll | 45 + llvm/test/CodeGen/LoongArch/eliminateFI.ll | 106 + .../CodeGen/LoongArch/emergency-spill-slot.ll | 103 + llvm/test/CodeGen/LoongArch/fcopysign.ll | 17 + .../CodeGen/LoongArch/fence-singlethread.ll | 11 + .../LoongArch/{ir-instruction => }/fence.ll | 38 +- llvm/test/CodeGen/LoongArch/float-imm.ll | 85 - llvm/test/CodeGen/LoongArch/frame-info.ll | 132 + llvm/test/CodeGen/LoongArch/frame.ll | 29 - llvm/test/CodeGen/LoongArch/fsel.ll | 47 + llvm/test/CodeGen/LoongArch/imm.ll | 165 - llvm/test/CodeGen/LoongArch/immediate.ll | 2542 +++++ .../CodeGen/LoongArch/inlineasm/extra-code.ll | 8 + .../inlineasm/floating-point-in-gpr.ll | 31 + .../non-native-value-type-registers-error.ll | 8 + .../non-native-value-type-registers.ll | 42 + .../test/CodeGen/LoongArch/inlineasm/preld.ll | 8 + .../CodeGen/LoongArch/ir-instruction/add.ll | 183 - .../CodeGen/LoongArch/ir-instruction/and.ll | 266 - .../CodeGen/LoongArch/ir-instruction/ashr.ll | 168 - .../CodeGen/LoongArch/ir-instruction/br.ll | 358 - .../CodeGen/LoongArch/ir-instruction/call.ll | 88 - .../ir-instruction/double-convert.ll | 329 - .../CodeGen/LoongArch/ir-instruction/fadd.ll | 32 - .../LoongArch/ir-instruction/fcmp-dbl.ll | 257 - .../LoongArch/ir-instruction/fcmp-flt.ll | 257 - .../CodeGen/LoongArch/ir-instruction/fdiv.ll | 32 - .../LoongArch/ir-instruction/float-convert.ll | 650 -- .../CodeGen/LoongArch/ir-instruction/fmul.ll | 32 - .../CodeGen/LoongArch/ir-instruction/fneg.ll | 32 - .../CodeGen/LoongArch/ir-instruction/fsub.ll | 60 - .../CodeGen/LoongArch/ir-instruction/icmp.ll | 244 - .../LoongArch/ir-instruction/indirectbr.ll | 30 - .../ir-instruction/load-store-atomic.ll | 143 - .../LoongArch/ir-instruction/load-store.ll | 406 - .../CodeGen/LoongArch/ir-instruction/lshr.ll | 160 - .../CodeGen/LoongArch/ir-instruction/mul.ll | 287 - .../CodeGen/LoongArch/ir-instruction/or.ll | 264 - .../ir-instruction/sdiv-udiv-srem-urem.ll | 685 -- .../ir-instruction/select-bare-dbl.ll | 23 - .../ir-instruction/select-bare-flt.ll | 23 - .../ir-instruction/select-bare-int.ll | 107 - .../ir-instruction/select-fpcc-dbl.ll | 272 - .../ir-instruction/select-fpcc-flt.ll | 272 - .../ir-instruction/select-fpcc-int.ll | 704 -- .../ir-instruction/select-icc-dbl.ll | 206 - .../ir-instruction/select-icc-flt.ll | 206 - .../ir-instruction/select-icc-int.ll | 226 - .../ir-instruction/sext-zext-trunc.ll | 418 - .../CodeGen/LoongArch/ir-instruction/shl.ll | 156 - .../CodeGen/LoongArch/ir-instruction/sub.ll | 93 - .../CodeGen/LoongArch/ir-instruction/xor.ll | 264 - llvm/test/CodeGen/LoongArch/jirl-verify.ll | 34 + llvm/test/CodeGen/LoongArch/lasx/VExtend.ll | 54 + .../CodeGen/LoongArch/lasx/imm_vector_lasx.ll | 176 + .../test/CodeGen/LoongArch/lasx/inline-asm.ll | 55 + .../CodeGen/LoongArch/lasx/insert-lasx.ll | 98 + .../CodeGen/LoongArch/lasx/intrinsic-lasx.ll | 70 + llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll | 106 + llvm/test/CodeGen/LoongArch/lasx/lasxvclr.ll | 46 + .../test/CodeGen/LoongArch/lasx/logic-lasx.ll | 130 + llvm/test/CodeGen/LoongArch/lasx/set-lasx.ll | 38 + .../LoongArch/lasx/shuffle_v4i64_1032.ll | 19 + .../CodeGen/LoongArch/lasx/v32i8-bswap.ll | 26 + llvm/test/CodeGen/LoongArch/lasx/vext2xv.ll | 65 + llvm/test/CodeGen/LoongArch/lasx/xvabsd.ll | 106 + llvm/test/CodeGen/LoongArch/lasx/xvadda.ll | 62 + llvm/test/CodeGen/LoongArch/lasx/xvaddsub.ll | 98 + llvm/test/CodeGen/LoongArch/lasx/xvfcvt.ll | 14 + llvm/test/CodeGen/LoongArch/lasx/xvhadd.ll | 21 + llvm/test/CodeGen/LoongArch/lasx/xvilvh.ll | 32 + llvm/test/CodeGen/LoongArch/lasx/xvilvl.ll | 32 + llvm/test/CodeGen/LoongArch/ldptr.ll | 70 + llvm/test/CodeGen/LoongArch/lit.local.cfg | 12 +- .../CodeGen/LoongArch/load-store-atomic.ll | 310 + llvm/test/CodeGen/LoongArch/logic-op.ll | 171 + llvm/test/CodeGen/LoongArch/lshr.ll | 12 + .../CodeGen/LoongArch/lsx/imm_vector_lsx.ll | 176 + llvm/test/CodeGen/LoongArch/lsx/inline-asm.ll | 34 + .../CodeGen/LoongArch/lsx/intrinsic-lsx.ll | 92 + llvm/test/CodeGen/LoongArch/lsx/logic-lsx.ll | 132 + llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll | 106 + llvm/test/CodeGen/LoongArch/lsx/lsxvclr.ll | 50 + llvm/test/CodeGen/LoongArch/lsx/set-lsx.ll | 38 + .../test/CodeGen/LoongArch/lsx/v16i8-bswap.ll | 20 + llvm/test/CodeGen/LoongArch/lsx/vabsd.ll | 262 + llvm/test/CodeGen/LoongArch/lsx/vadda.ll | 62 + llvm/test/CodeGen/LoongArch/lsx/vfcvt.ll | 14 + llvm/test/CodeGen/LoongArch/lu12i.ll | 7 + llvm/test/CodeGen/LoongArch/mcpu_load.ll | 72 + llvm/test/CodeGen/LoongArch/misc.mir | 200 - llvm/test/CodeGen/LoongArch/named-register.ll | 29 + llvm/test/CodeGen/LoongArch/nomerge.ll | 35 + llvm/test/CodeGen/LoongArch/noti32.ll | 143 + .../LoongArch/peephole-load-store-addi.ll | 100 + .../CodeGen/LoongArch/shift-masked-shamt.ll | 255 - llvm/test/CodeGen/LoongArch/signext.ll | 37 + llvm/test/CodeGen/LoongArch/stptr.ll | 52 + llvm/test/CodeGen/LoongArch/tailcall-R.ll | 62 + llvm/test/CodeGen/LoongArch/tailcall-check.ll | 155 + llvm/test/CodeGen/LoongArch/tailcall-mem.ll | 35 + llvm/test/CodeGen/LoongArch/tailcall.ll | 13 + llvm/test/CodeGen/LoongArch/target_support.ll | 3 - llvm/test/CodeGen/LoongArch/thread-pointer.ll | 9 + llvm/test/CodeGen/LoongArch/trap.ll | 13 + llvm/test/CodeGen/LoongArch/trunc.ll | 108 + llvm/test/CodeGen/LoongArch/unalignment.ll | 72 + .../CodeGen/Mips/atomic-fix-loongson3-llsc.ll | 7548 ++++++++++++++ llvm/test/CodeGen/Mips/atomic.ll | 30 +- .../Mips/atomic64-fix-loongson3-llsc.ll | 1377 +++ llvm/test/CodeGen/Mips/atomic64.ll | 18 +- .../ELF/loongarch-empty-name-symbol.s | 26 + llvm/test/DebugInfo/X86/dbg-value-no-crash.ll | 39 + .../X86/MachO_GOTAndStubsOptimization.s | 1 + llvm/test/ExecutionEngine/MCJIT/lit.local.cfg | 5 +- .../MCJIT/remote/cross-module-a.ll | 1 + llvm/test/ExecutionEngine/MCJIT/remote/eh.ll | 1 + .../MCJIT/remote/simpletest-remote.ll | 1 + .../MCJIT/remote/stubs-remote.ll | 1 + .../remote/test-common-symbols-remote.ll | 1 + .../test-fp-no-external-funcs-remote.ll | 1 + .../remote/test-global-init-nonzero-remote.ll | 1 + .../remote/test-global-init-nonzero-sm-pic.ll | 1 + .../MCJIT/remote/test-ptr-reloc-remote.ll | 1 + .../MCJIT/remote/test-ptr-reloc-sm-pic.ll | 1 + .../RuntimeDyld/LoongArch/hello-g.ll | 33 + .../RuntimeDyld/LoongArch/lit.local.cfg | 2 + .../MC/Disassembler/LoongArch/lit.local.cfg | 3 + llvm/test/MC/Disassembler/LoongArch/simd.txt | 1361 +++ llvm/test/MC/LoongArch/Basic/Float/d-arith.s | 99 - .../MC/LoongArch/Basic/Float/d-bound-check.s | 31 - llvm/test/MC/LoongArch/Basic/Float/d-branch.s | 15 - llvm/test/MC/LoongArch/Basic/Float/d-comp.s | 103 - llvm/test/MC/LoongArch/Basic/Float/d-conv.s | 99 - .../test/MC/LoongArch/Basic/Float/d-invalid.s | 7 - llvm/test/MC/LoongArch/Basic/Float/d-memory.s | 31 - llvm/test/MC/LoongArch/Basic/Float/d-move.s | 39 - llvm/test/MC/LoongArch/Basic/Float/f-arith.s | 94 - .../MC/LoongArch/Basic/Float/f-bound-check.s | 26 - llvm/test/MC/LoongArch/Basic/Float/f-branch.s | 18 - llvm/test/MC/LoongArch/Basic/Float/f-comp.s | 98 - llvm/test/MC/LoongArch/Basic/Float/f-conv.s | 38 - .../test/MC/LoongArch/Basic/Float/f-invalid.s | 4 - llvm/test/MC/LoongArch/Basic/Float/f-memory.s | 26 - llvm/test/MC/LoongArch/Basic/Float/f-move.s | 74 - llvm/test/MC/LoongArch/Basic/Integer/arith.s | 212 - llvm/test/MC/LoongArch/Basic/Integer/atomic.s | 185 - .../test/MC/LoongArch/Basic/Integer/barrier.s | 19 - .../MC/LoongArch/Basic/Integer/bit-manipu.s | 136 - .../MC/LoongArch/Basic/Integer/bit-shift.s | 88 - .../MC/LoongArch/Basic/Integer/bound-check.s | 71 - llvm/test/MC/LoongArch/Basic/Integer/branch.s | 55 - llvm/test/MC/LoongArch/Basic/Integer/crc.s | 39 - .../MC/LoongArch/Basic/Integer/invalid-dis.s | 10 - .../test/MC/LoongArch/Basic/Integer/invalid.s | 191 - .../MC/LoongArch/Basic/Integer/invalid64.s | 77 - llvm/test/MC/LoongArch/Basic/Integer/memory.s | 132 - llvm/test/MC/LoongArch/Basic/Integer/misc.s | 56 - .../test/MC/LoongArch/Basic/Integer/pseudos.s | 18 - .../MC/LoongArch/Basic/Privilege/invalid.s | 14 - .../test/MC/LoongArch/Basic/Privilege/valid.s | 118 - llvm/test/MC/LoongArch/Directives/cfi.s | 34 - llvm/test/MC/LoongArch/Directives/data.s | 102 - llvm/test/MC/LoongArch/Misc/aligned-nops.s | 15 - llvm/test/MC/LoongArch/aligned-nops.s | 25 + llvm/test/MC/LoongArch/atomic-error.s | 7 + llvm/test/MC/LoongArch/atomic.s | 12 + llvm/test/MC/LoongArch/cgprofile.ll | 63 + llvm/test/MC/LoongArch/cgprofile.s | 30 + llvm/test/MC/LoongArch/data_half.s | 13 + llvm/test/MC/LoongArch/fixups-expr.s | 40 + llvm/test/MC/LoongArch/invalid.s | 50 + llvm/test/MC/LoongArch/lit.local.cfg | 1 + llvm/test/MC/LoongArch/macro-la.s | 168 + llvm/test/MC/LoongArch/macro-li.s | 773 ++ llvm/test/MC/LoongArch/reloc-directive-err.s | 7 + llvm/test/MC/LoongArch/reloc-directive.s | 177 + .../MC/LoongArch/{Misc => }/unaligned-nops.s | 2 +- llvm/test/MC/LoongArch/valid_12imm.s | 33 + llvm/test/MC/LoongArch/valid_4operands.s | 53 + llvm/test/MC/LoongArch/valid_bigimm.s | 33 + llvm/test/MC/LoongArch/valid_branch.s | 155 + llvm/test/MC/LoongArch/valid_float.s | 297 + llvm/test/MC/LoongArch/valid_integer.s | 369 + llvm/test/MC/LoongArch/valid_memory.s | 405 + llvm/test/MC/LoongArch/valid_priv.s | 125 + llvm/test/MC/LoongArch/valid_simd.s | 5437 ++++++++++ llvm/test/Object/LoongArch/elf-flags.yaml | 22 + .../Object/LoongArch/elf-loongarch64-rel.yaml | 193 + llvm/test/Object/LoongArch/lit.local.cfg | 2 + .../AtomicExpand/LoongArch/lit.local.cfg | 5 - .../LoongArch/load-store-atomic.ll | 121 - llvm/test/Transforms/SROA/pr57796.ll | 41 + llvm/test/Transforms/SROA/vector-promotion.ll | 194 +- .../Inputs/loongarch_function_name.ll | 2 +- .../loongarch_function_name.ll.expected | 6 +- .../Inputs/loongarch_generated_funcs.ll | 63 - ...arch_generated_funcs.ll.generated.expected | 148 - ...ch_generated_funcs.ll.nogenerated.expected | 147 - .../loongarch_generated_funcs.test | 17 - llvm/test/tools/llvm-profgen/lit.local.cfg | 2 +- .../llvm-readobj/ELF/loongarch-eflags.test | 64 - .../tools/obj2yaml/ELF/loongarch-eflags.yaml | 29 - llvm/tools/llvm-profgen/ProfiledBinary.cpp | 3 - llvm/tools/llvm-readobj/ELFDumper.cpp | 19 +- .../lib/Target/LoongArch/AsmParser/BUILD.gn | 24 + .../llvm/lib/Target/LoongArch/BUILD.gn | 102 + .../Target/LoongArch/Disassembler/BUILD.gn | 23 + .../Target/LoongArch/MCTargetDesc/BUILD.gn | 74 + .../lib/Target/LoongArch/TargetInfo/BUILD.gn | 9 + openmp/README.rst | 4 +- openmp/runtime/CMakeLists.txt | 9 +- openmp/runtime/README.txt | 1 + .../runtime/cmake/LibompGetArchitecture.cmake | 2 + openmp/runtime/cmake/LibompMicroTests.cmake | 3 + openmp/runtime/cmake/LibompUtils.cmake | 2 + openmp/runtime/src/kmp_affinity.h | 12 + openmp/runtime/src/kmp_csupport.cpp | 2 +- openmp/runtime/src/kmp_os.h | 4 +- openmp/runtime/src/kmp_platform.h | 6 +- openmp/runtime/src/kmp_runtime.cpp | 2 +- .../thirdparty/ittnotify/ittnotify_config.h | 6 + openmp/runtime/src/z_Linux_asm.S | 156 +- openmp/runtime/src/z_Linux_util.cpp | 2 +- openmp/runtime/tools/lib/Platform.pm | 8 +- openmp/runtime/tools/lib/Uname.pm | 2 + 547 files changed, 109095 insertions(+), 19590 deletions(-) create mode 100644 clang/include/clang/Basic/BuiltinsLoongArch.def create mode 100644 clang/lib/Basic/Targets/LoongArch.cpp create mode 100644 clang/lib/Basic/Targets/LoongArch.h create mode 100644 clang/lib/Driver/ToolChains/Arch/LoongArch.cpp create mode 100644 clang/lib/Driver/ToolChains/Arch/LoongArch.h create mode 100644 clang/lib/Headers/larchintrin.h create mode 100644 clang/lib/Headers/lasxintrin.h create mode 100644 clang/lib/Headers/lsxintrin.h create mode 100644 clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c create mode 100644 clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c create mode 100644 clang/test/CodeGen/LoongArch/abi-lp64d.c create mode 100644 clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c create mode 100644 clang/test/CodeGen/builtins-loongarch-base.c create mode 100644 clang/test/CodeGen/builtins-loongarch-lasx-error.c create mode 100644 clang/test/CodeGen/builtins-loongarch-lasx.c create mode 100644 clang/test/CodeGen/builtins-loongarch-lsx-error.c create mode 100644 clang/test/CodeGen/builtins-loongarch-lsx.c create mode 100644 clang/test/CodeGen/loongarch-inline-asm-modifiers.c create mode 100644 clang/test/CodeGen/loongarch-inline-asm.c create mode 100644 clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91269.cpp create mode 100644 clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91278.cpp create mode 100644 clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp create mode 100644 clang/test/Driver/loongarch-alignment-feature.c create mode 100644 clang/test/Driver/loongarch-march.c create mode 100644 clang/test/Preprocessor/init-loongarch.c create mode 100644 compiler-rt/lib/builtins/loongarch/fp_mode.c create mode 100644 compiler-rt/lib/tsan/rtl/tsan_rtl_loongarch64.S create mode 100644 compiler-rt/lib/xray/xray_loongarch.cpp create mode 100644 compiler-rt/lib/xray/xray_trampoline_loongarch.S create mode 100644 libunwind/test/unw_resume.pass.cpp create mode 100644 lld/ELF/Arch/LoongArch.cpp create mode 100644 lld/test/ELF/Inputs/loongarch.s create mode 100644 lld/test/ELF/loongarch-branch.s create mode 100644 lld/test/ELF/loongarch-eflags-diff-abi-err.test create mode 100644 lld/test/ELF/loongarch-eflags-lp32.test create mode 100644 lld/test/ELF/loongarch-eflags-lp64.s create mode 100644 lld/test/ELF/loongarch-eflags-lpx32.test create mode 100644 lld/test/ELF/loongarch-got-reloc.s create mode 100644 lld/test/ELF/loongarch-ifunc.s create mode 100644 lld/test/ELF/loongarch-plt.s create mode 100644 lld/test/ELF/loongarch-relative.s create mode 100644 lld/test/ELF/loongarch-tls-gd.s create mode 100644 lld/test/ELF/loongarch-tls-ie.s create mode 100644 lld/test/ELF/loongarch-tls-le.s create mode 100644 lld/test/ELF/loongarch_eh_frame.s create mode 100644 lld/test/ELF/loongarch_not_relative_eh_frame.test create mode 100644 lld/test/ELF/loongarch_preempt_hidden_sym.s create mode 100644 lld/test/ELF/lto/loongarch-global-offset-table.ll create mode 100644 lld/test/ELF/lto/loongarch.ll create mode 100644 llvm/include/llvm/IR/IntrinsicsLoongArch.td create mode 100644 llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchCCState.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchCCState.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp delete mode 100644 llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td delete mode 100644 llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td delete mode 100644 llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchInstrInfoF.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td create mode 100644 llvm/lib/Target/LoongArch/LoongArchMCInstLower.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchMachineFunction.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchMachineFunction.h delete mode 100644 llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchTargetStreamer.h create mode 100644 llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp rename llvm/lib/Target/LoongArch/MCTargetDesc/{LoongArchMatInt.h => LoongArchAnalyzeImmediate.h} (62%) delete mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h delete mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp create mode 100644 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp delete mode 100644 llvm/test/CodeGen/LoongArch/1ri.mir delete mode 100644 llvm/test/CodeGen/LoongArch/2r.mir delete mode 100644 llvm/test/CodeGen/LoongArch/2ri.mir delete mode 100644 llvm/test/CodeGen/LoongArch/3r.mir delete mode 100644 llvm/test/CodeGen/LoongArch/3ri.mir create mode 100644 llvm/test/CodeGen/LoongArch/align.ll create mode 100644 llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll create mode 100644 llvm/test/CodeGen/LoongArch/atomic-operand-imm0.ll create mode 100644 llvm/test/CodeGen/LoongArch/atomic_16_8.ll create mode 100644 llvm/test/CodeGen/LoongArch/atomic_64_32.ll create mode 100644 llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll create mode 100644 llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll create mode 100644 llvm/test/CodeGen/LoongArch/atomicrmw.ll create mode 100644 llvm/test/CodeGen/LoongArch/bss.ll create mode 100644 llvm/test/CodeGen/LoongArch/builtins-loongarch-base.ll create mode 100644 llvm/test/CodeGen/LoongArch/const-mult.ll create mode 100644 llvm/test/CodeGen/LoongArch/disable-tail-calls.ll create mode 100644 llvm/test/CodeGen/LoongArch/divrem.ll delete mode 100644 llvm/test/CodeGen/LoongArch/double-imm.ll create mode 100644 llvm/test/CodeGen/LoongArch/dup-tail.ll create mode 100644 llvm/test/CodeGen/LoongArch/eliminateFI.ll create mode 100644 llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll create mode 100644 llvm/test/CodeGen/LoongArch/fcopysign.ll create mode 100644 llvm/test/CodeGen/LoongArch/fence-singlethread.ll rename llvm/test/CodeGen/LoongArch/{ir-instruction => }/fence.ll (42%) delete mode 100644 llvm/test/CodeGen/LoongArch/float-imm.ll create mode 100644 llvm/test/CodeGen/LoongArch/frame-info.ll delete mode 100644 llvm/test/CodeGen/LoongArch/frame.ll create mode 100644 llvm/test/CodeGen/LoongArch/fsel.ll delete mode 100644 llvm/test/CodeGen/LoongArch/imm.ll create mode 100644 llvm/test/CodeGen/LoongArch/immediate.ll create mode 100644 llvm/test/CodeGen/LoongArch/inlineasm/extra-code.ll create mode 100644 llvm/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll create mode 100644 llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll create mode 100644 llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll create mode 100644 llvm/test/CodeGen/LoongArch/inlineasm/preld.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/add.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/and.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/br.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/call.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/or.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll delete mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll create mode 100644 llvm/test/CodeGen/LoongArch/jirl-verify.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/VExtend.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/inline-asm.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/intrinsic-lasx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/lasxvclr.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/logic-lasx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/set-lasx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/shuffle_v4i64_1032.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/vext2xv.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/xvabsd.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/xvadda.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/xvaddsub.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/xvfcvt.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/xvhadd.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/xvilvh.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/xvilvl.ll create mode 100644 llvm/test/CodeGen/LoongArch/ldptr.ll create mode 100644 llvm/test/CodeGen/LoongArch/load-store-atomic.ll create mode 100644 llvm/test/CodeGen/LoongArch/logic-op.ll create mode 100644 llvm/test/CodeGen/LoongArch/lshr.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/inline-asm.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/intrinsic-lsx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/logic-lsx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/lsxvclr.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/set-lsx.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vabsd.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vadda.ll create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vfcvt.ll create mode 100644 llvm/test/CodeGen/LoongArch/lu12i.ll create mode 100644 llvm/test/CodeGen/LoongArch/mcpu_load.ll delete mode 100644 llvm/test/CodeGen/LoongArch/misc.mir create mode 100644 llvm/test/CodeGen/LoongArch/named-register.ll create mode 100644 llvm/test/CodeGen/LoongArch/nomerge.ll create mode 100644 llvm/test/CodeGen/LoongArch/noti32.ll create mode 100644 llvm/test/CodeGen/LoongArch/peephole-load-store-addi.ll delete mode 100644 llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll create mode 100644 llvm/test/CodeGen/LoongArch/signext.ll create mode 100644 llvm/test/CodeGen/LoongArch/stptr.ll create mode 100644 llvm/test/CodeGen/LoongArch/tailcall-R.ll create mode 100644 llvm/test/CodeGen/LoongArch/tailcall-check.ll create mode 100644 llvm/test/CodeGen/LoongArch/tailcall-mem.ll create mode 100644 llvm/test/CodeGen/LoongArch/tailcall.ll delete mode 100644 llvm/test/CodeGen/LoongArch/target_support.ll create mode 100644 llvm/test/CodeGen/LoongArch/thread-pointer.ll create mode 100644 llvm/test/CodeGen/LoongArch/trap.ll create mode 100644 llvm/test/CodeGen/LoongArch/trunc.ll create mode 100644 llvm/test/CodeGen/LoongArch/unalignment.ll create mode 100644 llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll create mode 100644 llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll create mode 100644 llvm/test/DebugInfo/Symbolize/ELF/loongarch-empty-name-symbol.s create mode 100644 llvm/test/DebugInfo/X86/dbg-value-no-crash.ll create mode 100644 llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/hello-g.ll create mode 100644 llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg create mode 100644 llvm/test/MC/Disassembler/LoongArch/lit.local.cfg create mode 100644 llvm/test/MC/Disassembler/LoongArch/simd.txt delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/d-arith.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/d-branch.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/d-comp.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/d-conv.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/d-invalid.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/d-memory.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/d-move.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/f-arith.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/f-branch.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/f-comp.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/f-conv.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/f-invalid.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/f-memory.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Float/f-move.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/arith.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/atomic.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/barrier.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/bit-manipu.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/bit-shift.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/bound-check.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/branch.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/crc.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/invalid-dis.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/invalid.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/invalid64.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/memory.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/misc.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Integer/pseudos.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Privilege/invalid.s delete mode 100644 llvm/test/MC/LoongArch/Basic/Privilege/valid.s delete mode 100644 llvm/test/MC/LoongArch/Directives/cfi.s delete mode 100644 llvm/test/MC/LoongArch/Directives/data.s delete mode 100644 llvm/test/MC/LoongArch/Misc/aligned-nops.s create mode 100644 llvm/test/MC/LoongArch/aligned-nops.s create mode 100644 llvm/test/MC/LoongArch/atomic-error.s create mode 100644 llvm/test/MC/LoongArch/atomic.s create mode 100644 llvm/test/MC/LoongArch/cgprofile.ll create mode 100644 llvm/test/MC/LoongArch/cgprofile.s create mode 100644 llvm/test/MC/LoongArch/data_half.s create mode 100644 llvm/test/MC/LoongArch/fixups-expr.s create mode 100644 llvm/test/MC/LoongArch/invalid.s create mode 100644 llvm/test/MC/LoongArch/macro-la.s create mode 100644 llvm/test/MC/LoongArch/macro-li.s create mode 100644 llvm/test/MC/LoongArch/reloc-directive-err.s create mode 100644 llvm/test/MC/LoongArch/reloc-directive.s rename llvm/test/MC/LoongArch/{Misc => }/unaligned-nops.s (54%) create mode 100644 llvm/test/MC/LoongArch/valid_12imm.s create mode 100644 llvm/test/MC/LoongArch/valid_4operands.s create mode 100644 llvm/test/MC/LoongArch/valid_bigimm.s create mode 100644 llvm/test/MC/LoongArch/valid_branch.s create mode 100644 llvm/test/MC/LoongArch/valid_float.s create mode 100644 llvm/test/MC/LoongArch/valid_integer.s create mode 100644 llvm/test/MC/LoongArch/valid_memory.s create mode 100644 llvm/test/MC/LoongArch/valid_priv.s create mode 100644 llvm/test/MC/LoongArch/valid_simd.s create mode 100644 llvm/test/Object/LoongArch/elf-flags.yaml create mode 100644 llvm/test/Object/LoongArch/elf-loongarch64-rel.yaml create mode 100644 llvm/test/Object/LoongArch/lit.local.cfg delete mode 100644 llvm/test/Transforms/AtomicExpand/LoongArch/lit.local.cfg delete mode 100644 llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll create mode 100644 llvm/test/Transforms/SROA/pr57796.ll delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected delete mode 100644 llvm/test/tools/UpdateTestChecks/update_llc_test_checks/loongarch_generated_funcs.test delete mode 100644 llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test delete mode 100644 llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml create mode 100644 llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn create mode 100644 llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn create mode 100644 llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn create mode 100644 llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn create mode 100644 llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn diff --git a/clang/bindings/python/tests/CMakeLists.txt b/clang/bindings/python/tests/CMakeLists.txt index 5127512fe312..8383e6faefda 100644 --- a/clang/bindings/python/tests/CMakeLists.txt +++ b/clang/bindings/python/tests/CMakeLists.txt @@ -40,7 +40,7 @@ endif() # addressed. # SystemZ has broken Python/FFI interface: # https://reviews.llvm.org/D52840#1265716 -if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|Sparc|SystemZ)$") +if(${LLVM_NATIVE_ARCH} MATCHES "^(AArch64|Hexagon|LoongArch|Sparc|SystemZ)$") set(RUN_PYTHON_TESTS FALSE) endif() diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def new file mode 100644 index 000000000000..75d7e77c12cf --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArch.def @@ -0,0 +1,1990 @@ +//===-- BuiltinsLoongArch.def - LoongArch Builtin function database --------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +// The format of this database matches clang/Basic/Builtins.def. + +// LoongArch LSX + +BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc") +BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc") +BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc") +BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc") + +BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc") +BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc") +BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc") +BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc") + +BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc") +BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc") +BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc") +BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc") + +BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc") + +BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc") +BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc") +BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc") +BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc") + +BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc") +BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc") +BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc") +BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc") + +BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc") +BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc") + +BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc") +BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc") +BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc") +BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc") +BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc") +BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc") +BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc") +BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc") +BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc") +BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc") +BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc") +BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc") +BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc") +BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") +BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc") +BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc") +BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc") +BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc") +BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc") +BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc") +BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc") +BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc") +BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc") +BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc") +BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc") +BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc") +BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc") +BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc") +BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") +BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc") +BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc") +BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc") +BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc") +BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc") +BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc") +BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc") +BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc") +BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc") +BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc") + +BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc") + +BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc") + +BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc") +BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc") +BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc") +BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc") +BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc") +BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc") + +BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc") + +BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc") +BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc") +BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc") +BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc") +BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc") + +BUILTIN(__builtin_lsx_vfrintrm_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vfrintrm_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfrintrp_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vfrintrp_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfrintrz_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vfrintrz_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfrintrne_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vfrintrne_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc") +BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc") + +BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc") + +BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc") + +BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc") + +BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc") + +BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc") +BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc") + +BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc") + +BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc") + +BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc") + +BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc") + +BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc") +BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc") +BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc") +BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc") +BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc") +BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc") +BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc") +BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc") + +BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc") +BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc") +BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc") + +BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc") +BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc") +BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc") +BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc") + +BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc") +BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc") +BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc") +BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc") + +BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc") +BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc") +BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc") +BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc") + +BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc") + +BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc") + +BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc") + +BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc") + +BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc") + +BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc") +BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc") +BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc") +BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc") +BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc") + +BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc") + +BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc") + +BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc") + +BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc") + +BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc") +BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc") + +BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc") +BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc") + +BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc") +BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc") + +BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc") +BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc") + +BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc") +BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc") + +BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc") +BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc") +BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc") +BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc") +BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc") +BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc") +BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc") + +BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc") + +BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc") +BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc") +BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc") +BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc") +BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc") +BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc") +BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc") + +BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc") + +BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc") + +BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc") +BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc") +BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc") +BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc") + +BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc") +BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc") +BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc") +BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc") + +BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc") +BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc") +BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc") +BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc") + +BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc") +BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc") +BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc") +BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc") + +BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc") +BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc") +BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc") +BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc") + +BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc") +BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc") +BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc") +BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc") + +BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc") +BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc") +BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc") +BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc") + +BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIi", "nc") +BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIi", "nc") +BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIi", "nc") +BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIi", "nc") + +BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIi", "nc") +BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIi", "nc") +BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIi", "nc") +BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIi", "nc") + +BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc") +BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc") +BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc") +BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc") +BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc") + +BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc") +BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc") + +BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc") +BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc") + +BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc") +BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc") + +BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc") +BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc") +BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc") +BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc") + +BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc") +BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc") +BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc") +BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc") + +BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc") +BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc") + +BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc") +BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc") +BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc") +BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc") + +BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc") +BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc") +BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc") +BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc") + +BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIi", "nc") +BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIi", "nc") +BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIi", "nc") +BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIi", "nc") + +BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc") + +BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc") + +BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc") + +BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc") + +BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc") +BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc") +BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc") +BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc") + +BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc") +BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc") +BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc") +BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc") + +//LoongArch LASX + +BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc") + + +BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc") +BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc") +BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc") +BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc") +BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc") +BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc") +BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc") +BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc") +BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc") +BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc") +BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc") +BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc") +BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc") + +BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIi", "nc") +BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIi", "nc") +BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIi", "nc") +BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIi", "nc") + +BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc") +BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc") +BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc") +BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc") + +BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIi", "nc") +BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIi", "nc") +BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIi", "nc") +BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIi", "nc") + +BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc") +BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc") +BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc") +BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc") + +BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc") +BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc") +BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc") +BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc") + +BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc") +BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc") +BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc") +BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc") +BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc") +BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc") +BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc") + +BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc") +BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc") +BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc") +BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc") +BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc") +BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc") +BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc") +BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc") +BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc") +BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc") + +BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc") + +BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc") + +BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc") + +BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc") + +BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc") + +BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc") + +BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc") + +BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc") + +BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc") + +BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc") + +BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc") + +BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc") + +BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc") + +BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc") +BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc") +BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc") +BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc") + +BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc") +BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc") + +BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc") +BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc") + +BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc") +BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc") + +BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc") +BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc") + +BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc") +BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc") + +BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc") + +BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc") + +BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc") +BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc") +BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc") +BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc") +BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc") +BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc") + +BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc") + +BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc") + +BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc") +BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc") +BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc") +BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc") +BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc") + +BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc") +BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc") +BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc") + +BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc") + +BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc") + +BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc") + +BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc") +BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc") +BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc") +BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc") + +BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc") + +BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc") + +BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc") + +BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc") +BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc") +BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc") +BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc") +BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc") + +BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc") +BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc") + +BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc") + +BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc") + +BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc") + +BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc") +BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc") + +BUILTIN(__builtin_lasx_xvfrintrne_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfrintrne_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfrintrz_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfrintrz_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfrintrp_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfrintrp_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvfrintrm_s, "V8fV8f", "nc") +BUILTIN(__builtin_lasx_xvfrintrm_d, "V4dV4d", "nc") + +BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc") + +BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc") + +BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc") +BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc") +BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc") +BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc") + +BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc") +BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc") + +BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc") + +BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc") +BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc") +BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc") +BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc") +BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc") +BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc") + +BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc") +BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc") +BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc") +BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc") +BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc") + +BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc") +BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc") +BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc") + +BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc") +BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc") +BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc") + +BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc") +BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc") +BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc") + +BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc") +BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc") +BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc") + +BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc") + +BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc") +BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc") +BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc") +BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc") + +BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc") +BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc") +BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc") +BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc") +BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc") +BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc") +BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc") +BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc") +BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc") +BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc") +BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc") +BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc") +BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc") +BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") +BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc") +BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc") +BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc") +BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc") +BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc") +BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc") +BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc") +BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc") + +BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc") +BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc") +BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc") +BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc") + +BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc") +BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc") +BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc") +BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc") +BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc") +BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc") +BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc") + +BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIi", "nc") +BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIi", "nc") +BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIi", "nc") +BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIi", "nc") + +BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc") + +BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc") + +BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc") +BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc") +BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc") +BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc") +BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc") +BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc") +BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc") + +BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc") +BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc") + + +// LoongArch BASE + +BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc") +BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc") +BUILTIN(__builtin_loongarch_csrrd_d, "ULiIULi", "nc") +BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc") +BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIULi", "nc") +BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc") +BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIULi", "nc") +BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc") +BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc") +BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc") +BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc") +BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc") +BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc") +BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc") +BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc") +BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc") +BUILTIN(__builtin_loongarch_cacop_d, "viULiLi", "nc") +BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc") +BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc") +BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc") +BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc") +BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc") +BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc") +BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc") +BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc") +BUILTIN(__builtin_loongarch_tlbclr, "v", "nc") +BUILTIN(__builtin_loongarch_tlbflush, "v", "nc") +BUILTIN(__builtin_loongarch_tlbfill, "v", "nc") +BUILTIN(__builtin_loongarch_tlbrd, "v", "nc") +BUILTIN(__builtin_loongarch_tlbwr, "v", "nc") +BUILTIN(__builtin_loongarch_tlbsrch, "v", "nc") +BUILTIN(__builtin_loongarch_syscall, "vIULi", "nc") +BUILTIN(__builtin_loongarch_break, "vIULi", "nc") +BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc") +BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc") +BUILTIN(__builtin_loongarch_dbar, "vIULi", "nc") +BUILTIN(__builtin_loongarch_ibar, "vIULi", "nc") +#undef BUILTIN diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index d8ad9858d8c8..5af3d2099e4b 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -150,6 +150,16 @@ namespace clang { }; } // namespace RISCV + /// LoongArch builtins + namespace LoongArch { + enum { + LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "clang/Basic/BuiltinsLoongArch.def" + LastTSBuiltin + }; + } // namespace LoongArch + /// Flags to identify the types for overloaded Neon builtins. /// /// These must be kept in sync with the flags in utils/TableGen/NeonEmitter.h. @@ -341,7 +351,8 @@ namespace clang { PPC::LastTSBuiltin, NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, X86::LastTSBuiltin, VE::LastTSBuiltin, RISCV::LastTSBuiltin, Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin, - SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin}); + SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin, + LoongArch::LastTSBuiltin}); } // end namespace clang. diff --git a/clang/include/clang/Basic/TargetCXXABI.def b/clang/include/clang/Basic/TargetCXXABI.def index 9501cca76094..8ea4beceff04 100644 --- a/clang/include/clang/Basic/TargetCXXABI.def +++ b/clang/include/clang/Basic/TargetCXXABI.def @@ -88,6 +88,12 @@ ITANIUM_CXXABI(GenericAArch64, "aarch64") /// - representation of member function pointers adjusted as in ARM. ITANIUM_CXXABI(GenericMIPS, "mips") +/// The generic LoongArch ABI is a modified version of the Itanium ABI. +/// +/// At the moment, only change from the generic ABI in this case is: +/// - representation of member function pointers adjusted as in ARM. +ITANIUM_CXXABI(GenericLoongArch, "loongarch") + /// The WebAssembly ABI is a modified version of the Itanium ABI. /// /// The changes from the Itanium ABI are: diff --git a/clang/include/clang/Basic/TargetCXXABI.h b/clang/include/clang/Basic/TargetCXXABI.h index e727f85edad7..507cf580e5d7 100644 --- a/clang/include/clang/Basic/TargetCXXABI.h +++ b/clang/include/clang/Basic/TargetCXXABI.h @@ -102,6 +102,9 @@ public: case GenericAArch64: return T.isAArch64(); + case GenericLoongArch: + return T.isLoongArch(); + case GenericMIPS: return T.isMIPS(); @@ -166,6 +169,7 @@ public: case Fuchsia: case GenericARM: case GenericAArch64: + case GenericLoongArch: case GenericMIPS: // TODO: ARM-style pointers to member functions put the discriminator in // the this adjustment, so they don't require functions to have any @@ -250,6 +254,7 @@ public: case GenericItanium: case iOS: // old iOS compilers did not follow this rule case Microsoft: + case GenericLoongArch: case GenericMIPS: case XL: return true; @@ -288,6 +293,7 @@ public: case GenericAArch64: case GenericARM: case iOS: + case GenericLoongArch: case GenericMIPS: case XL: return UseTailPaddingUnlessPOD03; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 97433f169d14..ddb47c0166b7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -184,6 +184,8 @@ def m_x86_Features_Group : OptionGroup<"">, Group, Flags<[CoreOption]>, DocName<"X86">; def m_riscv_Features_Group : OptionGroup<"">, Group, DocName<"RISCV">; +def m_loongarch_Features_Group : OptionGroup<"">, + Group, DocName<"LoongArch">; def m_libc_Group : OptionGroup<"">, Group, Flags<[HelpHidden]>; @@ -3505,6 +3507,9 @@ def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group; def mstrict_align : Flag<["-"], "mstrict-align">, Alias, Flags<[CC1Option,HelpHidden]>, HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">; +def mno_strict_align : Flag<["-"], "mno-strict-align">, Group, + Flags<[CC1Option,HelpHidden]>, Alias, + HelpText<"Allow memory accesses to be unaligned (LoongArch only, same as munaligned-access)">; def mno_thumb : Flag<["-"], "mno-thumb">, Group; def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, HelpText<"Disallow generation of complex IT blocks.">; @@ -3832,6 +3837,14 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, Flags<[CC1Option]>, Group, MarshallingInfoFlag>; +def mlsx : Flag<["-"], "mlsx">, Group, + HelpText<"Use LARCH Loongson LSX instructions.">; +def mno_lsx : Flag<["-"], "mno-lsx">, Group, + HelpText<"Disable LARCH Loongson LSX instructions.">; +def mlasx : Flag<["-"], "mlasx">, Group, + HelpText<"Enable LARCH Loongson LASX instructions.">; +def mno_lasx : Flag<["-"], "mno-lasx">, Group, + HelpText<"Disable LARCH Loongson LASX instructions.">; def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, Flags<[CC1Option]>, Group, MarshallingInfoFlag>; @@ -3988,6 +4001,12 @@ def mno_relax_pic_calls : Flag<["-"], "mno-relax-pic-calls">, Group, HelpText<"Do not produce relaxation hints for linkers to try optimizing PIC " "call sequences into direct calls (MIPS only)">, Flags<[HelpHidden]>; +def mfix_loongson3_llsc : Flag<["-"], "mfix-loongson3-llsc">, + Group, + HelpText<"Workaround loongson3 llsc erratum (MIPS only)">; +def mno_fix_loongson3_llsc : Flag<["-"], "mno-fix-loongson3-llsc">, + Group, + HelpText<"Don't Workaround loongson3 llsc erratum (MIPS only)">; def mglibc : Flag<["-"], "mglibc">, Group, Flags<[HelpHidden]>; def muclibc : Flag<["-"], "muclibc">, Group, Flags<[HelpHidden]>; def module_file_info : Flag<["-"], "module-file-info">, Flags<[NoXarchOption,CC1Option]>, Group, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 681a76dfa56a..5249fe601b9e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -13117,6 +13117,9 @@ private: bool CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum); bool CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); + bool CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + unsigned BuiltinID, + CallExpr *TheCall); bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap index 01bce77718b3..a21e2beebf37 100644 --- a/clang/include/clang/module.modulemap +++ b/clang/include/clang/module.modulemap @@ -42,6 +42,7 @@ module Clang_Basic { textual header "Basic/BuiltinsHexagon.def" textual header "Basic/BuiltinsHexagonDep.def" textual header "Basic/BuiltinsHexagonMapCustomDep.def" + textual header "Basic/BuiltinsLoongArch.def" textual header "Basic/BuiltinsMips.def" textual header "Basic/BuiltinsNEON.def" textual header "Basic/BuiltinsNVPTX.def" diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index cfd7bf604542..c6e1e9eed08f 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -900,6 +900,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { case TargetCXXABI::iOS: case TargetCXXABI::WatchOS: case TargetCXXABI::GenericAArch64: + case TargetCXXABI::GenericLoongArch: case TargetCXXABI::GenericMIPS: case TargetCXXABI::GenericItanium: case TargetCXXABI::WebAssembly: @@ -11747,6 +11748,7 @@ MangleContext *ASTContext::createMangleContext(const TargetInfo *T) { case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: + case TargetCXXABI::GenericLoongArch: case TargetCXXABI::GenericMIPS: case TargetCXXABI::iOS: case TargetCXXABI::WebAssembly: diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 3e052c0cf995..5d197f59ac4f 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -82,6 +82,7 @@ add_clang_library(clangBasic Targets/Hexagon.cpp Targets/Lanai.cpp Targets/Le64.cpp + Targets/LoongArch.cpp Targets/M68k.cpp Targets/MSP430.cpp Targets/Mips.cpp diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index d2eac5cae7b6..dcca853e2de8 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -24,6 +24,7 @@ #include "Targets/Hexagon.h" #include "Targets/Lanai.h" #include "Targets/Le64.h" +#include "Targets/LoongArch.h" #include "Targets/M68k.h" #include "Targets/MSP430.h" #include "Targets/Mips.h" @@ -344,6 +345,30 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::le64: return new Le64TargetInfo(Triple, Opts); +#if 0 + //TODO: support it in future + case llvm::Triple::loongarch32: + switch (os) { + case llvm::Triple::Linux: + return new LinuxTargetInfo(Triple, Opts); + default: + return new LoongArchTargetInfo(Triple, Opts); + } +#endif + + case llvm::Triple::loongarch64: + switch (os) { + case llvm::Triple::Linux: + switch (Triple.getEnvironment()) { + default: + return new LinuxTargetInfo(Triple, Opts); + case llvm::Triple::OpenHOS: + return new OHOSTargetInfo(Triple, Opts); + } + default: + return new LoongArchTargetInfo(Triple, Opts); + } + case llvm::Triple::ppc: if (Triple.isOSDarwin()) return new DarwinPPC32TargetInfo(Triple, Opts); diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp new file mode 100644 index 000000000000..7f5632327b4d --- /dev/null +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -0,0 +1,184 @@ +//===--- LoongArch.cpp - Implement LoongArch target feature support -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements LoongArch TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "Targets.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/MacroBuilder.h" +#include "clang/Basic/TargetBuiltins.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace clang; +using namespace clang::targets; + +const Builtin::Info LoongArchTargetInfo::BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) \ + {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, +#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ + {#ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr}, +#include "clang/Basic/BuiltinsLoongArch.def" +}; + +bool LoongArchTargetInfo::processorSupportsGPR64() const { + return llvm::StringSwitch(CPU) + .Case("la264", true) + .Case("la364", true) + .Case("la464", true) + .Default(false); + return false; +} + +static constexpr llvm::StringLiteral ValidCPUNames[] = { + {"la264"}, {"la364"}, {"la464"}}; + +bool LoongArchTargetInfo::isValidCPUName(StringRef Name) const { + return llvm::find(ValidCPUNames, Name) != std::end(ValidCPUNames); +} + +void LoongArchTargetInfo::fillValidCPUList( + SmallVectorImpl &Values) const { + Values.append(std::begin(ValidCPUNames), std::end(ValidCPUNames)); +} + +void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + Builder.defineMacro("__loongarch__"); + unsigned GRLen = getRegisterWidth(); + Builder.defineMacro("__loongarch_grlen", Twine(GRLen)); + if (GRLen == 64) + Builder.defineMacro("__loongarch64"); + + if (ABI == "lp32") { + Builder.defineMacro("__loongarch32"); + } else { + Builder.defineMacro("__loongarch_lp64"); + } + + if (ABI == "lp32") { + Builder.defineMacro("_ABILP32", "1"); + } else if (ABI == "lpx32") { + Builder.defineMacro("_ABILPX32", "2"); + } else if (ABI == "lp64") { + Builder.defineMacro("_ABILP64", "3"); + Builder.defineMacro("_LOONGARCH_SIM", "_ABILP64"); + } else + llvm_unreachable("Invalid ABI."); + + Builder.defineMacro("__REGISTER_PREFIX__", ""); + + switch (FloatABI) { + case HardFloat: + Builder.defineMacro("__loongarch_hard_float", Twine(1)); + Builder.defineMacro(IsSingleFloat ? "__loongarch_single_float" + : "__loongarch_double_float", + Twine(1)); + break; + case SoftFloat: + Builder.defineMacro("__loongarch_soft_float", Twine(1)); + break; + } + + switch (FPMode) { + case FP32: + Builder.defineMacro("__loongarch_fpr", Twine(32)); + Builder.defineMacro("__loongarch_frlen", Twine(32)); + break; + case FP64: + Builder.defineMacro("__loongarch_fpr", Twine(64)); + Builder.defineMacro("__loongarch_frlen", Twine(64)); + break; + } + + if (HasLSX) + Builder.defineMacro("__loongarch_sx", Twine(1)); + + if (HasLASX) + Builder.defineMacro("__loongarch_asx", Twine(1)); + + Builder.defineMacro("_LOONGARCH_SZPTR", Twine(getPointerWidth(0))); + Builder.defineMacro("_LOONGARCH_SZINT", Twine(getIntWidth())); + Builder.defineMacro("_LOONGARCH_SZLONG", Twine(getLongWidth())); + + Builder.defineMacro("_LOONGARCH_ARCH", "\"" + CPU + "\""); + Builder.defineMacro("_LOONGARCH_ARCH_" + StringRef(CPU).upper()); + + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); + + // 32-bit loongarch processors don't have the necessary lld/scd instructions + // found in 64-bit processors. In the case of lp32 on a 64-bit processor, + // the instructions exist but using them violates the ABI since they + // require 64-bit GPRs and LP32 only supports 32-bit GPRs. + if (ABI == "lpx32" || ABI == "lp64") + Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); +} + +bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { + return llvm::StringSwitch(Feature) + .Case("fp64", FPMode == FP64) + .Case("lsx", HasLSX) + .Case("lasx", HasLASX) + .Default(false); +} + +ArrayRef LoongArchTargetInfo::getTargetBuiltins() const { + return llvm::makeArrayRef(BuiltinInfo, clang::LoongArch::LastTSBuiltin - + Builtin::FirstTSBuiltin); +} + +bool LoongArchTargetInfo::validateTarget(DiagnosticsEngine &Diags) const { + // FIXME: It's valid to use LP32 on a 64-bit CPU but the backend can't handle + // this yet. It's better to fail here than on the backend assertion. + if (processorSupportsGPR64() && ABI == "lp32") { + Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; + return false; + } + + // 64-bit ABI's require 64-bit CPU's. + if (!processorSupportsGPR64() && (ABI == "lpx32" || ABI == "lp64")) { + Diags.Report(diag::err_target_unsupported_abi) << ABI << CPU; + return false; + } + + // FIXME: It's valid to use lp32 on a loongarch64 triple but the backend + // can't handle this yet. It's better to fail here than on the + // backend assertion. + if (getTriple().isLoongArch64() && ABI == "lp32") { + Diags.Report(diag::err_target_unsupported_abi_for_triple) + << ABI << getTriple().str(); + return false; + } + + // FIXME: It's valid to use lpx32/lp64 on a loongarch32 triple but the backend + // can't handle this yet. It's better to fail here than on the + // backend assertion. + if (getTriple().isLoongArch32() && (ABI == "lpx32" || ABI == "lp64")) { + Diags.Report(diag::err_target_unsupported_abi_for_triple) + << ABI << getTriple().str(); + return false; + } + + // -mfp32 and lpx32/lp64 ABIs are incompatible + if (FPMode != FP64 && !IsSingleFloat && + (ABI == "lpx32" || ABI == "lp64")) { + Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << ABI; + return false; + } + + if (FPMode != FP64 && (CPU == "la264" || CPU == "la364" || CPU == "la464")) { + Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfp32" << CPU; + return false; + } + + return true; +} diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h new file mode 100644 index 000000000000..ef18cc887e87 --- /dev/null +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -0,0 +1,402 @@ +//===--- LoongArch.h - Declare LoongArch target feature support -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares LoongArch TargetInfo objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H +#define LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H + +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Compiler.h" + +namespace clang { +namespace targets { + +class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { + void setDataLayout() { + StringRef Layout; + + if (ABI == "lp32") + Layout = "m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"; + else if (ABI == "lpx32") + Layout = "m:e-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128"; + else if (ABI == "lp64") + Layout = "m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128"; + else + llvm_unreachable("Invalid ABI"); + + resetDataLayout(("e-" + Layout).str()); + } + + static const Builtin::Info BuiltinInfo[]; + std::string CPU; + bool IsSingleFloat; + enum LoongArchFloatABI { HardFloat, SoftFloat } FloatABI; + bool HasLSX; + bool HasLASX; + +protected: + enum FPModeEnum { FP32, FP64 } FPMode; + std::string ABI; + +public: + LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + : TargetInfo(Triple), IsSingleFloat(false), FloatABI(HardFloat), + HasLSX(false), HasLASX(false), FPMode(FP64) { + TheCXXABI.set(TargetCXXABI::GenericLoongArch); + + if (Triple.isLoongArch32()) + setABI("lp32"); + else if (Triple.getEnvironment() == llvm::Triple::GNUABILPX32) + setABI("lpx32"); + else + setABI("lp64"); + + // Currently, CPU only supports 'la464' in LA. + if ( ABI == "lp64") + CPU = "la464"; + } + + bool processorSupportsGPR64() const; + + StringRef getABI() const override { return ABI; } + + bool setABI(const std::string &Name) override { + if (Name == "lp32") { + setLP32ABITypes(); + ABI = Name; + return true; + } + + if (Name == "lpx32") { + //setLPX32ABITypes(); + //ABI = Name; + //return true; + //TODO: implement + return false; + } + if (Name == "lp64") { + setLP64ABITypes(); + ABI = Name; + return true; + } + return false; + } + + void setLP32ABITypes() { + Int64Type = SignedLongLong; + IntMaxType = Int64Type; + LongDoubleFormat = &llvm::APFloat::IEEEdouble(); + LongDoubleWidth = LongDoubleAlign = 64; + LongWidth = LongAlign = 32; + MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32; + PointerWidth = PointerAlign = 32; + PtrDiffType = SignedInt; + SizeType = UnsignedInt; + SuitableAlign = 64; + } + + void setLPX32LP64ABITypes() { + LongDoubleWidth = LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); + if (getTriple().isOSFreeBSD()) { + LongDoubleWidth = LongDoubleAlign = 64; + LongDoubleFormat = &llvm::APFloat::IEEEdouble(); + } + MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; + SuitableAlign = 128; + } + + void setLP64ABITypes() { + setLPX32LP64ABITypes(); + if (getTriple().isOSOpenBSD()) { + Int64Type = SignedLongLong; + } else { + Int64Type = SignedLong; + } + IntMaxType = Int64Type; + LongWidth = LongAlign = 64; + PointerWidth = PointerAlign = 64; + PtrDiffType = SignedLong; + SizeType = UnsignedLong; + } + + void setLPX32ABITypes() { + setLPX32LP64ABITypes(); + Int64Type = SignedLongLong; + IntMaxType = Int64Type; + LongWidth = LongAlign = 32; + PointerWidth = PointerAlign = 32; + PtrDiffType = SignedInt; + SizeType = UnsignedInt; + } + + bool isValidCPUName(StringRef Name) const override; + void fillValidCPUList(SmallVectorImpl &Values) const override; + + bool setCPU(const std::string &Name) override { + CPU = Name; + return isValidCPUName(Name); + } + + const std::string &getCPU() const { return CPU; } + bool + initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, + const std::vector &FeaturesVec) const override { +#if 0 + if (CPU.empty()) + CPU = getCPU(); + Features[CPU] = true; +#else +// if (CPU == "la464") +// Features["loongarch64"] = true; + +//FIXME: we need this? +// if (CPU == "la464") +// Features["64bit"] = true; +#endif + return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); + } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override; + + ArrayRef getTargetBuiltins() const override; + + bool hasFeature(StringRef Feature) const override; + + bool hasBitIntType() const override { return true; } + + BuiltinVaListKind getBuiltinVaListKind() const override { + return TargetInfo::VoidPtrBuiltinVaList; + } + + ArrayRef getGCCRegNames() const override { + static const char *const GCCRegNames[] = { + // CPU register names + // Must match second column of GCCRegAliases + "$r0", "$r1", "$r2", "$r3", "$r4", "$r5", "$r6", "$r7", "$r8", "$r9", + "$r10", "$r11", "$r12", "$r13", "$r14", "$r15", "$r16", "$r17", "$r18", + "$r19", "$r20", "$r21", "$r22", "$r23", "$r24", "$r25", "$r26", "$r27", + "$r28", "$r29", "$r30", "$r31", + // Floating point register names + "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", + "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", + "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", + "$f28", "$f29", "$f30", "$f31", + // condition register names + "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7", + // LSX register names + "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8", + "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16", + "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24", + "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31", + // LASX register names + "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8", + "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16", + "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24", + "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31" + + }; + return llvm::makeArrayRef(GCCRegNames); + } + + bool validateAsmConstraint(const char *&Name, + TargetInfo::ConstraintInfo &Info) const override { + switch (*Name) { + default: + return false; + case 'r': // CPU registers. + case 'f': // floating-point registers. + Info.setAllowsRegister(); + return true; + case 'l': // Signed 16-bit constant + case 'I': // Signed 12-bit constant + case 'K': // Unsigned 12-bit constant + case 'J': // Integer 0 + case 'G': // Floating-point 0 + return true; + case 'm': // Memory address with 12-bit offset + case 'R': // An address that can be used in a non-macro load or store + Info.setAllowsMemory(); + return true; + case 'Z': + if (Name[1] == 'C' // Memory address with 16-bit and 4 bytes aligned offset + || Name[1] == 'B' ) { // Memory address with 0 offset + Info.setAllowsMemory(); + Name++; // Skip over 'Z'. + return true; + } + return false; + } + } + + std::string convertConstraint(const char *&Constraint) const override { + std::string R; + switch (*Constraint) { + case 'Z': // Two-character constraint; add "^" hint for later parsing. + if (Constraint[1] == 'C' || Constraint[1] == 'B') { + R = std::string("^") + std::string(Constraint, 2); + Constraint++; + return R; + } + break; + } + return TargetInfo::convertConstraint(Constraint); + } + + const char *getClobbers() const override { +#if 0 + // In GCC, $1 is not widely used in generated code (it's used only in a few + // specific situations), so there is no real need for users to add it to + // the clobbers list if they want to use it in their inline assembly code. + // + // In LLVM, $1 is treated as a normal GPR and is always allocatable during + // code generation, so using it in inline assembly without adding it to the + // clobbers list can cause conflicts between the inline assembly code and + // the surrounding generated code. + // + // Another problem is that LLVM is allowed to choose $1 for inline assembly + // operands, which will conflict with the ".set at" assembler option (which + // we use only for inline assembly, in order to maintain compatibility with + // GCC) and will also conflict with the user's usage of $1. + // + // The easiest way to avoid these conflicts and keep $1 as an allocatable + // register for generated code is to automatically clobber $1 for all inline + // assembly code. + // + // FIXME: We should automatically clobber $1 only for inline assembly code + // which actually uses it. This would allow LLVM to use $1 for inline + // assembly operands if the user's assembly code doesn't use it. + return "~{$1}"; +#endif + return ""; + } + + bool handleTargetFeatures(std::vector &Features, + DiagnosticsEngine &Diags) override { + IsSingleFloat = false; + FloatABI = HardFloat; + FPMode = FP64; + + for (const auto &Feature : Features) { + if (Feature == "+single-float") + IsSingleFloat = true; + else if (Feature == "+soft-float") + FloatABI = SoftFloat; + else if (Feature == "+lsx") + HasLSX = true; + else if (Feature == "+lasx") { + HasLASX = true; + HasLSX = true; + } else if (Feature == "+fp64") + FPMode = FP64; + else if (Feature == "-fp64") + FPMode = FP32; + } + + setDataLayout(); + + return true; + } + + int getEHDataRegisterNumber(unsigned RegNo) const override { + if (RegNo == 0) + return 4; + if (RegNo == 1) + return 5; + return -1; + } + + bool isCLZForZeroUndef() const override { return false; } + + ArrayRef getGCCRegAliases() const override { + static const TargetInfo::GCCRegAlias GCCRegAliases[] = { + {{"zero", "$zero", "r0", "$0"}, "$r0"}, + {{"ra", "$ra", "r1", "$1"}, "$r1"}, + {{"tp", "$tp", "r2", "$2"}, "$r2"}, + {{"sp", "$sp", "r3", "$3"}, "$r3"}, + {{"a0", "$a0", "r4", "$4", "v0"}, "$r4"}, + {{"a1", "$a1", "r5", "$5", "v1"}, "$r5"}, + {{"a2", "$a2", "r6", "$6"}, "$r6"}, + {{"a3", "$a3", "r7", "$7"}, "$r7"}, + {{"a4", "$a4", "r8", "$8"}, "$r8"}, + {{"a5", "$a5", "r9", "$9"}, "$r9"}, + {{"a6", "$a6", "r10", "$10"}, "$r10"}, + {{"a7", "$a7", "r11", "$11"}, "$r11"}, + {{"t0", "$t0", "r12", "$12"}, "$r12"}, + {{"t1", "$t1", "r13", "$13"}, "$r13"}, + {{"t2", "$t2", "r14", "$14"}, "$r14"}, + {{"t3", "$t3", "r15", "$15"}, "$r15"}, + {{"t4", "$t4", "r16", "$16"}, "$r16"}, + {{"t5", "$t5", "r17", "$17"}, "$r17"}, + {{"t6", "$t6", "r18", "$18"}, "$r18"}, + {{"t7", "$t7", "r19", "$19"}, "$r19"}, + {{"t8", "$t8", "r20", "$20"}, "$r20"}, + //{{"x", "$x", "r21", "$21"}, "$r21"}, + {{"fp", "$fp", "r22", "$22"}, "$r22"}, + {{"s0", "$s0", "r23", "$23"}, "$r23"}, + {{"s1", "$s1", "r24", "$24"}, "$r24"}, + {{"s2", "$s2", "r25", "$25"}, "$r25"}, + {{"s3", "$s3", "r26", "$26"}, "$r26"}, + {{"s4", "$s4", "r27", "$27"}, "$r27"}, + {{"s5", "$s5", "r28", "$28"}, "$r28"}, + {{"s6", "$s6", "r29", "$29"}, "$r29"}, + {{"s7", "$s7", "r30", "$30"}, "$r30"}, + {{"s8", "$s8", "r31", "$31"}, "$r31"}, + {{"fa0", "$fa0", "f0"}, "$f0"}, + {{"fa1", "$fa1", "f1"}, "$f1"}, + {{"fa2", "$fa2", "f2"}, "$f2"}, + {{"fa3", "$fa3", "f3"}, "$f3"}, + {{"fa4", "$fa4", "f4"}, "$f4"}, + {{"fa5", "$fa5", "f5"}, "$f5"}, + {{"fa6", "$fa6", "f6"}, "$f6"}, + {{"fa7", "$fa7", "f7"}, "$f7"}, + {{"ft0", "$ft0", "f8"}, "$f8"}, + {{"ft1", "$ft1", "f9"}, "$f9"}, + {{"ft2", "$ft2", "f10"}, "$f10"}, + {{"ft3", "$ft3", "f11"}, "$f11"}, + {{"ft4", "$ft4", "f12"}, "$f12"}, + {{"ft5", "$ft5", "f13"}, "$f13"}, + {{"ft6", "$ft6", "f14"}, "$f14"}, + {{"ft7", "$ft7", "f15"}, "$f15"}, + {{"ft8", "$ft8", "f16"}, "$f16"}, + {{"ft9", "$ft9", "f17"}, "$f17"}, + {{"ft10", "$ft10", "f18"}, "$f18"}, + {{"ft11", "$ft11", "f19"}, "$f19"}, + {{"ft12", "$ft12", "f20"}, "$f20"}, + {{"ft13", "$ft13", "f21"}, "$f21"}, + {{"ft14", "$ft14", "f22"}, "$f22"}, + {{"ft15", "$ft15", "f23"}, "$f23"}, + {{"fs0", "$fs0", "f24"}, "$f24"}, + {{"fs1", "$fs1", "f25"}, "$f25"}, + {{"fs2", "$fs2", "f26"}, "$f26"}, + {{"fs3", "$fs3", "f27"}, "$f27"}, + {{"fs4", "$fs4", "f28"}, "$f28"}, + {{"fs5", "$fs5", "f29"}, "$f29"}, + {{"fs6", "$fs6", "f30"}, "$f30"}, + {{"fs7", "$fs7", "f31"}, "$f31"}, + }; + return llvm::makeArrayRef(GCCRegAliases); + } + + bool hasInt128Type() const override { + return (ABI == "lpx32" || ABI == "lp64") || getTargetOpts().ForceEnableInt128; + } + + bool validateTarget(DiagnosticsEngine &Diags) const override; +}; +} // namespace targets +} // namespace clang + +#endif // LLVM_CLANG_LIB_BASIC_TARGETS_LOONGARCH_H diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a6144180f9c9..183c9a3c40fc 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -87,6 +87,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: case TargetCXXABI::WatchOS: + case TargetCXXABI::GenericLoongArch: case TargetCXXABI::GenericMIPS: case TargetCXXABI::GenericItanium: case TargetCXXABI::WebAssembly: diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index fc2ff15a6acd..eebd6fb653f0 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -533,6 +533,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, /*UseARMGuardVarABI=*/true); + case TargetCXXABI::GenericLoongArch: + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); + case TargetCXXABI::GenericMIPS: return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 3e667abc6f1c..77956df9f981 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -539,12 +539,17 @@ TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */ } -static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); +static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, + bool AsIfNoUniqueAddr = false); /// isEmptyField - Return true iff a the field is "empty", that is it -/// is an unnamed bit-field or an (array of) empty record(s). +/// is an unnamed bit-field or an (array of) empty record(s). If +/// AsIfNoUniqueAddr is true, then C++ record fields are considered empty if +/// the [[no_unique_address]] attribute would have made them empty. +/// Note: [[no_unique_address]] is a C++20 feature supported since clang 9. +/// Refer https://clang.llvm.org/cxx_status.html. static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, - bool AllowArrays) { + bool AllowArrays, bool AsIfNoUniqueAddr = false) { if (FD->isUnnamedBitfield()) return true; @@ -578,16 +583,21 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, // not arrays of records, so we must also check whether we stripped off an // array type above. if (isa(RT->getDecl()) && - (WasArray || !FD->hasAttr())) + (WasArray || (!AsIfNoUniqueAddr && !FD->hasAttr()))) return false; - return isEmptyRecord(Context, FT, AllowArrays); + return isEmptyRecord(Context, FT, AllowArrays, AsIfNoUniqueAddr); } /// isEmptyRecord - Return true iff a structure contains only empty /// fields. Note that a structure with a flexible array member is not -/// considered empty. -static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { +/// considered empty. If AsIfNoUniqueAddr is true, then C++ record fields are +/// considered empty if the [[no_unique_address]] attribute would have made +/// them empty. +/// Note: [[no_unique_address]] is a C++20 feature supported since clang 9. +/// Refer https://clang.llvm.org/cxx_status.html. +static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays, + bool AsIfNoUniqueAddr) { const RecordType *RT = T->getAs(); if (!RT) return false; @@ -598,11 +608,11 @@ static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) { // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) for (const auto &I : CXXRD->bases()) - if (!isEmptyRecord(Context, I.getType(), true)) + if (!isEmptyRecord(Context, I.getType(), true, AsIfNoUniqueAddr)) return false; for (const auto *I : RD->fields()) - if (!isEmptyField(Context, I, AllowArrays)) + if (!isEmptyField(Context, I, AllowArrays, AsIfNoUniqueAddr)) return false; return true; } @@ -11327,6 +11337,569 @@ public: }; } // namespace +//===----------------------------------------------------------------------===// +// LoongArch ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { +class LoongArchABIInfo : public DefaultABIInfo { +private: + // Size of the integer ('r') registers in bits. + unsigned GRLen; + // Size of the floating point ('f') registers in bits. Note that the target + // ISA might have a wider FRLen than the selected ABI. + unsigned FRLen; + static const int NumArgGPRs = 8; + static const int NumArgFPRs = 8; + bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const; + +public: + LoongArchABIInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, unsigned FRLen) + : DefaultABIInfo(CGT), GRLen(GRLen), FRLen(FRLen) {} + + // DefaultABIInfo's classifyReturnType and classifyArgumentType are + // non-virtual, but computeInfo is virtual, so we overload it. + void computeInfo(CGFunctionInfo &FI) const override; + + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, + int &ArgFPRsLeft) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + + uint64_t MinABIStackAlignInBytes = 8; + uint64_t StackAlignInBytes = 16; + llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; + llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; + void CoerceToIntArgs(uint64_t TySize, + SmallVectorImpl &ArgList) const; + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + ABIArgInfo extendType(QualType Ty) const; + + bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, llvm::Type *&Field2Ty, + CharUnits &Field2Off, int &NeededArgGPRs, + int &NeededArgFPRs) const; + ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, + CharUnits Field1Off, + llvm::Type *Field2Ty, + CharUnits Field2Off) const; +}; +} // end anonymous namespace + +void LoongArchABIInfo::computeInfo(CGFunctionInfo &FI) const { + QualType RetTy = FI.getReturnType(); + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(RetTy); + + // IsRetIndirect is true if classifyArgumentType indicated the value should + // be passed indirect or if the type size is greater than 2*grlen. + bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect || + getContext().getTypeSize(RetTy) > (2 * GRLen); + + // We must track the number of GPRs used in order to conform to the LoongArch + // ABI, as integer scalars passed in registers should have signext/zeroext + // when promoted, but are anyext if passed on the stack. As GPR usage is + // different for variadic arguments, we must also track whether we are + // examining a vararg or not. + int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int ArgFPRsLeft = FRLen ? NumArgFPRs : 0; + int NumFixedArgs = FI.getNumRequiredArgs(); + + int ArgNum = 0; + for (auto &ArgInfo : FI.arguments()) { + bool IsFixed = ArgNum < NumFixedArgs; + ArgInfo.info = + classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); + ArgNum++; + } +} + +// Returns true if the struct is a potential candidate for the floating point +// calling convention. If this function returns true, the caller is +// responsible for checking that if there is only a single field then that +// field is a float. +bool LoongArchABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const { + bool IsInt = Ty->isIntegralOrEnumerationType(); + bool IsFloat = Ty->isRealFloatingType(); + + if (IsInt || IsFloat) { + uint64_t Size = getContext().getTypeSize(Ty); + if (IsInt && Size > GRLen) + return false; + // Can't be eligible if larger than the FP registers. Half precision isn't + // currently supported on LoongArch and the ABI hasn't been confirmed, so + // default to the integer ABI in that case. + if (IsFloat && (Size > FRLen || Size < 32)) + return false; + // Can't be eligible if an integer type was already found (int+int pairs + // are not eligible). + if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) + return false; + if (!Field1Ty) { + Field1Ty = CGT.ConvertType(Ty); + Field1Off = CurOff; + return true; + } + if (!Field2Ty) { + Field2Ty = CGT.ConvertType(Ty); + Field2Off = CurOff; + return true; + } + return false; + } + + if (auto CTy = Ty->getAs()) { + if (Field1Ty) + return false; + QualType EltTy = CTy->getElementType(); + if (getContext().getTypeSize(EltTy) > FRLen) + return false; + Field1Ty = CGT.ConvertType(EltTy); + Field1Off = CurOff; + Field2Ty = Field1Ty; + Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); + return true; + } + + if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { + uint64_t ArraySize = ATy->getSize().getZExtValue(); + QualType EltTy = ATy->getElementType(); + // Non-zero-length arrays of empty records make the struct ineligible to be + // passed via FARs in C++. + if (const auto *RTy = EltTy->getAs()) { + if (ArraySize != 0 && isa(RTy->getDecl()) && + isEmptyRecord(getContext(), EltTy, true, true)) + return false; + } + CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); + for (uint64_t i = 0; i < ArraySize; ++i) { + bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, + Field1Off, Field2Ty, Field2Off); + if (!Ret) + return false; + CurOff += EltSize; + } + return true; + } + + if (const auto *RTy = Ty->getAs()) { + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; + const RecordDecl *RD = RTy->getDecl(); + if (isEmptyRecord(getContext(), Ty, true, true) && + (!RD->isUnion() || !isa(RD))) + return true; + // Unions aren't eligible unless they're empty in C (which is caught above). + if (RD->isUnion()) + return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { + for (const CXXBaseSpecifier &B : CXXRD->bases()) { + const auto *BDecl = + cast(B.getType()->castAs()->getDecl()); + CharUnits BaseOff = Layout.getBaseClassOffset(BDecl); + bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff, + Field1Ty, Field1Off, Field2Ty, + Field2Off); + if (!Ret) + return false; + } + } + int ZeroWidthBitFieldCount = 0; + for (const FieldDecl *FD : RD->fields()) { + uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); + QualType QTy = FD->getType(); + if (FD->isBitField()) { + unsigned BitWidth = FD->getBitWidthValue(getContext()); + // Allow a bitfield with a type greater than GRLen as long as the + // bitwidth is GRLen or less. + if (getContext().getTypeSize(QTy) > GRLen && BitWidth <= GRLen) + QTy = getContext().getIntTypeForBitwidth(GRLen, false); + if (BitWidth == 0) { + ZeroWidthBitFieldCount++; + continue; + } + } + + bool Ret = detectFPCCEligibleStructHelper( + QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), + Field1Ty, Field1Off, Field2Ty, Field2Off); + if (!Ret) + return false; + + // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp + // or int+fp structs, but are ignored for a struct with an fp field and + // any number of zero-width bitfields. + if (Field2Ty && ZeroWidthBitFieldCount > 0) + return false; + } + return Field1Ty != nullptr; + } + + return false; +} + +// Determine if a struct is eligible for passing according to the floating +// point calling convention (i.e., when flattened it contains a single fp +// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and +// NeededArgGPRs are incremented appropriately. +bool LoongArchABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off, + int &NeededArgGPRs, + int &NeededArgFPRs) const { + Field1Ty = nullptr; + Field2Ty = nullptr; + NeededArgGPRs = 0; + NeededArgFPRs = 0; + if (!detectFPCCEligibleStructHelper( + Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off)) + return false; + if (!Field1Ty) + return false; + // Not really a candidate if we have a single int but no float. + if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) + return false; + if (Field1Ty && Field1Ty->isFloatingPointTy()) + NeededArgFPRs++; + else if (Field1Ty) + NeededArgGPRs++; + if (Field2Ty && Field2Ty->isFloatingPointTy()) + NeededArgFPRs++; + else if (Field2Ty) + NeededArgGPRs++; + return true; +} + +// Call getCoerceAndExpand for the two-element flattened struct described by +// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an +// appropriate coerceToType and unpaddedCoerceToType. +ABIArgInfo LoongArchABIInfo::coerceAndExpandFPCCEligibleStruct( + llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, + CharUnits Field2Off) const { + SmallVector CoerceElts; + SmallVector UnpaddedCoerceElts; + if (!Field1Off.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); + + CoerceElts.push_back(Field1Ty); + UnpaddedCoerceElts.push_back(Field1Ty); + + if (!Field2Ty) { + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), + UnpaddedCoerceElts[0]); + } + + CharUnits Field2Align = + CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); + CharUnits Field1End = + Field1Off + + CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); + CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align); + + CharUnits Padding = CharUnits::Zero(); + if (Field2Off > Field2OffNoPadNoPack) + Padding = Field2Off - Field2OffNoPadNoPack; + else if (Field2Off != Field2Align && Field2Off > Field1End) + Padding = Field2Off - Field1End; + + bool IsPacked = !Field2Off.isMultipleOf(Field2Align); + + if (!Padding.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); + + CoerceElts.push_back(Field2Ty); + UnpaddedCoerceElts.push_back(Field2Ty); + + auto CoerceToType = + llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); + auto UnpaddedCoerceToType = + llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); + + return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); +} + +void LoongArchABIInfo::CoerceToIntArgs( + uint64_t TySize, SmallVectorImpl &ArgList) const { + llvm::IntegerType *IntTy = + llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); + + // Add (TySize / MinABIStackAlignInBytes) args of IntTy. + for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) + ArgList.push_back(IntTy); + + // If necessary, add one more integer type to ArgList. + unsigned R = TySize % (MinABIStackAlignInBytes * 8); + + if (R) + ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); +} + +llvm::Type* LoongArchABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { + SmallVector ArgList, IntArgList; + + if (Ty->isComplexType()) + return CGT.ConvertType(Ty); + + const RecordType *RT = Ty->getAs(); + + // Unions/vectors are passed in integer registers. + if (!RT || !RT->isStructureOrClassType()) { + CoerceToIntArgs(TySize, ArgList); + return llvm::StructType::get(getVMContext(), ArgList); + } + + const RecordDecl *RD = RT->getDecl(); + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + assert(!(TySize % 8) && "Size of structure must be multiple of 8."); + + uint64_t LastOffset = 0; + unsigned idx = 0; + llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); + + // Iterate over fields in the struct/class and check if there are any aligned + // double fields. + for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); + i != e; ++i, ++idx) { + const QualType Ty = i->getType(); + const BuiltinType *BT = Ty->getAs(); + + if (!BT || BT->getKind() != BuiltinType::Double) + continue; + + uint64_t Offset = Layout.getFieldOffset(idx); + if (Offset % 64) // Ignore doubles that are not aligned. + continue; + + // Add ((Offset - LastOffset) / 64) args of type i64. + for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j) + ArgList.push_back(I64); + + // Add double type. + ArgList.push_back(llvm::Type::getDoubleTy(getVMContext())); + LastOffset = Offset + 64; + } + + CoerceToIntArgs(TySize - LastOffset, IntArgList); + ArgList.append(IntArgList.begin(), IntArgList.end()); + + return llvm::StructType::get(getVMContext(), ArgList); +} + +llvm::Type * LoongArchABIInfo::getPaddingType(uint64_t OrigOffset, + uint64_t Offset) const { + if (OrigOffset + MinABIStackAlignInBytes > Offset) + return nullptr; + + return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8); +} + +ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + int &ArgGPRsLeft, + int &ArgFPRsLeft) const { + assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always passed indirectly. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { + if (ArgGPRsLeft) + ArgGPRsLeft -= 1; + return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA == + CGCXXABI::RAA_DirectInMemory); + } + + uint64_t Size = getContext().getTypeSize(Ty); + // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or + // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size + // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. + if (isEmptyRecord(getContext(), Ty, true) && Size == 0) + return ABIArgInfo::getIgnore(); + + // Pass floating point values via FPRs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FRLen >= Size && ArgFPRsLeft) { + ArgFPRsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the hard float ABI must be passed direct rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && ArgFPRsLeft >= 2) { + QualType EltTy = Ty->getAs()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FRLen) { + ArgFPRsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && + (getTarget().hasFeature("lsx"))) || + ((getContext().getTypeSize(Ty) == 256) && + getTarget().hasFeature("lasx")))) + return ABIArgInfo::getDirect(); + + if (IsFixed && FRLen && Ty->isStructureOrClassType()) { + llvm::Type *Field1Ty = nullptr; + llvm::Type *Field2Ty = nullptr; + CharUnits Field1Off = CharUnits::Zero(); + CharUnits Field2Off = CharUnits::Zero(); + int NeededArgGPRs = 0; + int NeededArgFPRs = 0; + bool IsCandidate = + detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, + NeededArgGPRs, NeededArgFPRs); + if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && + NeededArgFPRs <= ArgFPRsLeft) { + ArgGPRsLeft -= NeededArgGPRs; + ArgFPRsLeft -= NeededArgFPRs; + return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, + Field2Off); + } + } else if (Ty->isStructureOrClassType() && Size == 128 && + isAggregateTypeForABI(Ty)) { + uint64_t Offset = 8; + uint64_t OrigOffset = Offset; + uint64_t TySize = getContext().getTypeSize(Ty); + uint64_t Align = getContext().getTypeAlign(Ty) / 8; + + Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), + (uint64_t)StackAlignInBytes); + unsigned CurrOffset = llvm::alignTo(Offset, Align); + Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8; + + ABIArgInfo ArgInfo = + ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, + getPaddingType(OrigOffset, CurrOffset)); + ArgInfo.setInReg(true); + return ArgInfo; + } + + uint64_t NeededAlign = getContext().getTypeAlign(Ty); + // Determine the number of GPRs needed to pass the current argument + // according to the ABI. 2*GRLen-aligned varargs are passed in "aligned" + // register pairs, so may consume 3 registers. + int NeededArgGPRs = 1; + if (!IsFixed && NeededAlign == 2 * GRLen) + NeededArgGPRs = 2 + (ArgGPRsLeft % 2); + else if (Size > GRLen && Size <= 2 * GRLen) + NeededArgGPRs = 2; + + if (NeededArgGPRs > ArgGPRsLeft) { + NeededArgGPRs = ArgGPRsLeft; + } + + ArgGPRsLeft -= NeededArgGPRs; + + if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // All integral types are promoted to GRLen width, unless passed on the + // stack. + if (Size < GRLen && Ty->isIntegralOrEnumerationType()) { + return extendType(Ty); + } + + return ABIArgInfo::getDirect(); + } + + // Aggregates which are <= 2*GRLen will be passed in registers if possible, + // so coerce to integers. + if (Size <= 2 * GRLen) { + unsigned Alignment = getContext().getTypeAlign(Ty); + + // Use a single GRLen int if possible, 2*GRLen if 2*GRLen alignment is + // required, and a 2-element GRLen array if only GRLen alignment is required. + if (Size <= GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), GRLen)); + } else if (Alignment == 2 * GRLen) { + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), 2 * GRLen)); + } else { + return ABIArgInfo::getDirect(llvm::ArrayType::get( + llvm::IntegerType::get(getVMContext(), GRLen), 2)); + } + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo LoongArchABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + int ArgGPRsLeft = 2; + int ArgFPRsLeft = FRLen ? 2 : 0; + + // The rules for return and argument types are the same, so defer to + // classifyArgumentType. + return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, + ArgFPRsLeft); +} + +Address LoongArchABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + CharUnits SlotSize = CharUnits::fromQuantity(GRLen / 8); + + // Empty records are ignored for parameter passing purposes. + if (isEmptyRecord(getContext(), Ty, true)) { + Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr), + getVAListElementType(CGF), SlotSize); + Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty)); + return Addr; + } + + auto TInfo = getContext().getTypeInfoInChars(Ty); + + // Arguments bigger than 2*GRlen bytes are passed indirectly. + bool IsIndirect = TInfo.Width > 2 * SlotSize; + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo, + SlotSize, /*AllowHigherAlign=*/true); +} + +ABIArgInfo LoongArchABIInfo::extendType(QualType Ty) const { + int TySize = getContext().getTypeSize(Ty); + // LP64 ABI requires unsigned 32 bit integers to be sign extended. + if (GRLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32) + return ABIArgInfo::getSignExtend(Ty); + return ABIArgInfo::getExtend(Ty); +} + +namespace { +class LoongArchTargetCodeGenInfo : public TargetCodeGenInfo { +public: + LoongArchTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned GRLen, + unsigned FRLen) + : TargetCodeGenInfo(std::make_unique( + CGT, GRLen, FRLen)) {} + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + return; + } +}; +} // namespace + //===----------------------------------------------------------------------===// // VE ABI Implementation. // @@ -11564,6 +12137,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::le32: return SetCGInfo(new PNaClTargetCodeGenInfo(Types)); + case llvm::Triple::m68k: return SetCGInfo(new M68kTargetCodeGenInfo(Types)); case llvm::Triple::mips: @@ -11681,6 +12255,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::msp430: return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); + case llvm::Triple::loongarch64: + return SetCGInfo(new LoongArchTargetCodeGenInfo(Types, 64, 64)); + case llvm::Triple::riscv32: case llvm::Triple::riscv64: { StringRef ABIStr = getTarget().getABI(); diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index 7be7f959e896..b86a60faa311 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -28,6 +28,7 @@ add_clang_library(clangDriver ToolChains/Arch/AArch64.cpp ToolChains/Arch/ARM.cpp ToolChains/Arch/CSKY.cpp + ToolChains/Arch/LoongArch.cpp ToolChains/Arch/M68k.cpp ToolChains/Arch/Mips.cpp ToolChains/Arch/PPC.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index cbd2ff494de9..e9d0df53ef3a 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -627,6 +627,29 @@ static llvm::Triple computeTargetTriple(const Driver &D, Target.setVendorName("intel"); } + // If target is LoongArch adjust the target triple + // accordingly to provided ABI name. + A = Args.getLastArg(options::OPT_mabi_EQ); + if (A && Target.isLoongArch()) { + StringRef ABIName = A->getValue(); + if (ABIName == "lp32") { + Target = Target.get32BitArchVariant(); + if (Target.getEnvironment() == llvm::Triple::GNUABI64 || + Target.getEnvironment() == llvm::Triple::GNUABILPX32) + Target.setEnvironment(llvm::Triple::GNU); + } else if (ABIName == "lpx32") { + Target = Target.get64BitArchVariant(); + if (Target.getEnvironment() == llvm::Triple::GNU || + Target.getEnvironment() == llvm::Triple::GNUABI64) + Target.setEnvironment(llvm::Triple::GNUABILPX32); + } else if (ABIName == "lp64") { + Target = Target.get64BitArchVariant(); + if (Target.getEnvironment() == llvm::Triple::GNU || + Target.getEnvironment() == llvm::Triple::GNUABILPX32) + Target.setEnvironment(llvm::Triple::GNUABI64); + } + } + // If target is MIPS adjust the target triple // accordingly to provided ABI name. A = Args.getLastArg(options::OPT_mabi_EQ); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp new file mode 100644 index 000000000000..2c42db69009e --- /dev/null +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -0,0 +1,211 @@ +//===--- LoongArch.cpp - Tools Implementations -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "ToolChains/CommonArgs.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Option/ArgList.h" + +using namespace clang::driver; +using namespace clang::driver::tools; +using namespace clang; +using namespace llvm::opt; + +// Get CPU and ABI names. They are not independent +// so we have to calculate them together. +void loongarch::getLoongArchCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, + StringRef &CPUName, StringRef &ABIName) { + const char *DefLoongArch32CPU = "loongarch32"; + const char *DefLoongArch64CPU = "la464"; + + if (Arg *A = Args.getLastArg(clang::driver::options::OPT_march_EQ, + options::OPT_mcpu_EQ)) + CPUName = A->getValue(); + + if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { + ABIName = A->getValue(); + // Convert a GNU style LoongArch ABI name to the name + // accepted by LLVM LoongArch backend. + ABIName = llvm::StringSwitch(ABIName) + .Case("32", "lp32") + .Case("64", "lp64") + .Default(ABIName); + } + + // Setup default CPU and ABI names. + if (CPUName.empty() && ABIName.empty()) { + switch (Triple.getArch()) { + default: + llvm_unreachable("Unexpected triple arch name"); + case llvm::Triple::loongarch32: + CPUName = DefLoongArch32CPU; + break; + case llvm::Triple::loongarch64: + CPUName = DefLoongArch64CPU; + break; + } + } + + if (ABIName.empty() && (Triple.getEnvironment() == llvm::Triple::GNUABILPX32)) + ABIName = "lpx32"; + + if (ABIName.empty()) { + ABIName = llvm::StringSwitch(CPUName) + .Case("loongarch32", "lp32") + .Cases("la264", "la364", "la464", "lp64") + .Default(""); + } + + if (ABIName.empty()) { + // Deduce ABI name from the target triple. + ABIName = Triple.isLoongArch32() ? "lp32" : "lp64"; + } + + if (CPUName.empty()) { + // Deduce CPU name from ABI name. + CPUName = llvm::StringSwitch(ABIName) + .Case("lp32", DefLoongArch32CPU) + .Cases("lpx32", "lp64", DefLoongArch64CPU) + .Default(""); + } + + // FIXME: Warn on inconsistent use of -march and -mabi. +} + +std::string loongarch::getLoongArchABILibSuffix(const ArgList &Args, + const llvm::Triple &Triple) { + StringRef CPUName, ABIName; + tools::loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); + return llvm::StringSwitch(ABIName) + .Case("lp32", "") + .Case("lpx32", "32") + .Case("lp64", "64"); +} + +// Convert ABI name to the GNU tools acceptable variant. +StringRef loongarch::getGnuCompatibleLoongArchABIName(StringRef ABI) { + return llvm::StringSwitch(ABI) + .Case("lp32", "32") + .Case("lp64", "64") + .Default(ABI); +} + +// Select the LoongArch float ABI as determined by -msoft-float, -mhard-float, +// and -mfloat-abi=. +loongarch::FloatABI loongarch::getLoongArchFloatABI(const Driver &D, const ArgList &Args) { + loongarch::FloatABI ABI = loongarch::FloatABI::Invalid; + if (Arg *A = + Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, + options::OPT_mfloat_abi_EQ)) { + if (A->getOption().matches(options::OPT_msoft_float)) + ABI = loongarch::FloatABI::Soft; + else if (A->getOption().matches(options::OPT_mhard_float)) + ABI = loongarch::FloatABI::Hard; + else { + ABI = llvm::StringSwitch(A->getValue()) + .Case("soft", loongarch::FloatABI::Soft) + .Case("hard", loongarch::FloatABI::Hard) + .Default(loongarch::FloatABI::Invalid); + if (ABI == loongarch::FloatABI::Invalid && !StringRef(A->getValue()).empty()) { + D.Diag(clang::diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); + ABI = loongarch::FloatABI::Hard; + } + } + } + + // If unspecified, choose the default based on the platform. + if (ABI == loongarch::FloatABI::Invalid) { + // Assume "hard", because it's a default value used by gcc. + // When we start to recognize specific target LoongArch processors, + // we will be able to select the default more correctly. + ABI = loongarch::FloatABI::Hard; + } + + assert(ABI != loongarch::FloatABI::Invalid && "must select an ABI"); + return ABI; +} + +void loongarch::getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args, + std::vector &Features) { + StringRef CPUName; + StringRef ABIName; + getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); + ABIName = getGnuCompatibleLoongArchABIName(ABIName); + + // At final link time, LP32 and LPX32 with CPIC will have another section + // added to the binary which contains the stub functions to perform + // any fixups required for PIC code. + + bool IsLP64 = ABIName == "64"; + bool NonPIC = false; + + Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, + options::OPT_fpic, options::OPT_fno_pic, + options::OPT_fPIE, options::OPT_fno_PIE, + options::OPT_fpie, options::OPT_fno_pie); + if (LastPICArg) { + Option O = LastPICArg->getOption(); + NonPIC = + (O.matches(options::OPT_fno_PIC) || O.matches(options::OPT_fno_pic) || + O.matches(options::OPT_fno_PIE) || O.matches(options::OPT_fno_pie)); + } + + if (IsLP64 && NonPIC) { + NonPIC = false; + } + + loongarch::FloatABI FloatABI = loongarch::getLoongArchFloatABI(D, Args); + if (FloatABI == loongarch::FloatABI::Soft) { + // FIXME: Note, this is a hack. We need to pass the selected float + // mode to the LoongArchTargetInfoBase to define appropriate macros there. + // Now it is the only method. + Features.push_back("+soft-float"); + } + + AddTargetFeature(Args, Features, options::OPT_msingle_float, + options::OPT_mdouble_float, "single-float"); + + AddTargetFeature(Args, Features, options::OPT_mlsx, options::OPT_mno_lsx, + "lsx"); + AddTargetFeature(Args, Features, options::OPT_mlasx, options::OPT_mno_lasx, + "lasx"); + + AddTargetFeature(Args, Features, options::OPT_munaligned_access, + options::OPT_mno_unaligned_access, "unaligned-access"); + + // Add the last -mfp32/-mfp64, if none are given and fp64 is default, + // pass fp64. + if (Arg *A = Args.getLastArg(options::OPT_mfp32, + options::OPT_mfp64)) { + if (A->getOption().matches(options::OPT_mfp32)) + Features.push_back("-fp64"); + else + Features.push_back("+fp64"); + } else if (loongarch::isFP64Default(Args)) { + Features.push_back("+fp64"); + } + +} + +bool loongarch::hasLoongArchAbiArg(const ArgList &Args, const char *Value) { + Arg *A = Args.getLastArg(options::OPT_mabi_EQ); + return A && (A->getValue() == StringRef(Value)); +} + +bool loongarch::isUCLibc(const ArgList &Args) { + Arg *A = Args.getLastArg(options::OPT_m_libc_Group); + return A && A->getOption().matches(options::OPT_muclibc); +} + +bool loongarch::isFP64Default(const ArgList &Args) { + return Args.getLastArg(options::OPT_msingle_float) ? false : true; +} diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.h b/clang/lib/Driver/ToolChains/Arch/LoongArch.h new file mode 100644 index 000000000000..53664346f8f8 --- /dev/null +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.h @@ -0,0 +1,49 @@ +//===--- LoongArch.h - LoongArch-specific Tool Helpers ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H + +#include "clang/Driver/Driver.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Option.h" +#include +#include + +namespace clang { +namespace driver { +namespace tools { + +namespace loongarch { +enum class FloatABI { + Invalid, + Soft, + Hard, +}; + +void getLoongArchCPUAndABI(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple, StringRef &CPUName, + StringRef &ABIName); +void getLoongArchTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args, + std::vector &Features); +StringRef getGnuCompatibleLoongArchABIName(StringRef ABI); +loongarch::FloatABI getLoongArchFloatABI(const Driver &D, const llvm::opt::ArgList &Args); +std::string getLoongArchABILibSuffix(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple); +bool hasLoongArchAbiArg(const llvm::opt::ArgList &Args, const char *Value); +bool isUCLibc(const llvm::opt::ArgList &Args); +bool isFP64Default(const llvm::opt::ArgList &Args); + +} // end namespace loongarch +} // end namespace target +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ARCH_LOONGARCH_H diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ba06b28d9661..ff5c2805787c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -11,6 +11,7 @@ #include "Arch/AArch64.h" #include "Arch/ARM.h" #include "Arch/CSKY.h" +#include "Arch/LoongArch.h" #include "Arch/M68k.h" #include "Arch/Mips.h" #include "Arch/PPC.h" @@ -317,6 +318,11 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, arm::getARMTargetFeatures(D, Triple, Args, Features, ForAS); break; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); + break; + case llvm::Triple::ppc: case llvm::Triple::ppcle: case llvm::Triple::ppc64: @@ -527,6 +533,8 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, // XCore never wants frame pointers, regardless of OS. // WebAssembly never wants frame pointers. return false; + case llvm::Triple::loongarch64: + case llvm::Triple::loongarch32: case llvm::Triple::ppc: case llvm::Triple::ppcle: case llvm::Triple::ppc64: @@ -1794,6 +1802,11 @@ void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple, CmdArgs.push_back("-fallow-half-arguments-and-returns"); break; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + AddLoongArchTargetArgs(Args, CmdArgs); + break; + case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: @@ -1933,6 +1946,45 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args, AddUnalignedAccessWarning(CmdArgs); } +void Clang::AddLoongArchTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const Driver &D = getToolChain().getDriver(); + StringRef CPUName; + StringRef ABIName; + const llvm::Triple &Triple = getToolChain().getTriple(); + loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); + + CmdArgs.push_back("-target-abi"); + CmdArgs.push_back(ABIName.data()); + + loongarch::FloatABI ABI = loongarch::getLoongArchFloatABI(D, Args); + if (ABI == loongarch::FloatABI::Soft) { + // Floating point operations and argument passing are soft. + CmdArgs.push_back("-msoft-float"); + CmdArgs.push_back("-mfloat-abi"); + CmdArgs.push_back("soft"); + } else { + // Floating point operations and argument passing are hard. + assert(ABI == loongarch::FloatABI::Hard && "Invalid float abi!"); + CmdArgs.push_back("-mfloat-abi"); + CmdArgs.push_back("hard"); + } + + if (Arg *A = Args.getLastArg(options::OPT_mcheck_zero_division, + options::OPT_mno_check_zero_division)) { + if (A->getOption().matches(options::OPT_mno_check_zero_division)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-mnocheck-zero-division"); + } + } + + llvm::Reloc::Model RelocationModel; + unsigned PICLevel; + bool IsPIE; + std::tie(RelocationModel, PICLevel, IsPIE) = + ParsePICArgs(getToolChain(), Args); +} + void Clang::AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { const Driver &D = getToolChain().getDriver(); @@ -2079,6 +2131,15 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args, CmdArgs.push_back("-mips-jalr-reloc=0"); } } + + if (Arg *A = Args.getLastArg(options::OPT_mfix_loongson3_llsc, + options::OPT_mno_fix_loongson3_llsc)) { + CmdArgs.push_back("-mllvm"); + if (A->getOption().matches(options::OPT_mfix_loongson3_llsc)) + CmdArgs.push_back("-mips-fix-loongson3-llsc=1"); + else + CmdArgs.push_back("-mips-fix-loongson3-llsc=0"); + } } void Clang::AddPPCTargetArgs(const ArgList &Args, @@ -7849,6 +7910,17 @@ const char *Clang::getDependencyFileName(const ArgList &Args, // Begin ClangAs +void ClangAs::AddLoongArchTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + StringRef CPUName; + StringRef ABIName; + const llvm::Triple &Triple = getToolChain().getTriple(); + loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); + + CmdArgs.push_back("-target-abi"); + CmdArgs.push_back(ABIName.data()); +} + void ClangAs::AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { StringRef CPUName; @@ -8044,6 +8116,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, default: break; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + AddLoongArchTargetArgs(Args, CmdArgs); + break; + case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index 5209c6687599..e28012af1fda 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -57,6 +57,8 @@ private: bool KernelOrKext) const; void AddARM64TargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; void AddPPCTargetArgs(const llvm::opt::ArgList &Args, @@ -123,6 +125,8 @@ class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool { public: ClangAs(const ToolChain &TC) : Tool("clang::as", "clang integrated assembler", TC) {} + void AddLoongArchTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; void AddMIPSTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; void AddX86TargetArgs(const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index d20aa1390f0b..de027a571066 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -9,6 +9,7 @@ #include "CommonArgs.h" #include "Arch/AArch64.h" #include "Arch/ARM.h" +#include "Arch/LoongArch.h" #include "Arch/M68k.h" #include "Arch/Mips.h" #include "Arch/PPC.h" @@ -376,6 +377,14 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, return A->getValue(); return ""; + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: { + StringRef CPUName; + StringRef ABIName; + loongarch::getLoongArchCPUAndABI(Args, T, CPUName, ABIName); + return std::string(CPUName); + } + case llvm::Triple::m68k: return m68k::getM68kTargetCPU(Args); @@ -1405,6 +1414,18 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { if ((ROPI || RWPI) && (PIC || PIE)) ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic); + if (Triple.isLoongArch()) { + StringRef CPUName; + StringRef ABIName; + loongarch::getLoongArchCPUAndABI(Args, Triple, CPUName, ABIName); + // When targeting the LP64 ABI, PIC is the default. + if (ABIName == "lp64") + PIC = true; + // Unlike other architectures, LoongArch, even with -fPIC/-mxgot/multigot, + // does not use PIC level 2 for historical reasons. + IsPICLevelTwo = false; + } + if (Triple.isMIPS()) { StringRef CPUName; StringRef ABIName; diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index f8dfb189eaf6..1cacb397eedf 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -9,6 +9,7 @@ #include "Gnu.h" #include "Arch/ARM.h" #include "Arch/CSKY.h" +#include "Arch/LoongArch.h" #include "Arch/Mips.h" #include "Arch/PPC.h" #include "Arch/RISCV.h" @@ -255,6 +256,10 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { case llvm::Triple::armeb: case llvm::Triple::thumbeb: return isArmBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi"; + case llvm::Triple::loongarch32: + return "elf32loongarch"; + case llvm::Triple::loongarch64: + return "elf64loongarch"; case llvm::Triple::m68k: return "m68kelf"; case llvm::Triple::ppc: @@ -858,6 +863,63 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, break; } + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: { + StringRef CPUName; + StringRef ABIName; + loongarch::getLoongArchCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); + ABIName = loongarch::getGnuCompatibleLoongArchABIName(ABIName); + + //FIXME: Currently gnu as doesn't support -march + //CmdArgs.push_back("-march=loongarch"); + //CmdArgs.push_back(CPUName.data()); + + //FIXME: modify loongarch::getGnuCompatibleLoongArchABIName() + CmdArgs.push_back("-mabi=lp64"); + //CmdArgs.push_back(ABIName.data()); + + // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, + // or -mshared (not implemented) is in effect. + if (RelocationModel == llvm::Reloc::Static) + CmdArgs.push_back("-mno-shared"); + + // LLVM doesn't support -mplt yet and acts as if it is always given. + // However, -mplt has no effect with the LP64 ABI. + if (ABIName != "64") + CmdArgs.push_back("-call_nonpic"); + + break; + + // Add the last -mfp32/-mfp64. + if (Arg *A = Args.getLastArg(options::OPT_mfp32, + options::OPT_mfp64)) { + A->claim(); + A->render(Args, CmdArgs); + } + + if (Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { + // Do not use AddLastArg because not all versions of LoongArch assembler + // support -mlsx / -mno-lsx options. + if (A->getOption().matches(options::OPT_mlsx)) + CmdArgs.push_back(Args.MakeArgString("-mlsx")); + } + + if (Arg *A = Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { + // Do not use AddLastArg because not all versions of LoongArch assembler + // support -mlasx / -mno-lasx options. + if (A->getOption().matches(options::OPT_mlasx)) + CmdArgs.push_back(Args.MakeArgString("-mlasx")); + } + + Args.AddLastArg(CmdArgs, options::OPT_mhard_float, + options::OPT_msoft_float); + + Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, + options::OPT_msingle_float); + + AddAssemblerKPIC(getToolChain(), Args, CmdArgs); + break; + } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: @@ -2297,6 +2359,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", "s390x-suse-linux", "s390x-redhat-linux"}; + static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; + static const char *const LoongArch64Triples[] = { + "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu", + "loongarch64-loongson-linux-gnu", "loongarch64-redhat-linux", "loongarch64-linux-ohos"}; using std::begin; using std::end; @@ -2469,6 +2535,10 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( BiarchTripleAliases.append(begin(X32Triples), end(X32Triples)); } break; + case llvm::Triple::loongarch64: + LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs)); + TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples)); + break; case llvm::Triple::m68k: LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs)); TripleAliases.append(begin(M68kTriples), end(M68kTriples)); @@ -2826,6 +2896,7 @@ bool Generic_GCC::isPICDefault() const { switch (getArch()) { case llvm::Triple::x86_64: return getTriple().isOSWindows(); + case llvm::Triple::loongarch64: case llvm::Triple::mips64: case llvm::Triple::mips64el: return true; @@ -2866,6 +2937,8 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const { case llvm::Triple::ppc64le: case llvm::Triple::riscv32: case llvm::Triple::riscv64: + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::sparcv9: diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index ceb1a982c3a4..a3db30be9c1f 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -8,6 +8,7 @@ #include "Linux.h" #include "Arch/ARM.h" +#include "Arch/LoongArch.h" #include "Arch/Mips.h" #include "Arch/PPC.h" #include "Arch/RISCV.h" @@ -85,6 +86,13 @@ std::string Linux::getMultiarchTriple(const Driver &D, case llvm::Triple::aarch64_be: return "aarch64_be-linux-gnu"; + case llvm::Triple::loongarch32: + return "loongarch32-linux-gnu"; + case llvm::Triple::loongarch64: + if (IsAndroid) + return "loongarch64-linux-android"; + return "loongarch64-linux-gnu"; + case llvm::Triple::m68k: return "m68k-linux-gnu"; @@ -473,6 +481,11 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3"; break; } + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + LibDir = "lib" + tools::loongarch::getLoongArchABILibSuffix(Args, Triple); + Loader = "ld.so.1"; + break; case llvm::Triple::m68k: LibDir = "lib"; Loader = "ld.so.1"; @@ -741,6 +754,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64; const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz; const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon; + const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64; SanitizerMask Res = ToolChain::getSupportedSanitizers(); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; @@ -751,19 +765,20 @@ SanitizerMask Linux::getSupportedSanitizers() const { Res |= SanitizerKind::Memory; Res |= SanitizerKind::Vptr; Res |= SanitizerKind::SafeStack; - if (IsX86_64 || IsMIPS64 || IsAArch64) + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsLoongArch64) Res |= SanitizerKind::DataFlow; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 || - IsRISCV64 || IsSystemZ || IsHexagon) + IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64) Res |= SanitizerKind::Leak; - if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ) + if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || + IsLoongArch64) Res |= SanitizerKind::Thread; if (IsX86_64) Res |= SanitizerKind::KernelMemory; if (IsX86 || IsX86_64) Res |= SanitizerKind::Function; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || - IsPowerPC64 || IsHexagon) + IsPowerPC64 || IsHexagon || IsLoongArch64) Res |= SanitizerKind::Scudo; if (IsX86_64 || IsAArch64) { Res |= SanitizerKind::HWAddress; diff --git a/clang/lib/Driver/ToolChains/OHOS.cpp b/clang/lib/Driver/ToolChains/OHOS.cpp index 1bbe09030f3c..1773a899e899 100644 --- a/clang/lib/Driver/ToolChains/OHOS.cpp +++ b/clang/lib/Driver/ToolChains/OHOS.cpp @@ -56,6 +56,9 @@ static bool findOHOSMuslMultilibs(const Multilib::flags_list &Flags, Multilibs.push_back(Multilib("nanlegacy", {}, {}, 1) .flag("+mnan=legacy")); + Multilibs.push_back(Multilib("la264", {}, {}, 1) + .flag("+march=la264")); + if (Multilibs.select(Flags, Result.SelectedMultilib)) { Result.Multilibs = Multilibs; return true; @@ -93,6 +96,11 @@ static bool findOHOSMultilibs(const Driver &D, IsLegacy = A->getValue() != StringRef("2008"); addMultilibFlag(IsLegacy, "mnan=legacy", Flags); + bool IsLA264 = false; + if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) + IsLA264 = A->getValue() == StringRef("la264"); + addMultilibFlag(IsLA264, "march=la264", Flags); + return findOHOSMuslMultilibs(Flags, Result); } @@ -122,6 +130,8 @@ std::string OHOS::getMultiarchTriple(const llvm::Triple &T) const { return "x86_64-linux-ohos"; case llvm::Triple::aarch64: return "aarch64-linux-ohos"; + case llvm::Triple::loongarch64: + return "loongarch64-linux-ohos"; } return T.str(); } @@ -389,7 +399,12 @@ void OHOS::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { CmdArgs.push_back("-z"); CmdArgs.push_back("relro"); CmdArgs.push_back("-z"); - CmdArgs.push_back("max-page-size=4096"); + //LoongArch need page size 16K + if (getArch() == llvm::Triple::loongarch64) { + CmdArgs.push_back("max-page-size=16384"); + } else { + CmdArgs.push_back("max-page-size=4096"); + } // .gnu.hash section is not compatible with the MIPS target if (getArch() != llvm::Triple::mipsel) { CmdArgs.push_back("--hash-style=gnu"); @@ -405,6 +420,7 @@ void OHOS::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { SanitizerMask OHOS::getSupportedSanitizers() const { const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64; + const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64; SanitizerMask Res = ToolChain::getSupportedSanitizers(); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; @@ -420,7 +436,7 @@ SanitizerMask OHOS::getSupportedSanitizers() const { // OHOS_LOCAL Res |= SanitizerKind::HWAddress; // TODO: Support TSAN and HWASAN and update mask. - if (IsAArch64 || IsX86_64) + if (IsAArch64 || IsX86_64 || IsLoongArch64) Res |= SanitizerKind::Thread; return Res; } diff --git a/clang/lib/Driver/XRayArgs.cpp b/clang/lib/Driver/XRayArgs.cpp index 63b575178bd1..4e3ae3f250a6 100644 --- a/clang/lib/Driver/XRayArgs.cpp +++ b/clang/lib/Driver/XRayArgs.cpp @@ -42,6 +42,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) { case llvm::Triple::aarch64: case llvm::Triple::hexagon: case llvm::Triple::ppc64le: + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 6e2060991b92..2ec170cc2cb4 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -68,6 +68,12 @@ set(hlsl_files hlsl/hlsl_intrinsics.h ) +set(loongarch_files + lasxintrin.h + larchintrin.h + lsxintrin.h + ) + set(mips_msa_files msa.h ) @@ -220,6 +226,7 @@ set(files ${hexagon_files} ${hip_files} ${hlsl_files} + ${loongarch_files} ${mips_msa_files} ${opencl_files} ${ppc_files} @@ -381,6 +388,7 @@ add_dependencies("clang-resource-headers" "hexagon-resource-headers" "hip-resource-headers" "hlsl-resource-headers" + "loongarch-resource-headers" "mips-resource-headers" "ppc-resource-headers" "ppc-htm-resource-headers" @@ -404,6 +412,7 @@ add_header_target("aarch64-resource-headers" "${aarch64_only_files};${aarch64_on add_header_target("cuda-resource-headers" "${cuda_files};${cuda_wrapper_files}") add_header_target("hexagon-resource-headers" "${hexagon_files}") add_header_target("hip-resource-headers" "${hip_files}") +add_header_target("loongarch-resource-headers" "${loongarch_files}") add_header_target("mips-resource-headers" "${mips_msa_files}") add_header_target("ppc-resource-headers" "${ppc_files};${ppc_wrapper_files}") add_header_target("ppc-htm-resource-headers" "${ppc_htm_files}") @@ -494,6 +503,12 @@ install( EXCLUDE_FROM_ALL COMPONENT hip-resource-headers) +install( + FILES ${loongarch_files} + DESTINATION ${header_install_dir} + EXCLUDE_FROM_ALL + COMPONENT loongarch-resource-headers) + install( FILES ${mips_msa_files} DESTINATION ${header_install_dir} diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h new file mode 100644 index 000000000000..b5acf218bafb --- /dev/null +++ b/clang/lib/Headers/larchintrin.h @@ -0,0 +1,338 @@ +//===----------- larchintrin.h - LoongArch BASE intrinsics ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch Base intrinsics +// +//===----------------------------------------------------------------------===// +#ifndef __LOONGARCH_BASE_H +#define __LOONGARCH_BASE_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct drdtime{ + unsigned long dvalue; + unsigned long dtimeid; +} __drdtime_t; + +typedef struct rdtime{ + unsigned int value; + unsigned int timeid; +} __rdtime_t; + +/* Assembly instruction format: rd, csr_num */ +/* Data types in instruction templates: unsigned int, uimm14_32 */ +#define __csrrd_w(/*uimm14_32*/ _1) \ + ((unsigned int)__builtin_loongarch_csrrd_w(_1)) + +/* Assembly instruction format: rd, csr_num */ +/* Data types in instruction templates: unsigned int, uimm14_32 */ +#define __csrwr_w(/*unsigned int*/ _1, /*uimm14_32*/ _2) \ + ((unsigned int)__builtin_loongarch_csrwr_w((unsigned int)(_1), (_2))) + +/* Assembly instruction format: rd, rj, csr_num */ +/* Data types in instruction templates: unsigned int, unsigned int, uimm14_32 */ +#define __csrxchg_w(/*unsigned int*/ _1, /*unsigned int*/ _2, \ + /*uimm14_32*/ _3) \ + ((unsigned int)__builtin_loongarch_csrxchg_w((unsigned int)(_1), \ + (unsigned int)(_2), (_3))) + +/* Assembly instruction format: rd, csr_num */ +/* Data types in instruction templates: unsigned long int, uimm14 */ +#define __csrrd_d(/*uimm14*/ _1) \ + ((unsigned long int)__builtin_loongarch_csrrd_d(_1)) + +/* Assembly instruction format: rd, csr_num */ +/* Data types in instruction templates: unsigned long int, uimm14 */ +#define __csrwr_d(/*unsigned long int*/ _1, /*uimm14*/ _2) \ + ((unsigned long int)__builtin_loongarch_csrwr_d((unsigned long int)(_1), \ + (_2))) + +/* Assembly instruction format: rd, rj, csr_num */ +/* Data types in instruction templates: unsigned long int, unsigned long int, uimm14 */ +#define __csrxchg_d(/*unsigned long int*/ _1, /*unsigned long int*/ _2, \ + /*uimm14*/ _3) \ + ((unsigned long int)__builtin_loongarch_csrxchg_d( \ + (unsigned long int)(_1), (unsigned long int)(_2), (_3))) + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: unsigned char, unsigned int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned char __iocsrrd_b(unsigned int _1) +{ + return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: unsigned short, unsigned int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned short __iocsrrd_h(unsigned int _1) +{ + return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: unsigned int, unsigned int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned int __iocsrrd_w(unsigned int _1) +{ + return (unsigned int)__builtin_loongarch_iocsrrd_w((unsigned int)_1); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: unsigned long int, unsigned int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +unsigned long int __iocsrrd_d(unsigned int _1) +{ + return (unsigned long int)__builtin_loongarch_iocsrrd_d((unsigned int)_1); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: unsigned char, unsigned int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __iocsrwr_b(unsigned char _1, unsigned int _2) +{ + return (void)__builtin_loongarch_iocsrwr_b((unsigned char)_1, (unsigned int)_2); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: unsigned short, unsigned int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __iocsrwr_h(unsigned short _1, unsigned int _2) +{ + return (void)__builtin_loongarch_iocsrwr_h((unsigned short)_1, (unsigned int)_2); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: unsigned int, unsigned int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __iocsrwr_w(unsigned int _1, unsigned int _2) +{ + return (void)__builtin_loongarch_iocsrwr_w((unsigned int)_1, (unsigned int)_2); +} + +extern __inline unsigned int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __cpucfg(unsigned int _1) { + return (unsigned int)__builtin_loongarch_cpucfg((unsigned int)_1); +} + +/* Assembly instruction format: rd, rj */ +/* Data types in instruction templates: unsigned long int, unsigned int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __iocsrwr_d(unsigned long int _1, unsigned int _2) +{ + return (void)__builtin_loongarch_iocsrwr_d((unsigned long int)_1, (unsigned int)_2); +} + +/* Assembly instruction format: op, rj, si12 */ +/* Data types in instruction templates: uimm5, unsigned int, simm12 */ +#define __cacop_w(/*uimm5*/ _1, /*unsigned int*/ _2, /*simm12*/ _3) \ + ((void)__builtin_loongarch_cacop_w((_1), (unsigned int)(_2), (_3))) + +/* Assembly instruction format: op, rj, si12 */ +/* Data types in instruction templates: uimm5, unsigned long int, simm12 */ +#define __cacop_d(/*uimm5*/ _1, /*unsigned long int*/ _2, /*simm12*/ _3) \ + ((void)__builtin_loongarch_cacop_d((_1), (unsigned long int)(_2), (_3))) + +#define __rdtime_d __builtin_loongarch_rdtime_d +#define __rdtimel_w __builtin_loongarch_rdtimel_w +#define __rdtimeh_w __builtin_loongarch_rdtimeh_w + +extern __inline __drdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_rdtime_d (void) +{ + __drdtime_t drdtime; + __asm__ volatile ( + "rdtime.d\t%[val],%[tid]\n\t" + : [val]"=&r"(drdtime.dvalue),[tid]"=&r"(drdtime.dtimeid) + : + ); + return drdtime; +} + +extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_rdtimeh_w (void) +{ + __rdtime_t rdtime; + __asm__ volatile ( + "rdtimeh.w\t%[val],%[tid]\n\t" + : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) + : + ); + return rdtime; +} + +extern __inline __rdtime_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__builtin_loongarch_rdtimel_w (void) +{ + __rdtime_t rdtime; + __asm__ volatile ( + "rdtimel.w\t%[val],%[tid]\n\t" + : [val]"=&r"(rdtime.value),[tid]"=&r"(rdtime.timeid) + : + ); + return rdtime; +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: int, char, int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crc_w_b_w(char _1, int _2) +{ + return (int)__builtin_loongarch_crc_w_b_w((char)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: int, short, int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crc_w_h_w(short _1, int _2) +{ + return (int)__builtin_loongarch_crc_w_h_w((short)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: int, int, int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crc_w_w_w(int _1, int _2) +{ + return (int)__builtin_loongarch_crc_w_w_w((int)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: int, long int, int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crc_w_d_w(long int _1, int _2) +{ + return (int)__builtin_loongarch_crc_w_d_w((long int)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: int, char, int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crcc_w_b_w(char _1, int _2) +{ + return (int)__builtin_loongarch_crcc_w_b_w((char)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: int, short, int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crcc_w_h_w(short _1, int _2) +{ + return (int)__builtin_loongarch_crcc_w_h_w((short)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: int, int, int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crcc_w_w_w(int _1, int _2) +{ + return (int)__builtin_loongarch_crcc_w_w_w((int)_1, (int)_2); +} + +/* Assembly instruction format: rd, rj, rk */ +/* Data types in instruction templates: int, long int, int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +int __crcc_w_d_w(long int _1, int _2) +{ + return (int)__builtin_loongarch_crcc_w_d_w((long int)_1, (int)_2); +} + +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __tlbclr() +{ + return (void)__builtin_loongarch_tlbclr(); +} + +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __tlbflush() +{ + return (void)__builtin_loongarch_tlbflush(); +} + +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __tlbfill() +{ + return (void)__builtin_loongarch_tlbfill(); +} + +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __tlbrd() +{ + return (void)__builtin_loongarch_tlbrd(); +} + +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __tlbwr() +{ + return (void)__builtin_loongarch_tlbwr(); +} + +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __tlbsrch() +{ + return (void)__builtin_loongarch_tlbsrch(); +} + +/* Assembly instruction format: code */ +/* Data types in instruction templates: uimm15 */ +#define __syscall(/*uimm15*/ _1) ((void)__builtin_loongarch_syscall(_1)) + +/* Assembly instruction format: code */ +/* Data types in instruction templates: uimm15 */ +#define __break(/*uimm15*/ _1) ((void)__builtin_loongarch_break(_1)) + +/* Assembly instruction format: hint */ +/* Data types in instruction templates: uimm15 */ +#define __dbar(/*uimm15*/ _1) ((void)__builtin_loongarch_dbar(_1)) + +/* Assembly instruction format: hint */ +/* Data types in instruction templates: uimm15 */ +#define __ibar(/*uimm15*/ _1) ((void)__builtin_loongarch_ibar(_1)) + +/* Assembly instruction format: rj, rk */ +/* Data types in instruction templates: long int, long int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __asrtle_d(long int _1, long int _2) +{ + return (void)__builtin_loongarch_asrtle_d((long int)_1, (long int)_2); +} + +/* Assembly instruction format: rj, rk */ +/* Data types in instruction templates: long int, long int */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +void __asrtgt_d(long int _1, long int _2) +{ + return (void)__builtin_loongarch_asrtgt_d((long int)_1, (long int)_2); +} + +#define __movfcsr2gr(uimm5) \ +({ \ + unsigned int rd; \ + __asm__ volatile ( \ + "movfcsr2gr %0, $fcsr" #uimm5 \ + : "=&r"(rd) \ + : \ + ); rd; \ +}) + +#define __movgr2fcsr(uimm5, rj) \ +{ \ + __asm__ volatile ( \ + "movgr2fcsr $fcsr" #uimm5 ", %0" \ + : \ + : "r" (rj) \ + ); \ +} + +#ifdef __cplusplus +} +#endif +#endif /* __LOONGARCH_BASE_H */ diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h new file mode 100644 index 000000000000..c454b0c9e677 --- /dev/null +++ b/clang/lib/Headers/lasxintrin.h @@ -0,0 +1,5337 @@ +//===----------- lasxintrin.h - LoongArch LASX intrinsics +//------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch LASX intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef _GCC_LOONGSON_ASXINTRIN_H +#define _GCC_LOONGSON_ASXINTRIN_H 1 + +#if defined(__loongarch_asx) + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); +typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); +typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, UV32QI, UQI. */ +#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, UV16HI, UQI. */ +#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, UV8SI, UQI. */ +#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, UV4DI, UQI. */ +#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V32QI, V32QI, QI. */ +#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V16HI, V16HI, QI. */ +#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V8SI, V8SI, QI. */ +#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, si5. */ +/* Data types in instruction templates: V4DI, V4DI, QI. */ +#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, UV32QI, UQI. */ +#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, UV16HI, UQI. */ +#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, UV8SI, UQI. */ +#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, UV4DI, UQI. */ +#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV16HI, UV16HI, UQI. */ +#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV8SI, UV8SI, UQI. */ +#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV4DI, UV4DI, UQI. */ +#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui2. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui1. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvand_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvnor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvxor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UQI. */ +#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI, USI. */ +#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V32QI, V32QI, USI. */ +#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V16HI, V16HI, USI. */ +#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V8SI, V8SI, USI. */ +#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, rj. */ +/* Data types in instruction templates: V32QI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_b(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); +} + +/* Assembly instruction format: xd, rj. */ +/* Data types in instruction templates: V16HI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_h(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); +} + +/* Assembly instruction format: xd, rj. */ +/* Data types in instruction templates: V8SI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_w(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); +} + +/* Assembly instruction format: xd, rj. */ +/* Data types in instruction templates: V4DI, DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_d(long int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfadd_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfadd_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsub_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsub_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmul_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmul_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfdiv_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfdiv_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { + return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmin_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmin_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmina_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmina_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmax_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmax_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecip_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecip_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrint_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrint_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvflogb_s(__m256 _1) { + return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvflogb_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvth_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvth_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvtl_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvtl_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV8SI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV8SI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_w(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_l(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_wu(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_lu(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); +} + +/* Assembly instruction format: xd, xj, rk. */ +/* Data types in instruction templates: V32QI, V32QI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_b(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); +} + +/* Assembly instruction format: xd, xj, rk. */ +/* Data types in instruction templates: V16HI, V16HI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_h(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); +} + +/* Assembly instruction format: xd, xj, rk. */ +/* Data types in instruction templates: V8SI, V8SI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_w(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); +} + +/* Assembly instruction format: xd, xj, rk. */ +/* Data types in instruction templates: V4DI, V4DI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_d(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvandn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V16HI, V32QI, UQI. */ +#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V8SI, V16HI, UQI. */ +#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V4DI, V8SI, UQI. */ +#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: UV16HI, UV32QI, UQI. */ +#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV8SI, UV16HI, UQI. */ +#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV4DI, UV8SI, UQI. */ +#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV32QI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, UQI. */ +#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, UQI. */ +#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V8SF, V8SF, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +/* Assembly instruction format: xd, xj, xk, xa. */ +/* Data types in instruction templates: V4DF, V4DF, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SF, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_l(__m256i _1, __m256i _2) { + return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftinth_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffinth_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffintl_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrph_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrpl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrmh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrml_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrneh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrnel_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrne_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrne_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrz_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrz_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrp_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrp_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrm_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrm_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); +} + +/* Assembly instruction format: xd, rj, si12. */ +/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ +#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) + +/* Assembly instruction format: xd, rj, si12. */ +/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI. */ +#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) + +/* Assembly instruction format: xd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V32QI, CVPOINTER, SI, UQI. */ +#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: xd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V16HI, CVPOINTER, SI, UQI. */ +#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: xd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V8SI, CVPOINTER, SI, UQI. */ +#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: xd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V4DI, CVPOINTER, SI, UQI. */ +#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, UQI. */ +#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui2. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, UQI. */ +#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui2. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvorn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, i13. */ +/* Data types in instruction templates: V4DI, HI. */ +#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) + +/* Assembly instruction format: xd, rj, rk. */ +/* Data types in instruction templates: V32QI, CVPOINTER, DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvldx(void const *_1, long int _2) { + return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); +} + +/* Assembly instruction format: xd, rj, rk. */ +/* Data types in instruction templates: VOID, V32QI, CVPOINTER, DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lasx_xvstx(__m256i _1, void *_2, long int _3) { + return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); +} + +/* Assembly instruction format: xd, rj, ui3. */ +/* Data types in instruction templates: V8SI, V8SI, SI, UQI. */ +#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) + +/* Assembly instruction format: xd, rj, ui2. */ +/* Data types in instruction templates: V4DI, V4DI, DI, UQI. */ +#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_q(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); +} + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui8. */ +/* Data types in instruction templates: V4DI, V4DI, USI. */ +#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvperm_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, rj, si12. */ +/* Data types in instruction templates: V32QI, CVPOINTER, SI. */ +#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) + +/* Assembly instruction format: xd, rj, si11. */ +/* Data types in instruction templates: V16HI, CVPOINTER, SI. */ +#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) + +/* Assembly instruction format: xd, rj, si10. */ +/* Data types in instruction templates: V8SI, CVPOINTER, SI. */ +#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) + +/* Assembly instruction format: xd, rj, si9. */ +/* Data types in instruction templates: V4DI, CVPOINTER, SI. */ +#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) + +/* Assembly instruction format: rd, xj, ui3. */ +/* Data types in instruction templates: SI, V8SI, UQI. */ +#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: rd, xj, ui3. */ +/* Data types in instruction templates: USI, V8SI, UQI. */ +#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) + +/* Assembly instruction format: rd, xj, ui2. */ +/* Data types in instruction templates: DI, V4DI, UQI. */ +#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: rd, xj, ui2. */ +/* Data types in instruction templates: UDI, V4DI, UQI. */ +#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ + ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, UV16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, UV32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV4DI, UV4DI, UV8SI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV8SI, UV8SI, UV16HI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: UV16HI, UV16HI, UV32QI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, UV4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, UV8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, UV16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, UV32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, UV4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskgez_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V32QI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsknz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V16HI, V32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SI, V16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV16HI, UV32QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV8SI, UV16HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, UV8SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: UV4DI, UV4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); +} + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V32QI, V32QI, UQI. */ +#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V16HI, V16HI, UQI. */ +#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V8SI, V8SI, UQI. */ +#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V4DI, V4DI, UQI. */ +#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DI, V4DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ +#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ +#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ +#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ +#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ +#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ +#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ +#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ +#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ +#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ +#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ +#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ +#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: V32QI, V32QI, V32QI, USI. */ +#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: V16HI, V16HI, V16HI, USI. */ +#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: V8SI, V8SI, V8SI, USI. */ +#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: V4DI, V4DI, V4DI, USI. */ +#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui4. */ +/* Data types in instruction templates: UV32QI, UV32QI, V32QI, USI. */ +#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui5. */ +/* Data types in instruction templates: UV16HI, UV16HI, V16HI, USI. */ +#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui6. */ +/* Data types in instruction templates: UV8SI, UV8SI, V8SI, USI. */ +#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +/* Assembly instruction format: xd, xj, ui7. */ +/* Data types in instruction templates: UV4DI, UV4DI, V4DI, USI. */ +#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV32QI. */ +#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV4DI. */ +#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV16HI. */ +#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV32QI. */ +#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV8SI. */ +#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV32QI. */ +#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV4DI. */ +#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV16HI. */ +#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV32QI. */ +#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) + +/* Assembly instruction format: cd, xj. */ +/* Data types in instruction templates: SI, UV8SI. */ +#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V4DI, V4DF, V4DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, xk. */ +/* Data types in instruction templates: V8SI, V8SF, V8SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); +} + +/* Assembly instruction format: xd, xj, ui2. */ +/* Data types in instruction templates: V4DF, V4DF, UQI. */ +#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ + ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) + +/* Assembly instruction format: xd, xj, ui3. */ +/* Data types in instruction templates: V8SF, V8SF, UQI. */ +#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ + ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) + +/* Assembly instruction format: xd, si10. */ +/* Data types in instruction templates: V32QI, HI. */ +#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) + +/* Assembly instruction format: xd, si10. */ +/* Data types in instruction templates: V4DI, HI. */ +#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) + +/* Assembly instruction format: xd, si10. */ +/* Data types in instruction templates: V16HI, HI. */ +#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) + +/* Assembly instruction format: xd, si10. */ +/* Data types in instruction templates: V8SI, HI. */ +#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) + +#endif /* defined(__loongarch_asx). */ +#endif /* _GCC_LOONGSON_ASXINTRIN_H. */ diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h new file mode 100644 index 000000000000..48344c20933f --- /dev/null +++ b/clang/lib/Headers/lsxintrin.h @@ -0,0 +1,5162 @@ +//===----------- lsxintrin.h - LoongArch LSX intrinsics ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch LSX intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef _GCC_LOONGSON_SXINTRIN_H +#define _GCC_LOONGSON_SXINTRIN_H 1 + +#if defined(__loongarch_sx) +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, UV16QI, UQI. */ +#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, UV8HI, UQI. */ +#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, UV4SI, UQI. */ +#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, UV2DI, UQI. */ +#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V16QI, V16QI, QI. */ +#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V8HI, V8HI, QI. */ +#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V4SI, V4SI, QI. */ +#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, si5. */ +/* Data types in instruction templates: V2DI, V2DI, QI. */ +#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, UV16QI, UQI. */ +#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, UV8HI, UQI. */ +#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, UV4SI, UQI. */ +#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, UV2DI, UQI. */ +#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV8HI, UV8HI, UQI. */ +#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV4SI, UV4SI, UQI. */ +#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV2DI, UV2DI, UQI. */ +#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, rk. */ +/* Data types in instruction templates: V16QI, V16QI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_b(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); +} + +/* Assembly instruction format: vd, vj, rk. */ +/* Data types in instruction templates: V8HI, V8HI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_h(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); +} + +/* Assembly instruction format: vd, vj, rk. */ +/* Data types in instruction templates: V4SI, V4SI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_w(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); +} + +/* Assembly instruction format: vd, vj, rk. */ +/* Data types in instruction templates: V2DI, V2DI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_d(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); +} + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui2. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui1. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vand_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vnor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vxor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UQI. */ +#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI, USI. */ +#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V16QI, V16QI, USI. */ +#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V8HI, V8HI, USI. */ +#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V4SI, V4SI, USI. */ +#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, rj. */ +/* Data types in instruction templates: V16QI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_b(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); +} + +/* Assembly instruction format: vd, rj. */ +/* Data types in instruction templates: V8HI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_h(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); +} + +/* Assembly instruction format: vd, rj. */ +/* Data types in instruction templates: V4SI, SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_w(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); +} + +/* Assembly instruction format: vd, rj. */ +/* Data types in instruction templates: V2DI, DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_d(long int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_b(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_h(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_w(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_d(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); +} + +/* Assembly instruction format: rd, vj, ui4. */ +/* Data types in instruction templates: SI, V16QI, UQI. */ +#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui3. */ +/* Data types in instruction templates: SI, V8HI, UQI. */ +#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui2. */ +/* Data types in instruction templates: SI, V4SI, UQI. */ +#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui1. */ +/* Data types in instruction templates: DI, V2DI, UQI. */ +#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui4. */ +/* Data types in instruction templates: USI, V16QI, UQI. */ +#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui3. */ +/* Data types in instruction templates: USI, V8HI, UQI. */ +#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui2. */ +/* Data types in instruction templates: USI, V4SI, UQI. */ +#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) + +/* Assembly instruction format: rd, vj, ui1. */ +/* Data types in instruction templates: UDI, V2DI, UQI. */ +#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ + ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, rj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, SI, UQI. */ +#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) + +/* Assembly instruction format: vd, rj, ui3. */ +/* Data types in instruction templates: V8HI, V8HI, SI, UQI. */ +#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) + +/* Assembly instruction format: vd, rj, ui2. */ +/* Data types in instruction templates: V4SI, V4SI, SI, UQI. */ +#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) + +/* Assembly instruction format: vd, rj, ui1. */ +/* Data types in instruction templates: V2DI, V2DI, DI, UQI. */ +#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfadd_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfadd_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsub_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsub_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmul_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmul_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfdiv_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfdiv_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { + return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmin_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmin_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmina_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmina_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmax_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmax_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmaxa_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmaxa_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_s(__m128 _1) { + return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_d(__m128d _1) { + return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecip_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecip_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrint_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrint_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vflogb_s(__m128 _1) { + return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vflogb_d(__m128d _1) { + return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvth_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvth_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvtl_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvtl_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV4SI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV4SI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_w(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_l(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_wu(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_lu(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vandn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_b(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_h(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_w(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_d(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V8HI, V16QI, UQI. */ +#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V4SI, V8HI, UQI. */ +#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V2DI, V4SI, UQI. */ +#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: UV8HI, UV16QI, UQI. */ +#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV4SI, UV8HI, UQI. */ +#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV2DI, UV4SI, UQI. */ +#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV16QI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, UQI. */ +#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, UQI. */ +#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V4SF, V4SF, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V2DF, V2DF, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SF, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_l(__m128i _1, __m128i _2) { + return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftinth_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffinth_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffintl_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrpl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrph_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrml_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrmh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrnel_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrneh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrne_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrne_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrz_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrz_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrp_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrp_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrm_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrm_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); +} + +/* Assembly instruction format: vd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI, UQI. */ +#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: vd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V8HI, CVPOINTER, SI, UQI. */ +#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: vd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V4SI, CVPOINTER, SI, UQI. */ +#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: vd, rj, si8, idx. */ +/* Data types in instruction templates: VOID, V2DI, CVPOINTER, SI, UQI. */ +#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, UV8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, UV16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, UV2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV4SI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV4SI, UV4SI, UV8HI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV8HI, UV8HI, UV16QI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, UV4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, UV8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, UV16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: UV2DI, UV2DI, UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, UV2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, rj, si12. */ +/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ +#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) + +/* Assembly instruction format: vd, rj, si11. */ +/* Data types in instruction templates: V8HI, CVPOINTER, SI. */ +#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) + +/* Assembly instruction format: vd, rj, si10. */ +/* Data types in instruction templates: V4SI, CVPOINTER, SI. */ +#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) + +/* Assembly instruction format: vd, rj, si9. */ +/* Data types in instruction templates: V2DI, CVPOINTER, SI. */ +#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskgez_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsknz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V8HI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_h_b(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_w_h(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_d_w(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV8HI, UV16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_hu_bu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV4SI, UV8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_wu_hu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, UV4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_du_wu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); +} + +/* Assembly instruction format: vd, vj, ui3. */ +/* Data types in instruction templates: V16QI, V16QI, UQI. */ +#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V8HI, V8HI, UQI. */ +#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V4SI, V4SI, UQI. */ +#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V2DI, V2DI, UQI. */ +#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); +} + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ +#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ +#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ +#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ +#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ +#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ +#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ +#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ +#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ +#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ +#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ +#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ +#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, USI. */ +#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: V8HI, V8HI, V8HI, USI. */ +#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: V2DI, V2DI, V2DI, USI. */ +#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui4. */ +/* Data types in instruction templates: UV16QI, UV16QI, V16QI, USI. */ +#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui5. */ +/* Data types in instruction templates: UV8HI, UV8HI, V8HI, USI. */ +#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui6. */ +/* Data types in instruction templates: UV4SI, UV4SI, V4SI, USI. */ +#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui7. */ +/* Data types in instruction templates: UV2DI, UV2DI, V2DI, USI. */ +#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +/* Assembly instruction format: vd, vj, ui8. */ +/* Data types in instruction templates: V4SI, V4SI, V4SI, USI. */ +#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) + +/* Assembly instruction format: vd, rj, si12. */ +/* Data types in instruction templates: V16QI, CVPOINTER, SI. */ +#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) + +/* Assembly instruction format: vd, rj, si12. */ +/* Data types in instruction templates: VOID, V16QI, CVPOINTER, SI. */ +#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V8HI, V8HI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V8HI, V4SI, V4SI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V2DI, V2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vorn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); +} + +/* Assembly instruction format: vd, i13. */ +/* Data types in instruction templates: V2DI, HI. */ +#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) + +/* Assembly instruction format: vd, vj, vk, va. */ +/* Data types in instruction templates: V16QI, V16QI, V16QI, V16QI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +/* Assembly instruction format: vd, rj, rk. */ +/* Data types in instruction templates: V16QI, CVPOINTER, DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vldx(void const *_1, long int _2) { + return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); +} + +/* Assembly instruction format: vd, rj, rk. */ +/* Data types in instruction templates: VOID, V16QI, CVPOINTER, DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lsx_vstx(__m128i _1, void *_2, long int _3) { + return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: UV2DI, UV2DI. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); +} + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV16QI. */ +#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV2DI. */ +#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV8HI. */ +#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV16QI. */ +#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV4SI. */ +#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV16QI. */ +#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV2DI. */ +#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV8HI. */ +#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV16QI. */ +#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) + +/* Assembly instruction format: cd, vj. */ +/* Data types in instruction templates: SI, UV4SI. */ +#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V2DI, V2DF, V2DF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, vj, vk. */ +/* Data types in instruction templates: V4SI, V4SF, V4SF. */ +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: vd, si10. */ +/* Data types in instruction templates: V16QI, HI. */ +#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) + +/* Assembly instruction format: vd, si10. */ +/* Data types in instruction templates: V2DI, HI. */ +#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) + +/* Assembly instruction format: vd, si10. */ +/* Data types in instruction templates: V8HI, HI. */ +#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) + +/* Assembly instruction format: vd, si10. */ +/* Data types in instruction templates: V4SI, HI. */ +#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) + +#endif /* defined(__loongarch_sx) */ +#endif /* _GCC_LOONGSON_SXINTRIN_H */ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index dae51d0690e6..5504f99374d3 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1981,6 +1981,9 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CheckRISCVBuiltinFunctionCall(TI, BuiltinID, TheCall); + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + return CheckLoongArchBuiltinFunctionCall(TI, BuiltinID, TheCall); } } @@ -4445,6 +4448,559 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, return false; } +// CheckLoongArchBuiltinFunctionCall - Checks the constant value passed to the +// intrinsic is correct. +// +// FIXME: The size tests here should instead be tablegen'd along with the +// definitions from include/clang/Basic/BuiltinsLoongArch.def. +// FIXME: GCC is strict on signedness for some of these intrinsics, we should +// be too. +bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + unsigned BuiltinID, + CallExpr *TheCall) { + unsigned i = 0, l = 0, u = 0, m = 0; + switch (BuiltinID) { + default: return false; + // LSX/LASX intrinsics. + // These intrinsics take an unsigned 3 bit immediate. + case LoongArch::BI__builtin_lsx_vbitclri_b: + case LoongArch::BI__builtin_lasx_xvbitclri_b: + case LoongArch::BI__builtin_lsx_vbitrevi_b: + case LoongArch::BI__builtin_lasx_xvbitrevi_b: + case LoongArch::BI__builtin_lsx_vbitseti_b: + case LoongArch::BI__builtin_lasx_xvbitseti_b: + case LoongArch::BI__builtin_lsx_vsat_b: + case LoongArch::BI__builtin_lsx_vsat_bu: + case LoongArch::BI__builtin_lasx_xvsat_b: + case LoongArch::BI__builtin_lasx_xvsat_bu: + case LoongArch::BI__builtin_lsx_vslli_b: + case LoongArch::BI__builtin_lasx_xvslli_b: + case LoongArch::BI__builtin_lsx_vsrai_b: + case LoongArch::BI__builtin_lasx_xvsrai_b: + case LoongArch::BI__builtin_lsx_vsrari_b: + case LoongArch::BI__builtin_lasx_xvsrari_b: + case LoongArch::BI__builtin_lsx_vsrli_b: + case LoongArch::BI__builtin_lasx_xvsrli_b: + case LoongArch::BI__builtin_lsx_vsllwil_h_b: + case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: + case LoongArch::BI__builtin_lasx_xvsllwil_h_b: + case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: + case LoongArch::BI__builtin_lsx_vrotri_b: + case LoongArch::BI__builtin_lasx_xvrotri_b: + case LoongArch::BI__builtin_lasx_xvsrlri_b: + case LoongArch::BI__builtin_lsx_vsrlri_b: + i = 1; + l = 0; + u = 7; + break; + // These intrinsics take an unsigned 4 bit immediate. + case LoongArch::BI__builtin_lsx_vbitclri_h: + case LoongArch::BI__builtin_lasx_xvbitclri_h: + case LoongArch::BI__builtin_lsx_vbitrevi_h: + case LoongArch::BI__builtin_lasx_xvbitrevi_h: + case LoongArch::BI__builtin_lsx_vbitseti_h: + case LoongArch::BI__builtin_lasx_xvbitseti_h: + case LoongArch::BI__builtin_lsx_vsat_h: + case LoongArch::BI__builtin_lsx_vsat_hu: + case LoongArch::BI__builtin_lasx_xvsat_h: + case LoongArch::BI__builtin_lasx_xvsat_hu: + case LoongArch::BI__builtin_lsx_vslli_h: + case LoongArch::BI__builtin_lasx_xvslli_h: + case LoongArch::BI__builtin_lsx_vsrai_h: + case LoongArch::BI__builtin_lasx_xvsrai_h: + case LoongArch::BI__builtin_lsx_vsrari_h: + case LoongArch::BI__builtin_lasx_xvsrari_h: + case LoongArch::BI__builtin_lsx_vsrli_h: + case LoongArch::BI__builtin_lasx_xvsrli_h: + case LoongArch::BI__builtin_lsx_vsllwil_w_h: + case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: + case LoongArch::BI__builtin_lasx_xvsllwil_w_h: + case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: + case LoongArch::BI__builtin_lsx_vrotri_h: + case LoongArch::BI__builtin_lasx_xvrotri_h: + case LoongArch::BI__builtin_lasx_xvsrlri_h: + case LoongArch::BI__builtin_lsx_vsrlri_h: + i = 1; + l = 0; + u = 15; + break; + case LoongArch::BI__builtin_lsx_vssrarni_b_h: + case LoongArch::BI__builtin_lsx_vssrarni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrarni_b_h: + case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: + case LoongArch::BI__builtin_lsx_vssrani_b_h: + case LoongArch::BI__builtin_lsx_vssrani_bu_h: + case LoongArch::BI__builtin_lasx_xvssrani_b_h: + case LoongArch::BI__builtin_lasx_xvssrani_bu_h: + case LoongArch::BI__builtin_lsx_vsrarni_b_h: + case LoongArch::BI__builtin_lasx_xvsrarni_b_h: + case LoongArch::BI__builtin_lsx_vsrlni_b_h: + case LoongArch::BI__builtin_lasx_xvsrlni_b_h: + case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: + case LoongArch::BI__builtin_lsx_vssrlni_b_h: + case LoongArch::BI__builtin_lsx_vssrlni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrlni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: + case LoongArch::BI__builtin_lsx_vssrlrni_b_h: + case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: + case LoongArch::BI__builtin_lsx_vsrani_b_h: + case LoongArch::BI__builtin_lasx_xvsrani_b_h: + i = 2; + l = 0; + u = 15; + break; + // These intrinsics take an unsigned 5 bit immediate. + // The first block of intrinsics actually have an unsigned 5 bit field, + // not a df/n field. + case LoongArch::BI__builtin_lsx_vslei_bu: + case LoongArch::BI__builtin_lsx_vslei_hu: + case LoongArch::BI__builtin_lsx_vslei_wu: + case LoongArch::BI__builtin_lsx_vslei_du: + case LoongArch::BI__builtin_lasx_xvslei_bu: + case LoongArch::BI__builtin_lasx_xvslei_hu: + case LoongArch::BI__builtin_lasx_xvslei_wu: + case LoongArch::BI__builtin_lasx_xvslei_du: + case LoongArch::BI__builtin_lsx_vslti_bu: + case LoongArch::BI__builtin_lsx_vslti_hu: + case LoongArch::BI__builtin_lsx_vslti_wu: + case LoongArch::BI__builtin_lsx_vslti_du: + case LoongArch::BI__builtin_lasx_xvslti_bu: + case LoongArch::BI__builtin_lasx_xvslti_hu: + case LoongArch::BI__builtin_lasx_xvslti_wu: + case LoongArch::BI__builtin_lasx_xvslti_du: + case LoongArch::BI__builtin_lsx_vmaxi_bu: + case LoongArch::BI__builtin_lsx_vmaxi_hu: + case LoongArch::BI__builtin_lsx_vmaxi_wu: + case LoongArch::BI__builtin_lsx_vmaxi_du: + case LoongArch::BI__builtin_lasx_xvmaxi_bu: + case LoongArch::BI__builtin_lasx_xvmaxi_hu: + case LoongArch::BI__builtin_lasx_xvmaxi_wu: + case LoongArch::BI__builtin_lasx_xvmaxi_du: + case LoongArch::BI__builtin_lsx_vmini_bu: + case LoongArch::BI__builtin_lsx_vmini_hu: + case LoongArch::BI__builtin_lsx_vmini_wu: + case LoongArch::BI__builtin_lsx_vmini_du: + case LoongArch::BI__builtin_lasx_xvmini_bu: + case LoongArch::BI__builtin_lasx_xvmini_hu: + case LoongArch::BI__builtin_lasx_xvmini_wu: + case LoongArch::BI__builtin_lasx_xvmini_du: + case LoongArch::BI__builtin_lsx_vaddi_bu: + case LoongArch::BI__builtin_lsx_vaddi_hu: + case LoongArch::BI__builtin_lsx_vaddi_wu: + case LoongArch::BI__builtin_lsx_vaddi_du: + case LoongArch::BI__builtin_lasx_xvaddi_bu: + case LoongArch::BI__builtin_lasx_xvaddi_hu: + case LoongArch::BI__builtin_lasx_xvaddi_wu: + case LoongArch::BI__builtin_lasx_xvaddi_du: + case LoongArch::BI__builtin_lsx_vbitclri_w: + case LoongArch::BI__builtin_lasx_xvbitclri_w: + case LoongArch::BI__builtin_lsx_vbitrevi_w: + case LoongArch::BI__builtin_lasx_xvbitrevi_w: + case LoongArch::BI__builtin_lsx_vbitseti_w: + case LoongArch::BI__builtin_lasx_xvbitseti_w: + case LoongArch::BI__builtin_lsx_vsat_w: + case LoongArch::BI__builtin_lsx_vsat_wu: + case LoongArch::BI__builtin_lasx_xvsat_w: + case LoongArch::BI__builtin_lasx_xvsat_wu: + case LoongArch::BI__builtin_lsx_vslli_w: + case LoongArch::BI__builtin_lasx_xvslli_w: + case LoongArch::BI__builtin_lsx_vsrai_w: + case LoongArch::BI__builtin_lasx_xvsrai_w: + case LoongArch::BI__builtin_lsx_vsrari_w: + case LoongArch::BI__builtin_lasx_xvsrari_w: + case LoongArch::BI__builtin_lsx_vsrli_w: + case LoongArch::BI__builtin_lasx_xvsrli_w: + case LoongArch::BI__builtin_lsx_vsllwil_d_w: + case LoongArch::BI__builtin_lsx_vsllwil_du_wu: + case LoongArch::BI__builtin_lasx_xvsllwil_d_w: + case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: + case LoongArch::BI__builtin_lsx_vsrlri_w: + case LoongArch::BI__builtin_lasx_xvsrlri_w: + case LoongArch::BI__builtin_lsx_vrotri_w: + case LoongArch::BI__builtin_lasx_xvrotri_w: + case LoongArch::BI__builtin_lsx_vsubi_bu: + case LoongArch::BI__builtin_lsx_vsubi_hu: + case LoongArch::BI__builtin_lasx_xvsubi_bu: + case LoongArch::BI__builtin_lasx_xvsubi_hu: + case LoongArch::BI__builtin_lasx_xvsubi_wu: + case LoongArch::BI__builtin_lasx_xvsubi_du: + case LoongArch::BI__builtin_lsx_vbsrl_v: + case LoongArch::BI__builtin_lsx_vbsll_v: + case LoongArch::BI__builtin_lasx_xvbsrl_v: + case LoongArch::BI__builtin_lasx_xvbsll_v: + case LoongArch::BI__builtin_lsx_vsubi_wu: + case LoongArch::BI__builtin_lsx_vsubi_du: + i = 1; + l = 0; + u = 31; + break; + case LoongArch::BI__builtin_lsx_vssrarni_h_w: + case LoongArch::BI__builtin_lsx_vssrarni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrarni_h_w: + case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: + case LoongArch::BI__builtin_lsx_vssrani_h_w: + case LoongArch::BI__builtin_lsx_vssrani_hu_w: + case LoongArch::BI__builtin_lasx_xvssrani_h_w: + case LoongArch::BI__builtin_lasx_xvssrani_hu_w: + case LoongArch::BI__builtin_lsx_vsrarni_h_w: + case LoongArch::BI__builtin_lasx_xvsrarni_h_w: + case LoongArch::BI__builtin_lsx_vsrani_h_w: + case LoongArch::BI__builtin_lasx_xvsrani_h_w: + case LoongArch::BI__builtin_lsx_vfrstpi_b: + case LoongArch::BI__builtin_lsx_vfrstpi_h: + case LoongArch::BI__builtin_lasx_xvfrstpi_b: + case LoongArch::BI__builtin_lasx_xvfrstpi_h: + case LoongArch::BI__builtin_lsx_vsrlni_h_w: + case LoongArch::BI__builtin_lasx_xvsrlni_h_w: + case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: + case LoongArch::BI__builtin_lsx_vssrlni_h_w: + case LoongArch::BI__builtin_lsx_vssrlni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrlni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: + case LoongArch::BI__builtin_lsx_vssrlrni_h_w: + case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: + i = 2; + l = 0; + u = 31; + break; + case LoongArch::BI__builtin_lasx_xvstelm_b: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); + // These intrinsics take an unsigned 6 bit immediate. + case LoongArch::BI__builtin_lsx_vbitclri_d: + case LoongArch::BI__builtin_lasx_xvbitclri_d: + case LoongArch::BI__builtin_lsx_vbitrevi_d: + case LoongArch::BI__builtin_lasx_xvbitrevi_d: + case LoongArch::BI__builtin_lsx_vbitseti_d: + case LoongArch::BI__builtin_lasx_xvbitseti_d: + case LoongArch::BI__builtin_lsx_vsat_d: + case LoongArch::BI__builtin_lsx_vsat_du: + case LoongArch::BI__builtin_lasx_xvsat_d: + case LoongArch::BI__builtin_lasx_xvsat_du: + case LoongArch::BI__builtin_lsx_vslli_d: + case LoongArch::BI__builtin_lasx_xvslli_d: + case LoongArch::BI__builtin_lsx_vsrai_d: + case LoongArch::BI__builtin_lasx_xvsrai_d: + case LoongArch::BI__builtin_lsx_vsrli_d: + case LoongArch::BI__builtin_lasx_xvsrli_d: + case LoongArch::BI__builtin_lsx_vsrari_d: + case LoongArch::BI__builtin_lasx_xvsrari_d: + case LoongArch::BI__builtin_lsx_vrotri_d: + case LoongArch::BI__builtin_lasx_xvrotri_d: + case LoongArch::BI__builtin_lasx_xvsrlri_d: + case LoongArch::BI__builtin_lsx_vsrlri_d: + i = 1; + l = 0; + u = 63; + break; + case LoongArch::BI__builtin_lsx_vssrarni_w_d: + case LoongArch::BI__builtin_lsx_vssrarni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrarni_w_d: + case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: + case LoongArch::BI__builtin_lsx_vssrani_w_d: + case LoongArch::BI__builtin_lsx_vssrani_wu_d: + case LoongArch::BI__builtin_lasx_xvssrani_w_d: + case LoongArch::BI__builtin_lasx_xvssrani_wu_d: + case LoongArch::BI__builtin_lsx_vsrarni_w_d: + case LoongArch::BI__builtin_lasx_xvsrarni_w_d: + case LoongArch::BI__builtin_lsx_vsrlni_w_d: + case LoongArch::BI__builtin_lasx_xvsrlni_w_d: + case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: + case LoongArch::BI__builtin_lsx_vssrlni_w_d: + case LoongArch::BI__builtin_lsx_vssrlni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrlni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: + case LoongArch::BI__builtin_lsx_vssrlrni_w_d: + case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: + case LoongArch::BI__builtin_lsx_vsrani_w_d: + case LoongArch::BI__builtin_lasx_xvsrani_w_d: + i = 2; + l = 0; + u = 63; + break; + // These intrinsics take an unsigned 7 bit immediate. + case LoongArch::BI__builtin_lsx_vssrarni_d_q: + case LoongArch::BI__builtin_lsx_vssrarni_du_q: + case LoongArch::BI__builtin_lasx_xvssrarni_d_q: + case LoongArch::BI__builtin_lasx_xvssrarni_du_q: + case LoongArch::BI__builtin_lsx_vssrani_d_q: + case LoongArch::BI__builtin_lsx_vssrani_du_q: + case LoongArch::BI__builtin_lasx_xvssrani_d_q: + case LoongArch::BI__builtin_lasx_xvssrani_du_q: + case LoongArch::BI__builtin_lsx_vsrarni_d_q: + case LoongArch::BI__builtin_lasx_xvsrarni_d_q: + case LoongArch::BI__builtin_lsx_vssrlni_d_q: + case LoongArch::BI__builtin_lsx_vssrlni_du_q: + case LoongArch::BI__builtin_lasx_xvssrlni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlni_du_q: + case LoongArch::BI__builtin_lsx_vssrlrni_d_q: + case LoongArch::BI__builtin_lsx_vssrlrni_du_q: + case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: + case LoongArch::BI__builtin_lsx_vsrani_d_q: + case LoongArch::BI__builtin_lasx_xvsrani_d_q: + case LoongArch::BI__builtin_lasx_xvsrlni_d_q: + case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: + case LoongArch::BI__builtin_lsx_vsrlni_d_q: + i = 2; + l = 0; + u = 127; + break; + // These intrinsics take a signed 5 bit immediate. + case LoongArch::BI__builtin_lsx_vseqi_b: + case LoongArch::BI__builtin_lsx_vseqi_h: + case LoongArch::BI__builtin_lsx_vseqi_w: + case LoongArch::BI__builtin_lsx_vseqi_d: + case LoongArch::BI__builtin_lasx_xvseqi_b: + case LoongArch::BI__builtin_lasx_xvseqi_h: + case LoongArch::BI__builtin_lasx_xvseqi_w: + case LoongArch::BI__builtin_lasx_xvseqi_d: + case LoongArch::BI__builtin_lsx_vslti_b: + case LoongArch::BI__builtin_lsx_vslti_h: + case LoongArch::BI__builtin_lsx_vslti_w: + case LoongArch::BI__builtin_lsx_vslti_d: + case LoongArch::BI__builtin_lasx_xvslti_b: + case LoongArch::BI__builtin_lasx_xvslti_h: + case LoongArch::BI__builtin_lasx_xvslti_w: + case LoongArch::BI__builtin_lasx_xvslti_d: + case LoongArch::BI__builtin_lsx_vslei_b: + case LoongArch::BI__builtin_lsx_vslei_h: + case LoongArch::BI__builtin_lsx_vslei_w: + case LoongArch::BI__builtin_lsx_vslei_d: + case LoongArch::BI__builtin_lasx_xvslei_b: + case LoongArch::BI__builtin_lasx_xvslei_h: + case LoongArch::BI__builtin_lasx_xvslei_w: + case LoongArch::BI__builtin_lasx_xvslei_d: + case LoongArch::BI__builtin_lsx_vmaxi_b: + case LoongArch::BI__builtin_lsx_vmaxi_h: + case LoongArch::BI__builtin_lsx_vmaxi_w: + case LoongArch::BI__builtin_lsx_vmaxi_d: + case LoongArch::BI__builtin_lasx_xvmaxi_b: + case LoongArch::BI__builtin_lasx_xvmaxi_h: + case LoongArch::BI__builtin_lasx_xvmaxi_w: + case LoongArch::BI__builtin_lasx_xvmaxi_d: + case LoongArch::BI__builtin_lsx_vmini_b: + case LoongArch::BI__builtin_lsx_vmini_h: + case LoongArch::BI__builtin_lsx_vmini_w: + case LoongArch::BI__builtin_lasx_xvmini_b: + case LoongArch::BI__builtin_lasx_xvmini_h: + case LoongArch::BI__builtin_lasx_xvmini_w: + case LoongArch::BI__builtin_lasx_xvmini_d: + case LoongArch::BI__builtin_lsx_vmini_d: + i = 1; + l = -16; + u = 15; + break; + // These intrinsics take a signed 9 bit immediate. + case LoongArch::BI__builtin_lasx_xvldrepl_d: + case LoongArch::BI__builtin_lsx_vldrepl_d: + i = 1; + l = -256; + u = 255; + break; + // These intrinsics take an unsigned 8 bit immediate. + case LoongArch::BI__builtin_lsx_vandi_b: + case LoongArch::BI__builtin_lasx_xvandi_b: + case LoongArch::BI__builtin_lsx_vnori_b: + case LoongArch::BI__builtin_lasx_xvnori_b: + case LoongArch::BI__builtin_lsx_vori_b: + case LoongArch::BI__builtin_lasx_xvori_b: + case LoongArch::BI__builtin_lsx_vshuf4i_b: + case LoongArch::BI__builtin_lsx_vshuf4i_h: + case LoongArch::BI__builtin_lsx_vshuf4i_w: + case LoongArch::BI__builtin_lasx_xvshuf4i_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_h: + case LoongArch::BI__builtin_lasx_xvshuf4i_w: + case LoongArch::BI__builtin_lasx_xvxori_b: + case LoongArch::BI__builtin_lasx_xvpermi_d: + case LoongArch::BI__builtin_lsx_vxori_b: + i = 1; + l = 0; + u = 255; + break; + case LoongArch::BI__builtin_lsx_vbitseli_b: + case LoongArch::BI__builtin_lasx_xvbitseli_b: + case LoongArch::BI__builtin_lsx_vshuf4i_d: + case LoongArch::BI__builtin_lasx_xvshuf4i_d: + case LoongArch::BI__builtin_lsx_vextrins_b: + case LoongArch::BI__builtin_lsx_vextrins_h: + case LoongArch::BI__builtin_lsx_vextrins_w: + case LoongArch::BI__builtin_lsx_vextrins_d: + case LoongArch::BI__builtin_lasx_xvextrins_b: + case LoongArch::BI__builtin_lasx_xvextrins_h: + case LoongArch::BI__builtin_lasx_xvextrins_w: + case LoongArch::BI__builtin_lasx_xvextrins_d: + case LoongArch::BI__builtin_lasx_xvpermi_q: + case LoongArch::BI__builtin_lsx_vpermi_w: + case LoongArch::BI__builtin_lasx_xvpermi_w: + i = 2; + l = 0; + u = 255; + break; + // df/n format + // These intrinsics take an unsigned 4 bit immediate. + case LoongArch::BI__builtin_lsx_vpickve2gr_b: + case LoongArch::BI__builtin_lsx_vpickve2gr_bu: + case LoongArch::BI__builtin_lasx_xvrepl128vei_b: + case LoongArch::BI__builtin_lsx_vreplvei_b: + i = 1; + l = 0; + u = 15; + break; + case LoongArch::BI__builtin_lsx_vinsgr2vr_b: + i = 2; + l = 0; + u = 15; + break; + case LoongArch::BI__builtin_lasx_xvstelm_h: + case LoongArch::BI__builtin_lsx_vstelm_b: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + // These intrinsics take an unsigned 3 bit immediate. + case LoongArch::BI__builtin_lsx_vpickve2gr_h: + case LoongArch::BI__builtin_lsx_vpickve2gr_hu: + case LoongArch::BI__builtin_lasx_xvrepl128vei_h: + case LoongArch::BI__builtin_lasx_xvpickve2gr_w: + case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: + case LoongArch::BI__builtin_lasx_xvpickve_w: + case LoongArch::BI__builtin_lsx_vreplvei_h: + i = 1; + l = 0; + u = 7; + break; + case LoongArch::BI__builtin_lsx_vinsgr2vr_h: + case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: + case LoongArch::BI__builtin_lasx_xvinsve0_w: + i = 2; + l = 0; + u = 7; + break; + case LoongArch::BI__builtin_lasx_xvstelm_w: + case LoongArch::BI__builtin_lsx_vstelm_h: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + // These intrinsics take an unsigned 2 bit immediate. + case LoongArch::BI__builtin_lsx_vpickve2gr_w: + case LoongArch::BI__builtin_lsx_vpickve2gr_wu: + case LoongArch::BI__builtin_lasx_xvrepl128vei_w: + case LoongArch::BI__builtin_lasx_xvpickve2gr_d: + case LoongArch::BI__builtin_lasx_xvpickve2gr_du: + case LoongArch::BI__builtin_lasx_xvpickve_d: + case LoongArch::BI__builtin_lsx_vreplvei_w: + i = 1; + l = 0; + u = 3; + break; + case LoongArch::BI__builtin_lsx_vinsgr2vr_w: + case LoongArch::BI__builtin_lasx_xvinsve0_d: + case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: + i = 2; + l = 0; + u = 3; + break; + case LoongArch::BI__builtin_lasx_xvstelm_d: + case LoongArch::BI__builtin_lsx_vstelm_w: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + // These intrinsics take an unsigned 1 bit immediate. + case LoongArch::BI__builtin_lsx_vpickve2gr_d: + case LoongArch::BI__builtin_lsx_vpickve2gr_du: + case LoongArch::BI__builtin_lasx_xvrepl128vei_d: + case LoongArch::BI__builtin_lsx_vreplvei_d: + i = 1; + l = 0; + u = 1; + break; + case LoongArch::BI__builtin_lsx_vinsgr2vr_d: + i = 2; + l = 0; + u = 1; + break; + case LoongArch::BI__builtin_lsx_vstelm_d: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); + // Memory offsets and immediate loads. + // These intrinsics take a signed 10 bit immediate. + case LoongArch::BI__builtin_lasx_xvldrepl_w: + case LoongArch::BI__builtin_lsx_vldrepl_w: + i = 1; + l = -512; + u = 511; + break; + case LoongArch::BI__builtin_lasx_xvldrepl_h: + case LoongArch::BI__builtin_lsx_vldrepl_h: + i = 1; + l = -1024; + u = 1023; + break; + case LoongArch::BI__builtin_lasx_xvldrepl_b: + case LoongArch::BI__builtin_lsx_vldrepl_b: + i = 1; + l = -2048; + u = 2047; + break; + case LoongArch::BI__builtin_lasx_xvld: + case LoongArch::BI__builtin_lsx_vld: + i = 1; + l = -2048; + u = 2047; + break; + case LoongArch::BI__builtin_lsx_vst: + case LoongArch::BI__builtin_lasx_xvst: + i = 2; + l = -2048; + u = 2047; + break; + case LoongArch::BI__builtin_lasx_xvldi: + case LoongArch::BI__builtin_lsx_vldi: + i = 0; + l = -4096; + u = 4095; + break; + // These intrinsics take an unsigned 5 bit immediate and a signed 12 bit immediate. + case LoongArch::BI__builtin_loongarch_cacop_w: + case LoongArch::BI__builtin_loongarch_cacop_d: + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) || + SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + // These intrinsics take an unsigned 14 bit immediate. + case LoongArch::BI__builtin_loongarch_csrrd_w: + case LoongArch::BI__builtin_loongarch_csrrd_d: + i = 0; + l = 0; + u = 16383; + break; + case LoongArch::BI__builtin_loongarch_csrwr_w: + case LoongArch::BI__builtin_loongarch_csrwr_d: + i = 1; + l = 0; + u = 16383; + break; + case LoongArch::BI__builtin_loongarch_csrxchg_w: + case LoongArch::BI__builtin_loongarch_csrxchg_d: + i = 2; + l = 0; + u = 16383; + break; + // These intrinsics take an unsigned 15 bit immediate. + case LoongArch::BI__builtin_loongarch_dbar: + case LoongArch::BI__builtin_loongarch_ibar: + case LoongArch::BI__builtin_loongarch_syscall: + case LoongArch::BI__builtin_loongarch_break: i = 0; l = 0; u = 32767; break; + } + + if (!m) + return SemaBuiltinConstantArgRange(TheCall, i, l, u); + + return SemaBuiltinConstantArgRange(TheCall, i, l, u) || + SemaBuiltinConstantArgMultiple(TheCall, i, m); +} + bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinID == SystemZ::BI__builtin_tabort) { diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c new file mode 100644 index 000000000000..2affc5a3d470 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c @@ -0,0 +1,144 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - | \ +// RUN: FileCheck --check-prefix=CHECK-C %s +// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - -x c++ | \ +// RUN: FileCheck --check-prefix=CHECK-CXX %s + +// Fields containing empty structs are ignored when flattening +// structs to examine whether the structs can be passed via FARs, even in C++. +// But there is an exception that non-zero-length array of empty structures are +// not ignored in C++. These rules are not documented in psABI +// but they match GCC behaviours. + +#include + +struct empty { struct { struct { } e; }; }; +struct s1 { struct empty e; float f; }; + +// CHECK-C: define{{.*}} float @test_s1(float{{.*}}) +// CHECK-CXX: define{{.*}} float @_Z7test_s12s1(float{{.*}}) +struct s1 test_s1(struct s1 a) { + return a; +} + +struct s2 { struct empty e; int32_t i; float f; }; + +// CHECK-C: define{{.*}} { i32, float } @test_s2(i32{{.*}}, float{{.*}}) +// CHECK-CXX: define{{.*}} { i32, float } @_Z7test_s22s2(i32{{.*}}, float{{.*}}) +struct s2 test_s2(struct s2 a) { + return a; +} + +struct s3 { struct empty e; float f; float g; }; + +// CHECK-C: define{{.*}} { float, float } @test_s3(float{{.*}}, float{{.*}}) +// CHECK-CXX: define{{.*}} { float, float } @_Z7test_s32s3(float{{.*}}, float{{.*}}) +struct s3 test_s3(struct s3 a) { + return a; +} + +struct s4 { struct empty e; float __complex__ c; }; + +// CHECK-C: define{{.*}} { float, float } @test_s4(float{{.*}}, float{{.*}}) +// CHECK-CXX: define{{.*}} { float, float } @_Z7test_s42s4(float{{.*}}, float{{.*}}) +struct s4 test_s4(struct s4 a) { + return a; +} + +// An array of empty fields isn't ignored in C++ (this isn't explicit in the +// psABI, but matches observed g++ behaviour). + +struct s5 { struct empty e[1]; float f; }; + +// CHECK-C: define{{.*}} float @test_s5(float{{.*}}) +// CHECK-CXX: define{{.*}} i64 @_Z7test_s52s5(i64{{.*}}) +struct s5 test_s5(struct s5 a) { + return a; +} + +struct empty_arr { struct { struct { } e[1]; }; }; +struct s6 { struct empty_arr e; float f; }; + +// CHECK-C: define{{.*}} float @test_s6(float{{.*}}) +// CHECK-CXX: define{{.*}} i64 @_Z7test_s62s6(i64{{.*}}) +struct s6 test_s6(struct s6 a) { + return a; +} + +struct s7 { struct empty e[0]; float f; }; + +// CHECK-C: define{{.*}} float @test_s7(float{{.*}}) +// CHECK-CXX: define{{.*}} float @_Z7test_s72s7(float{{.*}}) +struct s7 test_s7(struct s7 a) { + return a; +} + +struct empty_arr0 { struct { struct { } e[0]; }; }; +struct s8 { struct empty_arr0 e; float f; }; + +// CHECK-C: define{{.*}} float @test_s8(float{{.*}}) +// CHECK-CXX: define{{.*}} float @_Z7test_s82s8(float{{.*}}) +struct s8 test_s8(struct s8 a) { + return a; +} + +/// Note: Below tests check how empty structs are passed while above tests check +/// empty structs as fields of container struct are ignored when flattening +/// structs to examine whether the container structs can be passed via FARs. + +// CHECK-C: define{{.*}} void @test_s9() +// CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) +struct s9 { + struct empty e; +}; +struct s9 test_s9(struct s9 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s10() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s103s10(i64 {{.*}}) +struct s10 { }; +struct s10 test_s10(struct s10 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s11() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s113s11(i64 {{.*}}) +struct s11 { struct { } s; }; +struct s11 test_s11(struct s11 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s12() +// CHECK-CXX: define{{.*}} void @_Z8test_s123s12() +struct s12 { int i[0]; }; +struct s12 test_s12(struct s12 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s13() +// CHECK-CXX: define{{.*}} void @_Z8test_s133s13() +struct s13 { struct { } s[0]; }; +struct s13 test_s13(struct s13 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s14() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s143s14(i64 {{.*}}) +struct s14 { struct { } s[1]; }; +struct s14 test_s14(struct s14 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s15() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s153s15(i64 {{.*}}) +struct s15 { int : 0; }; +struct s15 test_s15(struct s15 a) { + return a; +} + +// CHECK-C: define{{.*}} i64 @test_s16(i64 {{.*}}) +// CHECK-CXX: define{{.*}} i64 @_Z8test_s163s16(i64 {{.*}}) +struct s16 { int : 1; }; +struct s16 test_s16(struct s16 a) { + return a; +} diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c new file mode 100644 index 000000000000..771b1837f322 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - | \ +// RUN: FileCheck --check-prefix=CHECK-C %s +// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - -x c++ | \ +// RUN: FileCheck --check-prefix=CHECK-CXX %s + +#include + +// CHECK-C: define{{.*}} void @test1() +// CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{[^,]*}}) +union u1 {}; +union u1 test1(union u1 a) { + return a; +} + +struct s1 { + union u1 u; + int i; + float f; +}; + +// CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) +// CHECK-CXX: define{{.*}} [2 x i64] @_Z5test22s1([2 x i64]{{[^,]*}}) +struct s1 test2(struct s1 a) { + return a; +} diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d.c b/clang/test/CodeGen/LoongArch/abi-lp64d.c new file mode 100644 index 000000000000..51174cd3165b --- /dev/null +++ b/clang/test/CodeGen/LoongArch/abi-lp64d.c @@ -0,0 +1,501 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ + // RUN: -emit-llvm %s -o - | FileCheck %s + +/// This test checks the calling convention of the lp64d ABI. + +#include +#include + +/// Part 0: C Data Types and Alignment. + +/// `char` datatype is signed by default. +/// In most cases, the unsigned integer data types are zero-extended when stored +/// in general-purpose register, and the signed integer data types are +/// sign-extended. However, in the LP64D ABI, unsigned 32-bit types, such as +/// unsigned int, are stored in general-purpose registers as proper sign +/// extensions of their 32-bit values. + +// CHECK-LABEL: define{{.*}} zeroext i1 @check_bool() +_Bool check_bool() { return 0; } + +// CHECK-LABEL: define{{.*}} signext i8 @check_char() +char check_char() { return 0; } + +// CHECK-LABEL: define{{.*}} signext i16 @check_short() +short check_short() { return 0; } + +// CHECK-LABEL: define{{.*}} signext i32 @check_int() +int check_int() { return 0; } + +// CHECK-LABEL: define{{.*}} i64 @check_long() +long check_long() { return 0; } + +// CHECK-LABEL: define{{.*}} i64 @check_longlong() +long long check_longlong() { return 0; } + +// CHECK-LABEL: define{{.*}} zeroext i8 @check_uchar() +unsigned char check_uchar() { return 0; } + +// CHECK-LABEL: define{{.*}} zeroext i16 @check_ushort() +unsigned short check_ushort() { return 0; } + +// CHECK-LABEL: define{{.*}} signext i32 @check_uint() +unsigned int check_uint() { return 0; } + +// CHECK-LABEL: define{{.*}} i64 @check_ulong() +unsigned long check_ulong() { return 0; } + +// CHECK-LABEL: define{{.*}} i64 @check_ulonglong() +unsigned long long check_ulonglong() { return 0; } + +// CHECK-LABEL: define{{.*}} float @check_float() +float check_float() { return 0; } + +// CHECK-LABEL: define{{.*}} double @check_double() +double check_double() { return 0; } + +// CHECK-LABEL: define{{.*}} fp128 @check_longdouble() +long double check_longdouble() { return 0; } + +/// Part 1: Scalar arguments and return value. + +/// The lp64d abi says: +/// 1. 1 < WOA <= GRLEN +/// a. Argument is passed in a single argument register, or on the stack by +/// value if none is available. +/// i. If the argument is floating-point type, the argument is passed in FAR. if +/// no FAR is available, it’s passed in GAR. If no GAR is available, it’s +/// passed on the stack. When passed in registers or on the stack, +/// floating-point types narrower than GRLEN bits are widened to GRLEN bits, +/// with the upper bits undefined. +/// ii. If the argument is integer or pointer type, the argument is passed in +/// GAR. If no GAR is available, it’s passed on the stack. When passed in +/// registers or on the stack, the unsigned integer scalars narrower than GRLEN +/// bits are zero-extended to GRLEN bits, and the signed integer scalars are +/// sign-extended. +/// 2. GRLEN < WOA ≤ 2 × GRLEN +/// a. The argument is passed in a pair of GAR, with the low-order GRLEN bits in +/// the lower-numbered register and the high-order GRLEN bits in the +/// higher-numbered register. If exactly one register is available, the +/// low-order GRLEN bits are passed in the register and the high-order GRLEN +/// bits are passed on the stack. If no GAR is available, it’s passed on the +/// stack. + +/// Note that most of these conventions are handled at the llvm side, so here we +/// only check the correctness of argument (or return value)'s sign/zero +/// extension attribute. + +// CHECK-LABEL: define{{.*}} signext i32 @f_scalar(i1{{.*}} zeroext %a, i8{{.*}} signext %b, i8{{.*}} zeroext %c, i16{{.*}} signext %d, i16{{.*}} zeroext %e, i32{{.*}} signext %f, i32{{.*}} signext %g, i64{{.*}} %h, i1{{.*}} zeroext %i, i8{{.*}} signext %j, i8{{.*}} zeroext %k, i16{{.*}} signext %l, i16{{.*}} zeroext %m, i32{{.*}} signext %n, i32{{.*}} signext %o, i64{{.*}} %p) +int f_scalar(_Bool a, int8_t b, uint8_t c, int16_t d, uint16_t e, int32_t f, + uint32_t g, int64_t h, /* begin of stack passing -> */ _Bool i, + int8_t j, uint8_t k, int16_t l, uint16_t m, int32_t n, + uint32_t o, int64_t p) { + return 0; +} + +/// Part 2: Structure arguments and return value. + +/// The lp64d abi says: +/// Empty structures are ignored by C compilers which support them as a +/// non-standard extension(same as union arguments and return values). Bits +/// unused due to padding, and bits past the end of a structure whose size in +/// bits is not divisible by GRLEN, are undefined. And the layout of the +/// structure on the stack is consistent with that in memory. + +/// Check empty structs are ignored. + +struct empty_s {}; + +// CHECK-LABEL: define{{.*}} void @f_empty_s() +struct empty_s f_empty_s(struct empty_s x) { + return x; +} + +/// 1. 0 < WOA ≤ GRLEN +/// a. The structure has only fixed-point members. If there is an available GAR, +/// the structure is passed through the GAR by value passing; If no GAR is +/// available, it’s passed on the stack. + +struct i16x4_s { + int16_t a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} i64 @f_i16x4_s(i64 %x.coerce) +struct i16x4_s f_i16x4_s(struct i16x4_s x) { + return x; +} + +/// b. The structure has only floating-point members: +/// i. One floating-point member. The argument is passed in a FAR; If no FAR is +/// available, the value is passed in a GAR; if no GAR is available, the value +/// is passed on the stack. + +struct f32x1_s { + float a; +}; + +struct f64x1_s { + double a; +}; + +// CHECK-LABEL: define{{.*}} float @f_f32x1_s(float %0) +struct f32x1_s f_f32x1_s(struct f32x1_s x) { + return x; +} + +// CHECK-LABEL: define{{.*}} double @f_f64x1_s(double %0) +struct f64x1_s f_f64x1_s(struct f64x1_s x) { + return x; +} + +/// ii. Two floating-point members. The argument is passed in a pair of +/// available FAR, with the low-order float member bits in the lower-numbered +/// FAR and the high-order float member bits in the higher-numbered FAR. If the +/// number of available FAR is less than 2, it’s passed in a GAR, and passed on +/// the stack if no GAR is available. + +struct f32x2_s { + float a, b; +}; + +// CHECK-LABEL: define{{.*}} { float, float } @f_f32x2_s(float %0, float %1) +struct f32x2_s f_f32x2_s(struct f32x2_s x) { + return x; +} + +/// c. The structure has both fixed-point and floating-point members, i.e. the +/// structure has one float member and... +/// i. Multiple fixed-point members. If there are available GAR, the structure +/// is passed in a GAR, and passed on the stack if no GAR is available. + +struct f32x1_i16x2_s { + float a; + int16_t b, c; +}; + +// CHECK-LABEL: define{{.*}} i64 @f_f32x1_i16x2_s(i64 %x.coerce) +struct f32x1_i16x2_s f_f32x1_i16x2_s(struct f32x1_i16x2_s x) { + return x; +} + +/// ii. Only one fixed-point member. If one FAR and one GAR are available, the +/// floating-point member of the structure is passed in the FAR, and the integer +/// member of the structure is passed in the GAR; If no floating-point register +/// but one GAR is available, it’s passed in GAR; If no GAR is available, it’s +/// passed on the stack. + +struct f32x1_i32x1_s { + float a; + int32_t b; +}; + +// CHECK-LABEL: define{{.*}} { float, i32 } @f_f32x1_i32x1_s(float %0, i32 %1) +struct f32x1_i32x1_s f_f32x1_i32x1_s(struct f32x1_i32x1_s x) { + return x; +} + +/// 2. GRLEN < WOA ≤ 2 × GRLEN +/// a. Only fixed-point members. +/// i. The argument is passed in a pair of available GAR, with the low-order +/// bits in the lower-numbered GAR and the high-order bits in the +/// higher-numbered GAR. If only one GAR is available, the low-order bits are in +/// the GAR and the high-order bits are on the stack, and passed on the stack if +/// no GAR is available. + +struct i64x2_s { + int64_t a, b; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_i64x2_s([2 x i64] %x.coerce) +struct i64x2_s f_i64x2_s(struct i64x2_s x) { + return x; +} + +/// b. Only floating-point members. +/// i. The structure has one long double member or one double member and two +/// adjacent float members or 3-4 float members. The argument is passed in a +/// pair of available GAR, with the low-order bits in the lower-numbered GAR and +/// the high-order bits in the higher-numbered GAR. If only one GAR is +/// available, the low-order bits are in the GAR and the high-order bits are on +/// the stack, and passed on the stack if no GAR is available. + +struct f128x1_s { + long double a; +}; + +// CHECK-LABEL: define{{.*}} i128 @f_f128x1_s(i128 %x.coerce) +struct f128x1_s f_f128x1_s(struct f128x1_s x) { + return x; +} + +struct f64x1_f32x2_s { + double a; + float b, c; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_f32x2_s([2 x i64] %x.coerce) +struct f64x1_f32x2_s f_f64x1_f32x2_s(struct f64x1_f32x2_s x) { + return x; +} + +struct f32x3_s { + float a, b, c; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x3_s([2 x i64] %x.coerce) +struct f32x3_s f_f32x3_s(struct f32x3_s x) { + return x; +} + +struct f32x4_s { + float a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x4_s([2 x i64] %x.coerce) +struct f32x4_s f_f32x4_s(struct f32x4_s x) { + return x; +} + +/// ii. The structure with two double members is passed in a pair of available +/// FARs. If no a pair of available FARs, it’s passed in GARs. A structure with +/// one double member and one float member is same. + +struct f64x2_s { + double a, b; +}; + +// CHECK-LABEL: define{{.*}} { double, double } @f_f64x2_s(double %0, double %1) +struct f64x2_s f_f64x2_s(struct f64x2_s x) { + return x; +} + +/// c. Both fixed-point and floating-point members. +/// i. The structure has one double member and only one fixed-point member. +/// A. If one FAR and one GAR are available, the floating-point member of the +/// structure is passed in the FAR, and the integer member of the structure is +/// passed in the GAR; If no floating-point registers but two GARs are +/// available, it’s passed in the two GARs; If only one GAR is available, the +/// low-order bits are in the GAR and the high-order bits are on the stack; And +/// it’s passed on the stack if no GAR is available. + +struct f64x1_i64x1_s { + double a; + int64_t b; +}; + +// CHECK-LABEL: define{{.*}} { double, i64 } @f_f64x1_i64x1_s(double %0, i64 %1) +struct f64x1_i64x1_s f_f64x1_i64x1_s(struct f64x1_i64x1_s x) { + return x; +} + +/// ii. Others +/// A. The argument is passed in a pair of available GAR, with the low-order +/// bits in the lower-numbered GAR and the high-order bits in the +/// higher-numbered GAR. If only one GAR is available, the low-order bits are in +/// the GAR and the high-order bits are on the stack, and passed on the stack if +/// no GAR is available. + +struct f64x1_i32x2_s { + double a; + int32_t b, c; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f64x1_i32x2_s([2 x i64] %x.coerce) +struct f64x1_i32x2_s f_f64x1_i32x2_s(struct f64x1_i32x2_s x) { + return x; +} + +struct f32x2_i32x2_s { + float a, b; + int32_t c, d; +}; + +// CHECK-LABEL: define{{.*}} [2 x i64] @f_f32x2_i32x2_s([2 x i64] %x.coerce) +struct f32x2_i32x2_s f_f32x2_i32x2_s(struct f32x2_i32x2_s x) { + return x; +} + +/// 3. WOA > 2 × GRLEN +/// a. It’s passed by reference and are replaced in the argument list with the +/// address. If there is an available GAR, the reference is passed in the GAR, +/// and passed on the stack if no GAR is available. + +struct i64x4_s { + int64_t a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} void @f_i64x4_s(ptr{{.*}} sret(%struct.i64x4_s){{.*}} %agg.result, ptr{{.*}} %x) +struct i64x4_s f_i64x4_s(struct i64x4_s x) { + return x; +} + +struct f64x4_s { + double a, b, c, d; +}; + +// CHECK-LABEL: define{{.*}} void @f_f64x4_s(ptr{{.*}} sret(%struct.f64x4_s){{.*}} %agg.result, ptr{{.*}} %x) +struct f64x4_s f_f64x4_s(struct f64x4_s x) { + return x; +} + +/// Part 3: Union arguments and return value. + +/// Check empty unions are ignored. + +union empty_u {}; + +// CHECK-LABEL: define{{.*}} void @f_empty_u() +union empty_u f_empty_u(union empty_u x) { + return x; +} + +/// Union is passed in GAR or stack. +/// 1. 0 < WOA ≤ GRLEN +/// a. The argument is passed in a GAR, or on the stack by value if no GAR is +/// available. + +union i32_f32_u { + int32_t a; + float b; +}; + +// CHECK-LABEL: define{{.*}} i64 @f_i32_f32_u(i64 %x.coerce) +union i32_f32_u f_i32_f32_u(union i32_f32_u x) { + return x; +} + +union i64_f64_u { + int64_t a; + double b; +}; + +// CHECK-LABEL: define{{.*}} i64 @f_i64_f64_u(i64 %x.coerce) +union i64_f64_u f_i64_f64_u(union i64_f64_u x) { + return x; +} + +/// 2. GRLEN < WOA ≤ 2 × GRLEN +/// a. The argument is passed in a pair of available GAR, with the low-order +/// bits in the lower-numbered GAR and the high-order bits in the +/// higher-numbered GAR. If only one GAR is available, the low-order bits are in +/// the GAR and the high-order bits are on the stack. The arguments are passed +/// on the stack when no GAR is available. + +union i128_f128_u { + __int128_t a; + long double b; +}; + +// CHECK-LABEL: define{{.*}} i128 @f_i128_f128_u(i128 %x.coerce) +union i128_f128_u f_i128_f128_u(union i128_f128_u x) { + return x; +} + +/// 3. WOA > 2 × GRLEN +/// a. It’s passed by reference and are replaced in the argument list with the +/// address. If there is an available GAR, the reference is passed in the GAR, +/// and passed on the stack if no GAR is available. + +union i64_arr3_u { + int64_t a[3]; +}; + +// CHECK-LABEL: define{{.*}} void @f_i64_arr3_u(ptr{{.*}} sret(%union.i64_arr3_u){{.*}} %agg.result, ptr{{.*}} %x) +union i64_arr3_u f_i64_arr3_u(union i64_arr3_u x) { + return x; +} + +/// Part 4: Complex number arguments and return value. + +/// A complex floating-point number, or a structure containing just one complex +/// floating-point number, is passed as though it were a structure containing +/// two floating-point reals. + +// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex(float{{.*}} %x.coerce0, float{{.*}} %x.coerce1) +float __complex__ f_floatcomplex(float __complex__ x) { return x; } + +// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex(double{{.*}} %x.coerce0, double{{.*}} %x.coerce1) +double __complex__ f_doublecomplex(double __complex__ x) { return x; } + +struct floatcomplex_s { + float __complex__ c; +}; +// CHECK-LABEL: define{{.*}} { float, float } @f_floatcomplex_s(float %0, float %1) +struct floatcomplex_s f_floatcomplex_s(struct floatcomplex_s x) { + return x; +} + +struct doublecomplex_s { + double __complex__ c; +}; +// CHECK-LABEL: define{{.*}} { double, double } @f_doublecomplex_s(double %0, double %1) +struct doublecomplex_s f_doublecomplex_s(struct doublecomplex_s x) { + return x; +} + +/// Complex floating-point values or structs containing a single complex +/// floating-point value should be passed in GPRs if no two FPRs is available. + + +// CHECK: define{{.*}} void @f_floatcomplex_insufficient_fprs1(float{{.*}} %a.coerce0, float{{.*}} %a.coerce1, float{{.*}} %b.coerce0, float{{.*}} %b.coerce1, float{{.*}} %c.coerce0, float{{.*}} %c.coerce1, float{{.*}} %d.coerce0, float{{.*}} %d.coerce1, i64{{.*}} %e.coerce) +void f_floatcomplex_insufficient_fprs1(float __complex__ a, float __complex__ b, + float __complex__ c, float __complex__ d, + float __complex__ e) {} + + +// CHECK: define{{.*}} void @f_floatcomplex_s_arg_insufficient_fprs1(float{{.*}} %0, float{{.*}} %1, float{{.*}} %2, float{{.*}} %3, float{{.*}} %4, float{{.*}} %5, float{{.*}} %6, float{{.*}} %7, i64 %e.coerce) +void f_floatcomplex_s_arg_insufficient_fprs1(struct floatcomplex_s a, + struct floatcomplex_s b, + struct floatcomplex_s c, + struct floatcomplex_s d, + struct floatcomplex_s e) {} + +// CHECK: define{{.*}} void @f_floatcomplex_insufficient_fprs2(float{{.*}} %a, float{{.*}} %b.coerce0, float{{.*}} %b.coerce1, float{{.*}} %c.coerce0, float{{.*}} %c.coerce1, float{{.*}} %d.coerce0, float{{.*}} %d.coerce1, i64{{.*}} %e.coerce) +void f_floatcomplex_insufficient_fprs2(float a, + float __complex__ b, float __complex__ c, + float __complex__ d, float __complex__ e) {} + + +// CHECK: define{{.*}} void @f_floatcomplex_s_arg_insufficient_fprs2(float{{.*}} %a, float{{.*}} %0, float{{.*}} %1, float{{.*}} %2, float{{.*}} %3, float{{.*}} %4, float{{.*}} %5, i64{{.*}} %e.coerce) +void f_floatcomplex_s_arg_insufficient_fprs2(float a, + struct floatcomplex_s b, + struct floatcomplex_s c, + struct floatcomplex_s d, + struct floatcomplex_s e) {} + +/// Part 5: Variadic arguments. + +/// Variadic arguments are passed in GARs in the same manner as named arguments. + +int f_va_callee(int, ...); + +// CHECK-LABEL: define{{.*}} void @f_va_caller() +// CHECK: call signext i32 (i32, ...) @f_va_callee(i32{{.*}} signext 1, i32{{.*}} signext 2, i64{{.*}} 3, double{{.*}} 4.000000e+00, double{{.*}} 5.000000e+00, i64 {{.*}}, i64 {{.*}}, i64 {{.*}}) +void f_va_caller(void) { + f_va_callee(1, 2, 3LL, 4.0f, 5.0, (struct i16x4_s){6, 7, 8, 9}, + (struct i64x2_s){10, 11}); +} + +// CHECK-LABE: define signext i32 @f_va_int(ptr{{.*}} %fmt, ...) +// CHECK: entry: +// CHECK: %fmt.addr = alloca ptr, align 8 +// CHECK: %va = alloca ptr, align 8 +// CHECK: %v = alloca i32, align 4 +// CHECK: store ptr %fmt, ptr %fmt.addr, align 8 +// CHECK: call void @llvm.va_start(ptr %va) +// CHECK: %argp.cur = load ptr, ptr %va, align 8 +// CHECK: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8 +// CHECK: store ptr %argp.next, ptr %va, align 8 +// CHECK: %0 = load i32, ptr %argp.cur, align 8 +// CHECK: store i32 %0, ptr %v, align 4 +// CHECK: call void @llvm.va_end(ptr %va) +// CHECK: %1 = load i32, ptr %v, align 4 +// CHECK: ret i32 %1 +// CHECK: } +int f_va_int(char *fmt, ...) { + __builtin_va_list va; + __builtin_va_start(va, fmt); + int v = __builtin_va_arg(va, int); + __builtin_va_end(va); + return v; +} diff --git a/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c b/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c new file mode 100644 index 000000000000..e4a03d782765 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/inlineasm-float-double-in-gpr.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -triple loongarch64 -O2 -emit-llvm %s -o - \ +// RUN: | FileCheck %s + +float f; +double d; + +// CHECK-LABEL: @reg_float( +// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f +// CHECK: call void asm sideeffect "", "r"(float [[FLT_ARG]]) +// CHECK: ret void +void reg_float() { + float a = f; + asm volatile("" + : + : "r"(a)); +} + +// CHECK-LABEL: @r4_float( +// CHECK: [[FLT_ARG:%.*]] = load float, ptr @f +// CHECK: call void asm sideeffect "", "{$r4}"(float [[FLT_ARG]]) +// CHECK: ret void +void r4_float() { + register float a asm("$r4") = f; + asm volatile("" + : + : "r"(a)); +} + +// CHECK-LABEL: @reg_double( +// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d +// CHECK: call void asm sideeffect "", "r"(double [[DBL_ARG]]) +// CHECK: ret void +void reg_double() { + double a = d; + asm volatile("" + : + : "r"(a)); +} + +// CHECK-LABEL: @r4_double( +// CHECK: [[DBL_ARG:%.*]] = load double, ptr @d +// CHECK: call void asm sideeffect "", "{$r4}"(double [[DBL_ARG]]) +// CHECK: ret void +void r4_double() { + register double a asm("$r4") = d; + asm volatile("" + : + : "r"(a)); +} diff --git a/clang/test/CodeGen/builtins-loongarch-base.c b/clang/test/CodeGen/builtins-loongarch-base.c new file mode 100644 index 000000000000..cdff582fa3f2 --- /dev/null +++ b/clang/test/CodeGen/builtins-loongarch-base.c @@ -0,0 +1,409 @@ +// REQUIRES: loongarch-registered-target +// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +#include + +typedef char i8; +typedef unsigned char u8; +typedef short i16; +typedef unsigned short u16; +typedef int i32; +typedef unsigned int u32; + +#if __LONG_MAX__ == __LONG_LONG_MAX__ +typedef long int i64; +typedef unsigned long int u64; +#else +typedef long long i64; +typedef unsigned long long u64; +#endif + +__drdtime_t drdtime; +__rdtime_t rdtime; + +void cpucfg(){ + + u32 u32_r, u32_a; + // __cpucfg + // rd, rj + // unsigned int, unsigned int + u32_r= __builtin_loongarch_cpucfg(u32_a); // CHECK: call i32 @llvm.loongarch.cpucfg + +} + +void csrrd_w() { + + u32 u32_r; + // __csrrd_w + // rd, csr_num + // unsigned int, uimm14_32 + u32_r = __builtin_loongarch_csrrd_w(1); // CHECK: call i32 @llvm.loongarch.csrrd.w +} + +void csrrd_d() { + + u64 u64_r; + // __csrrd_d + // rd, csr_num + // unsigned long int, uimm14 + u64_r = __builtin_loongarch_csrrd_d(1); // CHECK: call i64 @llvm.loongarch.csrrd.d +} + +void csrwr_w() { + + u32 u32_r, u32_a; + // __csrwr_w + // rd, csr_num + // unsigned int, uimm14_32 + u32_r = __builtin_loongarch_csrwr_w(u32_a, 1); // CHECK: call i32 @llvm.loongarch.csrwr.w +} + +void csrwr_d() { + + u64 u64_r, u64_a; + // __csrwr_d + // rd, csr_num + // unsigned long int, uimm14 + u64_r = __builtin_loongarch_csrwr_d(u64_a, 1); // CHECK: call i64 @llvm.loongarch.csrwr.d +} + +void csrxchg_w() { + + u32 u32_r, u32_a, u32_b; + // __csrxchg_w + // rd, rj, csr_num + // unsigned int, unsigned int, uimm14_32 + u32_r = __builtin_loongarch_csrxchg_w(u32_a, u32_b, 1); // CHECK: call i32 @llvm.loongarch.csrxchg.w +} + +void csrxchg_d() { + + u64 u64_r, u64_a, u64_b; + // __csrxchg_d + // rd, rj, csr_num + // unsigned long int, unsigned long int, uimm14 + u64_r = __builtin_loongarch_csrxchg_d(u64_a, u64_b, 1); // CHECK: call i64 @llvm.loongarch.csrxchg.d +} + +void iocsrrd_b(){ + + u32 u32_a; + u8 u8_r; + // __iocsrrd_b + // rd, rj + // unsigned char, unsigned int + u8_r=__builtin_loongarch_iocsrrd_b(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.b + +} + +void iocsrrd_h(){ + + u32 u32_a; + u16 u16_r; + // __iocsrrd_h + // rd, rj + // unsigned short, unsigned int + u16_r=__builtin_loongarch_iocsrrd_h(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.h + +} + +void iocsrrd_w(){ + + u32 u32_r, u32_a; + // __iocsrrd_w + // rd, rj + // unsigned int, unsigned int + u32_r=__builtin_loongarch_iocsrrd_w(u32_a); // CHECK: call i32 @llvm.loongarch.iocsrrd.w + +} + +void iocsrrd_d(){ + + u32 u32_a; + u64 u64_r; + // __iocsrrd_d + // rd, rj + // unsigned long int, unsigned int + u64_r=__builtin_loongarch_iocsrrd_d(u32_a); // CHECK: call i64 @llvm.loongarch.iocsrrd.d + +} + +void iocsrwr_b(){ + + u32 u32_a; + u8 u8_a; + // __iocsrwr_b + // rd, rj + // unsigned char, unsigned int + __builtin_loongarch_iocsrwr_b(u8_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.b + +} + +void iocsrwr_h(){ + + u32 u32_a; + u16 u16_a; + // __iocsrwr_h + // rd, rj + // unsigned short, unsigned int + __builtin_loongarch_iocsrwr_h(u16_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.h + +} + +void iocsrwr_w(){ + + u32 u32_a, u32_b; + // __iocsrwr_w + // rd, rj + // unsigned int, unsigned int + __builtin_loongarch_iocsrwr_w(u32_a, u32_b); // CHECK: void @llvm.loongarch.iocsrwr.w + +} + +void iocsrwr_d(){ + + u32 u32_a; + u64 u64_a; + // __iocsrwr_d + // rd, rj + // unsigned long int, unsigned int + __builtin_loongarch_iocsrwr_d(u64_a, u32_a); // CHECK: void @llvm.loongarch.iocsrwr.d + +} + +void cacop_w() { + + i32 i32_a; + // __cacop_w + // op, rj, si12 + // uimm5, unsigned int, simm12 + __builtin_loongarch_cacop_w(1, i32_a, 2); // CHECK: void @llvm.loongarch.cacop.w +} + +void cacop_d() { + + i64 i64_a; + // __cacop_d + // op, rj, si12 + // uimm5, unsigned long int, simm12 + __builtin_loongarch_cacop_d(1, i64_a, 2); // CHECK: void @llvm.loongarch.cacop.d +} + +void rdtime_d(){ + + drdtime= __builtin_loongarch_rdtime_d(); // CHECK: call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"() + +} + +void rdtimeh_w(){ + + rdtime= __builtin_loongarch_rdtimeh_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"() + +} + +void rdtimel_w(){ + + rdtime= __builtin_loongarch_rdtimel_w(); // CHECK: call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"() + +} + +void crc_w_b_w(){ + + i32 i32_r, i32_a; + i8 i8_a; + // __crc_w_b_w + // rd, rj, rk + // int, char, int + i32_r=__builtin_loongarch_crc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.b.w + +} + +void crc_w_h_w(){ + + i32 i32_r, i32_a; + i16 i16_a; + // __crc_w_h_w + // rd, rj, rk + // int, short, int + i32_r=__builtin_loongarch_crc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.h.w + +} + +void crc_w_w_w(){ + + i32 i32_r, i32_a, i32_b; + // __crc_w_w_w + // rd, rj, rk + // int, int, int + i32_r=__builtin_loongarch_crc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crc.w.w.w + +} + +void crc_w_d_w(){ + + i32 i32_r, i32_a; + i64 i64_a; + // __crc_w_d_w + // rd, rj, rk + // int, long int, int + i32_r=__builtin_loongarch_crc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crc.w.d.w + +} + +void crcc_w_b_w(){ + + i32 i32_r, i32_a; + i8 i8_a; + // __crcc_w_b_w + // rd, rj, rk + // int, char, int + i32_r=__builtin_loongarch_crcc_w_b_w(i8_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.b.w + +} + +void crcc_w_h_w(){ + + i32 i32_r, i32_a; + i16 i16_a; + // __crcc_w_h_w + // rd, rj, rk + // int, short, int + i32_r=__builtin_loongarch_crcc_w_h_w(i16_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.h.w + +} + +void crcc_w_w_w(){ + + i32 i32_r, i32_a, i32_b; + // __crcc_w_w_w + // rd, rj, rk + // int, int, int + i32_r=__builtin_loongarch_crcc_w_w_w(i32_a, i32_b); // CHECK: call i32 @llvm.loongarch.crcc.w.w.w + +} + +void crcc_w_d_w(){ + + i32 i32_r, i32_a; + i64 i64_a; + // __crcc_w_d_w + // rd, rj, rk + // int, long int, int + i32_r=__builtin_loongarch_crcc_w_d_w(i64_a, i32_a); // CHECK: call i32 @llvm.loongarch.crcc.w.d.w + +} + +void tlbclr(){ + + // __tlbclr + __builtin_loongarch_tlbclr(); // CHECK: call void @llvm.loongarch.tlbclr + +} + +void tlbflush(){ + + // __tlbflush + __builtin_loongarch_tlbflush(); // CHECK: call void @llvm.loongarch.tlbflush + +} + +void tlbfill(){ + + // __tlbfill + __builtin_loongarch_tlbfill(); // CHECK: call void @llvm.loongarch.tlbfill + +} + +void tlbrd(){ + + // __tlbrd + __builtin_loongarch_tlbrd(); // CHECK: call void @llvm.loongarch.tlbrd + +} + +void tlbwr(){ + + // __tlbwr + __builtin_loongarch_tlbwr(); // CHECK: call void @llvm.loongarch.tlbwr + +} + +void tlbsrch(){ + + // __tlbsrch + __builtin_loongarch_tlbsrch(); // CHECK: call void @llvm.loongarch.tlbsrch + +} + +void syscall(){ + + // __syscall + // Code + // uimm15 + __builtin_loongarch_syscall(1); // CHECK: call void @llvm.loongarch.syscall + +} + +void break_builtin(){ + + // __break + // Code + // uimm15 + __builtin_loongarch_break(1); // CHECK: call void @llvm.loongarch.break + +} + +void asrtle_d(){ + + i64 i64_a, i64_b; + // __asrtle_d + // rj, rk + // long int, long int + __builtin_loongarch_asrtle_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtle.d + +} + +void asrtgt_d(){ + + i64 i64_a, i64_b; + // __asrtgt_d + // rj, rk + // long int, long int + __builtin_loongarch_asrtgt_d(i64_a, i64_b); // CHECK: call void @llvm.loongarch.asrtgt.d + +} + +void dbar(){ + + // __dbar + // hint + // uimm15 + __builtin_loongarch_dbar(0); // CHECK: call void @llvm.loongarch.dbar + +} + +void ibar(){ + + // __ibar + // hint + // uimm15 + __builtin_loongarch_ibar(0); // CHECK: call void @llvm.loongarch.ibar + +} + +void movfcsr2gr(){ + + u32 u32_r; + // __movfcsr2gr + u32_r=__movfcsr2gr(0); // CHECK: call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"() + +} + + +void movgr2fcsr() { + + u32 u32_a; + // __movgr2fcsr + __movgr2fcsr(0, u32_a); // CHECK: call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0) + +} diff --git a/clang/test/CodeGen/builtins-loongarch-lasx-error.c b/clang/test/CodeGen/builtins-loongarch-lasx-error.c new file mode 100644 index 000000000000..99f2687e48fd --- /dev/null +++ b/clang/test/CodeGen/builtins-loongarch-lasx-error.c @@ -0,0 +1,266 @@ +// REQUIRES: loongarch-registered-target +// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ +// RUN: -target-feature +lasx \ +// RUN: -verify -o - 2>&1 + +#include + +void test() { + v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; + v32i8 v32i8_r; + + v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + v16i16 v16i16_r; + + v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; + v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; + v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; + v8i32 v8i32_r; + + v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; + v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; + v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; + v4i64 v4i64_r; + + v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; + v32u8 v32u8_r; + + v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + v16u16 v16u16_r; + + v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; + v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; + v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; + v8u32 v8u32_r; + + v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; + v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; + v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; + v4u64 v4u64_r; + + v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; + v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; + v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; + v8f32 v8f32_r; + v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; + v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; + v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; + v4f64 v4f64_r; + + int i32_r; + int i32_a = 1; + int i32_b = 2; + unsigned int u32_r; + unsigned int u32_a = 1; + unsigned int u32_b = 2; + long long i64_r; + long long i64_a = 1; + long long i64_b = 2; + long long i64_c = 3; + unsigned long long u64_r; + unsigned long long u64_a = 1; + unsigned long long u64_b = 2; + unsigned long long u64_c = 3; + + v32i8_r = __lasx_xvslli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16i16_r = __lasx_xvslli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i32_r = __lasx_xvslli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvslli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32i8_r = __lasx_xvsrai_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16i16_r = __lasx_xvsrai_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i32_r = __lasx_xvsrai_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvsrai_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32i8_r = __lasx_xvsrari_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16i16_r = __lasx_xvsrari_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i32_r = __lasx_xvsrari_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvsrari_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32i8_r = __lasx_xvsrli_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16i16_r = __lasx_xvsrli_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i32_r = __lasx_xvsrli_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvsrli_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32i8_r = __lasx_xvsrlri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16i16_r = __lasx_xvsrlri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i32_r = __lasx_xvsrlri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvsrlri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32u8_r = __lasx_xvbitclri_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16u16_r = __lasx_xvbitclri_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8u32_r = __lasx_xvbitclri_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u64_r = __lasx_xvbitclri_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32u8_r = __lasx_xvbitseti_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16u16_r = __lasx_xvbitseti_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8u32_r = __lasx_xvbitseti_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u64_r = __lasx_xvbitseti_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32u8_r = __lasx_xvbitrevi_b(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16u16_r = __lasx_xvbitrevi_h(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8u32_r = __lasx_xvbitrevi_w(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u64_r = __lasx_xvbitrevi_d(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32i8_r = __lasx_xvaddi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i16_r = __lasx_xvaddi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvaddi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvaddi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvsubi_bu(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i16_r = __lasx_xvsubi_hu(v16i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvsubi_wu(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvsubi_du(v4i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvmaxi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16i16_r = __lasx_xvmaxi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i32_r = __lasx_xvmaxi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i64_r = __lasx_xvmaxi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v32u8_r = __lasx_xvmaxi_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16u16_r = __lasx_xvmaxi_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u32_r = __lasx_xvmaxi_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u64_r = __lasx_xvmaxi_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvmini_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16i16_r = __lasx_xvmini_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i32_r = __lasx_xvmini_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i64_r = __lasx_xvmini_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v32u8_r = __lasx_xvmini_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16u16_r = __lasx_xvmini_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u32_r = __lasx_xvmini_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u64_r = __lasx_xvmini_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvseqi_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16i16_r = __lasx_xvseqi_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i32_r = __lasx_xvseqi_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i64_r = __lasx_xvseqi_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v32i8_r = __lasx_xvslti_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16i16_r = __lasx_xvslti_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i32_r = __lasx_xvslti_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i64_r = __lasx_xvslti_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v32i8_r = __lasx_xvslti_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i16_r = __lasx_xvslti_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvslti_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvslti_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvslei_b(v32i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16i16_r = __lasx_xvslei_h(v16i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i32_r = __lasx_xvslei_w(v8i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i64_r = __lasx_xvslei_d(v4i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v32i8_r = __lasx_xvslei_bu(v32u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i16_r = __lasx_xvslei_hu(v16u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvslei_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvslei_du(v4u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvsat_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16i16_r = __lasx_xvsat_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i32_r = __lasx_xvsat_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvsat_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32u8_r = __lasx_xvsat_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16u16_r = __lasx_xvsat_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8u32_r = __lasx_xvsat_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u64_r = __lasx_xvsat_du(v4u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + v32u8_r = __lasx_xvandi_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v32u8_r = __lasx_xvori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v32u8_r = __lasx_xvnori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v32u8_r = __lasx_xvxori_b(v32u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v32i8_r = __lasx_xvshuf4i_b(v32i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16i16_r = __lasx_xvshuf4i_h(v16i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v8i32_r = __lasx_xvshuf4i_w(v8i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v32i8_r = __lasx_xvbsrl_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvbsll_v(v32i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v32i8_r = __lasx_xvld(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lasx_xvst(v32i8_a, &v32i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + v8i32_r = __lasx_xvpickve_w(v8i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v4i64_r = __lasx_xvpickve_d(v4i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + v4i64_r = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v4i64_r = __lasx_xvpermi_d(v4i64_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v32i8_r = __lasx_xvldrepl_b(&v32i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + v16i16_r = __lasx_xvldrepl_h(&v16i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} + v8i32_r = __lasx_xvldrepl_w(&v8i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + v4i64_r = __lasx_xvldrepl_d(&v4i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} + i32_r = __lasx_xvpickve2gr_w(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + u32_r = __lasx_xvpickve2gr_wu(v8i32_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + i64_r = __lasx_xvpickve2gr_d(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + u64_r = __lasx_xvpickve2gr_du(v4i64_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + v32i8_r = __lasx_xvrotri_b(v32i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v16i16_r = __lasx_xvrotri_h(v16i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i32_r = __lasx_xvrotri_w(v8i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i64_r = __lasx_xvrotri_d(v4i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} +} diff --git a/clang/test/CodeGen/builtins-loongarch-lasx.c b/clang/test/CodeGen/builtins-loongarch-lasx.c new file mode 100644 index 000000000000..0d6a54cb083d --- /dev/null +++ b/clang/test/CodeGen/builtins-loongarch-lasx.c @@ -0,0 +1,3772 @@ +// REQUIRES: loongarch-registered-target +// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ +// RUN: -target-feature +lasx \ +// RUN: -o - | FileCheck %s + +#include + +#define ui1_b 1 +#define ui2 1 +#define ui2_b ui2 +#define ui3 4 +#define ui3_b ui3 +#define ui4 7 +#define ui4_b ui4 +#define ui5 25 +#define ui5_b ui5 +#define ui6 44 +#define ui6_b ui6 +#define ui7 100 +#define ui7_b ui7 +#define ui8 127 //200 +#define ui8_b ui8 +#define si5_b -4 +#define si8 -100 +#define si9 0 +#define si10 0 +#define si11 0 +#define si12 0 +#define i10 500 +#define i13 4000 +#define mode 0 +#define idx1 1 +#define idx2 2 +#define idx3 4 +#define idx4 8 + +void test(void) { + v32i8 v32i8_a = (v32i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + v32i8 v32i8_b = (v32i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + v32i8 v32i8_c = (v32i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; + v32i8 v32i8_r; + + v16i16 v16i16_a = (v16i16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + v16i16 v16i16_b = (v16i16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + v16i16 v16i16_c = (v16i16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + v16i16 v16i16_r; + + v8i32 v8i32_a = (v8i32){0, 1, 2, 3, 4, 5, 6, 7}; + v8i32 v8i32_b = (v8i32){1, 2, 3, 4, 5, 6, 7, 8}; + v8i32 v8i32_c = (v8i32){2, 3, 4, 5, 6, 7, 8, 9}; + v8i32 v8i32_r; + + v4i64 v4i64_a = (v4i64){0, 1, 2, 3}; + v4i64 v4i64_b = (v4i64){1, 2, 3, 4}; + v4i64 v4i64_c = (v4i64){2, 3, 4, 5}; + v4i64 v4i64_r; + + v32u8 v32u8_a = (v32u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + v32u8 v32u8_b = (v32u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + v32u8 v32u8_c = (v32u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33}; + v32u8 v32u8_r; + + v16u16 v16u16_a = (v16u16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + v16u16 v16u16_b = (v16u16){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + v16u16 v16u16_c = (v16u16){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + v16u16 v16u16_r; + + v8u32 v8u32_a = (v8u32){0, 1, 2, 3, 4, 5, 6, 7}; + v8u32 v8u32_b = (v8u32){1, 2, 3, 4, 5, 6, 7, 8}; + v8u32 v8u32_c = (v8u32){2, 3, 4, 5, 6, 7, 8, 9}; + v8u32 v8u32_r; + + v4u64 v4u64_a = (v4u64){0, 1, 2, 3}; + v4u64 v4u64_b = (v4u64){1, 2, 3, 4}; + v4u64 v4u64_c = (v4u64){2, 3, 4, 5}; + v4u64 v4u64_r; + + v8f32 v8f32_a = (v8f32){0.5, 1, 2, 3, 4, 5, 6, 7}; + v8f32 v8f32_b = (v8f32){1.5, 2, 3, 4, 5, 6, 7, 8}; + v8f32 v8f32_c = (v8f32){2.5, 3, 4, 5, 6, 7, 8, 9}; + v8f32 v8f32_r; + v4f64 v4f64_a = (v4f64){0.5, 1, 2, 3}; + v4f64 v4f64_b = (v4f64){1.5, 2, 3, 4}; + v4f64 v4f64_c = (v4f64){2.5, 3, 4, 5}; + v4f64 v4f64_r; + + int i32_r; + int i32_a = 1; + int i32_b = 2; + unsigned int u32_r; + unsigned int u32_a = 1; + unsigned int u32_b = 2; + long long i64_r; + long long i64_a = 1; + long long i64_b = 2; + long long i64_c = 3; + long int i64_d = 0; + unsigned long long u64_r; + unsigned long long u64_a = 1; + unsigned long long u64_b = 2; + unsigned long long u64_c = 3; + + // __lasx_xvsll_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsll_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsll.b( + + // __lasx_xvsll_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsll_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsll.h( + + // __lasx_xvsll_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsll_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsll.w( + + // __lasx_xvsll_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsll_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsll.d( + + // __lasx_xvslli_b + // xd, xj, ui3 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvslli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslli.b( + + // __lasx_xvslli_h + // xd, xj, ui4 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvslli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslli.h( + + // __lasx_xvslli_w + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvslli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslli.w( + + // __lasx_xvslli_d + // xd, xj, ui6 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvslli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslli.d( + + // __lasx_xvsra_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsra_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsra.b( + + // __lasx_xvsra_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsra_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsra.h( + + // __lasx_xvsra_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsra_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsra.w( + + // __lasx_xvsra_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsra_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsra.d( + + // __lasx_xvsrai_b + // xd, xj, ui3 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvsrai_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrai.b( + + // __lasx_xvsrai_h + // xd, xj, ui4 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvsrai_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrai.h( + + // __lasx_xvsrai_w + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvsrai_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrai.w( + + // __lasx_xvsrai_d + // xd, xj, ui6 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvsrai_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrai.d( + + // __lasx_xvsrar_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsrar_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrar.b( + + // __lasx_xvsrar_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsrar_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrar.h( + + // __lasx_xvsrar_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsrar_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrar.w( + + // __lasx_xvsrar_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsrar_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrar.d( + + // __lasx_xvsrari_b + // xd, xj, ui3 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvsrari_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrari.b( + + // __lasx_xvsrari_h + // xd, xj, ui4 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvsrari_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrari.h( + + // __lasx_xvsrari_w + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvsrari_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrari.w( + + // __lasx_xvsrari_d + // xd, xj, ui6 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvsrari_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrari.d( + + // __lasx_xvsrl_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsrl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrl.b( + + // __lasx_xvsrl_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsrl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrl.h( + + // __lasx_xvsrl_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsrl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrl.w( + + // __lasx_xvsrl_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsrl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrl.d( + + // __lasx_xvsrli_b + // xd, xj, ui3 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvsrli_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrli.b( + + // __lasx_xvsrli_h + // xd, xj, ui4 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvsrli_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrli.h( + + // __lasx_xvsrli_w + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvsrli_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrli.w( + + // __lasx_xvsrli_d + // xd, xj, ui6 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvsrli_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrli.d( + + // __lasx_xvsrlr_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsrlr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b( + + // __lasx_xvsrlr_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsrlr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h( + + // __lasx_xvsrlr_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsrlr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w( + + // __lasx_xvsrlr_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsrlr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d( + + // __lasx_xvsrlri_b + // xd, xj, ui3 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvsrlri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b( + + // __lasx_xvsrlri_h + // xd, xj, ui4 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvsrlri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h( + + // __lasx_xvsrlri_w + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvsrlri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w( + + // __lasx_xvsrlri_d + // xd, xj, ui6 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvsrlri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d( + + // __lasx_xvbitclr_b + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvbitclr_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b( + + // __lasx_xvbitclr_h + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvbitclr_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h( + + // __lasx_xvbitclr_w + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvbitclr_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w( + + // __lasx_xvbitclr_d + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvbitclr_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d( + + // __lasx_xvbitclri_b + // xd, xj, ui3 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvbitclri_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b( + + // __lasx_xvbitclri_h + // xd, xj, ui4 + // UV16HI, UV16HI, UQI + v16u16_r = __lasx_xvbitclri_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h( + + // __lasx_xvbitclri_w + // xd, xj, ui5 + // UV8SI, UV8SI, UQI + v8u32_r = __lasx_xvbitclri_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w( + + // __lasx_xvbitclri_d + // xd, xj, ui6 + // UV4DI, UV4DI, UQI + v4u64_r = __lasx_xvbitclri_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d( + + // __lasx_xvbitset_b + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvbitset_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitset.b( + + // __lasx_xvbitset_h + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvbitset_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitset.h( + + // __lasx_xvbitset_w + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvbitset_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitset.w( + + // __lasx_xvbitset_d + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvbitset_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitset.d( + + // __lasx_xvbitseti_b + // xd, xj, ui3 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvbitseti_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b( + + // __lasx_xvbitseti_h + // xd, xj, ui4 + // UV16HI, UV16HI, UQI + v16u16_r = __lasx_xvbitseti_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h( + + // __lasx_xvbitseti_w + // xd, xj, ui5 + // UV8SI, UV8SI, UQI + v8u32_r = __lasx_xvbitseti_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w( + + // __lasx_xvbitseti_d + // xd, xj, ui6 + // UV4DI, UV4DI, UQI + v4u64_r = __lasx_xvbitseti_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d( + + // __lasx_xvbitrev_b + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvbitrev_b(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b( + + // __lasx_xvbitrev_h + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvbitrev_h(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h( + + // __lasx_xvbitrev_w + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvbitrev_w(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w( + + // __lasx_xvbitrev_d + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvbitrev_d(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d( + + // __lasx_xvbitrevi_b + // xd, xj, ui3 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvbitrevi_b(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b( + + // __lasx_xvbitrevi_h + // xd, xj, ui4 + // UV16HI, UV16HI, UQI + v16u16_r = __lasx_xvbitrevi_h(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h( + + // __lasx_xvbitrevi_w + // xd, xj, ui5 + // UV8SI, UV8SI, UQI + v8u32_r = __lasx_xvbitrevi_w(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w( + + // __lasx_xvbitrevi_d + // xd, xj, ui6 + // UV4DI, UV4DI, UQI + v4u64_r = __lasx_xvbitrevi_d(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d( + + // __lasx_xvadd_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadd.b( + + // __lasx_xvadd_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadd.h( + + // __lasx_xvadd_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadd.w( + + // __lasx_xvadd_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.d( + + // __lasx_xvaddi_bu + // xd, xj, ui5 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvaddi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu( + + // __lasx_xvaddi_hu + // xd, xj, ui5 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvaddi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu( + + // __lasx_xvaddi_wu + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvaddi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu( + + // __lasx_xvaddi_du + // xd, xj, ui5 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvaddi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddi.du( + + // __lasx_xvsub_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsub.b( + + // __lasx_xvsub_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsub.h( + + // __lasx_xvsub_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsub.w( + + // __lasx_xvsub_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.d( + + // __lasx_xvsubi_bu + // xd, xj, ui5 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvsubi_bu(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu( + + // __lasx_xvsubi_hu + // xd, xj, ui5 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvsubi_hu(v16i16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu( + + // __lasx_xvsubi_wu + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvsubi_wu(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu( + + // __lasx_xvsubi_du + // xd, xj, ui5 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvsubi_du(v4i64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubi.du( + + // __lasx_xvmax_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvmax_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.b( + + // __lasx_xvmax_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvmax_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.h( + + // __lasx_xvmax_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvmax_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.w( + + // __lasx_xvmax_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmax_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.d( + + // __lasx_xvmaxi_b + // xd, xj, si5 + // V32QI, V32QI, QI + v32i8_r = __lasx_xvmaxi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b( + + // __lasx_xvmaxi_h + // xd, xj, si5 + // V16HI, V16HI, QI + v16i16_r = __lasx_xvmaxi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h( + + // __lasx_xvmaxi_w + // xd, xj, si5 + // V8SI, V8SI, QI + v8i32_r = __lasx_xvmaxi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w( + + // __lasx_xvmaxi_d + // xd, xj, si5 + // V4DI, V4DI, QI + v4i64_r = __lasx_xvmaxi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d( + + // __lasx_xvmax_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvmax_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmax.bu( + + // __lasx_xvmax_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvmax_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmax.hu( + + // __lasx_xvmax_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvmax_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmax.wu( + + // __lasx_xvmax_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvmax_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmax.du( + + // __lasx_xvmaxi_bu + // xd, xj, ui5 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvmaxi_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu( + + // __lasx_xvmaxi_hu + // xd, xj, ui5 + // UV16HI, UV16HI, UQI + v16u16_r = __lasx_xvmaxi_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu( + + // __lasx_xvmaxi_wu + // xd, xj, ui5 + // UV8SI, UV8SI, UQI + v8u32_r = __lasx_xvmaxi_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu( + + // __lasx_xvmaxi_du + // xd, xj, ui5 + // UV4DI, UV4DI, UQI + v4u64_r = __lasx_xvmaxi_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du( + + // __lasx_xvmin_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvmin_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.b( + + // __lasx_xvmin_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvmin_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.h( + + // __lasx_xvmin_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvmin_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.w( + + // __lasx_xvmin_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmin_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.d( + + // __lasx_xvmini_b + // xd, xj, si5 + // V32QI, V32QI, QI + v32i8_r = __lasx_xvmini_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.b( + + // __lasx_xvmini_h + // xd, xj, si5 + // V16HI, V16HI, QI + v16i16_r = __lasx_xvmini_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.h( + + // __lasx_xvmini_w + // xd, xj, si5 + // V8SI, V8SI, QI + v8i32_r = __lasx_xvmini_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.w( + + // __lasx_xvmini_d + // xd, xj, si5 + // V4DI, V4DI, QI + v4i64_r = __lasx_xvmini_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.d( + + // __lasx_xvmin_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvmin_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmin.bu( + + // __lasx_xvmin_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvmin_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmin.hu( + + // __lasx_xvmin_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvmin_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmin.wu( + + // __lasx_xvmin_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvmin_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmin.du( + + // __lasx_xvmini_bu + // xd, xj, ui5 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvmini_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmini.bu( + + // __lasx_xvmini_hu + // xd, xj, ui5 + // UV16HI, UV16HI, UQI + v16u16_r = __lasx_xvmini_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmini.hu( + + // __lasx_xvmini_wu + // xd, xj, ui5 + // UV8SI, UV8SI, UQI + v8u32_r = __lasx_xvmini_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmini.wu( + + // __lasx_xvmini_du + // xd, xj, ui5 + // UV4DI, UV4DI, UQI + v4u64_r = __lasx_xvmini_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmini.du( + + // __lasx_xvseq_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvseq_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseq.b( + + // __lasx_xvseq_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvseq_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseq.h( + + // __lasx_xvseq_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvseq_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseq.w( + + // __lasx_xvseq_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvseq_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseq.d( + + // __lasx_xvseqi_b + // xd, xj, si5 + // V32QI, V32QI, QI + v32i8_r = __lasx_xvseqi_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvseqi.b( + + // __lasx_xvseqi_h + // xd, xj, si5 + // V16HI, V16HI, QI + v16i16_r = __lasx_xvseqi_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvseqi.h( + + // __lasx_xvseqi_w + // xd, xj, si5 + // V8SI, V8SI, QI + v8i32_r = __lasx_xvseqi_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvseqi.w( + + // __lasx_xvseqi_d + // xd, xj, si5 + // V4DI, V4DI, QI + v4i64_r = __lasx_xvseqi_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvseqi.d( + + // __lasx_xvslt_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvslt_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.b( + + // __lasx_xvslt_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvslt_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.h( + + // __lasx_xvslt_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvslt_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.w( + + // __lasx_xvslt_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvslt_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.d( + + // __lasx_xvslti_b + // xd, xj, si5 + // V32QI, V32QI, QI + v32i8_r = __lasx_xvslti_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.b( + + // __lasx_xvslti_h + // xd, xj, si5 + // V16HI, V16HI, QI + v16i16_r = __lasx_xvslti_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.h( + + // __lasx_xvslti_w + // xd, xj, si5 + // V8SI, V8SI, QI + v8i32_r = __lasx_xvslti_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.w( + + // __lasx_xvslti_d + // xd, xj, si5 + // V4DI, V4DI, QI + v4i64_r = __lasx_xvslti_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.d( + + // __lasx_xvslt_bu + // xd, xj, xk + // V32QI, UV32QI, UV32QI + v32i8_r = __lasx_xvslt_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslt.bu( + + // __lasx_xvslt_hu + // xd, xj, xk + // V16HI, UV16HI, UV16HI + v16i16_r = __lasx_xvslt_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslt.hu( + + // __lasx_xvslt_wu + // xd, xj, xk + // V8SI, UV8SI, UV8SI + v8i32_r = __lasx_xvslt_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslt.wu( + + // __lasx_xvslt_du + // xd, xj, xk + // V4DI, UV4DI, UV4DI + v4i64_r = __lasx_xvslt_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslt.du( + + // __lasx_xvslti_bu + // xd, xj, ui5 + // V32QI, UV32QI, UQI + v32i8_r = __lasx_xvslti_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslti.bu( + + // __lasx_xvslti_hu + // xd, xj, ui5 + // V16HI, UV16HI, UQI + v16i16_r = __lasx_xvslti_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslti.hu( + + // __lasx_xvslti_wu + // xd, xj, ui5 + // V8SI, UV8SI, UQI + v8i32_r = __lasx_xvslti_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslti.wu( + + // __lasx_xvslti_du + // xd, xj, ui5 + // V4DI, UV4DI, UQI + v4i64_r = __lasx_xvslti_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslti.du( + + // __lasx_xvsle_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsle_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.b( + + // __lasx_xvsle_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsle_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.h( + + // __lasx_xvsle_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsle_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.w( + + // __lasx_xvsle_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsle_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.d( + + // __lasx_xvslei_b + // xd, xj, si5 + // V32QI, V32QI, QI + v32i8_r = __lasx_xvslei_b(v32i8_a, si5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.b( + + // __lasx_xvslei_h + // xd, xj, si5 + // V16HI, V16HI, QI + v16i16_r = __lasx_xvslei_h(v16i16_a, si5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.h( + + // __lasx_xvslei_w + // xd, xj, si5 + // V8SI, V8SI, QI + v8i32_r = __lasx_xvslei_w(v8i32_a, si5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.w( + + // __lasx_xvslei_d + // xd, xj, si5 + // V4DI, V4DI, QI + v4i64_r = __lasx_xvslei_d(v4i64_a, si5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.d( + + // __lasx_xvsle_bu + // xd, xj, xk + // V32QI, UV32QI, UV32QI + v32i8_r = __lasx_xvsle_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsle.bu( + + // __lasx_xvsle_hu + // xd, xj, xk + // V16HI, UV16HI, UV16HI + v16i16_r = __lasx_xvsle_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsle.hu( + + // __lasx_xvsle_wu + // xd, xj, xk + // V8SI, UV8SI, UV8SI + v8i32_r = __lasx_xvsle_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsle.wu( + + // __lasx_xvsle_du + // xd, xj, xk + // V4DI, UV4DI, UV4DI + v4i64_r = __lasx_xvsle_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsle.du( + + // __lasx_xvslei_bu + // xd, xj, ui5 + // V32QI, UV32QI, UQI + v32i8_r = __lasx_xvslei_bu(v32u8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvslei.bu( + + // __lasx_xvslei_hu + // xd, xj, ui5 + // V16HI, UV16HI, UQI + v16i16_r = __lasx_xvslei_hu(v16u16_a, ui5_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvslei.hu( + + // __lasx_xvslei_wu + // xd, xj, ui5 + // V8SI, UV8SI, UQI + v8i32_r = __lasx_xvslei_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvslei.wu( + + // __lasx_xvslei_du + // xd, xj, ui5 + // V4DI, UV4DI, UQI + v4i64_r = __lasx_xvslei_du(v4u64_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvslei.du( + + // __lasx_xvsat_b + // xd, xj, ui3 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvsat_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.b( + + // __lasx_xvsat_h + // xd, xj, ui4 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvsat_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.h( + + // __lasx_xvsat_w + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvsat_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.w( + + // __lasx_xvsat_d + // xd, xj, ui6 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvsat_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.d( + + // __lasx_xvsat_bu + // xd, xj, ui3 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvsat_bu(v32u8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsat.bu( + + // __lasx_xvsat_hu + // xd, xj, ui4 + // UV16HI, UV16HI, UQI + v16u16_r = __lasx_xvsat_hu(v16u16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsat.hu( + + // __lasx_xvsat_wu + // xd, xj, ui5 + // UV8SI, UV8SI, UQI + v8u32_r = __lasx_xvsat_wu(v8u32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsat.wu( + + // __lasx_xvsat_du + // xd, xj, ui6 + // UV4DI, UV4DI, UQI + v4u64_r = __lasx_xvsat_du(v4u64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsat.du( + + // __lasx_xvadda_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvadda_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvadda.b( + + // __lasx_xvadda_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvadda_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvadda.h( + + // __lasx_xvadda_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvadda_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvadda.w( + + // __lasx_xvadda_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvadda_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadda.d( + + // __lasx_xvsadd_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsadd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.b( + + // __lasx_xvsadd_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsadd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.h( + + // __lasx_xvsadd_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsadd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.w( + + // __lasx_xvsadd_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsadd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.d( + + // __lasx_xvsadd_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvsadd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu( + + // __lasx_xvsadd_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvsadd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu( + + // __lasx_xvsadd_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvsadd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu( + + // __lasx_xvsadd_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvsadd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsadd.du( + + // __lasx_xvavg_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvavg_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.b( + + // __lasx_xvavg_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvavg_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.h( + + // __lasx_xvavg_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvavg_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.w( + + // __lasx_xvavg_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvavg_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.d( + + // __lasx_xvavg_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvavg_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavg.bu( + + // __lasx_xvavg_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvavg_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavg.hu( + + // __lasx_xvavg_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvavg_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavg.wu( + + // __lasx_xvavg_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvavg_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavg.du( + + // __lasx_xvavgr_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvavgr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.b( + + // __lasx_xvavgr_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvavgr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.h( + + // __lasx_xvavgr_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvavgr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.w( + + // __lasx_xvavgr_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvavgr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.d( + + // __lasx_xvavgr_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvavgr_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu( + + // __lasx_xvavgr_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvavgr_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu( + + // __lasx_xvavgr_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvavgr_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu( + + // __lasx_xvavgr_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvavgr_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvavgr.du( + + // __lasx_xvssub_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvssub_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.b( + + // __lasx_xvssub_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvssub_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.h( + + // __lasx_xvssub_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvssub_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.w( + + // __lasx_xvssub_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvssub_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.d( + + // __lasx_xvssub_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvssub_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssub.bu( + + // __lasx_xvssub_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvssub_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssub.hu( + + // __lasx_xvssub_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvssub_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssub.wu( + + // __lasx_xvssub_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvssub_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssub.du( + + // __lasx_xvabsd_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvabsd_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.b( + + // __lasx_xvabsd_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvabsd_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.h( + + // __lasx_xvabsd_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvabsd_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.w( + + // __lasx_xvabsd_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvabsd_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.d( + + // __lasx_xvabsd_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvabsd_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu( + + // __lasx_xvabsd_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvabsd_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu( + + // __lasx_xvabsd_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvabsd_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu( + + // __lasx_xvabsd_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvabsd_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvabsd.du( + + // __lasx_xvmul_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvmul_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmul.b( + + // __lasx_xvmul_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvmul_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmul.h( + + // __lasx_xvmul_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvmul_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmul.w( + + // __lasx_xvmul_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmul_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmul.d( + + // __lasx_xvmadd_b + // xd, xj, xk + // V32QI, V32QI, V32QI, V32QI + v32i8_r = __lasx_xvmadd_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmadd.b( + + // __lasx_xvmadd_h + // xd, xj, xk + // V16HI, V16HI, V16HI, V16HI + v16i16_r = __lasx_xvmadd_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmadd.h( + + // __lasx_xvmadd_w + // xd, xj, xk + // V8SI, V8SI, V8SI, V8SI + v8i32_r = __lasx_xvmadd_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmadd.w( + + // __lasx_xvmadd_d + // xd, xj, xk + // V4DI, V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmadd_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmadd.d( + + // __lasx_xvmsub_b + // xd, xj, xk + // V32QI, V32QI, V32QI, V32QI + v32i8_r = __lasx_xvmsub_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsub.b( + + // __lasx_xvmsub_h + // xd, xj, xk + // V16HI, V16HI, V16HI, V16HI + v16i16_r = __lasx_xvmsub_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmsub.h( + + // __lasx_xvmsub_w + // xd, xj, xk + // V8SI, V8SI, V8SI, V8SI + v8i32_r = __lasx_xvmsub_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmsub.w( + + // __lasx_xvmsub_d + // xd, xj, xk + // V4DI, V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmsub_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmsub.d( + + // __lasx_xvdiv_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvdiv_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.b( + + // __lasx_xvdiv_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvdiv_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.h( + + // __lasx_xvdiv_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvdiv_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.w( + + // __lasx_xvdiv_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvdiv_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.d( + + // __lasx_xvdiv_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvdiv_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu( + + // __lasx_xvdiv_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvdiv_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu( + + // __lasx_xvdiv_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvdiv_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu( + + // __lasx_xvdiv_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvdiv_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvdiv.du( + + // __lasx_xvhaddw_h_b + // xd, xj, xk + // V16HI, V32QI, V32QI + v16i16_r = __lasx_xvhaddw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b( + + // __lasx_xvhaddw_w_h + // xd, xj, xk + // V8SI, V16HI, V16HI + v8i32_r = __lasx_xvhaddw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h( + + // __lasx_xvhaddw_d_w + // xd, xj, xk + // V4DI, V8SI, V8SI + v4i64_r = __lasx_xvhaddw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w( + + // __lasx_xvhaddw_hu_bu + // xd, xj, xk + // UV16HI, UV32QI, UV32QI + v16u16_r = __lasx_xvhaddw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu( + + // __lasx_xvhaddw_wu_hu + // xd, xj, xk + // UV8SI, UV16HI, UV16HI + v8u32_r = __lasx_xvhaddw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu( + + // __lasx_xvhaddw_du_wu + // xd, xj, xk + // UV4DI, UV8SI, UV8SI + v4u64_r = __lasx_xvhaddw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu( + + // __lasx_xvhsubw_h_b + // xd, xj, xk + // V16HI, V32QI, V32QI + v16i16_r = __lasx_xvhsubw_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b( + + // __lasx_xvhsubw_w_h + // xd, xj, xk + // V8SI, V16HI, V16HI + v8i32_r = __lasx_xvhsubw_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h( + + // __lasx_xvhsubw_d_w + // xd, xj, xk + // V4DI, V8SI, V8SI + v4i64_r = __lasx_xvhsubw_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w( + + // __lasx_xvhsubw_hu_bu + // xd, xj, xk + // V16HI, UV32QI, UV32QI + v16i16_r = __lasx_xvhsubw_hu_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu( + + // __lasx_xvhsubw_wu_hu + // xd, xj, xk + // V8SI, UV16HI, UV16HI + v8i32_r = __lasx_xvhsubw_wu_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu( + + // __lasx_xvhsubw_du_wu + // xd, xj, xk + // V4DI, UV8SI, UV8SI + v4i64_r = __lasx_xvhsubw_du_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu( + + // __lasx_xvmod_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvmod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.b( + + // __lasx_xvmod_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvmod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.h( + + // __lasx_xvmod_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvmod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.w( + + // __lasx_xvmod_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.d( + + // __lasx_xvmod_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvmod_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmod.bu( + + // __lasx_xvmod_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvmod_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmod.hu( + + // __lasx_xvmod_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvmod_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmod.wu( + + // __lasx_xvmod_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvmod_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmod.du( + + // __lasx_xvrepl128vei_b + // xd, xj, ui4 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvrepl128vei_b(v32i8_a, ui4_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b( + + // __lasx_xvrepl128vei_h + // xd, xj, ui3 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvrepl128vei_h(v16i16_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h( + + // __lasx_xvrepl128vei_w + // xd, xj, ui2 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvrepl128vei_w(v8i32_a, ui2_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w( + + // __lasx_xvrepl128vei_d + // xd, xj, ui1 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvrepl128vei_d(v4i64_a, ui1_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d( + + // __lasx_xvpickev_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvpickev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickev.b( + + // __lasx_xvpickev_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvpickev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickev.h( + + // __lasx_xvpickev_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvpickev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickev.w( + + // __lasx_xvpickev_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvpickev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickev.d( + + // __lasx_xvpickod_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvpickod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpickod.b( + + // __lasx_xvpickod_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvpickod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpickod.h( + + // __lasx_xvpickod_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvpickod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickod.w( + + // __lasx_xvpickod_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvpickod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickod.d( + + // __lasx_xvilvh_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvilvh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvh.b( + + // __lasx_xvilvh_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvilvh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvh.h( + + // __lasx_xvilvh_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvilvh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvh.w( + + // __lasx_xvilvh_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvilvh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvh.d( + + // __lasx_xvilvl_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvilvl_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvilvl.b( + + // __lasx_xvilvl_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvilvl_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvilvl.h( + + // __lasx_xvilvl_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvilvl_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvilvl.w( + + // __lasx_xvilvl_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvilvl_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvilvl.d( + + // __lasx_xvpackev_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvpackev_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackev.b( + + // __lasx_xvpackev_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvpackev_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackev.h( + + // __lasx_xvpackev_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvpackev_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackev.w( + + // __lasx_xvpackev_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvpackev_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackev.d( + + // __lasx_xvpackod_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvpackod_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpackod.b( + + // __lasx_xvpackod_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvpackod_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpackod.h( + + // __lasx_xvpackod_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvpackod_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpackod.w( + + // __lasx_xvpackod_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvpackod_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpackod.d( + + // __lasx_xvshuf_b + // xd, xj, xk, xa + // V32QI, V32QI, V32QI, V32QI + v32i8_r = __lasx_xvshuf_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf.b( + + // __lasx_xvshuf_h + // xd, xj, xk + // V16HI, V16HI, V16HI, V16HI + v16i16_r = __lasx_xvshuf_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf.h( + + // __lasx_xvshuf_w + // xd, xj, xk + // V8SI, V8SI, V8SI, V8SI + v8i32_r = __lasx_xvshuf_w(v8i32_a, v8i32_b, v8i32_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf.w( + + // __lasx_xvshuf_d + // xd, xj, xk + // V4DI, V4DI, V4DI, V4DI + v4i64_r = __lasx_xvshuf_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf.d( + + // __lasx_xvand_v + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvand_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvand.v( + + // __lasx_xvandi_b + // xd, xj, ui8 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvandi_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandi.b( + + // __lasx_xvor_v + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvor.v( + + // __lasx_xvori_b + // xd, xj, ui8 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvori.b( + + // __lasx_xvnor_v + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvnor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnor.v( + + // __lasx_xvnori_b + // xd, xj, ui8 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvnori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvnori.b( + + // __lasx_xvxor_v + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvxor_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxor.v( + + // __lasx_xvxori_b + // xd, xj, ui8 + // UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvxori_b(v32u8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvxori.b( + + // __lasx_xvbitsel_v + // xd, xj, xk, xa + // UV32QI, UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvbitsel_v(v32u8_a, v32u8_b, v32u8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v( + + // __lasx_xvbitseli_b + // xd, xj, ui8 + // UV32QI, UV32QI, UV32QI, UQI + v32u8_r = __lasx_xvbitseli_b(v32u8_a, v32u8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b( + + // __lasx_xvshuf4i_b + // xd, xj, ui8 + // V32QI, V32QI, USI + v32i8_r = __lasx_xvshuf4i_b(v32i8_a, ui8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b( + + // __lasx_xvshuf4i_h + // xd, xj, ui8 + // V16HI, V16HI, USI + v16i16_r = __lasx_xvshuf4i_h(v16i16_a, ui8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h( + + // __lasx_xvshuf4i_w + // xd, xj, ui8 + // V8SI, V8SI, USI + v8i32_r = __lasx_xvshuf4i_w(v8i32_a, ui8_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w( + + // __lasx_xvreplgr2vr_b + // xd, rj + // V32QI, SI + v32i8_r = __lasx_xvreplgr2vr_b(i32_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b( + + // __lasx_xvreplgr2vr_h + // xd, rj + // V16HI, SI + v16i16_r = __lasx_xvreplgr2vr_h(i32_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h( + + // __lasx_xvreplgr2vr_w + // xd, rj + // V8SI, SI + v8i32_r = __lasx_xvreplgr2vr_w(i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w( + + // __lasx_xvreplgr2vr_d + // xd, rj + // V4DI, DI + v4i64_r = __lasx_xvreplgr2vr_d(i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d( + + // __lasx_xvpcnt_b + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvpcnt_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b( + + // __lasx_xvpcnt_h + // xd, xj + // V16HI, V16HI + v16i16_r = __lasx_xvpcnt_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h( + + // __lasx_xvpcnt_w + // xd, xj + // V8SI, V8SI + v8i32_r = __lasx_xvpcnt_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w( + + // __lasx_xvpcnt_d + // xd, xj + // V4DI, V4DI + v4i64_r = __lasx_xvpcnt_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d( + + // __lasx_xvclo_b + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvclo_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclo.b( + + // __lasx_xvclo_h + // xd, xj + // V16HI, V16HI + v16i16_r = __lasx_xvclo_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclo.h( + + // __lasx_xvclo_w + // xd, xj + // V8SI, V8SI + v8i32_r = __lasx_xvclo_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclo.w( + + // __lasx_xvclo_d + // xd, xj + // V4DI, V4DI + v4i64_r = __lasx_xvclo_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclo.d( + + // __lasx_xvclz_b + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvclz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvclz.b( + + // __lasx_xvclz_h + // xd, xj + // V16HI, V16HI + v16i16_r = __lasx_xvclz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvclz.h( + + // __lasx_xvclz_w + // xd, xj + // V8SI, V8SI + v8i32_r = __lasx_xvclz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvclz.w( + + // __lasx_xvclz_d + // xd, xj + // V4DI, V4DI + v4i64_r = __lasx_xvclz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvclz.d( + + // __lasx_xvfcmp_caf_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_caf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s( + + // __lasx_xvfcmp_caf_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_caf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d( + + // __lasx_xvfcmp_cor_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_cor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s( + + // __lasx_xvfcmp_cor_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_cor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d( + + // __lasx_xvfcmp_cun_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_cun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s( + + // __lasx_xvfcmp_cun_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_cun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d( + + // __lasx_xvfcmp_cune_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_cune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s( + + // __lasx_xvfcmp_cune_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_cune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d( + + // __lasx_xvfcmp_cueq_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_cueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s( + + // __lasx_xvfcmp_cueq_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_cueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d( + + // __lasx_xvfcmp_ceq_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_ceq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s( + + // __lasx_xvfcmp_ceq_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_ceq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d( + + // __lasx_xvfcmp_cne_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_cne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s( + + // __lasx_xvfcmp_cne_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_cne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d( + + // __lasx_xvfcmp_clt_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_clt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s( + + // __lasx_xvfcmp_clt_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_clt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d( + + // __lasx_xvfcmp_cult_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_cult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s( + + // __lasx_xvfcmp_cult_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_cult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d( + + // __lasx_xvfcmp_cle_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_cle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s( + + // __lasx_xvfcmp_cle_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_cle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d( + + // __lasx_xvfcmp_cule_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_cule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s( + + // __lasx_xvfcmp_cule_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_cule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d( + + // __lasx_xvfcmp_saf_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_saf_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s( + + // __lasx_xvfcmp_saf_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_saf_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d( + + // __lasx_xvfcmp_sor_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_sor_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s( + + // __lasx_xvfcmp_sor_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_sor_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d( + + // __lasx_xvfcmp_sun_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_sun_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s( + + // __lasx_xvfcmp_sun_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_sun_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d( + + // __lasx_xvfcmp_sune_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_sune_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s( + + // __lasx_xvfcmp_sune_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_sune_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d( + + // __lasx_xvfcmp_sueq_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_sueq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s( + + // __lasx_xvfcmp_sueq_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_sueq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d( + + // __lasx_xvfcmp_seq_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_seq_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s( + + // __lasx_xvfcmp_seq_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_seq_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d( + + // __lasx_xvfcmp_sne_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_sne_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s( + + // __lasx_xvfcmp_sne_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_sne_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d( + + // __lasx_xvfcmp_slt_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_slt_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s( + + // __lasx_xvfcmp_slt_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_slt_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d( + + // __lasx_xvfcmp_sult_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_sult_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s( + + // __lasx_xvfcmp_sult_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_sult_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d( + + // __lasx_xvfcmp_sle_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_sle_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s( + + // __lasx_xvfcmp_sle_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_sle_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d( + + // __lasx_xvfcmp_sule_s + // xd, xj, xk + // V8SI, V8SF, V8SF + v8i32_r = __lasx_xvfcmp_sule_s(v8f32_a, v8f32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s( + + // __lasx_xvfcmp_sule_d + // xd, xj, xk + // V4DI, V4DF, V4DF + v4i64_r = __lasx_xvfcmp_sule_d(v4f64_a, v4f64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d( + + // __lasx_xvfadd_s + // xd, xj, xk + // V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfadd_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfadd.s( + + // __lasx_xvfadd_d + // xd, xj, xk + // V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfadd_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfadd.d( + + // __lasx_xvfsub_s + // xd, xj, xk + // V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfsub_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsub.s( + + // __lasx_xvfsub_d + // xd, xj, xk + // V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfsub_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsub.d( + + // __lasx_xvfmul_s + // xd, xj, xk + // V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfmul_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmul.s( + + // __lasx_xvfmul_d + // xd, xj, xk + // V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfmul_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmul.d( + + // __lasx_xvfdiv_s + // xd, xj, xk + // V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfdiv_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfdiv.s( + + // __lasx_xvfdiv_d + // xd, xj, xk + // V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfdiv_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfdiv.d( + + // __lasx_xvfcvt_h_s + // xd, xj, xk + // V16HI, V8SF, V8SF + v16i16_r = __lasx_xvfcvt_h_s(v8f32_a, v8f32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s( + + // __lasx_xvfcvt_s_d + // xd, xj, xk + // V8SF, V4DF, V4DF + v8f32_r = __lasx_xvfcvt_s_d(v4f64_a, v4f64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d( + + // __lasx_xvfmin_s + // xd, xj, xk + // V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfmin_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmin.s( + + // __lasx_xvfmin_d + // xd, xj, xk + // V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfmin_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmin.d( + + // __lasx_xvfmina_s + // xd, xj, xk + // V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfmina_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmina.s( + + // __lasx_xvfmina_d + // xd, xj, xk + // V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfmina_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmina.d( + + // __lasx_xvfmax_s + // xd, xj, xk + // V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfmax_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmax.s( + + // __lasx_xvfmax_d + // xd, xj, xk + // V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfmax_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmax.d( + + // __lasx_xvfmaxa_s + // xd, xj, xk + // V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfmaxa_s(v8f32_a, v8f32_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s( + + // __lasx_xvfmaxa_d + // xd, xj, xk + // V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfmaxa_d(v4f64_a, v4f64_b); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d( + + // __lasx_xvfclass_s + // xd, xj + // V8SI, V8SF + v8i32_r = __lasx_xvfclass_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvfclass.s( + + // __lasx_xvfclass_d + // xd, xj + // V4DI, V4DF + v4i64_r = __lasx_xvfclass_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvfclass.d( + + // __lasx_xvfsqrt_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvfsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s( + + // __lasx_xvfsqrt_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvfsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d( + + // __lasx_xvfrecip_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvfrecip_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrecip.s( + + // __lasx_xvfrecip_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvfrecip_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrecip.d( + + // __lasx_xvfrint_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvfrint_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrint.s( + + // __lasx_xvfrint_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvfrint_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrint.d( + + // __lasx_xvfrsqrt_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvfrsqrt_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s( + + // __lasx_xvfrsqrt_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvfrsqrt_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d( + + // __lasx_xvflogb_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvflogb_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvflogb.s( + + // __lasx_xvflogb_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvflogb_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvflogb.d( + + // __lasx_xvfcvth_s_h + // xd, xj + // V8SF, V16HI + v8f32_r = __lasx_xvfcvth_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h( + + // __lasx_xvfcvth_d_s + // xd, xj + // V4DF, V8SF + v4f64_r = __lasx_xvfcvth_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s( + + // __lasx_xvfcvtl_s_h + // xd, xj + // V8SF, V16HI + v8f32_r = __lasx_xvfcvtl_s_h(v16i16_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h( + + // __lasx_xvfcvtl_d_s + // xd, xj + // V4DF, V8SF + v4f64_r = __lasx_xvfcvtl_d_s(v8f32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s( + + // __lasx_xvftint_w_s + // xd, xj + // V8SI, V8SF + v8i32_r = __lasx_xvftint_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s( + + // __lasx_xvftint_l_d + // xd, xj + // V4DI, V4DF + v4i64_r = __lasx_xvftint_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d( + + // __lasx_xvftint_wu_s + // xd, xj + // UV8SI, V8SF + v8u32_r = __lasx_xvftint_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s( + + // __lasx_xvftint_lu_d + // xd, xj + // UV4DI, V4DF + v4u64_r = __lasx_xvftint_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d( + + // __lasx_xvftintrz_w_s + // xd, xj + // V8SI, V8SF + v8i32_r = __lasx_xvftintrz_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s( + + // __lasx_xvftintrz_l_d + // xd, xj + // V4DI, V4DF + v4i64_r = __lasx_xvftintrz_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d( + + // __lasx_xvftintrz_wu_s + // xd, xj + // UV8SI, V8SF + v8u32_r = __lasx_xvftintrz_wu_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s( + + // __lasx_xvftintrz_lu_d + // xd, xj + // UV4DI, V4DF + v4u64_r = __lasx_xvftintrz_lu_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d( + + // __lasx_xvffint_s_w + // xd, xj + // V8SF, V8SI + v8f32_r = __lasx_xvffint_s_w(v8i32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.w( + + // __lasx_xvffint_d_l + // xd, xj + // V4DF, V4DI + v4f64_r = __lasx_xvffint_d_l(v4i64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.l( + + // __lasx_xvffint_s_wu + // xd, xj + // V8SF, UV8SI + v8f32_r = __lasx_xvffint_s_wu(v8u32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu( + + // __lasx_xvffint_d_lu + // xd, xj + // V4DF, UV4DI + v4f64_r = __lasx_xvffint_d_lu(v4u64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu( + + // __lasx_xvreplve_b + // xd, xj, rk + // V32QI, V32QI, SI + v32i8_r = __lasx_xvreplve_b(v32i8_a, i32_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve.b( + + // __lasx_xvreplve_h + // xd, xj, rk + // V16HI, V16HI, SI + v16i16_r = __lasx_xvreplve_h(v16i16_a, i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve.h( + + // __lasx_xvreplve_w + // xd, xj, rk + // V8SI, V8SI, SI + v8i32_r = __lasx_xvreplve_w(v8i32_a, i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve.w( + + // __lasx_xvreplve_d + // xd, xj, rk + // V4DI, V4DI, SI + v4i64_r = __lasx_xvreplve_d(v4i64_a, i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve.d( + + // __lasx_xvpermi_w + // xd, xj, ui8 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvpermi_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpermi.w( + + // __lasx_xvandn_v + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvandn_v(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvandn.v( + + // __lasx_xvneg_b + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvneg_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvneg.b( + + // __lasx_xvneg_h + // xd, xj + // V16HI, V16HI + v16i16_r = __lasx_xvneg_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvneg.h( + + // __lasx_xvneg_w + // xd, xj + // V8SI, V8SI + v8i32_r = __lasx_xvneg_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvneg.w( + + // __lasx_xvneg_d + // xd, xj + // V4DI, V4DI + v4i64_r = __lasx_xvneg_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvneg.d( + + // __lasx_xvmuh_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvmuh_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.b( + + // __lasx_xvmuh_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvmuh_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.h( + + // __lasx_xvmuh_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvmuh_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.w( + + // __lasx_xvmuh_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmuh_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.d( + + // __lasx_xvmuh_bu + // xd, xj, xk + // UV32QI, UV32QI, UV32QI + v32u8_r = __lasx_xvmuh_bu(v32u8_a, v32u8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu( + + // __lasx_xvmuh_hu + // xd, xj, xk + // UV16HI, UV16HI, UV16HI + v16u16_r = __lasx_xvmuh_hu(v16u16_a, v16u16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu( + + // __lasx_xvmuh_wu + // xd, xj, xk + // UV8SI, UV8SI, UV8SI + v8u32_r = __lasx_xvmuh_wu(v8u32_a, v8u32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu( + + // __lasx_xvmuh_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvmuh_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmuh.du( + + // __lasx_xvsllwil_h_b + // xd, xj, ui3 + // V16HI, V32QI, UQI + v16i16_r = __lasx_xvsllwil_h_b(v32i8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b( + + // __lasx_xvsllwil_w_h + // xd, xj, ui4 + // V8SI, V16HI, UQI + v8i32_r = __lasx_xvsllwil_w_h(v16i16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h( + + // __lasx_xvsllwil_d_w + // xd, xj, ui5 + // V4DI, V8SI, UQI + v4i64_r = __lasx_xvsllwil_d_w(v8i32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w( + + // __lasx_xvsllwil_hu_bu + // xd, xj, ui3 + // UV16HI, UV32QI, UQI + v16u16_r = __lasx_xvsllwil_hu_bu(v32u8_a, ui3_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu( + + // __lasx_xvsllwil_wu_hu + // xd, xj, ui4 + // UV8SI, UV16HI, UQI + v8u32_r = __lasx_xvsllwil_wu_hu(v16u16_a, ui4_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu( + + // __lasx_xvsllwil_du_wu + // xd, xj, ui5 + // UV4DI, UV8SI, UQI + v4u64_r = __lasx_xvsllwil_du_wu(v8u32_a, ui5_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu( + + // __lasx_xvsran_b_h + // xd, xj, xk + // V32QI, V16HI, V16HI + v32i8_r = __lasx_xvsran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h( + + // __lasx_xvsran_h_w + // xd, xj, xk + // V16HI, V8SI, V8SI + v16i16_r = __lasx_xvsran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w( + + // __lasx_xvsran_w_d + // xd, xj, xk + // V8SI, V4DI, V4DI + v8i32_r = __lasx_xvsran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d( + + // __lasx_xvssran_b_h + // xd, xj, xk + // V32QI, V16HI, V16HI + v32i8_r = __lasx_xvssran_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h( + + // __lasx_xvssran_h_w + // xd, xj, xk + // V16HI, V8SI, V8SI + v16i16_r = __lasx_xvssran_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w( + + // __lasx_xvssran_w_d + // xd, xj, xk + // V8SI, V4DI, V4DI + v8i32_r = __lasx_xvssran_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d( + + // __lasx_xvssran_bu_h + // xd, xj, xk + // UV32QI, UV16HI, UV16HI + v32u8_r = __lasx_xvssran_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h( + + // __lasx_xvssran_hu_w + // xd, xj, xk + // UV16HI, UV8SI, UV8SI + v16u16_r = __lasx_xvssran_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w( + + // __lasx_xvssran_wu_d + // xd, xj, xk + // UV8SI, UV4DI, UV4DI + v8u32_r = __lasx_xvssran_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d( + + // __lasx_xvsrarn_b_h + // xd, xj, xk + // V32QI, V16HI, V16HI + v32i8_r = __lasx_xvsrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h( + + // __lasx_xvsrarn_h_w + // xd, xj, xk + // V16HI, V8SI, V8SI + v16i16_r = __lasx_xvsrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w( + + // __lasx_xvsrarn_w_d + // xd, xj, xk + // V8SI, V4DI, V4DI + v8i32_r = __lasx_xvsrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d( + + // __lasx_xvssrarn_b_h + // xd, xj, xk + // V32QI, V16HI, V16HI + v32i8_r = __lasx_xvssrarn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h( + + // __lasx_xvssrarn_h_w + // xd, xj, xk + // V16HI, V8SI, V8SI + v16i16_r = __lasx_xvssrarn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w( + + // __lasx_xvssrarn_w_d + // xd, xj, xk + // V8SI, V4DI, V4DI + v8i32_r = __lasx_xvssrarn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d( + + // __lasx_xvssrarn_bu_h + // xd, xj, xk + // UV32QI, UV16HI, UV16HI + v32u8_r = __lasx_xvssrarn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h( + + // __lasx_xvssrarn_hu_w + // xd, xj, xk + // UV16HI, UV8SI, UV8SI + v16u16_r = __lasx_xvssrarn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w( + + // __lasx_xvssrarn_wu_d + // xd, xj, xk + // UV8SI, UV4DI, UV4DI + v8u32_r = __lasx_xvssrarn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d( + + // __lasx_xvsrln_b_h + // xd, xj, xk + // V32QI, V16HI, V16HI + v32i8_r = __lasx_xvsrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h( + + // __lasx_xvsrln_h_w + // xd, xj, xk + // V16HI, V8SI, V8SI + v16i16_r = __lasx_xvsrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w( + + // __lasx_xvsrln_w_d + // xd, xj, xk + // V8SI, V4DI, V4DI + v8i32_r = __lasx_xvsrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d( + + // __lasx_xvssrln_bu_h + // xd, xj, xk + // UV32QI, UV16HI, UV16HI + v32u8_r = __lasx_xvssrln_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h( + + // __lasx_xvssrln_hu_w + // xd, xj, xk + // UV16HI, UV8SI, UV8SI + v16u16_r = __lasx_xvssrln_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w( + + // __lasx_xvssrln_wu_d + // xd, xj, xk + // UV8SI, UV4DI, UV4DI + v8u32_r = __lasx_xvssrln_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d( + + // __lasx_xvsrlrn_b_h + // xd, xj, xk + // V32QI, V16HI, V16HI + v32i8_r = __lasx_xvsrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h( + + // __lasx_xvsrlrn_h_w + // xd, xj, xk + // V16HI, V8SI, V8SI + v16i16_r = __lasx_xvsrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w( + + // __lasx_xvsrlrn_w_d + // xd, xj, xk + // V8SI, V4DI, V4DI + v8i32_r = __lasx_xvsrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d( + + // __lasx_xvssrlrn_bu_h + // xd, xj, xk + // UV32QI, UV16HI, UV16HI + v32u8_r = __lasx_xvssrlrn_bu_h(v16u16_a, v16u16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h( + + // __lasx_xvssrlrn_hu_w + // xd, xj, xk + // UV16HI, UV8SI, UV8SI + v16u16_r = __lasx_xvssrlrn_hu_w(v8u32_a, v8u32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w( + + // __lasx_xvssrlrn_wu_d + // xd, xj, xk + // UV8SI, UV4DI, UV4DI + v8u32_r = __lasx_xvssrlrn_wu_d(v4u64_a, v4u64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d( + + // __lasx_xvfrstpi_b + // xd, xj, ui5 + // V32QI, V32QI, V32QI, UQI + v32i8_r = __lasx_xvfrstpi_b(v32i8_a, v32i8_b, ui5); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b( + + // __lasx_xvfrstpi_h + // xd, xj, ui5 + // V16HI, V16HI, V16HI, UQI + v16i16_r = __lasx_xvfrstpi_h(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h( + + // __lasx_xvfrstp_b + // xd, xj, xk + // V32QI, V32QI, V32QI, V32QI + v32i8_r = __lasx_xvfrstp_b(v32i8_a, v32i8_b, v32i8_c); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b( + + // __lasx_xvfrstp_h + // xd, xj, xk + // V16HI, V16HI, V16HI, V16HI + v16i16_r = __lasx_xvfrstp_h(v16i16_a, v16i16_b, v16i16_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h( + + // __lasx_xvshuf4i_d + // xd, xj, ui8 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvshuf4i_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d( + + // __lasx_xvbsrl_v + // xd, xj, ui5 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvbsrl_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v( + + // __lasx_xvbsll_v + // xd, xj, ui5 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvbsll_v(v32i8_a, ui5_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvbsll.v( + + // __lasx_xvextrins_b + // xd, xj, ui8 + // V32QI, V32QI, V32QI, UQI + v32i8_r = __lasx_xvextrins_b(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvextrins.b( + + // __lasx_xvextrins_h + // xd, xj, ui8 + // V16HI, V16HI, V16HI, UQI + v16i16_r = __lasx_xvextrins_h(v16i16_a, v16i16_b, ui8); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvextrins.h( + + // __lasx_xvextrins_w + // xd, xj, ui8 + // V8SI, V8SI, V8SI, UQI + v8i32_r = __lasx_xvextrins_w(v8i32_a, v8i32_b, ui8); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvextrins.w( + + // __lasx_xvextrins_d + // xd, xj, ui8 + // V4DI, V4DI, V4DI, UQI + v4i64_r = __lasx_xvextrins_d(v4i64_a, v4i64_b, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextrins.d( + + // __lasx_xvmskltz_b + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvmskltz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b( + + // __lasx_xvmskltz_h + // xd, xj + // V16HI, V16HI + v16i16_r = __lasx_xvmskltz_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h( + + // __lasx_xvmskltz_w + // xd, xj + // V8SI, V8SI + v8i32_r = __lasx_xvmskltz_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w( + + // __lasx_xvmskltz_d + // xd, xj + // V4DI, V4DI + v4i64_r = __lasx_xvmskltz_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d( + + // __lasx_xvsigncov_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvsigncov_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b( + + // __lasx_xvsigncov_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvsigncov_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h( + + // __lasx_xvsigncov_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvsigncov_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w( + + // __lasx_xvsigncov_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsigncov_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d( + + // __lasx_xvfmadd_s + // xd, xj, xk, xa + // V8SF, V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmadd.s( + + // __lasx_xvfmadd_d + // xd, xj, xk, xa + // V4DF, V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmadd.d( + + // __lasx_xvfmsub_s + // xd, xj, xk, xa + // V8SF, V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfmsub.s( + + // __lasx_xvfmsub_d + // xd, xj, xk, xa + // V4DF, V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfmsub.d( + + // __lasx_xvfnmadd_s + // xd, xj, xk, xa + // V8SF, V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfnmadd_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s( + + // __lasx_xvfnmadd_d + // xd, xj, xk, xa + // V4DF, V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfnmadd_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d( + + // __lasx_xvfnmsub_s + // xd, xj, xk, xa + // V8SF, V8SF, V8SF, V8SF + v8f32_r = __lasx_xvfnmsub_s(v8f32_a, v8f32_b, v8f32_c); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s( + + // __lasx_xvfnmsub_d + // xd, xj, xk, xa + // V4DF, V4DF, V4DF, V4DF + v4f64_r = __lasx_xvfnmsub_d(v4f64_a, v4f64_b, v4f64_c); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d( + + // __lasx_xvftintrne_w_s + // xd, xj + // V8SI, V8SF + v8i32_r = __lasx_xvftintrne_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s( + + // __lasx_xvftintrne_l_d + // xd, xj + // V4DI, V4DF + v4i64_r = __lasx_xvftintrne_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d( + + // __lasx_xvftintrp_w_s + // xd, xj + // V8SI, V8SF + v8i32_r = __lasx_xvftintrp_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s( + + // __lasx_xvftintrp_l_d + // xd, xj + // V4DI, V4DF + v4i64_r = __lasx_xvftintrp_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d( + + // __lasx_xvftintrm_w_s + // xd, xj + // V8SI, V8SF + v8i32_r = __lasx_xvftintrm_w_s(v8f32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s( + + // __lasx_xvftintrm_l_d + // xd, xj + // V4DI, V4DF + v4i64_r = __lasx_xvftintrm_l_d(v4f64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d( + + // __lasx_xvftint_w_d + // xd, xj, xk + // V8SI, V4DF, V4DF + v8i32_r = __lasx_xvftint_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d( + + // __lasx_xvffint_s_l + // xd, xj, xk + // V8SF, V4DI, V4DI + v8f32_r = __lasx_xvffint_s_l(v4i64_a, v4i64_b); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvffint.s.l( + + // __lasx_xvftintrz_w_d + // xd, xj, xk + // V8SI, V4DF, V4DF + v8i32_r = __lasx_xvftintrz_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d( + + // __lasx_xvftintrp_w_d + // xd, xj, xk + // V8SI, V4DF, V4DF + v8i32_r = __lasx_xvftintrp_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d( + + // __lasx_xvftintrm_w_d + // xd, xj, xk + // V8SI, V4DF, V4DF + v8i32_r = __lasx_xvftintrm_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d( + + // __lasx_xvftintrne_w_d + // xd, xj, xk + // V8SI, V4DF, V4DF + v8i32_r = __lasx_xvftintrne_w_d(v4f64_a, v4f64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d( + + // __lasx_xvftinth_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftinth_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s( + + // __lasx_xvftintl_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s( + + // __lasx_xvffinth_d_w + // xd, xj + // V4DF, V8SI + v4f64_r = __lasx_xvffinth_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w( + + // __lasx_xvffintl_d_w + // xd, xj + // V4DF, V8SI + v4f64_r = __lasx_xvffintl_d_w(v8i32_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w( + + // __lasx_xvftintrzh_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintrzh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s( + + // __lasx_xvftintrzl_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintrzl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s( + + // __lasx_xvftintrph_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintrph_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s( + + // __lasx_xvftintrpl_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintrpl_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s( + + // __lasx_xvftintrmh_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintrmh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s( + + // __lasx_xvftintrml_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintrml_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s( + + // __lasx_xvftintrneh_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintrneh_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s( + + // __lasx_xvftintrnel_l_s + // xd, xj + // V4DI, V8SF + v4i64_r = __lasx_xvftintrnel_l_s(v8f32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s( + + // __lasx_xvfrintrne_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvfrintrne_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s( + + // __lasx_xvfrintrne_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvfrintrne_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d( + + // __lasx_xvfrintrz_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvfrintrz_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s( + + // __lasx_xvfrintrz_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvfrintrz_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d( + + // __lasx_xvfrintrp_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvfrintrp_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s( + + // __lasx_xvfrintrp_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvfrintrp_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d( + + // __lasx_xvfrintrm_s + // xd, xj + // V8SF, V8SF + v8f32_r = __lasx_xvfrintrm_s(v8f32_a); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s( + + // __lasx_xvfrintrm_d + // xd, xj + // V4DF, V4DF + v4f64_r = __lasx_xvfrintrm_d(v4f64_a); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d( + + // __lasx_xvld + // xd, rj, si12 + // V32QI, CVPOINTER, SI + v32i8_r = __lasx_xvld(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvld( + + // __lasx_xvst + // xd, rj, si12 + // VOID, V32QI, CVPOINTER, SI + __lasx_xvst(v32i8_a, &v32i8_b, si12); // CHECK: call void @llvm.loongarch.lasx.xvst( + + // __lasx_xvstelm_b + // xd, rj, si8, idx + // VOID, V32QI, CVPOINTER, SI, UQI + __lasx_xvstelm_b(v32i8_a, &v32i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lasx.xvstelm.b( + + // __lasx_xvstelm_h + // xd, rj, si8, idx + // VOID, V16HI, CVPOINTER, SI, UQI + __lasx_xvstelm_h(v16i16_a, &v16i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lasx.xvstelm.h( + + // __lasx_xvstelm_w + // xd, rj, si8, idx + // VOID, V8SI, CVPOINTER, SI, UQI + __lasx_xvstelm_w(v8i32_a, &v8i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lasx.xvstelm.w( + + // __lasx_xvstelm_d + // xd, rj, si8, idx + // VOID, V4DI, CVPOINTER, SI, UQI + __lasx_xvstelm_d(v4i64_a, &v4i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lasx.xvstelm.d( + + // __lasx_xvinsve0_w + // xd, xj, ui3 + // V8SI, V8SI, V8SI, UQI + v8i32_r = __lasx_xvinsve0_w(v8i32_a, v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w( + + // __lasx_xvinsve0_d + // xd, xj, ui2 + // V4DI, V4DI, V4DI, UQI + v4i64_r = __lasx_xvinsve0_d(v4i64_a, v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d( + + // __lasx_xvpickve_w + // xd, xj, ui3 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvpickve_w(v8i32_b, 2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvpickve.w( + + // __lasx_xvpickve_d + // xd, xj, ui2 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvpickve_d(v4i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpickve.d( + + // __lasx_xvssrlrn_b_h + // xd, xj, xk + // V32QI, V16HI, V16HI + v32i8_r = __lasx_xvssrlrn_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h( + + // __lasx_xvssrlrn_h_w + // xd, xj, xk + // V16HI, V8SI, V8SI + v16i16_r = __lasx_xvssrlrn_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w( + + // __lasx_xvssrlrn_w_d + // xd, xj, xk + // V8SI, V4DI, V4DI + v8i32_r = __lasx_xvssrlrn_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d( + + // __lasx_xvssrln_b_h + // xd, xj, xk + // V32QI, V16HI, V16HI + v32i8_r = __lasx_xvssrln_b_h(v16i16_a, v16i16_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h( + + // __lasx_xvssrln_h_w + // xd, xj, xk + // V16HI, V8SI, V8SI + v16i16_r = __lasx_xvssrln_h_w(v8i32_a, v8i32_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w( + + // __lasx_xvssrln_w_d + // xd, xj, xk + // V8SI, V4DI, V4DI + v8i32_r = __lasx_xvssrln_w_d(v4i64_a, v4i64_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d( + + // __lasx_xvorn_v + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvorn_v(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvorn.v( + + // __lasx_xvldi + // xd, i13 + // V4DI, HI + v4i64_r = __lasx_xvldi(i13); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldi( + + // __lasx_xvldx + // xd, rj, rk + // V32QI, CVPOINTER, DI + v32i8_r = __lasx_xvldx(&v32i8_a, i64_d); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldx( + + // __lasx_xvstx + // xd, rj, rk + // VOID, V32QI, CVPOINTER, DI + __lasx_xvstx(v32i8_a, &v32i8_b, i64_d); // CHECK: call void @llvm.loongarch.lasx.xvstx( + + // __lasx_xvinsgr2vr_w + // xd, rj, ui3 + // V8SI, V8SI, SI, UQI + v8i32_r = __lasx_xvinsgr2vr_w(v8i32_a, i32_b, ui3); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w( + + // __lasx_xvinsgr2vr_d + // xd, rj, ui2 + // V4DI, V4DI, DI, UQI + v4i64_r = __lasx_xvinsgr2vr_d(v4i64_a, i64_b, ui2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d( + + // __lasx_xvreplve0_b + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvreplve0_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b( + + // __lasx_xvreplve0_h + // xd, xj + // V16HI, V16HI + v16i16_r = __lasx_xvreplve0_h(v16i16_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h( + + // __lasx_xvreplve0_w + // xd, xj + // V8SI, V8SI + v8i32_r = __lasx_xvreplve0_w(v8i32_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w( + + // __lasx_xvreplve0_d + // xd, xj + // V4DI, V4DI + v4i64_r = __lasx_xvreplve0_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d( + + // __lasx_xvreplve0_q + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvreplve0_q(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q( + + // __lasx_vext2xv_h_b + // xd, xj + // V16HI, V32QI + v16i16_r = __lasx_vext2xv_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b( + + // __lasx_vext2xv_w_h + // xd, xj + // V8SI, V16HI + v8i32_r = __lasx_vext2xv_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h( + + // __lasx_vext2xv_d_w + // xd, xj + // V4DI, V8SI + v4i64_r = __lasx_vext2xv_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w( + + // __lasx_vext2xv_w_b + // xd, xj + // V8SI, V32QI + v8i32_r = __lasx_vext2xv_w_b(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b( + + //gcc build fail + // __lasx_vext2xv_d_h + // xd, xj + // V4DI, V16HI + v4i64_r = __lasx_vext2xv_d_h(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h( + + // __lasx_vext2xv_d_b + // xd, xj + // V4DI, V32QI + v4i64_r = __lasx_vext2xv_d_b(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b( + + // __lasx_vext2xv_hu_bu + // xd, xj + // V16HI, V32QI + v16i16_r = __lasx_vext2xv_hu_bu(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu( + + // __lasx_vext2xv_wu_hu + // xd, xj + // V8SI, V16HI + v8i32_r = __lasx_vext2xv_wu_hu(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu( + + // __lasx_vext2xv_du_wu + // xd, xj + // V4DI, V8SI + v4i64_r = __lasx_vext2xv_du_wu(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu( + + // __lasx_vext2xv_wu_bu + // xd, xj + // V8SI, V32QI + v8i32_r = __lasx_vext2xv_wu_bu(v32i8_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu( + + //gcc build fail + // __lasx_vext2xv_du_hu + // xd, xj + // V4DI, V16HI + v4i64_r = __lasx_vext2xv_du_hu(v16i16_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu( + + // __lasx_vext2xv_du_bu + // xd, xj + // V4DI, V32QI + v4i64_r = __lasx_vext2xv_du_bu(v32i8_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu( + + // __lasx_xvpermi_q + // xd, xj, ui8 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvpermi_q(v32i8_a, v32i8_b, ui8); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvpermi.q( + + // __lasx_xvpermi_d + // xd, xj, ui8 + // V4DI, V4DI, USI + v4i64_r = __lasx_xvpermi_d(v4i64_a, ui8); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvpermi.d( + + // __lasx_xvperm_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvperm_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvperm.w( + + // __lasx_xvldrepl_b + // xd, rj, si12 + // V32QI, CVPOINTER, SI + v32i8_r = __lasx_xvldrepl_b(&v32i8_a, si12); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b( + + // __lasx_xvldrepl_h + // xd, rj, si11 + // V16HI, CVPOINTER, SI + v16i16_r = __lasx_xvldrepl_h(&v16i16_a, si11); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h( + + // __lasx_xvldrepl_w + // xd, rj, si10 + // V8SI, CVPOINTER, SI + v8i32_r = __lasx_xvldrepl_w(&v8i32_a, si10); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w( + + // __lasx_xvldrepl_d + // xd, rj, si9 + // V4DI, CVPOINTER, SI + v4i64_r = __lasx_xvldrepl_d(&v4i64_a, si9); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d( + + // __lasx_xvpickve2gr_w + // rd, xj, ui3 + // SI, V8SI, UQI + i32_r = __lasx_xvpickve2gr_w(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.w( + + // __lasx_xvpickve2gr_wu + // rd, xj, ui3 + // USI, V8SI, UQI + u32_r = __lasx_xvpickve2gr_wu(v8i32_a, ui3_b); // CHECK: call i32 @llvm.loongarch.lasx.xvpickve2gr.wu( + + // __lasx_xvpickve2gr_d + // rd, xj, ui2 + // DI, V4DI, UQI + i64_r = __lasx_xvpickve2gr_d(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.d( + + // __lasx_xvpickve2gr_du + // rd, xj, ui2 + // UDI, V4DI, UQI + u64_r = __lasx_xvpickve2gr_du(v4i64_a, ui2_b); // CHECK: call i64 @llvm.loongarch.lasx.xvpickve2gr.du( + + // __lasx_xvaddwev_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvaddwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d( + + // __lasx_xvaddwev_d_w + // xd, xj, xk + // V4DI, V8SI, V8SI + v4i64_r = __lasx_xvaddwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w( + + // __lasx_xvaddwev_w_h + // xd, xj, xk + // V8SI, V16HI, V16HI + v8i32_r = __lasx_xvaddwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h( + + // __lasx_xvaddwev_h_b + // xd, xj, xk + // V16HI, V32QI, V32QI + v16i16_r = __lasx_xvaddwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b( + + // __lasx_xvaddwev_q_du + // xd, xj, xk + // V4DI, UV4DI, UV4DI + v4i64_r = __lasx_xvaddwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du( + + // __lasx_xvaddwev_d_wu + // xd, xj, xk + // V4DI, UV8SI, UV8SI + v4i64_r = __lasx_xvaddwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu( + + // __lasx_xvaddwev_w_hu + // xd, xj, xk + // V8SI, UV16HI, UV16HI + v8i32_r = __lasx_xvaddwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu( + + // __lasx_xvaddwev_h_bu + // xd, xj, xk + // V16HI, UV32QI, UV32QI + v16i16_r = __lasx_xvaddwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu( + + // __lasx_xvsubwev_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsubwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d( + + // __lasx_xvsubwev_d_w + // xd, xj, xk + // V4DI, V8SI, V8SI + v4i64_r = __lasx_xvsubwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w( + + // __lasx_xvsubwev_w_h + // xd, xj, xk + // V8SI, V16HI, V16HI + v8i32_r = __lasx_xvsubwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h( + + // __lasx_xvsubwev_h_b + // xd, xj, xk + // V16HI, V32QI, V32QI + v16i16_r = __lasx_xvsubwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b( + + // __lasx_xvsubwev_q_du + // xd, xj, xk + // V4DI, UV4DI, UV4DI + v4i64_r = __lasx_xvsubwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du( + + // __lasx_xvsubwev_d_wu + // xd, xj, xk + // V4DI, UV8SI, UV8SI + v4i64_r = __lasx_xvsubwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu( + + // __lasx_xvsubwev_w_hu + // xd, xj, xk + // V8SI, UV16HI, UV16HI + v8i32_r = __lasx_xvsubwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu( + + // __lasx_xvsubwev_h_bu + // xd, xj, xk + // V16HI, UV32QI, UV32QI + v16i16_r = __lasx_xvsubwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu( + + // __lasx_xvmulwev_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmulwev_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d( + + // __lasx_xvmulwev_d_w + // xd, xj, xk + // V4DI, V8SI, V8SI + v4i64_r = __lasx_xvmulwev_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w( + + // __lasx_xvmulwev_w_h + // xd, xj, xk + // V8SI, V16HI, V16HI + v8i32_r = __lasx_xvmulwev_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h( + + // __lasx_xvmulwev_h_b + // xd, xj, xk + // V16HI, V32QI, V32QI + v16i16_r = __lasx_xvmulwev_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b( + + // __lasx_xvmulwev_q_du + // xd, xj, xk + // V4DI, UV4DI, UV4DI + v4i64_r = __lasx_xvmulwev_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du( + + // __lasx_xvmulwev_d_wu + // xd, xj, xk + // V4DI, UV8SI, UV8SI + v4i64_r = __lasx_xvmulwev_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu( + + // __lasx_xvmulwev_w_hu + // xd, xj, xk + // V8SI, UV16HI, UV16HI + v8i32_r = __lasx_xvmulwev_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu( + + // __lasx_xvmulwev_h_bu + // xd, xj, xk + // V16HI, UV32QI, UV32QI + v16i16_r = __lasx_xvmulwev_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu( + + // __lasx_xvaddwod_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvaddwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d( + + // __lasx_xvaddwod_d_w + // xd, xj, xk + // V4DI, V8SI, V8SI + v4i64_r = __lasx_xvaddwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w( + + // __lasx_xvaddwod_w_h + // xd, xj, xk + // V8SI, V16HI, V16HI + v8i32_r = __lasx_xvaddwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h( + + // __lasx_xvaddwod_h_b + // xd, xj, xk + // V16HI, V32QI, V32QI + v16i16_r = __lasx_xvaddwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b( + + // __lasx_xvaddwod_q_du + // xd, xj, xk + // V4DI, UV4DI, UV4DI + v4i64_r = __lasx_xvaddwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du( + + // __lasx_xvaddwod_d_wu + // xd, xj, xk + // V4DI, UV8SI, UV8SI + v4i64_r = __lasx_xvaddwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu( + + // __lasx_xvaddwod_w_hu + // xd, xj, xk + // V8SI, UV16HI, UV16HI + v8i32_r = __lasx_xvaddwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu( + + // __lasx_xvaddwod_h_bu + // xd, xj, xk + // V16HI, UV32QI, UV32QI + v16i16_r = __lasx_xvaddwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu( + + // __lasx_xvsubwod_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsubwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d( + + // __lasx_xvsubwod_d_w + // xd, xj, xk + // V4DI, V8SI, V8SI + v4i64_r = __lasx_xvsubwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w( + + // __lasx_xvsubwod_w_h + // xd, xj, xk + // V8SI, V16HI, V16HI + v8i32_r = __lasx_xvsubwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h( + + // __lasx_xvsubwod_h_b + // xd, xj, xk + // V16HI, V32QI, V32QI + v16i16_r = __lasx_xvsubwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b( + + // __lasx_xvsubwod_q_du + // xd, xj, xk + // V4DI, UV4DI, UV4DI + v4i64_r = __lasx_xvsubwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du( + + // __lasx_xvsubwod_d_wu + // xd, xj, xk + // V4DI, UV8SI, UV8SI + v4i64_r = __lasx_xvsubwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu( + + // __lasx_xvsubwod_w_hu + // xd, xj, xk + // V8SI, UV16HI, UV16HI + v8i32_r = __lasx_xvsubwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu( + + // __lasx_xvsubwod_h_bu + // xd, xj, xk + // V16HI, UV32QI, UV32QI + v16i16_r = __lasx_xvsubwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu( + + // __lasx_xvmulwod_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmulwod_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d( + + // __lasx_xvmulwod_d_w + // xd, xj, xk + // V4DI, V8SI, V8SI + v4i64_r = __lasx_xvmulwod_d_w(v8i32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w( + + // __lasx_xvmulwod_w_h + // xd, xj, xk + // V8SI, V16HI, V16HI + v8i32_r = __lasx_xvmulwod_w_h(v16i16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h( + + // __lasx_xvmulwod_h_b + // xd, xj, xk + // V16HI, V32QI, V32QI + v16i16_r = __lasx_xvmulwod_h_b(v32i8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b( + + // __lasx_xvmulwod_q_du + // xd, xj, xk + // V4DI, UV4DI, UV4DI + v4i64_r = __lasx_xvmulwod_q_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du( + + // __lasx_xvmulwod_d_wu + // xd, xj, xk + // V4DI, UV8SI, UV8SI + v4i64_r = __lasx_xvmulwod_d_wu(v8u32_a, v8u32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu( + + // __lasx_xvmulwod_w_hu + // xd, xj, xk + // V8SI, UV16HI, UV16HI + v8i32_r = __lasx_xvmulwod_w_hu(v16u16_a, v16u16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu( + + // __lasx_xvmulwod_h_bu + // xd, xj, xk + // V16HI, UV32QI, UV32QI + v16i16_r = __lasx_xvmulwod_h_bu(v32u8_a, v32u8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu( + + // __lasx_xvaddwev_d_wu_w + // xd, xj, xk + // V4DI, UV8SI, V8SI + v4i64_r = __lasx_xvaddwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w( + + // __lasx_xvaddwev_w_hu_h + // xd, xj, xk + // V8SI, UV16HI, V16HI + v8i32_r = __lasx_xvaddwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h( + + // __lasx_xvaddwev_h_bu_b + // xd, xj, xk + // V16HI, UV32QI, V32QI + v16i16_r = __lasx_xvaddwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b( + + // __lasx_xvmulwev_d_wu_w + // xd, xj, xk + // V4DI, UV8SI, V8SI + v4i64_r = __lasx_xvmulwev_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w( + + // __lasx_xvmulwev_w_hu_h + // xd, xj, xk + // V8SI, UV16HI, V16HI + v8i32_r = __lasx_xvmulwev_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h( + + // __lasx_xvmulwev_h_bu_b + // xd, xj, xk + // V16HI, UV32QI, V32QI + v16i16_r = __lasx_xvmulwev_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b( + + // __lasx_xvaddwod_d_wu_w + // xd, xj, xk + // V4DI, UV8SI, V8SI + v4i64_r = __lasx_xvaddwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w( + + // __lasx_xvaddwod_w_hu_h + // xd, xj, xk + // V8SI, UV16HI, V16HI + v8i32_r = __lasx_xvaddwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h( + + // __lasx_xvaddwod_h_bu_b + // xd, xj, xk + // V16HI, UV32QI, V32QI + v16i16_r = __lasx_xvaddwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b( + + // __lasx_xvmulwod_d_wu_w + // xd, xj, xk + // V4DI, UV8SI, V8SI + v4i64_r = __lasx_xvmulwod_d_wu_w(v8u32_a, v8i32_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w( + + // __lasx_xvmulwod_w_hu_h + // xd, xj, xk + // V8SI, UV16HI, V16HI + v8i32_r = __lasx_xvmulwod_w_hu_h(v16u16_a, v16i16_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h( + + // __lasx_xvmulwod_h_bu_b + // xd, xj, xk + // V16HI, UV32QI, V32QI + v16i16_r = __lasx_xvmulwod_h_bu_b(v32u8_a, v32i8_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b( + + // __lasx_xvhaddw_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvhaddw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d( + + // __lasx_xvhaddw_qu_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvhaddw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du( + + // __lasx_xvhsubw_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvhsubw_q_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d( + + // __lasx_xvhsubw_qu_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvhsubw_qu_du(v4u64_a, v4u64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du( + + // __lasx_xvmaddwev_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmaddwev_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d( + + // __lasx_xvmaddwev_d_w + // xd, xj, xk + // V4DI, V4DI, V8SI, V8SI + v4i64_r = __lasx_xvmaddwev_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w( + + // __lasx_xvmaddwev_w_h + // xd, xj, xk + // V8SI, V8SI, V16HI, V16HI + v8i32_r = __lasx_xvmaddwev_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h( + + // __lasx_xvmaddwev_h_b + // xd, xj, xk + // V16HI, V16HI, V32QI, V32QI + v16i16_r = __lasx_xvmaddwev_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b( + + // __lasx_xvmaddwev_q_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvmaddwev_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du( + + // __lasx_xvmaddwev_d_wu + // xd, xj, xk + // UV4DI, UV4DI, UV8SI, UV8SI + v4u64_r = __lasx_xvmaddwev_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu( + + // __lasx_xvmaddwev_w_hu + // xd, xj, xk + // UV8SI, UV8SI, UV16HI, UV16HI + v8u32_r = __lasx_xvmaddwev_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu( + + // __lasx_xvmaddwev_h_bu + // xd, xj, xk + // UV16HI, UV16HI, UV32QI, UV32QI + v16u16_r = __lasx_xvmaddwev_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu( + + // __lasx_xvmaddwod_q_d + // xd, xj, xk + // V4DI, V4DI, V4DI, V4DI + v4i64_r = __lasx_xvmaddwod_q_d(v4i64_a, v4i64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d( + + // __lasx_xvmaddwod_d_w + // xd, xj, xk + // V4DI, V4DI, V8SI, V8SI + v4i64_r = __lasx_xvmaddwod_d_w(v4i64_a, v8i32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w( + + // __lasx_xvmaddwod_w_h + // xd, xj, xk + // V8SI, V8SI, V16HI, V16HI + v8i32_r = __lasx_xvmaddwod_w_h(v8i32_a, v16i16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h( + + // __lasx_xvmaddwod_h_b + // xd, xj, xk + // V16HI, V16HI, V32QI, V32QI + v16i16_r = __lasx_xvmaddwod_h_b(v16i16_a, v32i8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b( + + // __lasx_xvmaddwod_q_du + // xd, xj, xk + // UV4DI, UV4DI, UV4DI, UV4DI + v4u64_r = __lasx_xvmaddwod_q_du(v4u64_a, v4u64_b, v4u64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du( + + // __lasx_xvmaddwod_d_wu + // xd, xj, xk + // UV4DI, UV4DI, UV8SI, UV8SI + v4u64_r = __lasx_xvmaddwod_d_wu(v4u64_a, v8u32_b, v8u32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu( + + // __lasx_xvmaddwod_w_hu + // xd, xj, xk + // UV8SI, UV8SI, UV16HI, UV16HI + v8u32_r = __lasx_xvmaddwod_w_hu(v8u32_a, v16u16_b, v16u16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu( + + // __lasx_xvmaddwod_h_bu + // xd, xj, xk + // UV16HI, UV16HI, UV32QI, UV32QI + v16u16_r = __lasx_xvmaddwod_h_bu(v16u16_a, v32u8_b, v32u8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu( + + // __lasx_xvmaddwev_q_du_d + // xd, xj, xk + // V4DI, V4DI, UV4DI, V4DI + v4i64_r = __lasx_xvmaddwev_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d( + + // __lasx_xvmaddwev_d_wu_w + // xd, xj, xk + // V4DI, V4DI, UV8SI, V8SI + v4i64_r = __lasx_xvmaddwev_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w( + + // __lasx_xvmaddwev_w_hu_h + // xd, xj, xk + // V8SI, V8SI, UV16HI, V16HI + v8i32_r = __lasx_xvmaddwev_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h( + + // __lasx_xvmaddwev_h_bu_b + // xd, xj, xk + // V16HI, V16HI, UV32QI, V32QI + v16i16_r = __lasx_xvmaddwev_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b( + + // __lasx_xvmaddwod_q_du_d + // xd, xj, xk + // V4DI, V4DI, UV4DI, V4DI + v4i64_r = __lasx_xvmaddwod_q_du_d(v4i64_a, v4u64_b, v4i64_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d( + + // __lasx_xvmaddwod_d_wu_w + // xd, xj, xk + // V4DI, V4DI, UV8SI, V8SI + v4i64_r = __lasx_xvmaddwod_d_wu_w(v4i64_a, v8u32_b, v8i32_c); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w( + + // __lasx_xvmaddwod_w_hu_h + // xd, xj, xk + // V8SI, V8SI, UV16HI, V16HI + v8i32_r = __lasx_xvmaddwod_w_hu_h(v8i32_a, v16u16_b, v16i16_c); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h( + + // __lasx_xvmaddwod_h_bu_b + // xd, xj, xk + // V16HI, V16HI, UV32QI, V32QI + v16i16_r = __lasx_xvmaddwod_h_bu_b(v16i16_a, v32u8_b, v32i8_c); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b( + + // __lasx_xvrotr_b + // xd, xj, xk + // V32QI, V32QI, V32QI + v32i8_r = __lasx_xvrotr_b(v32i8_a, v32i8_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotr.b( + + // __lasx_xvrotr_h + // xd, xj, xk + // V16HI, V16HI, V16HI + v16i16_r = __lasx_xvrotr_h(v16i16_a, v16i16_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotr.h( + + // __lasx_xvrotr_w + // xd, xj, xk + // V8SI, V8SI, V8SI + v8i32_r = __lasx_xvrotr_w(v8i32_a, v8i32_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotr.w( + + // __lasx_xvrotr_d + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvrotr_d(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotr.d( + + // __lasx_xvadd_q + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvadd_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvadd.q( + + // __lasx_xvsub_q + // xd, xj, xk + // V4DI, V4DI, V4DI + v4i64_r = __lasx_xvsub_q(v4i64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsub.q( + + // __lasx_xvaddwev_q_du_d + // xd, xj, xk + // V4DI, UV4DI, V4DI + v4i64_r = __lasx_xvaddwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d( + + // __lasx_xvaddwod_q_du_d + // xd, xj, xk + // V4DI, UV4DI, V4DI + v4i64_r = __lasx_xvaddwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d( + + // __lasx_xvmulwev_q_du_d + // xd, xj, xk + // V4DI, UV4DI, V4DI + v4i64_r = __lasx_xvmulwev_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d( + + // __lasx_xvmulwod_q_du_d + // xd, xj, xk + // V4DI, UV4DI, V4DI + v4i64_r = __lasx_xvmulwod_q_du_d(v4u64_a, v4i64_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d( + + // __lasx_xvmskgez_b + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvmskgez_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b( + + // __lasx_xvmsknz_b + // xd, xj + // V32QI, V32QI + v32i8_r = __lasx_xvmsknz_b(v32i8_a); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b( + + // __lasx_xvexth_h_b + // xd, xj + // V16HI, V32QI + v16i16_r = __lasx_xvexth_h_b(v32i8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b( + + // __lasx_xvexth_w_h + // xd, xj + // V8SI, V16HI + v8i32_r = __lasx_xvexth_w_h(v16i16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h( + + // __lasx_xvexth_d_w + // xd, xj + // V4DI, V8SI + v4i64_r = __lasx_xvexth_d_w(v8i32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w( + + // __lasx_xvexth_q_d + // xd, xj + // V4DI, V4DI + v4i64_r = __lasx_xvexth_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d( + + // __lasx_xvexth_hu_bu + // xd, xj + // UV16HI, UV32QI + v16u16_r = __lasx_xvexth_hu_bu(v32u8_a); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu( + + // __lasx_xvexth_wu_hu + // xd, xj + // UV8SI, UV16HI + v8u32_r = __lasx_xvexth_wu_hu(v16u16_a); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu( + + // __lasx_xvexth_du_wu + // xd, xj + // UV4DI, UV8SI + v4u64_r = __lasx_xvexth_du_wu(v8u32_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu( + + // __lasx_xvexth_qu_du + // xd, xj + // UV4DI, UV4DI + v4u64_r = __lasx_xvexth_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du( + + // __lasx_xvextl_q_d + // xd, xj + // V4DI, V4DI + v4i64_r = __lasx_xvextl_q_d(v4i64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d( + + // __lasx_xvextl_qu_du + // xd, xj + // UV4DI, UV4DI + v4u64_r = __lasx_xvextl_qu_du(v4u64_a); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du( + + // __lasx_xvrotri_b + // xd, xj, ui3 + // V32QI, V32QI, UQI + v32i8_r = __lasx_xvrotri_b(v32i8_a, ui3_b); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrotri.b( + + // __lasx_xvrotri_h + // xd, xj, ui4 + // V16HI, V16HI, UQI + v16i16_r = __lasx_xvrotri_h(v16i16_a, ui4_b); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrotri.h( + + // __lasx_xvrotri_w + // xd, xj, ui5 + // V8SI, V8SI, UQI + v8i32_r = __lasx_xvrotri_w(v8i32_a, ui5_b); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrotri.w( + + // __lasx_xvrotri_d + // xd, xj, ui6 + // V4DI, V4DI, UQI + v4i64_r = __lasx_xvrotri_d(v4i64_a, ui6_b); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrotri.d( + + // __lasx_xvsrlni_b_h + // xd, xj, ui4 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvsrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h( + + // __lasx_xvsrlni_h_w + // xd, xj, ui5 + // V16HI, V16HI, V16HI, USI + v16i16_r = __lasx_xvsrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w( + + // __lasx_xvsrlni_w_d + // xd, xj, ui6 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvsrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d( + + // __lasx_xvsrlni_d_q + // xd, xj, ui7 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvsrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q( + + // __lasx_xvsrlrni_b_h + // xd, xj, ui4 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvsrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h( + + // __lasx_xvsrlrni_h_w + // xd, xj, ui5 + // V16HI, V16HI, V16HI, USI + v16i16_r = __lasx_xvsrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w( + + // __lasx_xvsrlrni_w_d + // xd, xj, ui6 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvsrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d( + + // __lasx_xvsrlrni_d_q + // xd, xj, ui7 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvsrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q( + + // __lasx_xvssrlni_b_h + // xd, xj, ui4 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvssrlni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h( + + // __lasx_xvssrlni_h_w + // xd, xj, ui5 + // V16HI, V16HI, V16HI, USI + v16i16_r = __lasx_xvssrlni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w( + + // __lasx_xvssrlni_w_d + // xd, xj, ui6 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvssrlni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d( + + // __lasx_xvssrlni_d_q + // xd, xj, ui7 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvssrlni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q( + + // __lasx_xvssrlni_bu_h + // xd, xj, ui4 + // UV32QI, UV32QI, V32QI, USI + v32u8_r = __lasx_xvssrlni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h( + + // __lasx_xvssrlni_hu_w + // xd, xj, ui5 + // UV16HI, UV16HI, V16HI, USI + v16u16_r = __lasx_xvssrlni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w( + + // __lasx_xvssrlni_wu_d + // xd, xj, ui6 + // UV8SI, UV8SI, V8SI, USI + v8u32_r = __lasx_xvssrlni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d( + + // __lasx_xvssrlni_du_q + // xd, xj, ui7 + // UV4DI, UV4DI, V4DI, USI + v4u64_r = __lasx_xvssrlni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q( + + // __lasx_xvssrlrni_b_h + // xd, xj, ui4 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvssrlrni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h( + + // __lasx_xvssrlrni_h_w + // xd, xj, ui5 + // V16HI, V16HI, V16HI, USI + v16i16_r = __lasx_xvssrlrni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w( + + // __lasx_xvssrlrni_w_d + // xd, xj, ui6 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvssrlrni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d( + + // __lasx_xvssrlrni_d_q + // xd, xj, ui7 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvssrlrni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q( + + // __lasx_xvssrlrni_bu_h + // xd, xj, ui4 + // UV32QI, UV32QI, V32QI, USI + v32u8_r = __lasx_xvssrlrni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h( + + // __lasx_xvssrlrni_hu_w + // xd, xj, ui5 + // UV16HI, UV16HI, V16HI, USI + v16u16_r = __lasx_xvssrlrni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w( + + // __lasx_xvssrlrni_wu_d + // xd, xj, ui6 + // UV8SI, UV8SI, V8SI, USI + v8u32_r = __lasx_xvssrlrni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d( + + // __lasx_xvssrlrni_du_q + // xd, xj, ui7 + // UV4DI, UV4DI, V4DI, USI + v4u64_r = __lasx_xvssrlrni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q( + + // __lasx_xvsrani_b_h + // xd, xj, ui4 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvsrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h( + + // __lasx_xvsrani_h_w + // xd, xj, ui5 + // V16HI, V16HI, V16HI, USI + v16i16_r = __lasx_xvsrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w( + + // __lasx_xvsrani_w_d + // xd, xj, ui6 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvsrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d( + + // __lasx_xvsrani_d_q + // xd, xj, ui7 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvsrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q( + + // __lasx_xvsrarni_b_h + // xd, xj, ui4 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvsrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h( + + // __lasx_xvsrarni_h_w + // xd, xj, ui5 + // V16HI, V16HI, V16HI, USI + v16i16_r = __lasx_xvsrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w( + + // __lasx_xvsrarni_w_d + // xd, xj, ui6 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvsrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d( + + // __lasx_xvsrarni_d_q + // xd, xj, ui7 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvsrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q( + + // __lasx_xvssrani_b_h + // xd, xj, ui4 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvssrani_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h( + + // __lasx_xvssrani_h_w + // xd, xj, ui5 + // V16HI, V16HI, V16HI, USI + v16i16_r = __lasx_xvssrani_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w( + + // __lasx_xvssrani_w_d + // xd, xj, ui6 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvssrani_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d( + + // __lasx_xvssrani_d_q + // xd, xj, ui7 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvssrani_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q( + + // __lasx_xvssrani_bu_h + // xd, xj, ui4 + // UV32QI, UV32QI, V32QI, USI + v32u8_r = __lasx_xvssrani_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h( + + // __lasx_xvssrani_hu_w + // xd, xj, ui5 + // UV16HI, UV16HI, V16HI, USI + v16u16_r = __lasx_xvssrani_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w( + + // __lasx_xvssrani_wu_d + // xd, xj, ui6 + // UV8SI, UV8SI, V8SI, USI + v8u32_r = __lasx_xvssrani_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d( + + // __lasx_xvssrani_du_q + // xd, xj, ui7 + // UV4DI, UV4DI, V4DI, USI + v4u64_r = __lasx_xvssrani_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q( + + // __lasx_xvssrarni_b_h + // xd, xj, ui4 + // V32QI, V32QI, V32QI, USI + v32i8_r = __lasx_xvssrarni_b_h(v32i8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h( + + // __lasx_xvssrarni_h_w + // xd, xj, ui5 + // V16HI, V16HI, V16HI, USI + v16i16_r = __lasx_xvssrarni_h_w(v16i16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w( + + // __lasx_xvssrarni_w_d + // xd, xj, ui6 + // V8SI, V8SI, V8SI, USI + v8i32_r = __lasx_xvssrarni_w_d(v8i32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d( + + // __lasx_xvssrarni_d_q + // xd, xj, ui7 + // V4DI, V4DI, V4DI, USI + v4i64_r = __lasx_xvssrarni_d_q(v4i64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q( + + // __lasx_xvssrarni_bu_h + // xd, xj, ui4 + // UV32QI, UV32QI, V32QI, USI + v32u8_r = __lasx_xvssrarni_bu_h(v32u8_a, v32i8_b, ui4); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h( + + // __lasx_xvssrarni_hu_w + // xd, xj, ui5 + // UV16HI, UV16HI, V16HI, USI + v16u16_r = __lasx_xvssrarni_hu_w(v16u16_a, v16i16_b, ui5); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w( + + // __lasx_xvssrarni_wu_d + // xd, xj, ui6 + // UV8SI, UV8SI, V8SI, USI + v8u32_r = __lasx_xvssrarni_wu_d(v8u32_a, v8i32_b, ui6); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d( + + // __lasx_xvssrarni_du_q + // xd, xj, ui7 + // UV4DI, UV4DI, V4DI, USI + v4u64_r = __lasx_xvssrarni_du_q(v4u64_a, v4i64_b, ui7); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q( + + // __lasx_xbnz_v + // rd, xj + // SI, UV32QI + i32_r = __lasx_xbnz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.v( + + // __lasx_xbz_v + // rd, xj + // SI, UV32QI + i32_r = __lasx_xbz_v(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.v( + + // __lasx_xbnz_b + // rd, xj + // SI, UV32QI + i32_r = __lasx_xbnz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.b( + + // __lasx_xbnz_h + // rd, xj + // SI, UV16HI + i32_r = __lasx_xbnz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.h( + + // __lasx_xbnz_w + // rd, xj + // SI, UV8SI + i32_r = __lasx_xbnz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.w( + + // __lasx_xbnz_d + // rd, xj + // SI, UV4DI + i32_r = __lasx_xbnz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbnz.d( + + // __lasx_xbz_b + // rd, xj + // SI, UV32QI + i32_r = __lasx_xbz_b(v32u8_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.b( + + // __lasx_xbz_h + // rd, xj + // SI, UV16HI + i32_r = __lasx_xbz_h(v16u16_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.h( + + // __lasx_xbz_w + // rd, xj + // SI, UV8SI + i32_r = __lasx_xbz_w(v8u32_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.w( + + // __lasx_xbz_d + // rd, xj + // SI, UV4DI + i32_r = __lasx_xbz_d(v4u64_a); // CHECK: call i32 @llvm.loongarch.lasx.xbz.d( + + v32i8_r = __lasx_xvrepli_b(2); // CHECK: call <32 x i8> @llvm.loongarch.lasx.xvrepli.b( + + v16i16_r = __lasx_xvrepli_h(2); // CHECK: call <16 x i16> @llvm.loongarch.lasx.xvrepli.h( + + v8i32_r = __lasx_xvrepli_w(2); // CHECK: call <8 x i32> @llvm.loongarch.lasx.xvrepli.w( + + v4i64_r = __lasx_xvrepli_d(2); // CHECK: call <4 x i64> @llvm.loongarch.lasx.xvrepli.d( + + v4f64_r = __lasx_xvpickve_d_f(v4f64_a, 2); // CHECK: call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f( + + v8f32_r = __lasx_xvpickve_w_f(v8f32_a, 2); // CHECK: call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f( +} diff --git a/clang/test/CodeGen/builtins-loongarch-lsx-error.c b/clang/test/CodeGen/builtins-loongarch-lsx-error.c new file mode 100644 index 000000000000..f566a736223c --- /dev/null +++ b/clang/test/CodeGen/builtins-loongarch-lsx-error.c @@ -0,0 +1,250 @@ +// REQUIRES: loongarch-registered-target +// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -fsyntax-only %s \ +// RUN: -target-feature +lsx \ +// RUN: -verify -o - 2>&1 + +#include + +void test() { + v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + v16i8 v16i8_r; + v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; + v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; + v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; + v8i16 v8i16_r; + v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; + v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; + v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; + v4i32 v4i32_r; + v2i64 v2i64_a = (v2i64){0, 1}; + v2i64 v2i64_b = (v2i64){1, 2}; + v2i64 v2i64_c = (v2i64){2, 3}; + v2i64 v2i64_r; + + v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + v16u8 v16u8_r; + v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; + v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; + v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; + v8u16 v8u16_r; + v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; + v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; + v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; + v4u32 v4u32_r; + v2u64 v2u64_a = (v2u64){0, 1}; + v2u64 v2u64_b = (v2u64){1, 2}; + v2u64 v2u64_c = (v2u64){2, 3}; + v2u64 v2u64_r; + + v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; + v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; + v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; + v4f32 v4f32_r; + v2f64 v2f64_a = (v2f64){0.5, 1}; + v2f64 v2f64_b = (v2f64){1.5, 2}; + v2f64 v2f64_c = (v2f64){2.5, 3}; + v2f64 v2f64_r; + + int i32_r; + int i32_a = 1; + int i32_b = 2; + unsigned int u32_r; + unsigned int u32_a = 1; + unsigned int u32_b = 2; + long long i64_r; + long long i64_a = 1; + long long i64_b = 2; + long long i64_c = 3; + unsigned long long u64_r; + unsigned long long u64_a = 1; + unsigned long long u64_b = 2; + unsigned long long u64_c = 3; + + v16i8_r = __lsx_vslli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i16_r = __lsx_vslli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4i32_r = __lsx_vslli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vslli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16i8_r = __lsx_vsrai_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i16_r = __lsx_vsrai_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4i32_r = __lsx_vsrai_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vsrai_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16i8_r = __lsx_vsrari_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i16_r = __lsx_vsrari_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4i32_r = __lsx_vsrari_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vsrari_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16i8_r = __lsx_vsrli_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i16_r = __lsx_vsrli_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4i32_r = __lsx_vsrli_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vsrli_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16i8_r = __lsx_vsrlri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i16_r = __lsx_vsrlri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4i32_r = __lsx_vsrlri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vsrlri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16u8_r = __lsx_vbitclri_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8u16_r = __lsx_vbitclri_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4u32_r = __lsx_vbitclri_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2u64_r = __lsx_vbitclri_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16u8_r = __lsx_vbitseti_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8u16_r = __lsx_vbitseti_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4u32_r = __lsx_vbitseti_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2u64_r = __lsx_vbitseti_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16u8_r = __lsx_vbitrevi_b(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8u16_r = __lsx_vbitrevi_h(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4u32_r = __lsx_vbitrevi_w(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2u64_r = __lsx_vbitrevi_d(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16i8_r = __lsx_vaddi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i16_r = __lsx_vaddi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vaddi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vaddi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vsubi_bu(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i16_r = __lsx_vsubi_hu(v8i16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vsubi_wu(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vsubi_du(v2i64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vmaxi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i16_r = __lsx_vmaxi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i32_r = __lsx_vmaxi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v2i64_r = __lsx_vmaxi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16u8_r = __lsx_vmaxi_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u16_r = __lsx_vmaxi_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u32_r = __lsx_vmaxi_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2u64_r = __lsx_vmaxi_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vmini_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i16_r = __lsx_vmini_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i32_r = __lsx_vmini_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v2i64_r = __lsx_vmini_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16u8_r = __lsx_vmini_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u16_r = __lsx_vmini_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u32_r = __lsx_vmini_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2u64_r = __lsx_vmini_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vseqi_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i16_r = __lsx_vseqi_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i32_r = __lsx_vseqi_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v2i64_r = __lsx_vseqi_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16i8_r = __lsx_vslti_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i16_r = __lsx_vslti_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i32_r = __lsx_vslti_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v2i64_r = __lsx_vslti_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16i8_r = __lsx_vslti_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i16_r = __lsx_vslti_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vslti_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vslti_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vslei_b(v16i8_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v8i16_r = __lsx_vslei_h(v8i16_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v4i32_r = __lsx_vslei_w(v4i32_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v2i64_r = __lsx_vslei_d(v2i64_a, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + v16i8_r = __lsx_vslei_bu(v16u8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i16_r = __lsx_vslei_hu(v8u16_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vslei_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vslei_du(v2u64_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vsat_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i16_r = __lsx_vsat_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4i32_r = __lsx_vsat_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vsat_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16u8_r = __lsx_vsat_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8u16_r = __lsx_vsat_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4u32_r = __lsx_vsat_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2u64_r = __lsx_vsat_du(v2u64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16i8_r = __lsx_vreplvei_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vreplvei_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v4i32_r = __lsx_vreplvei_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + v2i64_r = __lsx_vreplvei_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + v16u8_r = __lsx_vandi_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16u8_r = __lsx_vori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16u8_r = __lsx_vnori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16u8_r = __lsx_vxori_b(v16u8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16i8_r = __lsx_vshuf4i_b(v16i8_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v8i16_r = __lsx_vshuf4i_h(v8i16_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v4i32_r = __lsx_vshuf4i_w(v4i32_a, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + i32_r = __lsx_vpickve2gr_b(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + i32_r = __lsx_vpickve2gr_h(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + i32_r = __lsx_vpickve2gr_w(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + i64_r = __lsx_vpickve2gr_d(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + u32_r = __lsx_vpickve2gr_bu(v16i8_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + u32_r = __lsx_vpickve2gr_hu(v8i16_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + u32_r = __lsx_vpickve2gr_wu(v4i32_a, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + u64_r = __lsx_vpickve2gr_du(v2i64_a, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + v8i16_r = __lsx_vsllwil_h_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v4i32_r = __lsx_vsllwil_w_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v2i64_r = __lsx_vsllwil_d_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16i8_r = __lsx_vbsrl_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vbsll_v(v16i8_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + v16i8_r = __lsx_vldrepl_b(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + v8i16_r = __lsx_vldrepl_h(&v8i16_a, -1025); // expected-error {{argument value -1025 is outside the valid range [-1024, 1023]}} + v4i32_r = __lsx_vldrepl_w(&v4i32_a, -513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + v2i64_r = __lsx_vldrepl_d(&v2i64_a, -257); // expected-error {{argument value -257 is outside the valid range [-256, 255]}} + v16i8_r = __lsx_vrotri_b(v16i8_a, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + v8i16_r = __lsx_vrotri_h(v8i16_a, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v4i32_r = __lsx_vrotri_w(v4i32_a, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v2i64_r = __lsx_vrotri_d(v2i64_a, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + v16i8_r = __lsx_vld(&v16i8_a, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lsx_vst(v16i8_a, &v16i8_b, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + v2i64_r = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} +} diff --git a/clang/test/CodeGen/builtins-loongarch-lsx.c b/clang/test/CodeGen/builtins-loongarch-lsx.c new file mode 100644 index 000000000000..2b86c0b2ee43 --- /dev/null +++ b/clang/test/CodeGen/builtins-loongarch-lsx.c @@ -0,0 +1,3645 @@ +// REQUIRES: loongarch-registered-target +// RUN: %clang_cc1 -triple loongarch64-unknown-linux-gnu -emit-llvm %s \ +// RUN: -target-feature +lsx \ +// RUN: -o - | FileCheck %s + +#include + +#define ui1 0 +#define ui2 1 +#define ui3 4 +#define ui4 7 +#define ui5 25 +#define ui6 44 +#define ui7 100 +#define ui8 127 //200 +#define si5 -4 +#define si8 -100 +#define si9 0 +#define si10 0 +#define si11 0 +#define si12 0 +#define i10 500 +#define i13 4000 +#define mode 11 +#define idx1 1 +#define idx2 2 +#define idx3 4 +#define idx4 8 + +void test(void) { + v16i8 v16i8_a = (v16i8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + v16i8 v16i8_b = (v16i8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + v16i8 v16i8_c = (v16i8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + v16i8 v16i8_r; + v8i16 v8i16_a = (v8i16){0, 1, 2, 3, 4, 5, 6, 7}; + v8i16 v8i16_b = (v8i16){1, 2, 3, 4, 5, 6, 7, 8}; + v8i16 v8i16_c = (v8i16){2, 3, 4, 5, 6, 7, 8, 9}; + v8i16 v8i16_r; + v4i32 v4i32_a = (v4i32){0, 1, 2, 3}; + v4i32 v4i32_b = (v4i32){1, 2, 3, 4}; + v4i32 v4i32_c = (v4i32){2, 3, 4, 5}; + v4i32 v4i32_r; + v2i64 v2i64_a = (v2i64){0, 1}; + v2i64 v2i64_b = (v2i64){1, 2}; + v2i64 v2i64_c = (v2i64){2, 3}; + v2i64 v2i64_r; + + v16u8 v16u8_a = (v16u8){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + v16u8 v16u8_b = (v16u8){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + v16u8 v16u8_c = (v16u8){2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; + v16u8 v16u8_r; + v8u16 v8u16_a = (v8u16){0, 1, 2, 3, 4, 5, 6, 7}; + v8u16 v8u16_b = (v8u16){1, 2, 3, 4, 5, 6, 7, 8}; + v8u16 v8u16_c = (v8u16){2, 3, 4, 5, 6, 7, 8, 9}; + v8u16 v8u16_r; + v4u32 v4u32_a = (v4u32){0, 1, 2, 3}; + v4u32 v4u32_b = (v4u32){1, 2, 3, 4}; + v4u32 v4u32_c = (v4u32){2, 3, 4, 5}; + v4u32 v4u32_r; + v2u64 v2u64_a = (v2u64){0, 1}; + v2u64 v2u64_b = (v2u64){1, 2}; + v2u64 v2u64_c = (v2u64){2, 3}; + v2u64 v2u64_r; + + v4f32 v4f32_a = (v4f32){0.5, 1, 2, 3}; + v4f32 v4f32_b = (v4f32){1.5, 2, 3, 4}; + v4f32 v4f32_c = (v4f32){2.5, 3, 4, 5}; + v4f32 v4f32_r; + v2f64 v2f64_a = (v2f64){0.5, 1}; + v2f64 v2f64_b = (v2f64){1.5, 2}; + v2f64 v2f64_c = (v2f64){2.5, 3}; + v2f64 v2f64_r; + + int i32_r; + int i32_a = 1; + int i32_b = 2; + unsigned int u32_r; + unsigned int u32_a = 1; + unsigned int u32_b = 2; + long long i64_r; + long long i64_a = 1; + long long i64_b = 2; + long long i64_c = 3; + long int i64_d = 0; + unsigned long long u64_r; + unsigned long long u64_a = 1; + unsigned long long u64_b = 2; + unsigned long long u64_c = 3; + + // __lsx_vsll_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsll_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsll.b( + + // __lsx_vsll_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsll_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsll.h( + + // __lsx_vsll_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsll_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsll.w( + + // __lsx_vsll_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsll_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsll.d( + + // __lsx_vslli_b + // vd, vj, ui3 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vslli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslli.b( + + // __lsx_vslli_h + // vd, vj, ui4 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vslli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslli.h( + + // __lsx_vslli_w + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vslli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslli.w( + + // __lsx_vslli_d + // vd, vj, ui6 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vslli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslli.d( + + // __lsx_vsra_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsra_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsra.b( + + // __lsx_vsra_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsra_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsra.h( + + // __lsx_vsra_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsra_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsra.w( + + // __lsx_vsra_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsra_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsra.d( + + // __lsx_vsrai_b + // vd, vj, ui3 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vsrai_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrai.b( + + // __lsx_vsrai_h + // vd, vj, ui4 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vsrai_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrai.h( + + // __lsx_vsrai_w + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vsrai_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrai.w( + + // __lsx_vsrai_d + // vd, vj, ui6 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vsrai_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrai.d( + + // __lsx_vsrar_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsrar_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrar.b( + + // __lsx_vsrar_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsrar_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrar.h( + + // __lsx_vsrar_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsrar_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrar.w( + + // __lsx_vsrar_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsrar_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrar.d( + + // __lsx_vsrari_b + // vd, vj, ui3 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vsrari_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrari.b( + + // __lsx_vsrari_h + // vd, vj, ui4 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vsrari_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrari.h( + + // __lsx_vsrari_w + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vsrari_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrari.w( + + // __lsx_vsrari_d + // vd, vj, ui6 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vsrari_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrari.d( + + // __lsx_vsrl_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsrl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrl.b( + + // __lsx_vsrl_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsrl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrl.h( + + // __lsx_vsrl_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsrl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrl.w( + + // __lsx_vsrl_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsrl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrl.d( + + // __lsx_vsrli_b + // vd, vj, ui3 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vsrli_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrli.b( + + // __lsx_vsrli_h + // vd, vj, ui4 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vsrli_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrli.h( + + // __lsx_vsrli_w + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vsrli_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrli.w( + + // __lsx_vsrli_d + // vd, vj, ui6 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vsrli_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrli.d( + + // __lsx_vsrlr_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsrlr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlr.b( + + // __lsx_vsrlr_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsrlr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlr.h( + + // __lsx_vsrlr_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsrlr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlr.w( + + // __lsx_vsrlr_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsrlr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlr.d( + + // __lsx_vsrlri_b + // vd, vj, ui3 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vsrlri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlri.b( + + // __lsx_vsrlri_h + // vd, vj, ui4 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vsrlri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlri.h( + + // __lsx_vsrlri_w + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vsrlri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlri.w( + + // __lsx_vsrlri_d + // vd, vj, ui6 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vsrlri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlri.d( + + // __lsx_vbitclr_b + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vbitclr_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclr.b( + + // __lsx_vbitclr_h + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vbitclr_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclr.h( + + // __lsx_vbitclr_w + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vbitclr_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclr.w( + + // __lsx_vbitclr_d + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vbitclr_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclr.d( + + // __lsx_vbitclri_b + // vd, vj, ui3 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vbitclri_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitclri.b( + + // __lsx_vbitclri_h + // vd, vj, ui4 + // UV8HI, UV8HI, UQI + v8u16_r = __lsx_vbitclri_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitclri.h( + + // __lsx_vbitclri_w + // vd, vj, ui5 + // UV4SI, UV4SI, UQI + v4u32_r = __lsx_vbitclri_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitclri.w( + + // __lsx_vbitclri_d + // vd, vj, ui6 + // UV2DI, UV2DI, UQI + v2u64_r = __lsx_vbitclri_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitclri.d( + + // __lsx_vbitset_b + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vbitset_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitset.b( + + // __lsx_vbitset_h + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vbitset_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitset.h( + + // __lsx_vbitset_w + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vbitset_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitset.w( + + // __lsx_vbitset_d + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vbitset_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitset.d( + + // __lsx_vbitseti_b + // vd, vj, ui3 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vbitseti_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseti.b( + + // __lsx_vbitseti_h + // vd, vj, ui4 + // UV8HI, UV8HI, UQI + v8u16_r = __lsx_vbitseti_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitseti.h( + + // __lsx_vbitseti_w + // vd, vj, ui5 + // UV4SI, UV4SI, UQI + v4u32_r = __lsx_vbitseti_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitseti.w( + + // __lsx_vbitseti_d + // vd, vj, ui6 + // UV2DI, UV2DI, UQI + v2u64_r = __lsx_vbitseti_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitseti.d( + + // __lsx_vbitrev_b + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vbitrev_b(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrev.b( + + // __lsx_vbitrev_h + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vbitrev_h(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrev.h( + + // __lsx_vbitrev_w + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vbitrev_w(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrev.w( + + // __lsx_vbitrev_d + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vbitrev_d(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrev.d( + + // __lsx_vbitrevi_b + // vd, vj, ui3 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vbitrevi_b(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b( + + // __lsx_vbitrevi_h + // vd, vj, ui4 + // UV8HI, UV8HI, UQI + v8u16_r = __lsx_vbitrevi_h(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h( + + // __lsx_vbitrevi_w + // vd, vj, ui5 + // UV4SI, UV4SI, UQI + v4u32_r = __lsx_vbitrevi_w(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w( + + // __lsx_vbitrevi_d + // vd, vj, ui6 + // UV2DI, UV2DI, UQI + v2u64_r = __lsx_vbitrevi_d(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d( + + // __lsx_vadd_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadd.b( + + // __lsx_vadd_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadd.h( + + // __lsx_vadd_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadd.w( + + // __lsx_vadd_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.d( + + // __lsx_vaddi_bu + // vd, vj, ui5 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vaddi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vaddi.bu( + + // __lsx_vaddi_hu + // vd, vj, ui5 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vaddi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddi.hu( + + // __lsx_vaddi_wu + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vaddi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddi.wu( + + // __lsx_vaddi_du + // vd, vj, ui5 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vaddi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddi.du( + + // __lsx_vsub_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsub.b( + + // __lsx_vsub_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsub.h( + + // __lsx_vsub_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsub.w( + + // __lsx_vsub_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.d( + + // __lsx_vsubi_bu + // vd, vj, ui5 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vsubi_bu(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsubi.bu( + + // __lsx_vsubi_hu + // vd, vj, ui5 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vsubi_hu(v8i16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubi.hu( + + // __lsx_vsubi_wu + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vsubi_wu(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubi.wu( + + // __lsx_vsubi_du + // vd, vj, ui5 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vsubi_du(v2i64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubi.du( + + // __lsx_vmax_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vmax_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.b( + + // __lsx_vmax_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vmax_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.h( + + // __lsx_vmax_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vmax_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.w( + + // __lsx_vmax_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vmax_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.d( + + // __lsx_vmaxi_b + // vd, vj, si5 + // V16QI, V16QI, QI + v16i8_r = __lsx_vmaxi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.b( + + // __lsx_vmaxi_h + // vd, vj, si5 + // V8HI, V8HI, QI + v8i16_r = __lsx_vmaxi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.h( + + // __lsx_vmaxi_w + // vd, vj, si5 + // V4SI, V4SI, QI + v4i32_r = __lsx_vmaxi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.w( + + // __lsx_vmaxi_d + // vd, vj, si5 + // V2DI, V2DI, QI + v2i64_r = __lsx_vmaxi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.d( + + // __lsx_vmax_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vmax_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmax.bu( + + // __lsx_vmax_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vmax_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmax.hu( + + // __lsx_vmax_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vmax_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmax.wu( + + // __lsx_vmax_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vmax_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmax.du( + + // __lsx_vmaxi_bu + // vd, vj, ui5 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vmaxi_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu( + + // __lsx_vmaxi_hu + // vd, vj, ui5 + // UV8HI, UV8HI, UQI + v8u16_r = __lsx_vmaxi_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu( + + // __lsx_vmaxi_wu + // vd, vj, ui5 + // UV4SI, UV4SI, UQI + v4u32_r = __lsx_vmaxi_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu( + + // __lsx_vmaxi_du + // vd, vj, ui5 + // UV2DI, UV2DI, UQI + v2u64_r = __lsx_vmaxi_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaxi.du( + + // __lsx_vmin_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vmin_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.b( + + // __lsx_vmin_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vmin_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.h( + + // __lsx_vmin_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vmin_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.w( + + // __lsx_vmin_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vmin_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.d( + + // __lsx_vmini_b + // vd, vj, si5 + // V16QI, V16QI, QI + v16i8_r = __lsx_vmini_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.b( + + // __lsx_vmini_h + // vd, vj, si5 + // V8HI, V8HI, QI + v8i16_r = __lsx_vmini_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.h( + + // __lsx_vmini_w + // vd, vj, si5 + // V4SI, V4SI, QI + v4i32_r = __lsx_vmini_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.w( + + // __lsx_vmini_d + // vd, vj, si5 + // V2DI, V2DI, QI + v2i64_r = __lsx_vmini_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.d( + + // __lsx_vmin_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vmin_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmin.bu( + + // __lsx_vmin_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vmin_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmin.hu( + + // __lsx_vmin_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vmin_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmin.wu( + + // __lsx_vmin_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vmin_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmin.du( + + // __lsx_vmini_bu + // vd, vj, ui5 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vmini_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmini.bu( + + // __lsx_vmini_hu + // vd, vj, ui5 + // UV8HI, UV8HI, UQI + v8u16_r = __lsx_vmini_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmini.hu( + + // __lsx_vmini_wu + // vd, vj, ui5 + // UV4SI, UV4SI, UQI + v4u32_r = __lsx_vmini_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmini.wu( + + // __lsx_vmini_du + // vd, vj, ui5 + // UV2DI, UV2DI, UQI + v2u64_r = __lsx_vmini_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmini.du( + + // __lsx_vseq_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vseq_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseq.b( + + // __lsx_vseq_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vseq_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseq.h( + + // __lsx_vseq_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vseq_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseq.w( + + // __lsx_vseq_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vseq_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseq.d( + + // __lsx_vseqi_b + // vd, vj, si5 + // V16QI, V16QI, QI + v16i8_r = __lsx_vseqi_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vseqi.b( + + // __lsx_vseqi_h + // vd, vj, si5 + // V8HI, V8HI, QI + v8i16_r = __lsx_vseqi_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vseqi.h( + + // __lsx_vseqi_w + // vd, vj, si5 + // V4SI, V4SI, QI + v4i32_r = __lsx_vseqi_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vseqi.w( + + // __lsx_vseqi_d + // vd, vj, si5 + // V2DI, V2DI, QI + v2i64_r = __lsx_vseqi_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vseqi.d( + + // __lsx_vslti_b + // vd, vj, si5 + // V16QI, V16QI, QI + v16i8_r = __lsx_vslti_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.b( + + // __lsx_vslt_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vslt_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.b( + + // __lsx_vslt_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vslt_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.h( + + // __lsx_vslt_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vslt_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.w( + + // __lsx_vslt_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vslt_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.d( + + // __lsx_vslti_h + // vd, vj, si5 + // V8HI, V8HI, QI + v8i16_r = __lsx_vslti_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.h( + + // __lsx_vslti_w + // vd, vj, si5 + // V4SI, V4SI, QI + v4i32_r = __lsx_vslti_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.w( + + // __lsx_vslti_d + // vd, vj, si5 + // V2DI, V2DI, QI + v2i64_r = __lsx_vslti_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.d( + + // __lsx_vslt_bu + // vd, vj, vk + // V16QI, UV16QI, UV16QI + v16i8_r = __lsx_vslt_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslt.bu( + + // __lsx_vslt_hu + // vd, vj, vk + // V8HI, UV8HI, UV8HI + v8i16_r = __lsx_vslt_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslt.hu( + + // __lsx_vslt_wu + // vd, vj, vk + // V4SI, UV4SI, UV4SI + v4i32_r = __lsx_vslt_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslt.wu( + + // __lsx_vslt_du + // vd, vj, vk + // V2DI, UV2DI, UV2DI + v2i64_r = __lsx_vslt_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslt.du( + + // __lsx_vslti_bu + // vd, vj, ui5 + // V16QI, UV16QI, UQI + v16i8_r = __lsx_vslti_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslti.bu( + + // __lsx_vslti_hu + // vd, vj, ui5 + // V8HI, UV8HI, UQI + v8i16_r = __lsx_vslti_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslti.hu( + + // __lsx_vslti_wu + // vd, vj, ui5 + // V4SI, UV4SI, UQI + v4i32_r = __lsx_vslti_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslti.wu( + + // __lsx_vslti_du + // vd, vj, ui5 + // V2DI, UV2DI, UQI + v2i64_r = __lsx_vslti_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslti.du( + + // __lsx_vsle_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsle_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.b( + + // __lsx_vsle_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsle_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.h( + + // __lsx_vsle_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsle_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.w( + + // __lsx_vsle_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsle_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.d( + + // __lsx_vslei_b + // vd, vj, si5 + // V16QI, V16QI, QI + v16i8_r = __lsx_vslei_b(v16i8_a, si5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.b( + + // __lsx_vslei_h + // vd, vj, si5 + // V8HI, V8HI, QI + v8i16_r = __lsx_vslei_h(v8i16_a, si5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.h( + + // __lsx_vslei_w + // vd, vj, si5 + // V4SI, V4SI, QI + v4i32_r = __lsx_vslei_w(v4i32_a, si5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.w( + + // __lsx_vslei_d + // vd, vj, si5 + // V2DI, V2DI, QI + v2i64_r = __lsx_vslei_d(v2i64_a, si5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.d( + + // __lsx_vsle_bu + // vd, vj, vk + // V16QI, UV16QI, UV16QI + v16i8_r = __lsx_vsle_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsle.bu( + + // __lsx_vsle_hu + // vd, vj, vk + // V8HI, UV8HI, UV8HI + v8i16_r = __lsx_vsle_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsle.hu( + + // __lsx_vsle_wu + // vd, vj, vk + // V4SI, UV4SI, UV4SI + v4i32_r = __lsx_vsle_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsle.wu( + + // __lsx_vsle_du + // vd, vj, vk + // V2DI, UV2DI, UV2DI + v2i64_r = __lsx_vsle_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsle.du( + + // __lsx_vslei_bu + // vd, vj, ui5 + // V16QI, UV16QI, UQI + v16i8_r = __lsx_vslei_bu(v16u8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vslei.bu( + + // __lsx_vslei_hu + // vd, vj, ui5 + // V8HI, UV8HI, UQI + v8i16_r = __lsx_vslei_hu(v8u16_a, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vslei.hu( + + // __lsx_vslei_wu + // vd, vj, ui5 + // V4SI, UV4SI, UQI + v4i32_r = __lsx_vslei_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vslei.wu( + + // __lsx_vslei_du + // vd, vj, ui5 + // V2DI, UV2DI, UQI + v2i64_r = __lsx_vslei_du(v2u64_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vslei.du( + + // __lsx_vsat_b + // vd, vj, ui3 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vsat_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.b( + + // __lsx_vsat_h + // vd, vj, ui4 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vsat_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.h( + + // __lsx_vsat_w + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vsat_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.w( + + // __lsx_vsat_d + // vd, vj, ui6 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vsat_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.d( + + // __lsx_vsat_bu + // vd, vj, ui3 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vsat_bu(v16u8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsat.bu( + + // __lsx_vsat_hu + // vd, vj, ui4 + // UV8HI, UV8HI, UQI + v8u16_r = __lsx_vsat_hu(v8u16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsat.hu( + + // __lsx_vsat_wu + // vd, vj, ui5 + // UV4SI, UV4SI, UQI + v4u32_r = __lsx_vsat_wu(v4u32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsat.wu( + + // __lsx_vsat_du + // vd, vj, ui6 + // UV2DI, UV2DI, UQI + v2u64_r = __lsx_vsat_du(v2u64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsat.du( + + // __lsx_vadda_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vadda_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vadda.b( + + // __lsx_vadda_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vadda_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vadda.h( + + // __lsx_vadda_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vadda_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vadda.w( + + // __lsx_vadda_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vadda_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadda.d( + + // __lsx_vsadd_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsadd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.b( + + // __lsx_vsadd_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsadd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.h( + + // __lsx_vsadd_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsadd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.w( + + // __lsx_vsadd_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsadd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.d( + + // __lsx_vsadd_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vsadd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsadd.bu( + + // __lsx_vsadd_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vsadd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsadd.hu( + + // __lsx_vsadd_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vsadd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsadd.wu( + + // __lsx_vsadd_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vsadd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsadd.du( + + // __lsx_vavg_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vavg_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.b( + + // __lsx_vavg_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vavg_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.h( + + // __lsx_vavg_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vavg_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.w( + + // __lsx_vavg_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vavg_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.d( + + // __lsx_vavg_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vavg_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavg.bu( + + // __lsx_vavg_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vavg_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavg.hu( + + // __lsx_vavg_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vavg_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavg.wu( + + // __lsx_vavg_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vavg_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavg.du( + + // __lsx_vavgr_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vavgr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.b( + + // __lsx_vavgr_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vavgr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.h( + + // __lsx_vavgr_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vavgr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.w( + + // __lsx_vavgr_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vavgr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.d( + + // __lsx_vavgr_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vavgr_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vavgr.bu( + + // __lsx_vavgr_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vavgr_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vavgr.hu( + + // __lsx_vavgr_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vavgr_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vavgr.wu( + + // __lsx_vavgr_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vavgr_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vavgr.du( + + // __lsx_vssub_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vssub_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.b( + + // __lsx_vssub_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vssub_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.h( + + // __lsx_vssub_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vssub_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.w( + + // __lsx_vssub_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vssub_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.d( + + // __lsx_vssub_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vssub_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssub.bu( + + // __lsx_vssub_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vssub_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssub.hu( + + // __lsx_vssub_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vssub_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssub.wu( + + // __lsx_vssub_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vssub_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssub.du( + + // __lsx_vabsd_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vabsd_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.b( + + // __lsx_vabsd_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vabsd_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.h( + + // __lsx_vabsd_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vabsd_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.w( + + // __lsx_vabsd_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vabsd_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.d( + + // __lsx_vabsd_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vabsd_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vabsd.bu( + + // __lsx_vabsd_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vabsd_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vabsd.hu( + + // __lsx_vabsd_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vabsd_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vabsd.wu( + + // __lsx_vabsd_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vabsd_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vabsd.du( + + // __lsx_vmul_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vmul_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmul.b( + + // __lsx_vmul_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vmul_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmul.h( + + // __lsx_vmul_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vmul_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmul.w( + + // __lsx_vmul_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vmul_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmul.d( + + // __lsx_vmadd_b + // vd, vj, vk + // V16QI, V16QI, V16QI, V16QI + v16i8_r = __lsx_vmadd_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmadd.b( + + // __lsx_vmadd_h + // vd, vj, vk + // V8HI, V8HI, V8HI, V8HI + v8i16_r = __lsx_vmadd_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmadd.h( + + // __lsx_vmadd_w + // vd, vj, vk + // V4SI, V4SI, V4SI, V4SI + v4i32_r = __lsx_vmadd_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmadd.w( + + // __lsx_vmadd_d + // vd, vj, vk + // V2DI, V2DI, V2DI, V2DI + v2i64_r = __lsx_vmadd_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmadd.d( + + // __lsx_vmsub_b + // vd, vj, vk + // V16QI, V16QI, V16QI, V16QI + v16i8_r = __lsx_vmsub_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsub.b( + + // __lsx_vmsub_h + // vd, vj, vk + // V8HI, V8HI, V8HI, V8HI + v8i16_r = __lsx_vmsub_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmsub.h( + + // __lsx_vmsub_w + // vd, vj, vk + // V4SI, V4SI, V4SI, V4SI + v4i32_r = __lsx_vmsub_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmsub.w( + + // __lsx_vmsub_d + // vd, vj, vk + // V2DI, V2DI, V2DI, V2DI + v2i64_r = __lsx_vmsub_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmsub.d( + + // __lsx_vdiv_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vdiv_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.b( + + // __lsx_vdiv_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vdiv_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.h( + + // __lsx_vdiv_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vdiv_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.w( + + // __lsx_vdiv_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vdiv_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.d( + + // __lsx_vdiv_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vdiv_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vdiv.bu( + + // __lsx_vdiv_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vdiv_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vdiv.hu( + + // __lsx_vdiv_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vdiv_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vdiv.wu( + + // __lsx_vdiv_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vdiv_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vdiv.du( + + // __lsx_vhaddw_h_b + // vd, vj, vk + // V8HI, V16QI, V16QI + v8i16_r = __lsx_vhaddw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b( + + // __lsx_vhaddw_w_h + // vd, vj, vk + // V4SI, V8HI, V8HI + v4i32_r = __lsx_vhaddw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h( + + // __lsx_vhaddw_d_w + // vd, vj, vk + // V2DI, V4SI, V4SI + v2i64_r = __lsx_vhaddw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w( + + // __lsx_vhaddw_hu_bu + // vd, vj, vk + // UV8HI, UV16QI, UV16QI + v8u16_r = __lsx_vhaddw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu( + + // __lsx_vhaddw_wu_hu + // vd, vj, vk + // UV4SI, UV8HI, UV8HI + v4u32_r = __lsx_vhaddw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu( + + // __lsx_vhaddw_du_wu + // vd, vj, vk + // UV2DI, UV4SI, UV4SI + v2u64_r = __lsx_vhaddw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu( + + // __lsx_vhsubw_h_b + // vd, vj, vk + // V8HI, V16QI, V16QI + v8i16_r = __lsx_vhsubw_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b( + + // __lsx_vhsubw_w_h + // vd, vj, vk + // V4SI, V8HI, V8HI + v4i32_r = __lsx_vhsubw_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h( + + // __lsx_vhsubw_d_w + // vd, vj, vk + // V2DI, V4SI, V4SI + v2i64_r = __lsx_vhsubw_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w( + + // __lsx_vhsubw_hu_bu + // vd, vj, vk + // V8HI, UV16QI, UV16QI + v8i16_r = __lsx_vhsubw_hu_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu( + + // __lsx_vhsubw_wu_hu + // vd, vj, vk + // V4SI, UV8HI, UV8HI + v4i32_r = __lsx_vhsubw_wu_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu( + + // __lsx_vhsubw_du_wu + // vd, vj, vk + // V2DI, UV4SI, UV4SI + v2i64_r = __lsx_vhsubw_du_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu( + + // __lsx_vmod_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vmod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.b( + + // __lsx_vmod_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vmod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.h( + + // __lsx_vmod_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vmod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.w( + + // __lsx_vmod_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vmod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.d( + + // __lsx_vmod_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vmod_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmod.bu( + + // __lsx_vmod_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vmod_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmod.hu( + + // __lsx_vmod_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vmod_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmod.wu( + + // __lsx_vmod_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vmod_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmod.du( + + // __lsx_vreplve_b + // vd, vj, rk + // V16QI, V16QI, SI + v16i8_r = __lsx_vreplve_b(v16i8_a, i32_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplve.b( + + // __lsx_vreplve_h + // vd, vj, rk + // V8HI, V8HI, SI + v8i16_r = __lsx_vreplve_h(v8i16_a, i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplve.h( + + // __lsx_vreplve_w + // vd, vj, rk + // V4SI, V4SI, SI + v4i32_r = __lsx_vreplve_w(v4i32_a, i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplve.w( + + // __lsx_vreplve_d + // vd, vj, rk + // V2DI, V2DI, SI + v2i64_r = __lsx_vreplve_d(v2i64_a, i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplve.d( + + // __lsx_vreplvei_b + // vd, vj, ui4 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vreplvei_b(v16i8_a, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplvei.b( + + // __lsx_vreplvei_h + // vd, vj, ui3 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vreplvei_h(v8i16_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplvei.h( + + // __lsx_vreplvei_w + // vd, vj, ui2 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vreplvei_w(v4i32_a, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplvei.w( + + // __lsx_vreplvei_d + // vd, vj, ui1 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vreplvei_d(v2i64_a, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplvei.d( + + // __lsx_vpickev_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vpickev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickev.b( + + // __lsx_vpickev_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vpickev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickev.h( + + // __lsx_vpickev_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vpickev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickev.w( + + // __lsx_vpickev_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vpickev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickev.d( + + // __lsx_vpickod_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vpickod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpickod.b( + + // __lsx_vpickod_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vpickod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpickod.h( + + // __lsx_vpickod_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vpickod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpickod.w( + + // __lsx_vpickod_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vpickod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpickod.d( + + // __lsx_vilvh_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vilvh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvh.b( + + // __lsx_vilvh_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vilvh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvh.h( + + // __lsx_vilvh_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vilvh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvh.w( + + // __lsx_vilvh_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vilvh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvh.d( + + // __lsx_vilvl_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vilvl_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vilvl.b( + + // __lsx_vilvl_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vilvl_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vilvl.h( + + // __lsx_vilvl_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vilvl_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vilvl.w( + + // __lsx_vilvl_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vilvl_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vilvl.d( + + // __lsx_vpackev_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vpackev_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackev.b( + + // __lsx_vpackev_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vpackev_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackev.h( + + // __lsx_vpackev_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vpackev_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackev.w( + + // __lsx_vpackev_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vpackev_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackev.d( + + // __lsx_vpackod_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vpackod_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpackod.b( + + // __lsx_vpackod_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vpackod_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpackod.h( + + // __lsx_vpackod_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vpackod_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpackod.w( + + // __lsx_vpackod_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vpackod_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpackod.d( + + // __lsx_vshuf_h + // vd, vj, vk + // V8HI, V8HI, V8HI, V8HI + v8i16_r = __lsx_vshuf_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf.h( + + // __lsx_vshuf_w + // vd, vj, vk + // V4SI, V4SI, V4SI, V4SI + v4i32_r = __lsx_vshuf_w(v4i32_a, v4i32_b, v4i32_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf.w( + + // __lsx_vshuf_d + // vd, vj, vk + // V2DI, V2DI, V2DI, V2DI + v2i64_r = __lsx_vshuf_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf.d( + + // __lsx_vand_v + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vand_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vand.v( + + // __lsx_vandi_b + // vd, vj, ui8 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vandi_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandi.b( + + // __lsx_vor_v + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vor.v( + + // __lsx_vori_b + // vd, vj, ui8 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vori.b( + + // __lsx_vnor_v + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vnor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnor.v( + + // __lsx_vnori_b + // vd, vj, ui8 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vnori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vnori.b( + + // __lsx_vxor_v + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vxor_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxor.v( + + // __lsx_vxori_b + // vd, vj, ui8 + // UV16QI, UV16QI, UQI + v16u8_r = __lsx_vxori_b(v16u8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vxori.b( + + // __lsx_vbitsel_v + // vd, vj, vk, va + // UV16QI, UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vbitsel_v(v16u8_a, v16u8_b, v16u8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitsel.v( + + // __lsx_vbitseli_b + // vd, vj, ui8 + // UV16QI, UV16QI, UV16QI, UQI + v16u8_r = __lsx_vbitseli_b(v16u8_a, v16u8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbitseli.b( + + // __lsx_vshuf4i_b + // vd, vj, ui8 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vshuf4i_b(v16i8_a, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b( + + // __lsx_vshuf4i_h + // vd, vj, ui8 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vshuf4i_h(v8i16_a, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h( + + // __lsx_vshuf4i_w + // vd, vj, ui8 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vshuf4i_w(v4i32_a, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w( + + // __lsx_vreplgr2vr_b + // vd, rj + // V16QI, SI + v16i8_r = __lsx_vreplgr2vr_b(i32_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b( + + // __lsx_vreplgr2vr_h + // vd, rj + // V8HI, SI + v8i16_r = __lsx_vreplgr2vr_h(i32_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h( + + // __lsx_vreplgr2vr_w + // vd, rj + // V4SI, SI + v4i32_r = __lsx_vreplgr2vr_w(i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w( + + // __lsx_vreplgr2vr_d + // vd, rj + // V2DI, DI + v2i64_r = __lsx_vreplgr2vr_d(i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d( + + // __lsx_vpcnt_b + // vd, vj + // V16QI, V16QI + v16i8_r = __lsx_vpcnt_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vpcnt.b( + + // __lsx_vpcnt_h + // vd, vj + // V8HI, V8HI + v8i16_r = __lsx_vpcnt_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vpcnt.h( + + // __lsx_vpcnt_w + // vd, vj + // V4SI, V4SI + v4i32_r = __lsx_vpcnt_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpcnt.w( + + // __lsx_vpcnt_d + // vd, vj + // V2DI, V2DI + v2i64_r = __lsx_vpcnt_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vpcnt.d( + + // __lsx_vclo_b + // vd, vj + // V16QI, V16QI + v16i8_r = __lsx_vclo_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclo.b( + + // __lsx_vclo_h + // vd, vj + // V8HI, V8HI + v8i16_r = __lsx_vclo_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclo.h( + + // __lsx_vclo_w + // vd, vj + // V4SI, V4SI + v4i32_r = __lsx_vclo_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclo.w( + + // __lsx_vclo_d + // vd, vj + // V2DI, V2DI + v2i64_r = __lsx_vclo_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclo.d( + + // __lsx_vclz_b + // vd, vj + // V16QI, V16QI + v16i8_r = __lsx_vclz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vclz.b( + + // __lsx_vclz_h + // vd, vj + // V8HI, V8HI + v8i16_r = __lsx_vclz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vclz.h( + + // __lsx_vclz_w + // vd, vj + // V4SI, V4SI + v4i32_r = __lsx_vclz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vclz.w( + + // __lsx_vclz_d + // vd, vj + // V2DI, V2DI + v2i64_r = __lsx_vclz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vclz.d( + + // __lsx_vpickve2gr_b + // rd, vj, ui4 + // SI, V16QI, UQI + i32_r = __lsx_vpickve2gr_b(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.b( + + // __lsx_vpickve2gr_h + // rd, vj, ui3 + // SI, V8HI, UQI + i32_r = __lsx_vpickve2gr_h(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.h( + + // __lsx_vpickve2gr_w + // rd, vj, ui2 + // SI, V4SI, UQI + i32_r = __lsx_vpickve2gr_w(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.w( + + // __lsx_vpickve2gr_d + // rd, vj, ui1 + // DI, V2DI, UQI + i64_r = __lsx_vpickve2gr_d(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.d( + + // __lsx_vpickve2gr_bu + // rd, vj, ui4 + // USI, V16QI, UQI + u32_r = __lsx_vpickve2gr_bu(v16i8_a, ui4); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.bu( + + // __lsx_vpickve2gr_hu + // rd, vj, ui3 + // USI, V8HI, UQI + u32_r = __lsx_vpickve2gr_hu(v8i16_a, ui3); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.hu( + + // __lsx_vpickve2gr_wu + // rd, vj, ui2 + // USI, V4SI, UQI + u32_r = __lsx_vpickve2gr_wu(v4i32_a, ui2); // CHECK: call i32 @llvm.loongarch.lsx.vpickve2gr.wu( + + // __lsx_vpickve2gr_du + // rd, vj, ui1 + // UDI, V2DI, UQI + u64_r = __lsx_vpickve2gr_du(v2i64_a, ui1); // CHECK: call i64 @llvm.loongarch.lsx.vpickve2gr.du( + + // __lsx_vinsgr2vr_b + // vd, rj, ui4 + // V16QI, V16QI, SI, UQI + v16i8_r = __lsx_vinsgr2vr_b(v16i8_a, i32_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b( + + // __lsx_vinsgr2vr_h + // vd, rj, ui3 + // V8HI, V8HI, SI, UQI + v8i16_r = __lsx_vinsgr2vr_h(v8i16_a, i32_b, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h( + + // __lsx_vinsgr2vr_w + // vd, rj, ui2 + // V4SI, V4SI, SI, UQI + v4i32_r = __lsx_vinsgr2vr_w(v4i32_a, i32_b, ui2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w( + + // __lsx_vinsgr2vr_d + // vd, rj, ui1 + // V2DI, V2DI, SI, UQI + v2i64_r = __lsx_vinsgr2vr_d(v2i64_a, i32_b, ui1); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d( + + // __lsx_vfcmp_caf_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_caf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s( + + // __lsx_vfcmp_caf_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_caf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d( + + // __lsx_vfcmp_cor_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_cor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s( + + // __lsx_vfcmp_cor_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_cor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d( + + // __lsx_vfcmp_cun_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_cun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s( + + // __lsx_vfcmp_cun_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_cun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d( + + // __lsx_vfcmp_cune_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_cune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s( + + // __lsx_vfcmp_cune_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_cune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d( + + // __lsx_vfcmp_cueq_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_cueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s( + + // __lsx_vfcmp_cueq_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_cueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d( + + // __lsx_vfcmp_ceq_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_ceq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s( + + // __lsx_vfcmp_ceq_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_ceq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d( + + // __lsx_vfcmp_cne_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_cne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s( + + // __lsx_vfcmp_cne_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_cne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d( + + // __lsx_vfcmp_clt_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_clt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s( + + // __lsx_vfcmp_clt_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_clt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d( + + // __lsx_vfcmp_cult_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_cult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s( + + // __lsx_vfcmp_cult_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_cult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d( + + // __lsx_vfcmp_cle_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_cle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s( + + // __lsx_vfcmp_cle_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_cle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d( + + // __lsx_vfcmp_cule_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_cule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s( + + // __lsx_vfcmp_cule_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_cule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d( + + // __lsx_vfcmp_saf_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_saf_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s( + + // __lsx_vfcmp_saf_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_saf_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d( + + // __lsx_vfcmp_sor_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_sor_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s( + + // __lsx_vfcmp_sor_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_sor_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d( + + // __lsx_vfcmp_sun_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_sun_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s( + + // __lsx_vfcmp_sun_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_sun_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d( + + // __lsx_vfcmp_sune_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_sune_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s( + + // __lsx_vfcmp_sune_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_sune_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d( + + // __lsx_vfcmp_sueq_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_sueq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s( + + // __lsx_vfcmp_sueq_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_sueq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d( + + // __lsx_vfcmp_seq_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_seq_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s( + + // __lsx_vfcmp_seq_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_seq_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d( + + // __lsx_vfcmp_sne_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_sne_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s( + + // __lsx_vfcmp_sne_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_sne_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d( + + // __lsx_vfcmp_slt_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_slt_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s( + + // __lsx_vfcmp_slt_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_slt_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d( + + // __lsx_vfcmp_sult_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_sult_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s( + + // __lsx_vfcmp_sult_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_sult_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d( + + // __lsx_vfcmp_sle_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_sle_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s( + + // __lsx_vfcmp_sle_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_sle_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d( + + // __lsx_vfcmp_sule_s + // vd, vj, vk + // V4SI, V4SF, V4SF + v4i32_r = __lsx_vfcmp_sule_s(v4f32_a, v4f32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s( + + // __lsx_vfcmp_sule_d + // vd, vj, vk + // V2DI, V2DF, V2DF + v2i64_r = __lsx_vfcmp_sule_d(v2f64_a, v2f64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d( + + // __lsx_vfadd_s + // vd, vj, vk + // V4SF, V4SF, V4SF + v4f32_r = __lsx_vfadd_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfadd.s( + // __lsx_vfadd_d + // vd, vj, vk + // V2DF, V2DF, V2DF + v2f64_r = __lsx_vfadd_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfadd.d( + + // __lsx_vfsub_s + // vd, vj, vk + // V4SF, V4SF, V4SF + v4f32_r = __lsx_vfsub_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsub.s( + + // __lsx_vfsub_d + // vd, vj, vk + // V2DF, V2DF, V2DF + v2f64_r = __lsx_vfsub_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsub.d( + + // __lsx_vfmul_s + // vd, vj, vk + // V4SF, V4SF, V4SF + v4f32_r = __lsx_vfmul_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmul.s( + + // __lsx_vfmul_d + // vd, vj, vk + // V2DF, V2DF, V2DF + v2f64_r = __lsx_vfmul_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmul.d( + + // __lsx_vfdiv_s + // vd, vj, vk + // V4SF, V4SF, V4SF + v4f32_r = __lsx_vfdiv_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfdiv.s( + + // __lsx_vfdiv_d + // vd, vj, vk + // V2DF, V2DF, V2DF + v2f64_r = __lsx_vfdiv_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfdiv.d( + + // __lsx_vfcvt_h_s + // vd, vj, vk + // V8HI, V4SF, V4SF + v8i16_r = __lsx_vfcvt_h_s(v4f32_a, v4f32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s( + + // __lsx_vfcvt_s_d + // vd, vj, vk + // V4SF, V2DF, V2DF + v4f32_r = __lsx_vfcvt_s_d(v2f64_a, v2f64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d( + + // __lsx_vfmin_s + // vd, vj, vk + // V4SF, V4SF, V4SF + v4f32_r = __lsx_vfmin_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmin.s( + + // __lsx_vfmin_d + // vd, vj, vk + // V2DF, V2DF, V2DF + v2f64_r = __lsx_vfmin_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmin.d( + + // __lsx_vfmina_s + // vd, vj, vk + // V4SF, V4SF, V4SF + v4f32_r = __lsx_vfmina_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmina.s( + + // __lsx_vfmina_d + // vd, vj, vk + // V2DF, V2DF, V2DF + v2f64_r = __lsx_vfmina_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmina.d( + + // __lsx_vfmax_s + // vd, vj, vk + // V4SF, V4SF, V4SF + v4f32_r = __lsx_vfmax_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmax.s( + + // __lsx_vfmax_d + // vd, vj, vk + // V2DF, V2DF, V2DF + v2f64_r = __lsx_vfmax_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmax.d( + + // __lsx_vfmaxa_s + // vd, vj, vk + // V4SF, V4SF, V4SF + v4f32_r = __lsx_vfmaxa_s(v4f32_a, v4f32_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmaxa.s( + + // __lsx_vfmaxa_d + // vd, vj, vk + // V2DF, V2DF, V2DF + v2f64_r = __lsx_vfmaxa_d(v2f64_a, v2f64_b); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmaxa.d( + + // __lsx_vfclass_s + // vd, vj + // V4SI, V4SF + v4i32_r = __lsx_vfclass_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vfclass.s( + + // __lsx_vfclass_d + // vd, vj + // V2DI, V2DF + v2i64_r = __lsx_vfclass_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vfclass.d( + + // __lsx_vfsqrt_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vfsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfsqrt.s( + + // __lsx_vfsqrt_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vfsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfsqrt.d( + + // __lsx_vfrecip_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vfrecip_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrecip.s( + + // __lsx_vfrecip_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vfrecip_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrecip.d( + + // __lsx_vfrint_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vfrint_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrint.s( + + // __lsx_vfrint_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vfrint_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrint.d( + + // __lsx_vfrsqrt_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vfrsqrt_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s( + + // __lsx_vfrsqrt_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vfrsqrt_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d( + + // __lsx_vflogb_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vflogb_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vflogb.s( + + // __lsx_vflogb_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vflogb_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vflogb.d( + + // __lsx_vfcvth_s_h + // vd, vj + // V4SF, V8HI + v4f32_r = __lsx_vfcvth_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h( + + // __lsx_vfcvth_d_s + // vd, vj + // V2DF, V4SF + v2f64_r = __lsx_vfcvth_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s( + + //gcc build fail + + // __lsx_vfcvtl_s_h + // vd, vj + // V4SF, V8HI + v4f32_r = __lsx_vfcvtl_s_h(v8i16_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h( + + // __lsx_vfcvtl_d_s + // vd, vj + // V2DF, V4SF + v2f64_r = __lsx_vfcvtl_d_s(v4f32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s( + + // __lsx_vftint_w_s + // vd, vj + // V4SI, V4SF + v4i32_r = __lsx_vftint_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.s( + + // __lsx_vftint_l_d + // vd, vj + // V2DI, V2DF + v2i64_r = __lsx_vftint_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.l.d( + + // __lsx_vftint_wu_s + // vd, vj + // UV4SI, V4SF + v4u32_r = __lsx_vftint_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s( + + // __lsx_vftint_lu_d + // vd, vj + // UV2DI, V2DF + v2u64_r = __lsx_vftint_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d( + + // __lsx_vftintrz_w_s + // vd, vj + // V4SI, V4SF + v4i32_r = __lsx_vftintrz_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s( + + // __lsx_vftintrz_l_d + // vd, vj + // V2DI, V2DF + v2i64_r = __lsx_vftintrz_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d( + + // __lsx_vftintrz_wu_s + // vd, vj + // UV4SI, V4SF + v4u32_r = __lsx_vftintrz_wu_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s( + + // __lsx_vftintrz_lu_d + // vd, vj + // UV2DI, V2DF + v2u64_r = __lsx_vftintrz_lu_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d( + + // __lsx_vffint_s_w + // vd, vj + // V4SF, V4SI + v4f32_r = __lsx_vffint_s_w(v4i32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.w( + + // __lsx_vffint_d_l + // vd, vj + // V2DF, V2DI + v2f64_r = __lsx_vffint_d_l(v2i64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.l( + + // __lsx_vffint_s_wu + // vd, vj + // V4SF, UV4SI + v4f32_r = __lsx_vffint_s_wu(v4u32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.wu( + + // __lsx_vffint_d_lu + // vd, vj + // V2DF, UV2DI + v2f64_r = __lsx_vffint_d_lu(v2u64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffint.d.lu( + + // __lsx_vandn_v + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vandn_v(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vandn.v( + + // __lsx_vneg_b + // vd, vj + // V16QI, V16QI + v16i8_r = __lsx_vneg_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vneg.b( + + // __lsx_vneg_h + // vd, vj + // V8HI, V8HI + v8i16_r = __lsx_vneg_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vneg.h( + + // __lsx_vneg_w + // vd, vj + // V4SI, V4SI + v4i32_r = __lsx_vneg_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vneg.w( + + // __lsx_vneg_d + // vd, vj + // V2DI, V2DI + v2i64_r = __lsx_vneg_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vneg.d( + + // __lsx_vmuh_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vmuh_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.b( + + // __lsx_vmuh_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vmuh_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.h( + + // __lsx_vmuh_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vmuh_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.w( + + // __lsx_vmuh_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vmuh_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.d( + + // __lsx_vmuh_bu + // vd, vj, vk + // UV16QI, UV16QI, UV16QI + v16u8_r = __lsx_vmuh_bu(v16u8_a, v16u8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmuh.bu( + + // __lsx_vmuh_hu + // vd, vj, vk + // UV8HI, UV8HI, UV8HI + v8u16_r = __lsx_vmuh_hu(v8u16_a, v8u16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmuh.hu( + + // __lsx_vmuh_wu + // vd, vj, vk + // UV4SI, UV4SI, UV4SI + v4u32_r = __lsx_vmuh_wu(v4u32_a, v4u32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmuh.wu( + + // __lsx_vmuh_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vmuh_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmuh.du( + + // __lsx_vsllwil_h_b + // vd, vj, ui3 + // V8HI, V16QI, UQI + v8i16_r = __lsx_vsllwil_h_b(v16i8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b( + + // __lsx_vsllwil_w_h + // vd, vj, ui4 + // V4SI, V8HI, UQI + v4i32_r = __lsx_vsllwil_w_h(v8i16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h( + + // __lsx_vsllwil_d_w + // vd, vj, ui5 + // V2DI, V4SI, UQI + v2i64_r = __lsx_vsllwil_d_w(v4i32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w( + + // __lsx_vsllwil_hu_bu + // vd, vj, ui3 + // UV8HI, UV16QI, UQI + v8u16_r = __lsx_vsllwil_hu_bu(v16u8_a, ui3); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu( + + // __lsx_vsllwil_wu_hu + // vd, vj, ui4 + // UV4SI, UV8HI, UQI + v4u32_r = __lsx_vsllwil_wu_hu(v8u16_a, ui4); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu( + + // __lsx_vsllwil_du_wu + // vd, vj, ui5 + // UV2DI, UV4SI, UQI + v2u64_r = __lsx_vsllwil_du_wu(v4u32_a, ui5); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu( + + // __lsx_vsran_b_h + // vd, vj, vk + // V16QI, V8HI, V8HI + v16i8_r = __lsx_vsran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsran.b.h( + + // __lsx_vsran_h_w + // vd, vj, vk + // V8HI, V4SI, V4SI + v8i16_r = __lsx_vsran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsran.h.w( + + // __lsx_vsran_w_d + // vd, vj, vk + // V4SI, V2DI, V2DI + v4i32_r = __lsx_vsran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsran.w.d( + + // __lsx_vssran_b_h + // vd, vj, vk + // V16QI, V8HI, V8HI + v16i8_r = __lsx_vssran_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.b.h( + + // __lsx_vssran_h_w + // vd, vj, vk + // V8HI, V4SI, V4SI + v8i16_r = __lsx_vssran_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.h.w( + + // __lsx_vssran_w_d + // vd, vj, vk + // V4SI, V2DI, V2DI + v4i32_r = __lsx_vssran_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.w.d( + + // __lsx_vssran_bu_h + // vd, vj, vk + // UV16QI, UV8HI, UV8HI + v16u8_r = __lsx_vssran_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h( + + // __lsx_vssran_hu_w + // vd, vj, vk + // UV8HI, UV4SI, UV4SI + v8u16_r = __lsx_vssran_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w( + + // __lsx_vssran_wu_d + // vd, vj, vk + // UV4SI, UV2DI, UV2DI + v4u32_r = __lsx_vssran_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d( + + // __lsx_vsrarn_b_h + // vd, vj, vk + // V16QI, V8HI, V8HI + v16i8_r = __lsx_vsrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h( + + // __lsx_vsrarn_h_w + // vd, vj, vk + // V8HI, V4SI, V4SI + v8i16_r = __lsx_vsrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w( + + // __lsx_vsrarn_w_d + // vd, vj, vk + // V4SI, V2DI, V2DI + v4i32_r = __lsx_vsrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d( + + // __lsx_vssrarn_b_h + // vd, vj, vk + // V16QI, V8HI, V8HI + v16i8_r = __lsx_vssrarn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h( + + // __lsx_vssrarn_h_w + // vd, vj, vk + // V8HI, V4SI, V4SI + v8i16_r = __lsx_vssrarn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w( + + // __lsx_vssrarn_w_d + // vd, vj, vk + // V4SI, V2DI, V2DI + v4i32_r = __lsx_vssrarn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d( + + // __lsx_vssrarn_bu_h + // vd, vj, vk + // UV16QI, UV8HI, UV8HI + v16u8_r = __lsx_vssrarn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h( + + // __lsx_vssrarn_hu_w + // vd, vj, vk + // UV8HI, UV4SI, UV4SI + v8u16_r = __lsx_vssrarn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w( + + // __lsx_vssrarn_wu_d + // vd, vj, vk + // UV4SI, UV2DI, UV2DI + v4u32_r = __lsx_vssrarn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d( + + // __lsx_vsrln_b_h + // vd, vj, vk + // V16QI, V8HI, V8HI + v16i8_r = __lsx_vsrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h( + + // __lsx_vsrln_h_w + // vd, vj, vk + // V8HI, V4SI, V4SI + v8i16_r = __lsx_vsrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w( + + // __lsx_vsrln_w_d + // vd, vj, vk + // V4SI, V2DI, V2DI + v4i32_r = __lsx_vsrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d( + + // __lsx_vssrln_bu_h + // vd, vj, vk + // UV16QI, UV8HI, UV8HI + v16u8_r = __lsx_vssrln_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h( + + // __lsx_vssrln_hu_w + // vd, vj, vk + // UV8HI, UV4SI, UV4SI + v8u16_r = __lsx_vssrln_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w( + + // __lsx_vssrln_wu_d + // vd, vj, vk + // UV4SI, UV2DI, UV2DI + v4u32_r = __lsx_vssrln_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d( + + // __lsx_vsrlrn_b_h + // vd, vj, vk + // V16QI, V8HI, V8HI + v16i8_r = __lsx_vsrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h( + + // __lsx_vsrlrn_h_w + // vd, vj, vk + // V8HI, V4SI, V4SI + v8i16_r = __lsx_vsrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w( + + // __lsx_vsrlrn_w_d + // vd, vj, vk + // V4SI, V2DI, V2DI + v4i32_r = __lsx_vsrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d( + + // __lsx_vssrlrn_bu_h + // vd, vj, vk + // UV16QI, UV8HI, UV8HI + v16u8_r = __lsx_vssrlrn_bu_h(v8u16_a, v8u16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h( + + // __lsx_vssrlrn_hu_w + // vd, vj, vk + // UV8HI, UV4SI, UV4SI + v8u16_r = __lsx_vssrlrn_hu_w(v4u32_a, v4u32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w( + + // __lsx_vssrlrn_wu_d + // vd, vj, vk + // UV4SI, UV2DI, UV2DI + v4u32_r = __lsx_vssrlrn_wu_d(v2u64_a, v2u64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d( + + // __lsx_vfrstpi_b + // vd, vj, ui5 + // V16QI, V16QI, V16QI, UQI + v16i8_r = __lsx_vfrstpi_b(v16i8_a, v16i8_b, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b( + + // __lsx_vfrstpi_h + // vd, vj, ui5 + // V8HI, V8HI, V8HI, UQI + v8i16_r = __lsx_vfrstpi_h(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h( + + // __lsx_vfrstp_b + // vd, vj, vk + // V16QI, V16QI, V16QI, V16QI + v16i8_r = __lsx_vfrstp_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vfrstp.b( + + // __lsx_vfrstp_h + // vd, vj, vk + // V8HI, V8HI, V8HI, V8HI + v8i16_r = __lsx_vfrstp_h(v8i16_a, v8i16_b, v8i16_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vfrstp.h( + + // __lsx_vshuf4i_d + // vd, vj, ui8 + // V2DI, V2DI, V2DI, USI + v2i64_r = __lsx_vshuf4i_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d( + + // __lsx_vbsrl_v + // vd, vj, ui5 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vbsrl_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsrl.v( + + // __lsx_vbsll_v + // vd, vj, ui5 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vbsll_v(v16i8_a, ui5); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vbsll.v( + + // __lsx_vextrins_b + // vd, vj, ui8 + // V16QI, V16QI, V16QI, UQI + v16i8_r = __lsx_vextrins_b(v16i8_a, v16i8_b, ui8); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vextrins.b( + + // __lsx_vextrins_h + // vd, vj, ui8 + // V8HI, V8HI, V8HI, UQI + v8i16_r = __lsx_vextrins_h(v8i16_a, v8i16_b, ui8); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vextrins.h( + + // __lsx_vextrins_w + // vd, vj, ui8 + // V4SI, V4SI, V4SI, UQI + v4i32_r = __lsx_vextrins_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vextrins.w( + + // __lsx_vextrins_d + // vd, vj, ui8 + // V2DI, V2DI, V2DI, UQI + v2i64_r = __lsx_vextrins_d(v2i64_a, v2i64_b, ui8); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextrins.d( + + // __lsx_vmskltz_b + // vd, vj + // V16QI, V16QI + v16i8_r = __lsx_vmskltz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskltz.b( + + // __lsx_vmskltz_h + // vd, vj + // V8HI, V8HI + v8i16_r = __lsx_vmskltz_h(v8i16_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmskltz.h( + + // __lsx_vmskltz_w + // vd, vj + // V4SI, V4SI + v4i32_r = __lsx_vmskltz_w(v4i32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmskltz.w( + + // __lsx_vmskltz_d + // vd, vj + // V2DI, V2DI + v2i64_r = __lsx_vmskltz_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmskltz.d( + + // __lsx_vsigncov_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vsigncov_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsigncov.b( + + // __lsx_vsigncov_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vsigncov_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsigncov.h( + + // __lsx_vsigncov_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vsigncov_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsigncov.w( + + // __lsx_vsigncov_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsigncov_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsigncov.d( + + // __lsx_vfmadd_s + // vd, vj, vk, va + // V4SF, V4SF, V4SF, V4SF + v4f32_r = __lsx_vfmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmadd.s( + + // __lsx_vfmadd_d + // vd, vj, vk, va + // V2DF, V2DF, V2DF, V2DF + v2f64_r = __lsx_vfmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmadd.d( + + // __lsx_vfmsub_s + // vd, vj, vk, va + // V4SF, V4SF, V4SF, V4SF + v4f32_r = __lsx_vfmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfmsub.s( + + // __lsx_vfmsub_d + // vd, vj, vk, va + // V2DF, V2DF, V2DF, V2DF + v2f64_r = __lsx_vfmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfmsub.d( + + // __lsx_vfnmadd_s + // vd, vj, vk, va + // V4SF, V4SF, V4SF, V4SF + v4f32_r = __lsx_vfnmadd_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmadd.s( + + // __lsx_vfnmadd_d + // vd, vj, vk, va + // V2DF, V2DF, V2DF, V2DF + v2f64_r = __lsx_vfnmadd_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmadd.d( + + // __lsx_vfnmsub_s + // vd, vj, vk, va + // V4SF, V4SF, V4SF, V4SF + v4f32_r = __lsx_vfnmsub_s(v4f32_a, v4f32_b, v4f32_c); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfnmsub.s( + + // __lsx_vfnmsub_d + // vd, vj, vk, va + // V2DF, V2DF, V2DF, V2DF + v2f64_r = __lsx_vfnmsub_d(v2f64_a, v2f64_b, v2f64_c); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfnmsub.d( + + // __lsx_vftintrne_w_s + // vd, vj + // V4SI, V4SF + v4i32_r = __lsx_vftintrne_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s( + + // __lsx_vftintrne_l_d + // vd, vj + // V2DI, V2DF + v2i64_r = __lsx_vftintrne_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d( + + // __lsx_vftintrp_w_s + // vd, vj + // V4SI, V4SF + v4i32_r = __lsx_vftintrp_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s( + + // __lsx_vftintrp_l_d + // vd, vj + // V2DI, V2DF + v2i64_r = __lsx_vftintrp_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d( + + // __lsx_vftintrm_w_s + // vd, vj + // V4SI, V4SF + v4i32_r = __lsx_vftintrm_w_s(v4f32_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s( + + // __lsx_vftintrm_l_d + // vd, vj + // V2DI, V2DF + v2i64_r = __lsx_vftintrm_l_d(v2f64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d( + + // __lsx_vftint_w_d + // vd, vj, vk + // V4SI, V2DF, V2DF + v4i32_r = __lsx_vftint_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftint.w.d( + + // __lsx_vffint_s_l + // vd, vj, vk + // V4SF, V2DI, V2DI + v4f32_r = __lsx_vffint_s_l(v2i64_a, v2i64_b); // CHECK: call <4 x float> @llvm.loongarch.lsx.vffint.s.l( + + // __lsx_vftintrz_w_d + // vd, vj, vk + // V4SI, V2DF, V2DF + v4i32_r = __lsx_vftintrz_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d( + + // __lsx_vftintrp_w_d + // vd, vj, vk + // V4SI, V2DF, V2DF + v4i32_r = __lsx_vftintrp_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d( + + // __lsx_vftintrm_w_d + // vd, vj, vk + // V4SI, V2DF, V2DF + v4i32_r = __lsx_vftintrm_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d( + + // __lsx_vftintrne_w_d + // vd, vj, vk + // V4SI, V2DF, V2DF + v4i32_r = __lsx_vftintrne_w_d(v2f64_a, v2f64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d( + + // __lsx_vftintl_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s( + + // __lsx_vftinth_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftinth_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s( + + // __lsx_vffinth_d_w + // vd, vj + // V2DF, V4SI + v2f64_r = __lsx_vffinth_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffinth.d.w( + + // __lsx_vffintl_d_w + // vd, vj + // V2DF, V4SI + v2f64_r = __lsx_vffintl_d_w(v4i32_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vffintl.d.w( + + // __lsx_vftintrzl_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintrzl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s( + + // __lsx_vftintrzh_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintrzh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s( + + // __lsx_vftintrpl_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintrpl_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s( + + // __lsx_vftintrph_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintrph_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s( + + // __lsx_vftintrml_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintrml_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s( + + // __lsx_vftintrmh_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintrmh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s( + + // __lsx_vftintrnel_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintrnel_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s( + + // __lsx_vftintrneh_l_s + // vd, vj + // V2DI, V4SF + v2i64_r = __lsx_vftintrneh_l_s(v4f32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s( + + // __lsx_vfrintrne_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vfrintrne_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrne.s( + + // __lsx_vfrintrne_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vfrintrne_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrne.d( + + // __lsx_vfrintrz_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vfrintrz_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrz.s( + + // __lsx_vfrintrz_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vfrintrz_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrz.d( + + // __lsx_vfrintrp_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vfrintrp_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrp.s( + + // __lsx_vfrintrp_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vfrintrp_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrp.d( + + // __lsx_vfrintrm_s + // vd, vj + // V4SF, V4SF + v4f32_r = __lsx_vfrintrm_s(v4f32_a); // CHECK: call <4 x float> @llvm.loongarch.lsx.vfrintrm.s( + + // __lsx_vfrintrm_d + // vd, vj + // V2DF, V2DF + v2f64_r = __lsx_vfrintrm_d(v2f64_a); // CHECK: call <2 x double> @llvm.loongarch.lsx.vfrintrm.d( + + // __lsx_vstelm_b + // vd, rj, si8, idx + // VOID, V16QI, CVPOINTER, SI, UQI + __lsx_vstelm_b(v16i8_a, &v16i8_b, 0, idx4); // CHECK: call void @llvm.loongarch.lsx.vstelm.b( + // __lsx_vstelm_h + // vd, rj, si8, idx + // VOID, V8HI, CVPOINTER, SI, UQI + __lsx_vstelm_h(v8i16_a, &v8i16_b, 0, idx3); // CHECK: call void @llvm.loongarch.lsx.vstelm.h( + + // __lsx_vstelm_w + // vd, rj, si8, idx + // VOID, V4SI, CVPOINTER, SI, UQI + __lsx_vstelm_w(v4i32_a, &v4i32_b, 0, idx2); // CHECK: call void @llvm.loongarch.lsx.vstelm.w( + + // __lsx_vstelm_d + // vd, rj, si8, idx + // VOID, V2DI, CVPOINTER, SI, UQI + __lsx_vstelm_d(v2i64_a, &v2i64_b, 0, idx1); // CHECK: call void @llvm.loongarch.lsx.vstelm.d( + + // __lsx_vaddwev_d_w + // vd, vj, vk + // V2DI, V4SI, V4SI + v2i64_r = __lsx_vaddwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w( + + // __lsx_vaddwev_w_h + // vd, vj, vk + // V4SI, V8HI, V8HI + v4i32_r = __lsx_vaddwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h( + + // __lsx_vaddwev_h_b + // vd, vj, vk + // V8HI, V16QI, V16QI + v8i16_r = __lsx_vaddwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b( + + // __lsx_vaddwod_d_w + // vd, vj, vk + // V2DI, V4SI, V4SI + v2i64_r = __lsx_vaddwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w( + + // __lsx_vaddwod_w_h + // vd, vj, vk + // V4SI, V8HI, V8HI + v4i32_r = __lsx_vaddwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h( + + // __lsx_vaddwod_h_b + // vd, vj, vk + // V8HI, V16QI, V16QI + v8i16_r = __lsx_vaddwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b( + + // __lsx_vaddwev_d_wu + // vd, vj, vk + // V2DI, UV4SI, UV4SI + v2i64_r = __lsx_vaddwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu( + + // __lsx_vaddwev_w_hu + // vd, vj, vk + // V4SI, UV8HI, UV8HI + v4i32_r = __lsx_vaddwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu( + + // __lsx_vaddwev_h_bu + // vd, vj, vk + // V8HI, UV16QI, UV16QI + v8i16_r = __lsx_vaddwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu( + + // __lsx_vaddwod_d_wu + // vd, vj, vk + // V2DI, UV4SI, UV4SI + v2i64_r = __lsx_vaddwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu( + + // __lsx_vaddwod_w_hu + // vd, vj, vk + // V4SI, UV8HI, UV8HI + v4i32_r = __lsx_vaddwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu( + + // __lsx_vaddwod_h_bu + // vd, vj, vk + // V8HI, UV16QI, UV16QI + v8i16_r = __lsx_vaddwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu( + + // __lsx_vaddwev_d_wu_w + // vd, vj, vk + // V2DI, UV4SI, V4SI + v2i64_r = __lsx_vaddwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w( + + // __lsx_vaddwev_w_hu_h + // vd, vj, vk + // V4SI, UV8HI, V8HI + v4i32_r = __lsx_vaddwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h( + + // __lsx_vaddwev_h_bu_b + // vd, vj, vk + // V8HI, UV16QI, V16QI + v8i16_r = __lsx_vaddwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b( + + // __lsx_vaddwod_d_wu_w + // vd, vj, vk + // V2DI, UV4SI, V4SI + v2i64_r = __lsx_vaddwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w( + + // __lsx_vaddwod_w_hu_h + // vd, vj, vk + // V4SI, UV8HI, V8HI + v4i32_r = __lsx_vaddwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h( + + // __lsx_vaddwod_h_bu_b + // vd, vj, vk + // V8HI, UV16QI, V16QI + v8i16_r = __lsx_vaddwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b( + + // __lsx_vsubwev_d_w + // vd, vj, vk + // V2DI, V4SI, V4SI + v2i64_r = __lsx_vsubwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w( + + // __lsx_vsubwev_w_h + // vd, vj, vk + // V4SI, V8HI, V8HI + v4i32_r = __lsx_vsubwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h( + + // __lsx_vsubwev_h_b + // vd, vj, vk + // V8HI, V16QI, V16QI + v8i16_r = __lsx_vsubwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b( + + // __lsx_vsubwod_d_w + // vd, vj, vk + // V2DI, V4SI, V4SI + v2i64_r = __lsx_vsubwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w( + + // __lsx_vsubwod_w_h + // vd, vj, vk + // V4SI, V8HI, V8HI + v4i32_r = __lsx_vsubwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h( + + // __lsx_vsubwod_h_b + // vd, vj, vk + // V8HI, V16QI, V16QI + v8i16_r = __lsx_vsubwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b( + + // __lsx_vsubwev_d_wu + // vd, vj, vk + // V2DI, UV4SI, UV4SI + v2i64_r = __lsx_vsubwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu( + + // __lsx_vsubwev_w_hu + // vd, vj, vk + // V4SI, UV8HI, UV8HI + v4i32_r = __lsx_vsubwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu( + + // __lsx_vsubwev_h_bu + // vd, vj, vk + // V8HI, UV16QI, UV16QI + v8i16_r = __lsx_vsubwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu( + + // __lsx_vsubwod_d_wu + // vd, vj, vk + // V2DI, UV4SI, UV4SI + v2i64_r = __lsx_vsubwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu( + + // __lsx_vsubwod_w_hu + // vd, vj, vk + // V4SI, UV8HI, UV8HI + v4i32_r = __lsx_vsubwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu( + + // __lsx_vsubwod_h_bu + // vd, vj, vk + // V8HI, UV16QI, UV16QI + v8i16_r = __lsx_vsubwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu( + + // __lsx_vaddwev_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vaddwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d( + + // __lsx_vaddwod_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vaddwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d( + + // __lsx_vaddwev_q_du + // vd, vj, vk + // V2DI, UV2DI, UV2DI + v2i64_r = __lsx_vaddwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du( + + // __lsx_vaddwod_q_du + // vd, vj, vk + // V2DI, UV2DI, UV2DI + v2i64_r = __lsx_vaddwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du( + + // __lsx_vsubwev_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsubwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d( + + // __lsx_vsubwod_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsubwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d( + + // __lsx_vsubwev_q_du + // vd, vj, vk + // V2DI, UV2DI, UV2DI + v2i64_r = __lsx_vsubwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du( + + // __lsx_vsubwod_q_du + // vd, vj, vk + // V2DI, UV2DI, UV2DI + v2i64_r = __lsx_vsubwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du( + + // __lsx_vaddwev_q_du_d + // vd, vj, vk + // V2DI, UV2DI, V2DI + v2i64_r = __lsx_vaddwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d( + + // __lsx_vaddwod_q_du_d + // vd, vj, vk + // V2DI, UV2DI, V2DI + v2i64_r = __lsx_vaddwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d( + + // __lsx_vmulwev_d_w + // vd, vj, vk + // V2DI, V4SI, V4SI + v2i64_r = __lsx_vmulwev_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w( + + // __lsx_vmulwev_w_h + // vd, vj, vk + // V4SI, V8HI, V8HI + v4i32_r = __lsx_vmulwev_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h( + + // __lsx_vmulwev_h_b + // vd, vj, vk + // V8HI, V16QI, V16QI + v8i16_r = __lsx_vmulwev_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b( + + // __lsx_vmulwod_d_w + // vd, vj, vk + // V2DI, V4SI, V4SI + v2i64_r = __lsx_vmulwod_d_w(v4i32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w( + + // __lsx_vmulwod_w_h + // vd, vj, vk + // V4SI, V8HI, V8HI + v4i32_r = __lsx_vmulwod_w_h(v8i16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h( + + // __lsx_vmulwod_h_b + // vd, vj, vk + // V8HI, V16QI, V16QI + v8i16_r = __lsx_vmulwod_h_b(v16i8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b( + + // __lsx_vmulwev_d_wu + // vd, vj, vk + // V2DI, UV4SI, UV4SI + v2i64_r = __lsx_vmulwev_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu( + + // __lsx_vmulwev_w_hu + // vd, vj, vk + // V4SI, UV8HI, UV8HI + v4i32_r = __lsx_vmulwev_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu( + + // __lsx_vmulwev_h_bu + // vd, vj, vk + // V8HI, UV16QI, UV16QI + v8i16_r = __lsx_vmulwev_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu( + + // __lsx_vmulwod_d_wu + // vd, vj, vk + // V2DI, UV4SI, UV4SI + v2i64_r = __lsx_vmulwod_d_wu(v4u32_a, v4u32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu( + + // __lsx_vmulwod_w_hu + // vd, vj, vk + // V4SI, UV8HI, UV8HI + v4i32_r = __lsx_vmulwod_w_hu(v8u16_a, v8u16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu( + + // __lsx_vmulwod_h_bu + // vd, vj, vk + // V8HI, UV16QI, UV16QI + v8i16_r = __lsx_vmulwod_h_bu(v16u8_a, v16u8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu( + + // __lsx_vmulwev_d_wu_w + // vd, vj, vk + // V2DI, UV4SI, V4SI + v2i64_r = __lsx_vmulwev_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w( + + // __lsx_vmulwev_w_hu_h + // vd, vj, vk + // V4SI, UV8HI, V8HI + v4i32_r = __lsx_vmulwev_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h( + + // __lsx_vmulwev_h_bu_b + // vd, vj, vk + // V8HI, UV16QI, V16QI + v8i16_r = __lsx_vmulwev_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b( + + // __lsx_vmulwod_d_wu_w + // vd, vj, vk + // V2DI, UV4SI, V4SI + v2i64_r = __lsx_vmulwod_d_wu_w(v4u32_a, v4i32_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w( + + // __lsx_vmulwod_w_hu_h + // vd, vj, vk + // V4SI, UV8HI, V8HI + v4i32_r = __lsx_vmulwod_w_hu_h(v8u16_a, v8i16_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h( + + // __lsx_vmulwod_h_bu_b + // vd, vj, vk + // V8HI, UV16QI, V16QI + v8i16_r = __lsx_vmulwod_h_bu_b(v16u8_a, v16i8_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b( + + // __lsx_vmulwev_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vmulwev_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d( + + // __lsx_vmulwod_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vmulwod_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d( + + // __lsx_vmulwev_q_du + // vd, vj, vk + // V2DI, UV2DI, UV2DI + v2i64_r = __lsx_vmulwev_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du( + + // __lsx_vmulwod_q_du + // vd, vj, vk + // V2DI, UV2DI, UV2DI + v2i64_r = __lsx_vmulwod_q_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du( + + // __lsx_vmulwev_q_du_d + // vd, vj, vk + // V2DI, UV2DI, V2DI + v2i64_r = __lsx_vmulwev_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d( + + // __lsx_vmulwod_q_du_d + // vd, vj, vk + // V2DI, UV2DI, V2DI + v2i64_r = __lsx_vmulwod_q_du_d(v2u64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d( + + // __lsx_vhaddw_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vhaddw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d( + + // __lsx_vhaddw_qu_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vhaddw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du( + + // __lsx_vhsubw_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vhsubw_q_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d( + + // __lsx_vhsubw_qu_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vhsubw_qu_du(v2u64_a, v2u64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du( + + // __lsx_vmaddwev_d_w + // vd, vj, vk + // V2DI, V2DI, V4SI, V4SI + v2i64_r = __lsx_vmaddwev_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w( + + // __lsx_vmaddwev_w_h + // vd, vj, vk + // V4SI, V4SI, V8HI, V8HI + v4i32_r = __lsx_vmaddwev_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h( + + // __lsx_vmaddwev_h_b + // vd, vj, vk + // V8HI, V8HI, V16QI, V16QI + v8i16_r = __lsx_vmaddwev_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b( + + // __lsx_vmaddwev_d_wu + // vd, vj, vk + // UV2DI, UV2DI, UV4SI, UV4SI + v2u64_r = __lsx_vmaddwev_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu( + + // __lsx_vmaddwev_w_hu + // vd, vj, vk + // UV4SI, UV4SI, UV8HI, UV8HI + v4u32_r = __lsx_vmaddwev_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu( + + // __lsx_vmaddwev_h_bu + // vd, vj, vk + // UV8HI, UV8HI, UV16QI, UV16QI + v8u16_r = __lsx_vmaddwev_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu( + + // __lsx_vmaddwod_d_w + // vd, vj, vk + // V2DI, V2DI, V4SI, V4SI + v2i64_r = __lsx_vmaddwod_d_w(v2i64_a, v4i32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w( + + // __lsx_vmaddwod_w_h + // vd, vj, vk + // V4SI, V4SI, V8HI, V8HI + v4i32_r = __lsx_vmaddwod_w_h(v4i32_a, v8i16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h( + + // __lsx_vmaddwod_h_b + // vd, vj, vk + // V8HI, V8HI, V16QI, V16QI + v8i16_r = __lsx_vmaddwod_h_b(v8i16_a, v16i8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b( + + // __lsx_vmaddwod_d_wu + // vd, vj, vk + // UV2DI, UV2DI, UV4SI, UV4SI + v2u64_r = __lsx_vmaddwod_d_wu(v2u64_a, v4u32_b, v4u32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu( + + // __lsx_vmaddwod_w_hu + // vd, vj, vk + // UV4SI, UV4SI, UV8HI, UV8HI + v4u32_r = __lsx_vmaddwod_w_hu(v4u32_a, v8u16_b, v8u16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu( + + // __lsx_vmaddwod_h_bu + // vd, vj, vk + // UV8HI, UV8HI, UV16QI, UV16QI + v8u16_r = __lsx_vmaddwod_h_bu(v8u16_a, v16u8_b, v16u8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu( + + // __lsx_vmaddwev_d_wu_w + // vd, vj, vk + // V2DI, V2DI, UV4SI, V4SI + v2i64_r = __lsx_vmaddwev_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w( + + // __lsx_vmaddwev_w_hu_h + // vd, vj, vk + // V4SI, V4SI, UV8HI, V8HI + v4i32_r = __lsx_vmaddwev_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h( + + // __lsx_vmaddwev_h_bu_b + // vd, vj, vk + // V8HI, V8HI, UV16QI, V16QI + v8i16_r = __lsx_vmaddwev_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b( + + // __lsx_vmaddwod_d_wu_w + // vd, vj, vk + // V2DI, V2DI, UV4SI, V4SI + v2i64_r = __lsx_vmaddwod_d_wu_w(v2i64_a, v4u32_b, v4i32_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w( + + // __lsx_vmaddwod_w_hu_h + // vd, vj, vk + // V4SI, V4SI, UV8HI, V8HI + v4i32_r = __lsx_vmaddwod_w_hu_h(v4i32_a, v8u16_b, v8i16_c); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h( + + // __lsx_vmaddwod_h_bu_b + // vd, vj, vk + // V8HI, V8HI, UV16QI, V16QI + v8i16_r = __lsx_vmaddwod_h_bu_b(v8i16_a, v16u8_b, v16i8_c); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b( + + // __lsx_vmaddwev_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI, V2DI + v2i64_r = __lsx_vmaddwev_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d( + + // __lsx_vmaddwod_q_d + // vd, vj, vk + // V2DI, V2DI, V2DI, V2DI + v2i64_r = __lsx_vmaddwod_q_d(v2i64_a, v2i64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d( + + // __lsx_vmaddwev_q_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vmaddwev_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du( + + // __lsx_vmaddwod_q_du + // vd, vj, vk + // UV2DI, UV2DI, UV2DI, UV2DI + v2u64_r = __lsx_vmaddwod_q_du(v2u64_a, v2u64_b, v2u64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du( + + // __lsx_vmaddwev_q_du_d + // vd, vj, vk + // V2DI, V2DI, UV2DI, V2DI + v2i64_r = __lsx_vmaddwev_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d( + + // __lsx_vmaddwod_q_du_d + // vd, vj, vk + // V2DI, V2DI, UV2DI, V2DI + v2i64_r = __lsx_vmaddwod_q_du_d(v2i64_a, v2u64_b, v2i64_c); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d( + + // __lsx_vrotr_b + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vrotr_b(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotr.b( + + // __lsx_vrotr_h + // vd, vj, vk + // V8HI, V8HI, V8HI + v8i16_r = __lsx_vrotr_h(v8i16_a, v8i16_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotr.h( + + // __lsx_vrotr_w + // vd, vj, vk + // V4SI, V4SI, V4SI + v4i32_r = __lsx_vrotr_w(v4i32_a, v4i32_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotr.w( + + // __lsx_vrotr_d + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vrotr_d(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotr.d( + + // __lsx_vadd_q + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vadd_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vadd.q( + + // __lsx_vsub_q + // vd, vj, vk + // V2DI, V2DI, V2DI + v2i64_r = __lsx_vsub_q(v2i64_a, v2i64_b); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsub.q( + + // __lsx_vldrepl_b + // vd, rj, si12 + // V16QI, CVPOINTER, SI + v16i8_r = __lsx_vldrepl_b(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldrepl.b( + + // __lsx_vldrepl_h + // vd, rj, si11 + // V8HI, CVPOINTER, SI + v8i16_r = __lsx_vldrepl_h(&v8i16_a, si11); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vldrepl.h( + + // __lsx_vldrepl_w + // vd, rj, si10 + // V4SI, CVPOINTER, SI + v4i32_r = __lsx_vldrepl_w(&v4i32_a, si10); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vldrepl.w( + + // __lsx_vldrepl_d + // vd, rj, si9 + // V2DI, CVPOINTER, SI + v2i64_r = __lsx_vldrepl_d(&v2i64_a, si9); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldrepl.d( + + // __lsx_vmskgez_b + // vd, vj + // V16QI, V16QI + v16i8_r = __lsx_vmskgez_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmskgez.b( + + // __lsx_vmsknz_b + // vd, vj + // V16QI, V16QI + v16i8_r = __lsx_vmsknz_b(v16i8_a); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vmsknz.b( + + // __lsx_vexth_h_b + // vd, vj + // V8HI, V16QI + v8i16_r = __lsx_vexth_h_b(v16i8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.h.b( + + // __lsx_vexth_w_h + // vd, vj + // V4SI, V8HI + v4i32_r = __lsx_vexth_w_h(v8i16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.w.h( + + // __lsx_vexth_d_w + // vd, vj + // V2DI, V4SI + v2i64_r = __lsx_vexth_d_w(v4i32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.d.w( + + // __lsx_vexth_q_d + // vd, vj + // V2DI, V2DI + v2i64_r = __lsx_vexth_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.q.d( + + // __lsx_vexth_hu_bu + // vd, vj + // UV8HI, UV16QI + v8u16_r = __lsx_vexth_hu_bu(v16u8_a); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu( + + // __lsx_vexth_wu_hu + // vd, vj + // UV4SI, UV8HI + v4u32_r = __lsx_vexth_wu_hu(v8u16_a); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu( + + // __lsx_vexth_du_wu + // vd, vj + // UV2DI, UV4SI + v2u64_r = __lsx_vexth_du_wu(v4u32_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu( + + // __lsx_vexth_qu_du + // vd, vj + // UV2DI, UV2DI + v2u64_r = __lsx_vexth_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du( + + // __lsx_vrotri_b + // vd, vj, ui3 + // V16QI, V16QI, UQI + v16i8_r = __lsx_vrotri_b(v16i8_a, ui3); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrotri.b( + + // __lsx_vrotri_h + // vd, vj, ui4 + // V8HI, V8HI, UQI + v8i16_r = __lsx_vrotri_h(v8i16_a, ui4); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrotri.h( + + // __lsx_vrotri_w + // vd, vj, ui5 + // V4SI, V4SI, UQI + v4i32_r = __lsx_vrotri_w(v4i32_a, ui5); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrotri.w( + + // __lsx_vrotri_d + // vd, vj, ui6 + // V2DI, V2DI, UQI + v2i64_r = __lsx_vrotri_d(v2i64_a, ui6); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrotri.d( + + // __lsx_vextl_q_d + // vd, vj + // V2DI, V2DI + v2i64_r = __lsx_vextl_q_d(v2i64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.q.d( + + // __lsx_vsrlni_b_h + // vd, vj, ui4 + // V16QI, V16QI, V16QI, USI + v16i8_r = __lsx_vsrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h( + + // __lsx_vsrlni_h_w + // vd, vj, ui5 + // V8HI, V8HI, V8HI, USI + v8i16_r = __lsx_vsrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w( + + // __lsx_vsrlni_w_d + // vd, vj, ui6 + // V4SI, V4SI, V4SI, USI + v4i32_r = __lsx_vsrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d( + + // __lsx_vsrlni_d_q + // vd, vj, ui7 + // V2DI, V2DI, V2DI, USI + v2i64_r = __lsx_vsrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q( + + // __lsx_vssrlni_b_h + // vd, vj, ui4 + // V16QI, V16QI, V16QI, USI + v16i8_r = __lsx_vssrlni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h( + + // __lsx_vssrlni_h_w + // vd, vj, ui5 + // V8HI, V8HI, V8HI, USI + v8i16_r = __lsx_vssrlni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w( + + // __lsx_vssrlni_w_d + // vd, vj, ui6 + // V4SI, V4SI, V4SI, USI + v4i32_r = __lsx_vssrlni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d( + + // __lsx_vssrlni_d_q + // vd, vj, ui7 + // V2DI, V2DI, V2DI, USI + v2i64_r = __lsx_vssrlni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q( + + // __lsx_vssrlni_bu_h + // vd, vj, ui4 + // UV16QI, UV16QI, V16QI, USI + v16u8_r = __lsx_vssrlni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h( + + // __lsx_vssrlni_hu_w + // vd, vj, ui5 + // UV8HI, UV8HI, V8HI, USI + v8u16_r = __lsx_vssrlni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w( + + // __lsx_vssrlni_wu_d + // vd, vj, ui6 + // UV4SI, UV4SI, V4SI, USI + v4u32_r = __lsx_vssrlni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d( + + // __lsx_vssrlni_du_q + // vd, vj, ui7 + // UV2DI, UV2DI, V2DI, USI + v2u64_r = __lsx_vssrlni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q( + + // __lsx_vssrlrni_b_h + // vd, vj, ui4 + // V16QI, V16QI, V16QI, USI + v16i8_r = __lsx_vssrlrni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h( + + // __lsx_vssrlrni_h_w + // vd, vj, ui5 + // V8HI, V8HI, V8HI, USI + v8i16_r = __lsx_vssrlrni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w( + + // __lsx_vssrlrni_w_d + // vd, vj, ui6 + // V4SI, V4SI, V4SI, USI + v4i32_r = __lsx_vssrlrni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d( + + // __lsx_vssrlrni_d_q + // vd, vj, ui7 + // V2DI, V2DI, V2DI, USI + v2i64_r = __lsx_vssrlrni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q( + + // __lsx_vssrlrni_bu_h + // vd, vj, ui4 + // UV16QI, UV16QI, V16QI, USI + v16u8_r = __lsx_vssrlrni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h( + + // __lsx_vssrlrni_hu_w + // vd, vj, ui5 + // UV8HI, UV8HI, V8HI, USI + v8u16_r = __lsx_vssrlrni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w( + + // __lsx_vssrlrni_wu_d + // vd, vj, ui6 + // UV4SI, UV4SI, V4SI, USI + v4u32_r = __lsx_vssrlrni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d( + + // __lsx_vssrlrni_du_q + // vd, vj, ui7 + // UV2DI, UV2DI, V2DI, USI + v2u64_r = __lsx_vssrlrni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q( + + // __lsx_vsrani_b_h + // vd, vj, ui4 + // V16QI, V16QI, V16QI, USI + v16i8_r = __lsx_vsrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h( + + // __lsx_vsrani_h_w + // vd, vj, ui5 + // V8HI, V8HI, V8HI, USI + v8i16_r = __lsx_vsrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w( + + // __lsx_vsrani_w_d + // vd, vj, ui6 + // V4SI, V4SI, V4SI, USI + v4i32_r = __lsx_vsrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d( + + // __lsx_vsrani_d_q + // vd, vj, ui7 + // V2DI, V2DI, V2DI, USI + v2i64_r = __lsx_vsrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q( + + // __lsx_vsrarni_b_h + // vd, vj, ui4 + // V16QI, V16QI, V16QI, USI + v16i8_r = __lsx_vsrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h( + + // __lsx_vsrarni_h_w + // vd, vj, ui5 + // V8HI, V8HI, V8HI, USI + v8i16_r = __lsx_vsrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w( + + // __lsx_vsrarni_w_d + // vd, vj, ui6 + // V4SI, V4SI, V4SI, USI + v4i32_r = __lsx_vsrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d( + + // __lsx_vsrarni_d_q + // vd, vj, ui7 + // V2DI, V2DI, V2DI, USI + v2i64_r = __lsx_vsrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q( + + // __lsx_vssrani_b_h + // vd, vj, ui4 + // V16QI, V16QI, V16QI, USI + v16i8_r = __lsx_vssrani_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h( + + // __lsx_vssrani_h_w + // vd, vj, ui5 + // V8HI, V8HI, V8HI, USI + v8i16_r = __lsx_vssrani_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w( + + // __lsx_vssrani_w_d + // vd, vj, ui6 + // V4SI, V4SI, V4SI, USI + v4i32_r = __lsx_vssrani_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d( + + // __lsx_vssrani_d_q + // vd, vj, ui7 + // V2DI, V2DI, V2DI, USI + v2i64_r = __lsx_vssrani_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q( + + // __lsx_vssrani_bu_h + // vd, vj, ui4 + // UV16QI, UV16QI, V16QI, USI + v16u8_r = __lsx_vssrani_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h( + + // __lsx_vssrani_hu_w + // vd, vj, ui5 + // UV8HI, UV8HI, V8HI, USI + v8u16_r = __lsx_vssrani_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w( + + // __lsx_vssrani_wu_d + // vd, vj, ui6 + // UV4SI, UV4SI, V4SI, USI + v4u32_r = __lsx_vssrani_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d( + + // __lsx_vssrani_du_q + // vd, vj, ui7 + // UV2DI, UV2DI, V2DI, USI + v2u64_r = __lsx_vssrani_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q( + + // __lsx_vssrarni_b_h + // vd, vj, ui4 + // V16QI, V16QI, V16QI, USI + v16i8_r = __lsx_vssrarni_b_h(v16i8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h( + + // __lsx_vssrarni_h_w + // vd, vj, ui5 + // V8HI, V8HI, V8HI, USI + v8i16_r = __lsx_vssrarni_h_w(v8i16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w( + + // __lsx_vssrarni_w_d + // vd, vj, ui6 + // V4SI, V4SI, V4SI, USI + v4i32_r = __lsx_vssrarni_w_d(v4i32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d( + + // __lsx_vssrarni_d_q + // vd, vj, ui7 + // V2DI, V2DI, V2DI, USI + v2i64_r = __lsx_vssrarni_d_q(v2i64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q( + + // __lsx_vssrarni_bu_h + // vd, vj, ui4 + // UV16QI, UV16QI, V16QI, USI + v16u8_r = __lsx_vssrarni_bu_h(v16u8_a, v16i8_b, ui4); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h( + + // __lsx_vssrarni_hu_w + // vd, vj, ui5 + // UV8HI, UV8HI, V8HI, USI + v8u16_r = __lsx_vssrarni_hu_w(v8u16_a, v8i16_b, ui5); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w( + + // __lsx_vssrarni_wu_d + // vd, vj, ui6 + // UV4SI, UV4SI, V4SI, USI + v4u32_r = __lsx_vssrarni_wu_d(v4u32_a, v4i32_b, ui6); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d( + + // __lsx_vssrarni_du_q + // vd, vj, ui7 + // UV2DI, UV2DI, V2DI, USI + v2u64_r = __lsx_vssrarni_du_q(v2u64_a, v2i64_b, ui7); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q( + + // __lsx_vpermi_w + // vd, vj, ui8 + // V4SI, V4SI, V4SI, USI + v4i32_r = __lsx_vpermi_w(v4i32_a, v4i32_b, ui8); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vpermi.w( + + // __lsx_vld + // vd, rj, si12 + // V16QI, CVPOINTER, SI + v16i8_r = __lsx_vld(&v16i8_a, si12); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vld( + + // __lsx_vst + // vd, rj, si12 + // VOID, V16QI, CVPOINTER, SI + __lsx_vst(v16i8_a, &v16i8_b, 0); // CHECK: call void @llvm.loongarch.lsx.vst( + + // __lsx_vssrlrn_b_h + // vd, vj, vk + // V16QI, V8HI, V8HI + v16i8_r = __lsx_vssrlrn_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h( + + // __lsx_vssrlrn_h_w + // vd, vj, vk + // V8HI, V4SI, V4SI + v8i16_r = __lsx_vssrlrn_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w( + + // __lsx_vssrlrn_w_d + // vd, vj, vk + // V4SI, V2DI, V2DI + v4i32_r = __lsx_vssrlrn_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d( + + // __lsx_vssrln_b_h + // vd, vj, vk + // V16QI, V8HI, V8HI + v16i8_r = __lsx_vssrln_b_h(v8i16_a, v8i16_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h( + + // __lsx_vssrln_h_w + // vd, vj, vk + // V8HI, V4SI, V4SI + v8i16_r = __lsx_vssrln_h_w(v4i32_a, v4i32_b); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w( + + // __lsx_vssrln_w_d + // vd, vj, vk + // V4SI, V2DI, V2DI + v4i32_r = __lsx_vssrln_w_d(v2i64_a, v2i64_b); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d( + + // __lsx_vorn_v + // vd, vj, vk + // V16QI, V16QI, V16QI + v16i8_r = __lsx_vorn_v(v16i8_a, v16i8_b); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vorn.v( + + // __lsx_vldi + // vd, i13 + // V2DI, HI + v2i64_r = __lsx_vldi(i13); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vldi( + + // __lsx_vshuf_b + // vd, vj, vk, va + // V16QI, V16QI, V16QI, V16QI + v16i8_r = __lsx_vshuf_b(v16i8_a, v16i8_b, v16i8_c); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vshuf.b( + + // __lsx_vldx + // vd, rj, rk + // V16QI, CVPOINTER, DI + v16i8_r = __lsx_vldx(&v16i8_a, i64_d); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vldx( + + // __lsx_vstx + // vd, rj, rk + // VOID, V16QI, CVPOINTER, DI + __lsx_vstx(v16i8_a, &v16i8_b, i64_d); // CHECK: call void @llvm.loongarch.lsx.vstx( + + // __lsx_vextl_qu_du + // vd, vj + // UV2DI, UV2DI + v2u64_r = __lsx_vextl_qu_du(v2u64_a); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du( + + // __lsx_bnz_v + // rd, vj + // SI, UV16QI + i32_r = __lsx_bnz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.v( + + // __lsx_bz_v + // rd, vj + // SI, UV16QI + i32_r = __lsx_bz_v(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.v( + + // __lsx_bnz_b + // rd, vj + // SI, UV16QI + i32_r = __lsx_bnz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.b( + + // __lsx_bnz_h + // rd, vj + // SI, UV8HI + i32_r = __lsx_bnz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.h( + + // __lsx_bnz_w + // rd, vj + // SI, UV4SI + i32_r = __lsx_bnz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.w( + + // __lsx_bnz_d + // rd, vj + // SI, UV2DI + i32_r = __lsx_bnz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bnz.d( + + // __lsx_bz_b + // rd, vj + // SI, UV16QI + i32_r = __lsx_bz_b(v16u8_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.b( + + // __lsx_bz_h + // rd, vj + // SI, UV8HI + i32_r = __lsx_bz_h(v8u16_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.h( + + // __lsx_bz_w + // rd, vj + // SI, UV4SI + i32_r = __lsx_bz_w(v4u32_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.w( + + // __lsx_bz_d + // rd, vj + // SI, UV2DI + i32_r = __lsx_bz_d(v2u64_a); // CHECK: call i32 @llvm.loongarch.lsx.bz.d( + + v16i8_r = __lsx_vsrlrni_b_h(v16i8_a, v16i8_b, 2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h( + + v8i16_r = __lsx_vsrlrni_h_w(v8i16_a, v8i16_b, 2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w( + + v4i32_r = __lsx_vsrlrni_w_d(v4i32_a, v4i32_b, 2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d( + + v2i64_r = __lsx_vsrlrni_d_q(v2i64_a, v2i64_b, 2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q( + + v16i8_r = __lsx_vrepli_b(2); // CHECK: call <16 x i8> @llvm.loongarch.lsx.vrepli.b( + + v8i16_r = __lsx_vrepli_h(2); // CHECK: call <8 x i16> @llvm.loongarch.lsx.vrepli.h( + + v4i32_r = __lsx_vrepli_w(2); // CHECK: call <4 x i32> @llvm.loongarch.lsx.vrepli.w( + + v2i64_r = __lsx_vrepli_d(2); // CHECK: call <2 x i64> @llvm.loongarch.lsx.vrepli.d( +} diff --git a/clang/test/CodeGen/loongarch-inline-asm-modifiers.c b/clang/test/CodeGen/loongarch-inline-asm-modifiers.c new file mode 100644 index 000000000000..412eca2bdde8 --- /dev/null +++ b/clang/test/CodeGen/loongarch-inline-asm-modifiers.c @@ -0,0 +1,50 @@ +// RUN: %clang -target loongarch64-unknown-linux-gnu -S -o - -emit-llvm %s \ +// RUN: | FileCheck %s + +// This checks that the frontend will accept inline asm operand modifiers + +int printf(const char*, ...); + +typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); +typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); + +// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,$1;\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4)) +// CHECK: %{{[0-9]+}} = call i32 asm "ld.w $0,${1:D};\0A", "=r,*m"(ptr elementtype(i32) getelementptr inbounds (i32, ptr @b, i64 4)) +// CHECK: %{{[0-9]+}} = call <2 x i64> asm "vldi ${0:w},1", "=f" +// CHECK: %{{[0-9]+}} = call <4 x i64> asm "xldi ${0:u},1", "=f" +int b[8] = {0,1,2,3,4,5,6,7}; +int main() +{ + int i; + v2i64 v2i64_r; + v4i64 v4i64_r; + + // The first word. Notice, no 'D' + {asm ( + "ld.w %0,%1;\n" + : "=r" (i) + : "m" (*(b+4)));} + + printf("%d\n",i); + + // The second word + {asm ( + "ld.w %0,%D1;\n" + : "=r" (i) + : "m" (*(b+4)) + );} + + // LSX registers + { asm("vldi %w0,1" + : "=f"(v2i64_r)); } + + printf("%d\n", i); + + // LASX registers + { asm("xldi %u0,1" + : "=f"(v4i64_r)); } + + printf("%d\n",i); + + return 1; +} diff --git a/clang/test/CodeGen/loongarch-inline-asm.c b/clang/test/CodeGen/loongarch-inline-asm.c new file mode 100644 index 000000000000..1f995ac792bd --- /dev/null +++ b/clang/test/CodeGen/loongarch-inline-asm.c @@ -0,0 +1,31 @@ +// REQUIRES: loongarch-registered-target +// RUN: %clang_cc1 -triple loongarch64-linux-gnu -emit-llvm -o - %s | FileCheck %s + +int data; + +void m () { + asm("ld.w $r1, %0" :: "m"(data)); + // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*m"(ptr elementtype(i32) @data) +} + +void ZC () { + asm("ll.w $r1, %0" :: "ZC"(data)); + // CHECK: call void asm sideeffect "ll.w $$r1, $0", "*^ZC"(ptr elementtype(i32) @data) +} + +void ZB () { + asm("amadd_db.w $zero, $r1, %0" :: "ZB"(data)); + // CHECK: call void asm sideeffect "amadd_db.w $$zero, $$r1, $0", "*^ZB"(ptr elementtype(i32) @data) +} + +void R () { + asm("ld.w $r1, %0" :: "R"(data)); + // CHECK: call void asm sideeffect "ld.w $$r1, $0", "*R"(ptr elementtype(i32) @data) +} + +int *p; +void preld () { + asm("preld 0, %0, 2" :: "r"(p)); + // CHECK: %0 = load ptr, ptr @p, align 8 + // CHECK: call void asm sideeffect "preld 0, $0, 2", "r"(ptr %0) +} diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91269.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91269.cpp new file mode 100644 index 000000000000..289d7a430bd4 --- /dev/null +++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91269.cpp @@ -0,0 +1,35 @@ +/// Ported from https://reviews.llvm.org/D91269. + +// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 -emit-llvm %s -o - | FileCheck %s + +struct empty_float2 { struct {}; float f; float g; }; + +// CHECK: define{{.*}} float @_Z14f_empty_float212empty_float2(float{{[^,]*}}, float{{[^,]*}}) +// CHECK: { [4 x i8], float, float } +float f_empty_float2(struct empty_float2 a) { + return a.g; +} + +struct empty_double2 { struct {}; double f; double g; }; + +// CHECK: define{{.*}} double @_Z15f_empty_double213empty_double2(double{{[^,]*}}, double{{[^,]*}}) +// CHECK: { [8 x i8], double, double } +double f_empty_double2(struct empty_double2 a) { + return a.g; +} + +struct empty_float_double { struct {}; float f; double g; }; + +// CHECK: define{{.*}} double @_Z20f_empty_float_double18empty_float_double(float{{[^,]*}}, double{{[^,]*}}) +// CHECK: { [4 x i8], float, double } +double f_empty_float_double(struct empty_float_double a) { + return a.g; +} + +struct empty_double_float { struct {}; double f; float g; }; + +// CHECK: define{{.*}} double @_Z20f_empty_double_float18empty_double_float(double{{[^,]*}}, float{{[^,]*}}) +// CHECK: { [8 x i8], double, float } +double f_empty_double_float(struct empty_double_float a) { + return a.g; +} diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91278.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91278.cpp new file mode 100644 index 000000000000..4934fe018dc6 --- /dev/null +++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-D91278.cpp @@ -0,0 +1,26 @@ +/// Ported from https://reviews.llvm.org/D91278. + +// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +struct empty_complex_f { + struct {}; + float _Complex fc; +}; + +// CHECK: define{{.*}} float @_Z17f_empty_complex_f15empty_complex_f(float{{[^,]*}}, float{{[^,]*}}) +// CHECK: { [4 x i8], float, float } +float f_empty_complex_f(struct empty_complex_f a) { + return __imag__ a.fc; +} + +struct empty_complex_d { + struct {}; + double _Complex fc; +}; + +// CHECK: define{{.*}} double @_Z17f_empty_complex_d15empty_complex_d(double{{[^,]*}}, double{{[^,]*}}) +// CHECK: { [8 x i8], double, double } +double f_empty_complex_d(struct empty_complex_d a) { + return __imag__ a.fc; +} diff --git a/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp new file mode 100644 index 000000000000..dc5ffaf089c9 --- /dev/null +++ b/clang/test/CodeGenCXX/LoongArch/abi-lp64d-struct-inherit.cpp @@ -0,0 +1,95 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-abi lp64 \ +// RUN: -emit-llvm %s -o - | FileCheck %s + +#include + +/// Ensure that fields inherited from a parent struct are treated in the same +/// way as fields directly in the child for the purposes of LoongArch ABI rules. + +struct parent1_int32_s { + int32_t i1; +}; + +struct child1_int32_s : parent1_int32_s { + int32_t i2; +}; + +// CHECK-LABEL: define{{.*}} i64 @_Z30int32_int32_struct_inheritance14child1_int32_s(i64 %a.coerce) +struct child1_int32_s int32_int32_struct_inheritance(struct child1_int32_s a) { + return a; +} + +struct parent2_int32_s { + int32_t i1; +}; + +struct child2_float_s : parent2_int32_s { + float f1; +}; + +// CHECK-LABEL: define{{.*}} { i32, float } @_Z30int32_float_struct_inheritance14child2_float_s(i32 %0, float %1) +struct child2_float_s int32_float_struct_inheritance(struct child2_float_s a) { + return a; +} + +struct parent3_float_s { + float f1; +}; + +struct child3_int64_s : parent3_float_s { + int64_t i1; +}; + +// CHECK-LABEL: define{{.*}} { float, i64 } @_Z30float_int64_struct_inheritance14child3_int64_s(float %0, i64 %1) +struct child3_int64_s float_int64_struct_inheritance(struct child3_int64_s a) { + return a; +} + +struct parent4_double_s { + double d1; +}; + +struct child4_double_s : parent4_double_s { + double d1; +}; + +// CHECK-LABEL: define{{.*}} { double, double } @_Z32double_double_struct_inheritance15child4_double_s(double %0, double %1) +struct child4_double_s double_double_struct_inheritance(struct child4_double_s a) { + return a; +} + +/// When virtual inheritance is used, the resulting struct isn't eligible for +/// passing in registers. + +struct parent5_virtual_s { + int32_t i1; +}; + +struct child5_virtual_s : virtual parent5_virtual_s { + float f1; +}; + +// CHECK-LABEL: define{{.*}} void @_ZN16child5_virtual_sC1EOS_(ptr{{.*}} %this, ptr{{.*}} dereferenceable(12) %0) +struct child5_virtual_s int32_float_virtual_struct_inheritance(struct child5_virtual_s a) { + return a; +} + +/// Check for correct lowering in the presence of diamoned inheritance. + +struct parent6_float_s { + float f1; +}; + +struct child6a_s : parent6_float_s { +}; + +struct child6b_s : parent6_float_s { +}; + +struct grandchild_6_s : child6a_s, child6b_s { +}; + +// CHECK-LABEL: define{{.*}} { float, float } @_Z38float_float_diamond_struct_inheritance14grandchild_6_s(float %0, float %1) +struct grandchild_6_s float_float_diamond_struct_inheritance(struct grandchild_6_s a) { + return a; +} diff --git a/clang/test/Driver/baremetal.cpp b/clang/test/Driver/baremetal.cpp index 7c11fe67155a..56eb5b708020 100644 --- a/clang/test/Driver/baremetal.cpp +++ b/clang/test/Driver/baremetal.cpp @@ -105,7 +105,7 @@ // CHECK-SYSROOT-INC-NOT: "-internal-isystem" "include" // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ -// RUN: -target aarch64-none-elf \ +// RUN: -target aarch64-none-elf --sysroot= \ // RUN: | FileCheck --check-prefix=CHECK-AARCH64-NO-HOST-INC %s // Verify that the bare metal driver does not include any host system paths: // CHECK-AARCH64-NO-HOST-INC: InstalledDir: [[INSTALLEDDIR:.+]] diff --git a/clang/test/Driver/hexagon-toolchain-linux.c b/clang/test/Driver/hexagon-toolchain-linux.c index 05ae1733992d..986c2dd616e1 100644 --- a/clang/test/Driver/hexagon-toolchain-linux.c +++ b/clang/test/Driver/hexagon-toolchain-linux.c @@ -100,7 +100,7 @@ // ----------------------------------------------------------------------------- // internal-isystem for linux with and without musl // ----------------------------------------------------------------------------- -// RUN: %clang -### -target hexagon-unknown-linux-musl \ +// RUN: %clang -### -target hexagon-unknown-linux-musl --sysroot= \ // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: %s 2>&1 \ @@ -110,7 +110,7 @@ // CHECK008-SAME: {{^}} "-internal-isystem" "[[RESOURCE]]/include" // CHECK008-SAME: {{^}} "-internal-externc-isystem" "[[INSTALLED_DIR]]/../target/hexagon/include" -// RUN: %clang -### -target hexagon-unknown-linux \ +// RUN: %clang -### -target hexagon-unknown-linux --sysroot= \ // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: %s 2>&1 \ diff --git a/clang/test/Driver/loongarch-alignment-feature.c b/clang/test/Driver/loongarch-alignment-feature.c new file mode 100644 index 000000000000..2270ff536250 --- /dev/null +++ b/clang/test/Driver/loongarch-alignment-feature.c @@ -0,0 +1,8 @@ +// RUN: %clang -target loongarch64-unknown-linux-gnu -mno-strict-align -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-UNALIGNED < %t %s + +// RUN: %clang -target loongarch64-unknown-linux-gnu -mstrict-align -### %s 2> %t +// RUN: FileCheck --check-prefix=CHECK-ALIGNED < %t %s + +// CHECK-UNALIGNED: "-target-feature" "+unaligned-access" +// CHECK-ALIGNED: "-target-feature" "-unaligned-access" diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c new file mode 100644 index 000000000000..196862229e82 --- /dev/null +++ b/clang/test/Driver/loongarch-march.c @@ -0,0 +1,15 @@ +/// This test checks the valid cpu model which is supported by LoongArch. + +// RUN: %clang --target=loongarch64 -march=la264 -emit-llvm -### %s 2> %t +// | FileCheck -check-prefix=LA264 %t %s +// RUN: %clang --target=loongarch64 -march=la364 -emit-llvm -### %s 2> %t +// | FileCheck -check-prefix=LA364 %t %s +// RUN: %clang --target=loongarch64 -march=la464 -emit-llvm -### %s 2> %t +// | FileCheck -check-prefix=LA464 %t %s +// RUN: %clang --target=loongarch64 -march=xxx -emit-llvm -### %s 2> %t +// | FileCheck -check-prefix=INVALID %t %s + +// LA264: "-target-cpu la264" "-target-abi lp64" +// LA364: "-target-cpu la364" "-target-abi lp64" +// LA464: "-target-cpu la464" "-target-abi lp64" +// INVALID: error: unknown target CPU 'xxx' diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c new file mode 100644 index 000000000000..a32853d17f80 --- /dev/null +++ b/clang/test/Preprocessor/init-loongarch.c @@ -0,0 +1,10 @@ +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | grep loongarch | FileCheck %s + +// CHECK: #define __loongarch64 1 +// CHECK-NEXT: #define __loongarch__ 1 +// CHECK-NEXT: #define __loongarch_double_float 1 +// CHECK-NEXT: #define __loongarch_fpr 64 +// CHECK-NEXT: #define __loongarch_frlen 64 +// CHECK-NEXT: #define __loongarch_grlen 64 +// CHECK-NEXT: #define __loongarch_hard_float 1 +// CHECK-NEXT: #define __loongarch_lp64 1 diff --git a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp index 75928d912dd4..3350ee3fe3d1 100644 --- a/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp +++ b/clang/unittests/Interpreter/ExceptionTests/InterpreterExceptionTest.cpp @@ -104,6 +104,11 @@ extern "C" int throw_exception() { if (Triple.isPPC()) return; + // FIXME: LoongArch64 fails due to `Symbols not found: + // [DW.ref.__gxx_personality_v0]` + if (Triple.isLoongArch64()) + return; + // FIXME: ARM fails due to `Not implemented relocation type!` if (Triple.isARM()) return; diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index 4612e4d8b9af..8c3ef42b98bb 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -16,6 +16,7 @@ set(SPARCV9 sparcv9) set(WASM32 wasm32) set(WASM64 wasm64) set(VE ve) +set(LOONGARCH64 loongarch64) if(APPLE) set(ARM64 arm64) @@ -29,7 +30,7 @@ set(ALL_SANITIZER_COMMON_SUPPORTED_ARCH ${X86} ${X86_64} ${PPC64} ${RISCV64} set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64} ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${LOONGARCH64}) -set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64}) +set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${LOONGARCH64}) if(ANDROID) set(OS_NAME "Android") @@ -38,11 +39,11 @@ else() endif() if(OS_NAME MATCHES "Linux") - set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${S390X}) + set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${S390X} ${LOONGARCH64}) elseif (OS_NAME MATCHES "Windows") set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64}) elseif(OS_NAME MATCHES "Android|OHOS") - set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}) + set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${LOONGARCH64}) else() set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64}) endif() @@ -52,30 +53,30 @@ if(APPLE) set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64}) else() set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64} ${ARM32} - ${PPC64} ${S390X} ${RISCV64} ${HEXAGON}) + ${PPC64} ${S390X} ${RISCV64} ${HEXAGON} ${LOONGARCH64}) endif() -set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}) +set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X} ${LOONGARCH64}) set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64}) set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64}) set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64} ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} - ${RISCV32} ${RISCV64}) -set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}) + ${RISCV32} ${RISCV64} ${LOONGARCH64}) +set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X} ${LOONGARCH64}) set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64} - ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}) + ${MIPS32} ${MIPS64} ${PPC64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${LOONGARCH64}) set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64} - ${HEXAGON}) + ${HEXAGON} ${LOONGARCH64}) set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS64} - ${HEXAGON}) + ${HEXAGON} ${LOONGARCH64}) set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS32} - ${MIPS64} ${PPC64} ${HEXAGON}) + ${MIPS64} ${PPC64} ${HEXAGON} ${LOONGARCH64}) set(ALL_SCUDO_STANDALONE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} ${PPC64} ${HEXAGON}) if(APPLE) set(ALL_XRAY_SUPPORTED_ARCH ${X86_64}) else() set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} - powerpc64le ${HEXAGON}) + powerpc64le ${HEXAGON} ${LOONGARCH64}) endif() set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64}) diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake index 8675f5689b99..6478977f0d16 100644 --- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake @@ -151,6 +151,7 @@ macro(detect_target_arch) check_symbol_exists(__aarch64__ "" __AARCH64) check_symbol_exists(__x86_64__ "" __X86_64) check_symbol_exists(__i386__ "" __I386) + check_symbol_exists(__loongarch__ "" __LOONGARCH) check_symbol_exists(__mips__ "" __MIPS) check_symbol_exists(__mips64__ "" __MIPS64) check_symbol_exists(__powerpc__ "" __PPC) @@ -179,6 +180,14 @@ macro(detect_target_arch) endif() elseif(__I386) add_default_target_arch(i386) + elseif(__LOONGARCH) + if(CMAKE_SIZEOF_VOID_P EQUAL "4") + add_default_target_arch(loongarch32) + elseif(CMAKE_SIZEOF_VOID_P EQUAL "8") + add_default_target_arch(loongarch64) + else() + message(FATAL_ERROR "Unsupported pointer size for LoongArch") + endif() elseif(__MIPS64) # must be checked before __MIPS add_default_target_arch(mips64) elseif(__MIPS) diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake index f61b962bdb24..462b0be4cfb5 100644 --- a/compiler-rt/cmake/base-config-ix.cmake +++ b/compiler-rt/cmake/base-config-ix.cmake @@ -208,6 +208,8 @@ macro(test_targets) test_target_arch(x86_64 "" "") endif() endif() + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64") + test_target_arch(loongarch64 "" "") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le") test_target_arch(powerpc64le "" "-m64") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc") @@ -253,6 +255,8 @@ macro(test_targets) test_target_arch(wasm64 "" "--target=wasm64-unknown-unknown") elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "ve") test_target_arch(ve "__ve__" "--target=ve-unknown-none") + elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64") + test_target_arch(loongarch64 "" "") endif() set(COMPILER_RT_OS_SUFFIX "") endif() diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index 439abc713bad..dfa213597a86 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -61,6 +61,7 @@ set(SPARCV9 sparcv9) set(WASM32 wasm32) set(WASM64 wasm64) set(VE ve) +set(LOONGARCH64 loongarch64) if(APPLE) set(ARM64 arm64 arm64e) @@ -72,7 +73,7 @@ set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR} ${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64} ${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9} - ${WASM32} ${WASM64} ${VE}) + ${WASM32} ${WASM64} ${VE} ${LOONGARCH64}) include(CompilerRTUtils) include(CompilerRTDarwinUtils) diff --git a/compiler-rt/cmake/crt-config-ix.cmake b/compiler-rt/cmake/crt-config-ix.cmake index f737e4eba822..01b1fec9128c 100644 --- a/compiler-rt/cmake/crt-config-ix.cmake +++ b/compiler-rt/cmake/crt-config-ix.cmake @@ -28,9 +28,10 @@ set(PPC64 powerpc64 powerpc64le) set(RISCV32 riscv32) set(RISCV64 riscv64) set(VE ve) +set(LOONGARCH64 loongarch64) set(ALL_CRT_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} - ${PPC64} ${MIPS32} ${RISCV32} ${RISCV64} ${VE} ${HEXAGON}) + ${PPC64} ${MIPS32} ${RISCV32} ${RISCV64} ${VE} ${HEXAGON} ${LOONGARCH64}) include(CompilerRTUtils) diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index 0a1a4a5535df..6f522e0709c8 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -39,7 +39,7 @@ # if defined(__i386) && SANITIZER_LINUX # define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1" -# elif defined(__mips__) && SANITIZER_LINUX +# elif (defined(__mips__) || defined(__loongarch__)) && SANITIZER_LINUX # define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.2" # endif diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h index aeadb9d94ebd..2dd6cb54881b 100644 --- a/compiler-rt/lib/asan/asan_mapping.h +++ b/compiler-rt/lib/asan/asan_mapping.h @@ -167,6 +167,8 @@ # define ASAN_SHADOW_OFFSET_DYNAMIC # elif defined(__mips__) # define ASAN_SHADOW_OFFSET_CONST 0x0aaa0000 +# elif defined(__loongarch__) +# define ASAN_SHADOW_OFFSET_CONST 0x0aaa0000 # elif SANITIZER_FREEBSD # define ASAN_SHADOW_OFFSET_CONST 0x40000000 # elif SANITIZER_NETBSD @@ -201,6 +203,8 @@ # define ASAN_SHADOW_OFFSET_CONST 0x0000100000000000 # elif defined(__mips64) # define ASAN_SHADOW_OFFSET_CONST 0x0000002000000000 +# elif defined(__loongarch64) +# define ASAN_SHADOW_OFFSET_CONST 0x0000002000000000 # elif defined(__sparc__) # define ASAN_SHADOW_OFFSET_CONST 0x0000080000000000 # elif SANITIZER_LOONGARCH64 diff --git a/compiler-rt/lib/asan/tests/asan_test.cpp b/compiler-rt/lib/asan/tests/asan_test.cpp index e392ea90dbc2..ec8d6b9d1b21 100644 --- a/compiler-rt/lib/asan/tests/asan_test.cpp +++ b/compiler-rt/lib/asan/tests/asan_test.cpp @@ -621,9 +621,9 @@ NOINLINE void SigLongJmpFunc1(sigjmp_buf buf) { siglongjmp(buf, 1); } -#if !defined(__ANDROID__) && !defined(__arm__) && !defined(__aarch64__) && \ - !defined(__mips__) && !defined(__mips64) && !defined(__s390__) && \ - !defined(__riscv) +# if !defined(__ANDROID__) && !defined(__arm__) && !defined(__aarch64__) && \ + !defined(__mips__) && !defined(__mips64) && !defined(__s390__) && \ + !defined(__riscv) && !defined(__loongarch__) NOINLINE void BuiltinLongJmpFunc1(jmp_buf buf) { // create three red zones for these two stack objects. int a; @@ -645,10 +645,10 @@ TEST(AddressSanitizer, BuiltinLongJmpTest) { TouchStackFunc(); } } -#endif // !defined(__ANDROID__) && !defined(__arm__) && - // !defined(__aarch64__) && !defined(__mips__) - // !defined(__mips64) && !defined(__s390__) - // !defined(__riscv) +# endif // !defined(__ANDROID__) && !defined(__arm__) && + // !defined(__aarch64__) && !defined(__mips__) + // !defined(__mips64) && !defined(__s390__) + // !defined(__riscv) && !defined(__loongarch__) TEST(AddressSanitizer, UnderscopeLongJmpTest) { static jmp_buf buf; diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index ec668e294d6d..7c924754511d 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -14,7 +14,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(LLVM_COMMON_CMAKE_UTILS "${COMPILER_RT_SOURCE_DIR}/../cmake") - # Add path for custom modules +# Add path for custom modules list(INSERT CMAKE_MODULE_PATH 0 "${COMPILER_RT_SOURCE_DIR}/cmake" "${COMPILER_RT_SOURCE_DIR}/cmake/Modules" @@ -627,6 +627,14 @@ set(mips64_SOURCES ${GENERIC_TF_SOURCES} ${mips_SOURCES}) set(mips64el_SOURCES ${GENERIC_TF_SOURCES} ${mips_SOURCES}) +set(loongarch_SOURCES + loongarch/fp_mode.c + ${GENERIC_SOURCES} + ${GENERIC_TF_SOURCES} +) +set(loongarch64_SOURCES + ${loongarch_SOURCES} +) set(powerpc_SOURCES ${GENERIC_SOURCES}) diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index 9816940b504a..bcc5922e073b 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -91,6 +91,8 @@ void __clear_cache(void *start, void *end) { #else compilerrt_abort(); #endif +#elif defined(__linux__) && defined(__loongarch__) + __asm__ volatile("ibar 0"); #elif defined(__linux__) && defined(__mips__) const uintptr_t start_int = (uintptr_t)start; const uintptr_t end_int = (uintptr_t)end; diff --git a/compiler-rt/lib/builtins/loongarch/fp_mode.c b/compiler-rt/lib/builtins/loongarch/fp_mode.c new file mode 100644 index 000000000000..31877fb02bd5 --- /dev/null +++ b/compiler-rt/lib/builtins/loongarch/fp_mode.c @@ -0,0 +1,59 @@ +//=== lib/builtins/loongarch/fp_mode.c - Floaing-point mode utilities -*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "../fp_mode.h" + +#define LOONGARCH_TONEAREST 0x0000 +#define LOONGARCH_TOWARDZERO 0x0100 +#define LOONGARCH_UPWARD 0x0200 +#define LOONGARCH_DOWNWARD 0x0300 + +#define LOONGARCH_RMODE_MASK (LOONGARCH_TONEAREST | LOONGARCH_TOWARDZERO | \ + LOONGARCH_UPWARD | LOONGARCH_DOWNWARD) + +#define LOONGARCH_INEXACT 0x10000 + +CRT_FE_ROUND_MODE __fe_getround(void) { +#if __loongarch_frlen != 0 + int fcsr; +# ifdef __clang__ + __asm__ __volatile__("movfcsr2gr %0, $fcsr0" : "=r" (fcsr)); +# else + __asm__ __volatile__("movfcsr2gr %0, $r0" : "=r" (fcsr)); +# endif + fcsr &= LOONGARCH_RMODE_MASK; + switch (fcsr) { + case LOONGARCH_TOWARDZERO: + return CRT_FE_TOWARDZERO; + case LOONGARCH_DOWNWARD: + return CRT_FE_DOWNWARD; + case LOONGARCH_UPWARD: + return CRT_FE_UPWARD; + case LOONGARCH_TONEAREST: + default: + return CRT_FE_TONEAREST; + } +#else + return CRT_FE_TONEAREST; +#endif +} + +int __fe_raise_inexact(void) { +#if __loongarch_frlen != 0 + int fcsr; +# ifdef __clang__ + __asm__ __volatile__("movfcsr2gr %0, $fcsr0" : "=r" (fcsr)); + __asm__ __volatile__( + "movgr2fcsr $fcsr0, %0" :: "r" (fcsr | LOONGARCH_INEXACT)); +# else + __asm__ __volatile__("movfcsr2gr %0, $r0" : "=r" (fcsr)); + __asm__ __volatile__( + "movgr2fcsr $r0, %0" :: "r" (fcsr | LOONGARCH_INEXACT)); +# endif +#endif + return 0; +} diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp index f12f7aa61bc4..7f4e8ef91c44 100644 --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp @@ -149,8 +149,8 @@ inline ALWAYS_INLINE uintptr_t GetPreviousInstructionPc(uintptr_t PC) { ALWAYS_INLINE uintptr_t TracePC::GetNextInstructionPc(uintptr_t PC) { #if defined(__mips__) return PC + 8; -#elif defined(__powerpc__) || defined(__sparc__) || defined(__arm__) || \ - defined(__aarch64__) +#elif defined(__powerpc__) || defined(__sparc__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__loongarch__) return PC + 4; #else return PC + 1; diff --git a/compiler-rt/lib/interception/tests/CMakeLists.txt b/compiler-rt/lib/interception/tests/CMakeLists.txt index 37bf99edaf06..6d4988aae576 100644 --- a/compiler-rt/lib/interception/tests/CMakeLists.txt +++ b/compiler-rt/lib/interception/tests/CMakeLists.txt @@ -1,6 +1,6 @@ include(CompilerRTCompile) -filter_available_targets(INTERCEPTION_UNITTEST_SUPPORTED_ARCH x86_64 i386 mips64 mips64el) +filter_available_targets(INTERCEPTION_UNITTEST_SUPPORTED_ARCH x86_64 i386 mips64 mips64el loongarch64) set(INTERCEPTION_UNITTESTS interception_linux_test.cpp diff --git a/compiler-rt/lib/lsan/lsan_allocator.cpp b/compiler-rt/lib/lsan/lsan_allocator.cpp index 43928ad294e2..525e1e1fd696 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.cpp +++ b/compiler-rt/lib/lsan/lsan_allocator.cpp @@ -28,7 +28,7 @@ extern "C" void *memset(void *ptr, int value, uptr num); namespace __lsan { #if defined(__i386__) || defined(__arm__) static const uptr kMaxAllowedMallocSize = 1ULL << 30; -#elif defined(__mips64) || defined(__aarch64__) +#elif defined(__mips64) || defined(__aarch64__) || defined(__loongarch64) static const uptr kMaxAllowedMallocSize = 4ULL << 30; #else static const uptr kMaxAllowedMallocSize = 8ULL << 30; diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 94bb3cca0083..a427d2e17527 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -251,6 +251,8 @@ static inline bool MaybeUserPointer(uptr p) { return ((p >> 47) == 0); # elif defined(__mips64) return ((p >> 40) == 0); +# elif defined(__loongarch64) + return ((p >> 40) == 0); # elif defined(__aarch64__) // Accept up to 48 bit VMA. return ((p >> 48) == 0); diff --git a/compiler-rt/lib/lsan/lsan_common.h b/compiler-rt/lib/lsan/lsan_common.h index 2fc038b8fd14..3b9fb5c99add 100644 --- a/compiler-rt/lib/lsan/lsan_common.h +++ b/compiler-rt/lib/lsan/lsan_common.h @@ -37,7 +37,7 @@ # define CAN_SANITIZE_LEAKS 0 #elif (SANITIZER_LINUX || SANITIZER_APPLE) && (SANITIZER_WORDSIZE == 64) && \ (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \ - defined(__powerpc64__) || defined(__s390x__)) + defined(__powerpc64__) || defined(__s390x__) || defined(__loongarch64)) # define CAN_SANITIZE_LEAKS 1 #elif defined(__i386__) && (SANITIZER_LINUX || SANITIZER_APPLE) # define CAN_SANITIZE_LEAKS 1 diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h index 1a4cc8c0c975..2bb6dcd12a28 100644 --- a/compiler-rt/lib/msan/msan.h +++ b/compiler-rt/lib/msan/msan.h @@ -60,8 +60,32 @@ const MappingDesc kMemoryLayout[] = { {0x00c000000000ULL, 0x00e200000000ULL, MappingDesc::INVALID, "invalid"}, {0x00e200000000ULL, 0x00ffffffffffULL, MappingDesc::APP, "app-3"}}; -#define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x8000000000ULL) -#define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x2000000000ULL) +# define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x8000000000ULL) +# define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x2000000000ULL) + +#elif SANITIZER_LINUX && defined(__loongarch64) + +// LOONGARCH64 maps: +// - 0x0000000000-0x0200000000: Program own segments +// - 0xa200000000-0xc000000000: PIE program segments +// - 0xe200000000-0xffffffffff: libraries segments. +const MappingDesc kMemoryLayout[] = { + {0x000000000000ULL, 0x000200000000ULL, MappingDesc::APP, "app-1"}, + {0x000200000000ULL, 0x002200000000ULL, MappingDesc::INVALID, "invalid"}, + {0x002200000000ULL, 0x004000000000ULL, MappingDesc::SHADOW, "shadow-2"}, + {0x004000000000ULL, 0x004200000000ULL, MappingDesc::INVALID, "invalid"}, + {0x004200000000ULL, 0x006000000000ULL, MappingDesc::ORIGIN, "origin-2"}, + {0x006000000000ULL, 0x006200000000ULL, MappingDesc::INVALID, "invalid"}, + {0x006200000000ULL, 0x008000000000ULL, MappingDesc::SHADOW, "shadow-3"}, + {0x008000000000ULL, 0x008200000000ULL, MappingDesc::SHADOW, "shadow-1"}, + {0x008200000000ULL, 0x00a000000000ULL, MappingDesc::ORIGIN, "origin-3"}, + {0x00a000000000ULL, 0x00a200000000ULL, MappingDesc::ORIGIN, "origin-1"}, + {0x00a200000000ULL, 0x00c000000000ULL, MappingDesc::APP, "app-2"}, + {0x00c000000000ULL, 0x00e200000000ULL, MappingDesc::INVALID, "invalid"}, + {0x00e200000000ULL, 0x00ffffffffffULL, MappingDesc::APP, "app-3"}}; + +# define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x8000000000ULL) +# define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x2000000000ULL) #elif SANITIZER_LINUX && defined(__aarch64__) diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp index 0d5e85329850..4ae11b75f663 100644 --- a/compiler-rt/lib/msan/msan_allocator.cpp +++ b/compiler-rt/lib/msan/msan_allocator.cpp @@ -44,7 +44,7 @@ struct MsanMapUnmapCallback { } }; -#if defined(__mips64) +#if defined(__mips64) || defined(__loongarch64) static const uptr kMaxAllowedMallocSize = 2UL << 30; struct AP32 { diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index c4a9e88655fe..3649b8f67db8 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -1742,7 +1742,7 @@ void InitializeInterceptors() { INTERCEPT_FUNCTION(dlerror); INTERCEPT_FUNCTION(dl_iterate_phdr); INTERCEPT_FUNCTION(getrusage); -#if defined(__mips__) +#if defined(__mips__) || defined(__loongarch__) INTERCEPT_FUNCTION_VER(pthread_create, "GLIBC_2.2"); #else INTERCEPT_FUNCTION(pthread_create); diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index 8babe8799f65..2f33b5e9d6cd 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -3161,6 +3161,8 @@ static void GetPathToLoadable(char *buf, size_t sz) { static const char basename[] = "libmsan_loadable.mips64.so"; #elif defined(__mips64) static const char basename[] = "libmsan_loadable.mips64el.so"; +#elif defined(__loongarch64) + static const char basename[] = "libmsan_loadable.loongarch64.so"; #elif defined(__aarch64__) static const char basename[] = "libmsan_loadable.aarch64.so"; #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S index 05192485d597..379328950bfb 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S @@ -1,11 +1,11 @@ -#if defined(__loongarch_lp64) && defined(__linux__) +#if defined(__loongarch64) && defined(__linux__) #include "sanitizer_common/sanitizer_asm.h" ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA) ASM_HIDDEN(_ZN14__interception10real_vforkE) -.bss +.section .bss .type _ZN14__interception10real_vforkE, @object .size _ZN14__interception10real_vforkE, 8 _ZN14__interception10real_vforkE: @@ -44,7 +44,7 @@ ASM_WRAPPER_NAME(vfork): // $a0 != 0 => parent process. Clear stack shadow. // put old $sp to $a0 addi.d $a0, $sp, 16 - bl %plt(COMMON_INTERCEPTOR_HANDLE_VFORK) + bl COMMON_INTERCEPTOR_HANDLE_VFORK .L_exit: // Restore $ra diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc index 7e7628ea0c1c..316286bce493 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc @@ -2512,7 +2512,7 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) { # if !SANITIZER_ANDROID && !SANITIZER_OHOS && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \ - SANITIZER_RISCV64) + SANITIZER_RISCV64 || defined(__loongarch64)) if (data) { if (request == ptrace_setregs) { PRE_READ((void *)data, struct_user_regs_struct_sz); @@ -2534,7 +2534,7 @@ POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) { # if !SANITIZER_ANDROID && !SANITIZER_OHOS && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \ - SANITIZER_RISCV64) + SANITIZER_RISCV64 || defined(__loongarch64)) if (res >= 0 && data) { // Note that this is different from the interceptor in // sanitizer_common_interceptors.inc. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 7d1315255005..795d4b84889d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -14,6 +14,12 @@ #include "sanitizer_interface_internal.h" // OHOS_LOCAL #include "sanitizer_platform.h" +#if defined(__loongarch__) +# define __ARCH_WANT_RENAMEAT 1 +# define SC_ADDRERR_RD (1 << 30) +# define SC_ADDRERR_WR (1 << 31) +#endif + #if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \ SANITIZER_SOLARIS @@ -80,6 +86,7 @@ #if SANITIZER_LINUX && defined(__loongarch__) # include +#include #endif #if SANITIZER_FREEBSD @@ -1140,13 +1147,15 @@ uptr GetMaxVirtualAddress() { return (1ULL << 38) - 1; # elif SANITIZER_MIPS64 return (1ULL << 40) - 1; // 0x000000ffffffffffUL; -# elif defined(__s390x__) +# elif defined(__loongarch64) + return (1ULL << 40) - 1; // 0x000000ffffffffffUL; +# elif defined(__s390x__) return (1ULL << 53) - 1; // 0x001fffffffffffffUL; -#elif defined(__sparc__) +# elif defined(__sparc__) return ~(uptr)0; -# else +# else return (1ULL << 47) - 1; // 0x00007fffffffffffUL; -# endif +# endif #else // SANITIZER_WORDSIZE == 32 # if defined(__s390__) return (1ULL << 31) - 1; // 0x7fffffff; @@ -1472,7 +1481,62 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, : "memory"); return res; } -#elif defined(__aarch64__) +# elif defined(__loongarch__) && SANITIZER_LINUX +uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, + int *parent_tidptr, void *newtls, int *child_tidptr) { + long long res; + if (!fn || !child_stack) + return -EINVAL; + CHECK_EQ(0, (uptr)child_stack % 16); + child_stack = (char *)child_stack - 2 * sizeof(unsigned long long); + ((unsigned long long *)child_stack)[0] = (uptr)fn; + ((unsigned long long *)child_stack)[1] = (uptr)arg; + + register int __flags __asm__("r4") = flags; + register void *__child_stack __asm__("r5") = child_stack; + register int *__parent_tidptr __asm__("r6") = parent_tidptr; + register void *__newtls __asm__("r7") = newtls; + register int *__child_tidptr __asm__("r8") = child_tidptr; + + __asm__ __volatile__( + /* $a0 = syscall($a7 = SYSCALL(clone), + * $a0 = flags, + * $a1 = child_stack, + * $a2 = parent_tidptr, + * $a3 = new_tls, + * $a4 = child_tyidptr) + */ + + /* Do the system call */ + "addi.d $a7, $r0, %1\n" + "syscall 0\n" + + "move %0, $a0" + : "=r"(res) + : "i"(__NR_clone), "r"(__flags), "r"(__child_stack), "r"(__parent_tidptr), + "r"(__newtls), "r"(__child_tidptr) + : "memory"); + if (res != 0) { + return res; + } + __asm__ __volatile__( + /* In the child, now. Call "fn(arg)". */ + "ld.d $a6, $sp, 0\n" + "ld.d $a0, $sp, 8\n" + + "jirl $r1, $a6, 0\n" + + /* Call _exit($v0) */ + "addi.d $a7, $r0, %1\n" + "syscall 0\n" + + "move %0, $a0" + : "=r"(res) + : "i"(__NR_exit) + : "r1", "memory"); + return res; +} +# elif defined(__aarch64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { register long long res __asm__("x0"); @@ -1523,12 +1587,12 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, : "x30", "memory"); return res; } -#elif defined(__powerpc64__) +# elif defined(__powerpc64__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { long long res; // Stack frame structure. -#if SANITIZER_PPC64V1 +# if SANITIZER_PPC64V1 // Back chain == 0 (SP + 112) // Frame (112 bytes): // Parameter save area (SP + 48), 8 doublewords @@ -1538,20 +1602,20 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, // LR save area (SP + 16) // CR save area (SP + 8) // Back chain (SP + 0) -# define FRAME_SIZE 112 -# define FRAME_TOC_SAVE_OFFSET 40 -#elif SANITIZER_PPC64V2 +# define FRAME_SIZE 112 +# define FRAME_TOC_SAVE_OFFSET 40 +# elif SANITIZER_PPC64V2 // Back chain == 0 (SP + 32) // Frame (32 bytes): // TOC save area (SP + 24) // LR save area (SP + 16) // CR save area (SP + 8) // Back chain (SP + 0) -# define FRAME_SIZE 32 -# define FRAME_TOC_SAVE_OFFSET 24 -#else -# error "Unsupported PPC64 ABI" -#endif +# define FRAME_SIZE 32 +# define FRAME_TOC_SAVE_OFFSET 24 +# else +# error "Unsupported PPC64 ABI" +# endif if (!fn || !child_stack) return -EINVAL; CHECK_EQ(0, (uptr)child_stack % 16); @@ -1564,72 +1628,62 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, register void *__newtls __asm__("r8") = newtls; register int *__ctidptr __asm__("r9") = child_tidptr; - __asm__ __volatile__( - /* fn and arg are saved across the syscall */ - "mr 28, %5\n\t" - "mr 27, %8\n\t" - - /* syscall - r0 == __NR_clone - r3 == flags - r4 == child_stack - r5 == parent_tidptr - r6 == newtls - r7 == child_tidptr */ - "mr 3, %7\n\t" - "mr 5, %9\n\t" - "mr 6, %10\n\t" - "mr 7, %11\n\t" - "li 0, %3\n\t" - "sc\n\t" - - /* Test if syscall was successful */ - "cmpdi cr1, 3, 0\n\t" - "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" - "bne- cr1, 1f\n\t" - - /* Set up stack frame */ - "li 29, 0\n\t" - "stdu 29, -8(1)\n\t" - "stdu 1, -%12(1)\n\t" - /* Do the function call */ - "std 2, %13(1)\n\t" -#if SANITIZER_PPC64V1 - "ld 0, 0(28)\n\t" - "ld 2, 8(28)\n\t" - "mtctr 0\n\t" -#elif SANITIZER_PPC64V2 - "mr 12, 28\n\t" - "mtctr 12\n\t" -#else -# error "Unsupported PPC64 ABI" -#endif - "mr 3, 27\n\t" - "bctrl\n\t" - "ld 2, %13(1)\n\t" - - /* Call _exit(r3) */ - "li 0, %4\n\t" - "sc\n\t" - - /* Return to parent */ - "1:\n\t" - "mr %0, 3\n\t" - : "=r" (res) - : "0" (-1), - "i" (EINVAL), - "i" (__NR_clone), - "i" (__NR_exit), - "r" (__fn), - "r" (__cstack), - "r" (__flags), - "r" (__arg), - "r" (__ptidptr), - "r" (__newtls), - "r" (__ctidptr), - "i" (FRAME_SIZE), - "i" (FRAME_TOC_SAVE_OFFSET) - : "cr0", "cr1", "memory", "ctr", "r0", "r27", "r28", "r29"); + __asm__ __volatile__( + /* fn and arg are saved across the syscall */ + "mr 28, %5\n\t" + "mr 27, %8\n\t" + + /* syscall + r0 == __NR_clone + r3 == flags + r4 == child_stack + r5 == parent_tidptr + r6 == newtls + r7 == child_tidptr */ + "mr 3, %7\n\t" + "mr 5, %9\n\t" + "mr 6, %10\n\t" + "mr 7, %11\n\t" + "li 0, %3\n\t" + "sc\n\t" + + /* Test if syscall was successful */ + "cmpdi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Set up stack frame */ + "li 29, 0\n\t" + "stdu 29, -8(1)\n\t" + "stdu 1, -%12(1)\n\t" + /* Do the function call */ + "std 2, %13(1)\n\t" +# if SANITIZER_PPC64V1 + "ld 0, 0(28)\n\t" + "ld 2, 8(28)\n\t" + "mtctr 0\n\t" +# elif SANITIZER_PPC64V2 + "mr 12, 28\n\t" + "mtctr 12\n\t" +# else +# error "Unsupported PPC64 ABI" +# endif + "mr 3, 27\n\t" + "bctrl\n\t" + "ld 2, %13(1)\n\t" + + /* Call _exit(r3) */ + "li 0, %4\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n\t" + "mr %0, 3\n\t" + : "=r"(res) + : "0"(-1), "i"(EINVAL), "i"(__NR_clone), "i"(__NR_exit), "r"(__fn), + "r"(__cstack), "r"(__flags), "r"(__arg), "r"(__ptidptr), "r"(__newtls), + "r"(__ctidptr), "i"(FRAME_SIZE), "i"(FRAME_TOC_SAVE_OFFSET) + : "cr0", "cr1", "memory", "ctr", "r0", "r27", "r28", "r29"); return res; } #elif defined(__i386__) @@ -1645,56 +1699,53 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ((unsigned int *)child_stack)[2] = (uptr)fn; ((unsigned int *)child_stack)[3] = (uptr)arg; __asm__ __volatile__( - /* %eax = syscall(%eax = SYSCALL(clone), - * %ebx = flags, - * %ecx = child_stack, - * %edx = parent_tidptr, - * %esi = new_tls, - * %edi = child_tidptr) - */ + /* %eax = syscall(%eax = SYSCALL(clone), + * %ebx = flags, + * %ecx = child_stack, + * %edx = parent_tidptr, + * %esi = new_tls, + * %edi = child_tidptr) + */ - /* Obtain flags */ - "movl (%%ecx), %%ebx\n" - /* Do the system call */ - "pushl %%ebx\n" - "pushl %%esi\n" - "pushl %%edi\n" - /* Remember the flag value. */ - "movl %%ebx, (%%ecx)\n" - "int $0x80\n" - "popl %%edi\n" - "popl %%esi\n" - "popl %%ebx\n" - - /* if (%eax != 0) - * return; - */ - - "test %%eax,%%eax\n" - "jnz 1f\n" - - /* terminate the stack frame */ - "xorl %%ebp,%%ebp\n" - /* Call FN. */ - "call *%%ebx\n" -#ifdef PIC - "call here\n" - "here:\n" - "popl %%ebx\n" - "addl $_GLOBAL_OFFSET_TABLE_+[.-here], %%ebx\n" -#endif - /* Call exit */ - "movl %%eax, %%ebx\n" - "movl %2, %%eax\n" - "int $0x80\n" - "1:\n" - : "=a" (res) - : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)), - "c"(child_stack), - "d"(parent_tidptr), - "S"(newtls), - "D"(child_tidptr) - : "memory"); + /* Obtain flags */ + "movl (%%ecx), %%ebx\n" + /* Do the system call */ + "pushl %%ebx\n" + "pushl %%esi\n" + "pushl %%edi\n" + /* Remember the flag value. */ + "movl %%ebx, (%%ecx)\n" + "int $0x80\n" + "popl %%edi\n" + "popl %%esi\n" + "popl %%ebx\n" + + /* if (%eax != 0) + * return; + */ + + "test %%eax,%%eax\n" + "jnz 1f\n" + + /* terminate the stack frame */ + "xorl %%ebp,%%ebp\n" + /* Call FN. */ + "call *%%ebx\n" +# ifdef PIC + "call here\n" + "here:\n" + "popl %%ebx\n" + "addl $_GLOBAL_OFFSET_TABLE_+[.-here], %%ebx\n" +# endif + /* Call exit */ + "movl %%eax, %%ebx\n" + "movl %2, %%eax\n" + "int $0x80\n" + "1:\n" + : "=a"(res) + : "a"(SYSCALL(clone)), "i"(SYSCALL(exit)), "c"(child_stack), + "d"(parent_tidptr), "S"(newtls), "D"(child_tidptr) + : "memory"); return res; } #elif defined(__arm__) @@ -1713,22 +1764,22 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, register int *r4 __asm__("r4") = child_tidptr; register int r7 __asm__("r7") = __NR_clone; -#if __ARM_ARCH > 4 || defined (__ARM_ARCH_4T__) -# define ARCH_HAS_BX -#endif -#if __ARM_ARCH > 4 -# define ARCH_HAS_BLX -#endif +# if __ARM_ARCH > 4 || defined(__ARM_ARCH_4T__) +# define ARCH_HAS_BX +# endif +# if __ARM_ARCH > 4 +# define ARCH_HAS_BLX +# endif -#ifdef ARCH_HAS_BX -# ifdef ARCH_HAS_BLX -# define BLX(R) "blx " #R "\n" -# else -# define BLX(R) "mov lr, pc; bx " #R "\n" -# endif -#else -# define BLX(R) "mov lr, pc; mov pc," #R "\n" -#endif +# ifdef ARCH_HAS_BX +# ifdef ARCH_HAS_BLX +# define BLX(R) "blx " # R "\n" +# else +# define BLX(R) "mov lr, pc; bx " # R "\n" +# endif +# else +# define BLX(R) "mov lr, pc; mov pc," # R "\n" +# endif __asm__ __volatile__( /* %r0 = syscall(%r7 = SYSCALL(clone), @@ -1766,7 +1817,7 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, #endif #endif // SANITIZER_LINUX -#if SANITIZER_LINUX +# if SANITIZER_LINUX int internal_uname(struct utsname *buf) { return internal_syscall(SYSCALL(uname), buf); } @@ -1968,6 +2019,19 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { #endif } return SignalContext::Unknown; +# elif defined(__loongarch__) +#if SANITIZER_ANDROID + u32 flags = ucontext->uc_mcontext.sc_flags; +#elif SANITIZER_OHOS + u32 flags = ucontext->uc_mcontext.flags; +#else + u32 flags = ucontext->uc_mcontext.__flags; +#endif + if (flags & SC_ADDRERR_RD) + return SignalContext::Read; + if (flags & SC_ADDRERR_WR) + return SignalContext::Write; + return SignalContext::Unknown; #elif defined(__arm__) static const uptr FSR_WRITE = 1U << 11; uptr fsr = ucontext->uc_mcontext.error_code; @@ -1980,17 +2044,17 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { #elif defined(__sparc__) // Decode the instruction to determine the access type. // From OpenSolaris $SRC/uts/sun4/os/trap.c (get_accesstype). -#if SANITIZER_SOLARIS +# if SANITIZER_SOLARIS uptr pc = ucontext->uc_mcontext.gregs[REG_PC]; -#else +# else // Historical BSDism here. struct sigcontext *scontext = (struct sigcontext *)context; -#if defined(__arch64__) +# if defined(__arch64__) uptr pc = scontext->sigc_regs.tpc; -#else +# else uptr pc = scontext->si_regs.pc; -#endif -#endif +# endif +# endif u32 instr = *(u32 *)pc; return (instr >> 21) & 1 ? Write: Read; #elif defined(__riscv) @@ -2001,7 +2065,7 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { #endif unsigned faulty_instruction = *(uint16_t *)pc; -#if defined(__riscv_compressed) +# if defined(__riscv_compressed) if ((faulty_instruction & 0x3) != 0x3) { // it's a compressed instruction // set op_bits to the instruction bits [1, 0, 15, 14, 13] unsigned op_bits = @@ -2009,29 +2073,29 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { unsigned rd = faulty_instruction & 0xF80; // bits 7-11, inclusive switch (op_bits) { case 0b10'010: // c.lwsp (rd != x0) -#if __riscv_xlen == 64 +# if __riscv_xlen == 64 case 0b10'011: // c.ldsp (rd != x0) #endif return rd ? SignalContext::Read : SignalContext::Unknown; case 0b00'010: // c.lw -#if __riscv_flen >= 32 && __riscv_xlen == 32 +# if __riscv_flen >= 32 && __riscv_xlen == 32 case 0b10'011: // c.flwsp -#endif -#if __riscv_flen >= 32 || __riscv_xlen == 64 +# endif +# if __riscv_flen >= 32 || __riscv_xlen == 64 case 0b00'011: // c.flw / c.ld -#endif -#if __riscv_flen == 64 +# endif +# if __riscv_flen == 64 case 0b00'001: // c.fld case 0b10'001: // c.fldsp #endif return SignalContext::Read; case 0b00'110: // c.sw case 0b10'110: // c.swsp -#if __riscv_flen >= 32 || __riscv_xlen == 64 +# if __riscv_flen >= 32 || __riscv_xlen == 64 case 0b00'111: // c.fsw / c.sd case 0b10'111: // c.fswsp / c.sdsp -#endif -#if __riscv_flen == 64 +# endif +# if __riscv_flen == 64 case 0b00'101: // c.fsd case 0b10'101: // c.fsdsp #endif @@ -2040,7 +2104,7 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { return SignalContext::Unknown; } } -#endif +# endif unsigned opcode = faulty_instruction & 0x7f; // lower 7 bits unsigned funct3 = (faulty_instruction >> 12) & 0x7; // bits 12-14, inclusive @@ -2050,9 +2114,9 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { case 0b000: // lb case 0b001: // lh case 0b010: // lw -#if __riscv_xlen == 64 +# if __riscv_xlen == 64 case 0b011: // ld -#endif +# endif case 0b100: // lbu case 0b101: // lhu return SignalContext::Read; @@ -2064,18 +2128,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { case 0b000: // sb case 0b001: // sh case 0b010: // sw -#if __riscv_xlen == 64 +# if __riscv_xlen == 64 case 0b011: // sd #endif return SignalContext::Write; default: return SignalContext::Unknown; } -#if __riscv_flen >= 32 +# if __riscv_flen >= 32 case 0b0000111: // floating-point loads switch (funct3) { case 0b010: // flw -#if __riscv_flen == 64 +# if __riscv_flen == 64 case 0b011: // fld #endif return SignalContext::Read; @@ -2085,18 +2149,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { case 0b0100111: // floating-point stores switch (funct3) { case 0b010: // fsw -#if __riscv_flen == 64 +# if __riscv_flen == 64 case 0b011: // fsd #endif return SignalContext::Write; default: return SignalContext::Unknown; } -#endif +# endif default: return SignalContext::Unknown; } -#else +# else (void)ucontext; return Unknown; // FIXME: Implement. #endif @@ -2254,16 +2318,31 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) { *pc = ucontext->uc_mcontext.pc; *bp = ucontext->uc_mcontext.gregs[30]; *sp = ucontext->uc_mcontext.gregs[29]; -#elif defined(__s390__) +# elif defined(__loongarch__) + ucontext_t *ucontext = (ucontext_t *)context; +#if SANITIZER_ANDROID + *pc = ucontext->uc_mcontext.sc_pc; + *bp = ucontext->uc_mcontext.sc_regs[22]; + *sp = ucontext->uc_mcontext.sc_regs[3]; +#elif SANITIZER_OHOS + *pc = ucontext->uc_mcontext.pc; + *bp = ucontext->uc_mcontext.gregs[22]; + *sp = ucontext->uc_mcontext.gregs[3]; +#else + *pc = ucontext->uc_mcontext.__pc; + *bp = ucontext->uc_mcontext.__gregs[22]; + *sp = ucontext->uc_mcontext.__gregs[3]; +#endif +# elif defined(__s390__) ucontext_t *ucontext = (ucontext_t*)context; -# if defined(__s390x__) +# if defined(__s390x__) *pc = ucontext->uc_mcontext.psw.addr; -# else +# else *pc = ucontext->uc_mcontext.psw.addr & 0x7fffffff; -# endif +# endif *bp = ucontext->uc_mcontext.gregs[11]; *sp = ucontext->uc_mcontext.gregs[15]; -#elif defined(__riscv) +# elif defined(__riscv) ucontext_t *ucontext = (ucontext_t*)context; # if SANITIZER_FREEBSD *pc = ucontext->uc_mcontext.mc_gpregs.gp_sepc; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h index 1b1b98fe4989..6d1a082d2d30 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h @@ -77,9 +77,9 @@ uptr internal_arch_prctl(int option, uptr arg2); // internal_sigaction instead. int internal_sigaction_norestorer(int signum, const void *act, void *oldact); void internal_sigdelset(__sanitizer_sigset_t *set, int signum); -#if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) || \ - defined(__powerpc64__) || defined(__s390__) || defined(__i386__) || \ - defined(__arm__) || SANITIZER_RISCV64 +# if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) || \ + defined(__powerpc64__) || defined(__s390__) || defined(__i386__) || \ + defined(__arm__) || SANITIZER_RISCV64 || defined(__loongarch__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr); #endif @@ -152,6 +152,9 @@ inline void ReleaseMemoryPagesToOSAndZeroFill(uptr beg, uptr end) { "rdhwr %0,$29\n" \ ".set pop\n" : "=r"(__v)); \ __v; }) +#elif defined (__loongarch__) +# define __get_tls() \ + ({ void** __v; __asm__("move %0, $tp" : "=r"(__v)); __v; }) #elif defined(__riscv) # define __get_tls() \ ({ void** __v; __asm__("mv %0, tp" : "=r"(__v)); __v; }) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index 4f330ffa10b0..db14197fe22f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -265,7 +265,9 @@ static uptr ThreadDescriptorSizeFallback() { #elif defined(__mips__) // TODO(sagarthakur): add more values as per different glibc versions. val = FIRST_32_SECOND_64(1152, 1776); -#elif SANITIZER_RISCV64 +# elif defined(__loongarch64) + val = 1776; +# elif SANITIZER_RISCV64 int major; int minor; int patch; @@ -280,10 +282,10 @@ static uptr ThreadDescriptorSizeFallback() { val = 1936; // tested against glibc 2.32 } -#elif defined(__aarch64__) +# elif defined(__aarch64__) // The sizeof (struct pthread) is the same from GLIBC 2.17 to 2.22. val = 1776; -#elif defined(__powerpc64__) +# elif defined(__powerpc64__) val = 1776; // from glibc.ppc64le 2.20-8.fc21 #endif return val; @@ -304,17 +306,20 @@ uptr ThreadDescriptorSize() { return val; } -#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 +# if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 || \ + defined(__loongarch__) // TlsPreTcbSize includes size of struct pthread_descr and size of tcb // head structure. It lies before the static tls blocks. static uptr TlsPreTcbSize() { #if defined(__mips__) const uptr kTcbHead = 16; // sizeof (tcbhead_t) -#elif defined(__powerpc64__) +# elif defined(__loongarch__) + const uptr kTcbHead = 16; // sizeof (tcbhead_t) +# elif defined(__powerpc64__) const uptr kTcbHead = 88; // sizeof (tcbhead_t) -#elif SANITIZER_RISCV64 +# elif SANITIZER_RISCV64 const uptr kTcbHead = 16; // sizeof (tcbhead_t) -#endif +# endif const uptr kTlsAlign = 16; const uptr kTlsPreTcbSize = RoundUpTo(ThreadDescriptorSize() + kTcbHead, kTlsAlign); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index 9469f0587f7f..20f9c81f69f6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -278,12 +278,6 @@ # define SANITIZER_RISCV64 0 #endif -#if defined(__loongarch_lp64) -# define SANITIZER_LOONGARCH64 1 -#else -# define SANITIZER_LOONGARCH64 0 -#endif - // By default we allow to use SizeClassAllocator64 on 64-bit platform. // But in some cases (e.g. AArch64's 39-bit address space) SizeClassAllocator64 // does not work well and we need to fallback to SizeClassAllocator32. @@ -294,7 +288,7 @@ SANITIZER_FUCHSIA # define SANITIZER_CAN_USE_ALLOCATOR64 1 # elif defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \ - defined(__arm__) || SANITIZER_RISCV64 || defined(__hexagon__) + defined(__arm__) || SANITIZER_RISCV64 || defined(__hexagon__) || defined(__loongarch64) # define SANITIZER_CAN_USE_ALLOCATOR64 0 # else # define SANITIZER_CAN_USE_ALLOCATOR64 (SANITIZER_WORDSIZE == 64) @@ -304,7 +298,7 @@ // The range of addresses which can be returned my mmap. // FIXME: this value should be different on different platforms. Larger values // will still work but will consume more memory for TwoLevelByteMap. -#if defined(__mips__) +#if defined(__mips__) || defined(__loongarch__) # if SANITIZER_GO && defined(__mips64) # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 47) # else @@ -359,6 +353,21 @@ # define SANITIZER_POINTER_FORMAT_LENGTH FIRST_32_SECOND_64(8, 12) #endif +#if defined(__loongarch__) +# define SANITIZER_LOONGARCH 1 +# if defined(__loongarch64) +# define SANITIZER_LOONGARCH32 0 +# define SANITIZER_LOONGARCH64 1 +# else +# define SANITIZER_LOONGARCH32 1 +# define SANITIZER_LOONGARCH64 0 +# endif +#else +# define SANITIZER_LOONGARCH 0 +# define SANITIZER_LOONGARCH32 0 +# define SANITIZER_LOONGARCH64 0 +#endif + /// \macro MSC_PREREQ /// \brief Is the compiler MSVC of at least the specified version? /// The common \param version values to check for are: diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 81febbdca434..b1eedc2434da 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -281,8 +281,8 @@ #if SI_LINUX_NOT_ANDROID && !SI_OHOS && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__) || SANITIZER_RISCV64) -#define SANITIZER_INTERCEPT_PTRACE 1 + defined(__s390__) || SANITIZER_RISCV64 || defined(__loongarch__)) +# define SANITIZER_INTERCEPT_PTRACE 1 #else #define SANITIZER_INTERCEPT_PTRACE 0 #endif @@ -503,7 +503,8 @@ #define SANITIZER_INTERCEPT_MEMALIGN (!SI_FREEBSD && !SI_MAC && !SI_NETBSD) #define SANITIZER_INTERCEPT___LIBC_MEMALIGN SI_GLIBC #define SANITIZER_INTERCEPT_PVALLOC (SI_GLIBC || SI_ANDROID) -#define SANITIZER_INTERCEPT_CFREE (SI_GLIBC && !SANITIZER_RISCV64) +#define SANITIZER_INTERCEPT_CFREE \ + (SI_GLIBC && !SANITIZER_RISCV64 && !SANITIZER_LOONGARCH) #define SANITIZER_INTERCEPT_REALLOCARRAY SI_POSIX #define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC) #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC && !SI_NETBSD) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 839d31a8b75c..c9778f207e78 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -103,7 +103,7 @@ # endif # if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \ - defined(__hexagon__) || SANITIZER_RISCV64 + defined(__hexagon__) || SANITIZER_RISCV64 || defined(__loongarch64) # include # ifdef __arm__ typedef struct user_fpregs elf_fpregset_t; @@ -157,7 +157,7 @@ typedef struct user_fpregs elf_fpregset_t; #include #include #include -#if defined(__mips64) +#if defined(__mips64) || defined(__loongarch64) # include #endif #include @@ -279,13 +279,13 @@ namespace __sanitizer { #if SANITIZER_LINUX && !SANITIZER_ANDROID // Use pre-computed size of struct ustat to avoid which // has been removed from glibc 2.28. -#if defined(__aarch64__) || defined(__s390x__) || defined(__mips64) || \ - defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) || \ - defined(__x86_64__) || SANITIZER_RISCV64 -#define SIZEOF_STRUCT_USTAT 32 +# if defined(__aarch64__) || defined(__s390x__) || defined(__mips64) || \ + defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) || \ + defined(__x86_64__) || SANITIZER_RISCV64 || defined(__loongarch64) +# define SIZEOF_STRUCT_USTAT 32 # elif defined(__arm__) || defined(__i386__) || defined(__mips__) || \ defined(__powerpc__) || defined(__s390__) || defined(__sparc__) || \ - defined(__hexagon__) + defined(__hexagon__) || defined(__loongarch__) # define SIZEOF_STRUCT_USTAT 20 # elif defined(__loongarch__) // Not used. The minimum Glibc version available for LoongArch is 2.36 @@ -364,35 +364,38 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); const int wordexp_wrde_dooffs = WRDE_DOOFFS; # endif // !SANITIZER_ANDROID -#if SANITIZER_LINUX && !SANITIZER_ANDROID && \ - (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ - defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__) || SANITIZER_RISCV64) -#if defined(__mips64) || defined(__powerpc64__) || defined(__arm__) +# if SANITIZER_LINUX && !SANITIZER_ANDROID && \ + (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ + defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ + defined(__s390__) || SANITIZER_RISCV64 || defined(__loongarch64)) +# if defined(__mips64) || defined(__powerpc64__) || defined(__arm__) unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t); -#elif SANITIZER_RISCV64 +# elif SANITIZER_RISCV64 unsigned struct_user_regs_struct_sz = sizeof(struct user_regs_struct); unsigned struct_user_fpregs_struct_sz = sizeof(struct __riscv_q_ext_state); -#elif defined(__aarch64__) +# elif defined(__aarch64__) unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpsimd_state); -#elif defined(__s390__) +# elif defined(__loongarch64) + unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs); + unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fp_state); +# elif defined(__s390__) unsigned struct_user_regs_struct_sz = sizeof(struct _user_regs_struct); unsigned struct_user_fpregs_struct_sz = sizeof(struct _user_fpregs_struct); -#else +# else unsigned struct_user_regs_struct_sz = sizeof(struct user_regs_struct); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct); -#endif // __mips64 || __powerpc64__ || __aarch64__ -#if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \ - defined(__aarch64__) || defined(__arm__) || defined(__s390__) || \ - SANITIZER_RISCV64 +# endif // __mips64 || __powerpc64__ || __aarch64__ || __loongarch64 +# if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \ + defined(__aarch64__) || defined(__arm__) || defined(__s390__) || \ + SANITIZER_RISCV64 || defined(__loongarch64) unsigned struct_user_fpxregs_struct_sz = 0; #else unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct); #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__ -// || __s390__ -#ifdef __arm__ + // || __s390__ || __loongarch64 +# ifdef __arm__ unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE; #else unsigned struct_user_vfpregs_struct_sz = 0; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index a13117a330ec..600e87ee0e4d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -106,21 +106,24 @@ const unsigned struct_kernel_stat_sz = ? FIRST_32_SECOND_64(104, 128) : FIRST_32_SECOND_64((_MIPS_SIM == _ABIN32) ? 160 : 144, 216); const unsigned struct_kernel_stat64_sz = 104; -#elif defined(__s390__) && !defined(__s390x__) +# elif defined(__loongarch__) +const unsigned struct_kernel_stat_sz = 128; +const unsigned struct_kernel_stat64_sz = 128; +# elif defined(__s390__) && !defined(__s390x__) const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; -#elif defined(__s390x__) +# elif defined(__s390x__) const unsigned struct_kernel_stat_sz = 144; const unsigned struct_kernel_stat64_sz = 0; -#elif defined(__sparc__) && defined(__arch64__) +# elif defined(__sparc__) && defined(__arch64__) const unsigned struct___old_kernel_stat_sz = 0; const unsigned struct_kernel_stat_sz = 104; const unsigned struct_kernel_stat64_sz = 144; -#elif defined(__sparc__) && !defined(__arch64__) +# elif defined(__sparc__) && !defined(__arch64__) const unsigned struct___old_kernel_stat_sz = 0; const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; -#elif SANITIZER_RISCV64 +# elif SANITIZER_RISCV64 const unsigned struct_kernel_stat_sz = 128; const unsigned struct_kernel_stat64_sz = 0; // RISCV64 does not use stat64 # elif defined(__hexagon__) @@ -580,6 +583,11 @@ union __sanitizer_pthread_attr_t { #if SANITIZER_ANDROID # if SANITIZER_MIPS typedef unsigned long __sanitizer_sigset_t[16 / sizeof(unsigned long)]; +# elif SANITIZER_LOONGARCH64 +struct __sanitizer_sigset_t { + // The size is determined by looking at sizeof of real sigset_t on linux. + uptr val[128 / (sizeof(uptr) * 8)]; +}; # else typedef unsigned long __sanitizer_sigset_t; # endif @@ -680,7 +688,7 @@ struct __sanitizer_sigaction { }; # endif // !SANITIZER_ANDROID -# if defined(__mips__) +# if defined(__mips__) || defined(__loongarch__) # define __SANITIZER_KERNEL_NSIG 128 # else # define __SANITIZER_KERNEL_NSIG 64 @@ -848,7 +856,7 @@ typedef void __sanitizer_FILE; #if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ - defined(__s390__) || SANITIZER_RISCV64) + defined(__s390__) || SANITIZER_RISCV64 || defined(__loongarch64)) extern unsigned struct_user_regs_struct_sz; extern unsigned struct_user_fpregs_struct_sz; extern unsigned struct_user_fpxregs_struct_sz; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_ring_buffer.h b/compiler-rt/lib/sanitizer_common/sanitizer_ring_buffer.h index f22e40cac284..2f067f6674d3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_ring_buffer.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_ring_buffer.h @@ -84,21 +84,24 @@ template class CompactRingBuffer { // Top byte of long_ stores the buffer size in pages. // Lower bytes store the address of the next buffer element. - static constexpr int kPageSizeBits = 12; static constexpr int kSizeShift = 56; static constexpr int kSizeBits = 64 - kSizeShift; static constexpr uptr kNextMask = (1ULL << kSizeShift) - 1; - uptr GetStorageSize() const { return (long_ >> kSizeShift) << kPageSizeBits; } + uptr GetStorageSize() const { + unsigned kPageSizeBits = Log2(GetPageSizeCached()); + return (long_ >> kSizeShift) << kPageSizeBits; + } static uptr SignExtend(uptr x) { return ((sptr)x) << kSizeBits >> kSizeBits; } void Init(void *storage, uptr size) { + unsigned kPageSizeBits = Log2(GetPageSizeCached()); CHECK_EQ(sizeof(CompactRingBuffer), sizeof(void *)); CHECK(IsPowerOfTwo(size)); CHECK_GE(size, 1 << kPageSizeBits); CHECK_LE(size, 128 << kPageSizeBits); - CHECK_EQ(size % 4096, 0); + CHECK_EQ(size % GetPageSizeCached(), 0); CHECK_EQ(size % sizeof(T), 0); uptr st = (uptr)storage; CHECK_EQ(st % (size * 2), 0); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp index 937afb1645d0..f3b4a7d1bf2f 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.cpp @@ -124,9 +124,9 @@ void BufferedStackTrace::UnwindFast(uptr pc, uptr bp, uptr stack_top, #elif defined(__loongarch__) || defined(__riscv) // frame[-1] contains the return address uhwptr pc1 = frame[-1]; -#else +# else uhwptr pc1 = STRIP_PAC_PC((void *)frame[1]); -#endif +# endif // Let's assume that any pointer in the 0th page (i.e. <0x1000 on i386 and // x86_64) is invalid and stop unwinding here. If we're adding support for // a platform where this isn't true, we need to reconsider this check. diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h index ee996c3e07ea..aaa6565689e9 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h @@ -22,8 +22,8 @@ struct BufferedStackTrace; static const u32 kStackTraceMax = 255; -#if SANITIZER_LINUX && defined(__mips__) -# define SANITIZER_CAN_FAST_UNWIND 0 +#if (SANITIZER_LINUX && defined(__mips__)) +# define SANITIZER_CAN_FAST_UNWIND 0 #elif SANITIZER_WINDOWS # define SANITIZER_CAN_FAST_UNWIND 0 #else diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index 556fdfaaa89e..e69f84165042 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -16,22 +16,25 @@ #if SANITIZER_LINUX && \ (defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) || \ defined(__powerpc64__) || defined(__s390__) || defined(__i386__) || \ - defined(__arm__) || SANITIZER_RISCV64) - -#include "sanitizer_stoptheworld.h" - -#include "sanitizer_platform_limits_posix.h" -#include "sanitizer_atomic.h" - -#include -#include // for CLONE_* definitions -#include -#include // for PR_* definitions -#include // for PTRACE_* definitions -#include // for pid_t -#include // for iovec -#include // for NT_PRSTATUS -#if (defined(__aarch64__) || SANITIZER_RISCV64) && !SANITIZER_ANDROID + defined(__arm__) || SANITIZER_RISCV64 || defined(__loongarch__)) + +# include "sanitizer_atomic.h" +# include "sanitizer_platform_limits_posix.h" +# include "sanitizer_stoptheworld.h" + +# if defined(__loongarch__) +# include +# endif + +# include // for NT_PRSTATUS +# include +# include // for CLONE_* definitions +# include +# include // for PR_* definitions +# include // for PTRACE_* definitions +# include // for pid_t +# include // for iovec +# if (defined(__aarch64__) || SANITIZER_RISCV64) && !SANITIZER_ANDROID // GLIBC 2.20+ sys/user does not include asm/ptrace.h #if SANITIZER_OHOS // Do not include asm/sigcontext.h on behalf of asm/ptrace.h @@ -41,7 +44,7 @@ # include #endif #include // for user_regs_struct -# if (SANITIZER_ANDROID || SANITIZER_OHOS) && SANITIZER_MIPS +# if ((SANITIZER_ANDROID || SANITIZER_OHOS) && SANITIZER_MIPS) || SANITIZER_LOONGARCH # include // for mips SP register in sys/user.h # endif # include // for signal-related stuff @@ -518,29 +521,40 @@ typedef struct user regs_struct; # define REG_SP regs[EF_REG29] # endif -#elif defined(__aarch64__) +# elif defined(__loongarch__) +typedef struct user_regs_struct regs_struct; +static constexpr uptr kExtraRegs[] = {0}; +# define ARCH_IOVEC_FOR_GETREGSET + +# if SANITIZER_ANDROID +# define REG_SP regs[3] +# elif SANITIZER_LOONGARCH +# define REG_SP gpr[3] +# endif + +# elif defined(__aarch64__) typedef struct user_pt_regs regs_struct; -#define REG_SP sp +# define REG_SP sp static constexpr uptr kExtraRegs[] = {0}; -#define ARCH_IOVEC_FOR_GETREGSET +# define ARCH_IOVEC_FOR_GETREGSET -#elif SANITIZER_RISCV64 +# elif SANITIZER_RISCV64 typedef struct user_regs_struct regs_struct; // sys/ucontext.h already defines REG_SP as 2. Undefine it first. -#undef REG_SP -#define REG_SP sp +# undef REG_SP +# define REG_SP sp static constexpr uptr kExtraRegs[] = {0}; -#define ARCH_IOVEC_FOR_GETREGSET +# define ARCH_IOVEC_FOR_GETREGSET -#elif defined(__s390__) +# elif defined(__s390__) typedef _user_regs_struct regs_struct; -#define REG_SP gprs[15] +# define REG_SP gprs[15] static constexpr uptr kExtraRegs[] = {0}; -#define ARCH_IOVEC_FOR_GETREGSET +# define ARCH_IOVEC_FOR_GETREGSET -#else -#error "Unsupported architecture" -#endif // SANITIZER_ANDROID && defined(__arm__) +# else +# error "Unsupported architecture" +# endif // SANITIZER_ANDROID && defined(__arm__) tid_t SuspendedThreadsListLinux::GetThreadID(uptr index) const { CHECK_LT(index, thread_ids_.size()); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp index b13e2dc9e332..958966e7af03 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp @@ -99,14 +99,14 @@ void DTLS_Destroy() { // "Dynamic thread vector pointers point 0x8000 past the start of each // TLS block." (sysdeps//dl-tls.h) static const uptr kDtvOffset = 0x8000; -#elif defined(__riscv) +# elif defined(__riscv) // This is glibc's TLS_DTV_OFFSET: // "Dynamic thread vector pointers point 0x800 past the start of each // TLS block." (sysdeps/riscv/dl-tls.h) static const uptr kDtvOffset = 0x800; -#else +# else static const uptr kDtvOffset = 0; -#endif +# endif DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res, uptr static_tls_begin, uptr static_tls_end) { diff --git a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt index 8b1d2df63ea1..8c8382aa4cd8 100644 --- a/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt +++ b/compiler-rt/lib/sanitizer_common/tests/CMakeLists.txt @@ -3,7 +3,7 @@ include(CompilerRTCompile) clang_compiler_add_cxx_check() # FIXME: use SANITIZER_COMMON_SUPPORTED_ARCH here -filter_available_targets(SANITIZER_UNITTEST_SUPPORTED_ARCH x86_64 i386 mips64 mips64el riscv64 sparcv9 sparc) +filter_available_targets(SANITIZER_UNITTEST_SUPPORTED_ARCH x86_64 i386 mips64 mips64el riscv64 sparcv9 sparc loongarch64) if(APPLE) darwin_filter_host_archs(SANITIZER_UNITTEST_SUPPORTED_ARCH SANITIZER_UNITTEST_SUPPORTED_ARCH) endif() diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp index ad78782f9856..fbc50682e571 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_allocator_test.cpp @@ -162,9 +162,11 @@ static const u64 kAddressSpaceSize = 1ULL << 39; static const u64 kAddressSpaceSize = 1ULL << 53; #elif defined(__s390__) static const u64 kAddressSpaceSize = 1ULL << 31; -#else +# elif defined(__loongarch64) +static const u64 kAddressSpaceSize = 1ULL << 40; +# else static const u64 kAddressSpaceSize = 1ULL << 32; -#endif +# endif static const uptr kRegionSizeLog = FIRST_32_SECOND_64(20, 24); diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cpp index 91ec2f9e229a..cbaefe1c476f 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_ring_buffer_test.cpp @@ -10,7 +10,9 @@ // //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_ring_buffer.h" + #include "gtest/gtest.h" +#include "sanitizer_common/sanitizer_common.h" namespace __sanitizer { @@ -84,9 +86,10 @@ CompactRingBuffer *AllocCompactRingBuffer(size_t count) { TEST(CompactRingBuffer, int64) { const size_t page_sizes[] = {1, 2, 4, 128}; + size_t page_size = GetPageSizeCached(); for (size_t pages : page_sizes) { - size_t count = 4096 * pages / sizeof(int64_t); + size_t count = page_size * pages / sizeof(int64_t); auto R = AllocCompactRingBuffer(count); int64_t top = count * 3 + 13; for (int64_t i = 0; i < top; ++i) R->push(i); diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cpp index e8d590a503a9..a9dd0669ccb9 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stacktrace_test.cpp @@ -44,7 +44,7 @@ class FastUnwindTest : public ::testing::Test { uhwptr fake_bottom; BufferedStackTrace trace; -#if defined(__riscv) +#if defined(__loongarch__) || defined(__riscv) const uptr kFpOffset = 4; const uptr kBpOffset = 2; #else diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt index af3c9a2de500..cf03c784faa1 100644 --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -133,6 +133,7 @@ if(APPLE) add_asm_sources(TSAN_ASM_SOURCES tsan_rtl_amd64.S tsan_rtl_aarch64.S + tsan_rtl_loongarch64.S ) set(TSAN_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS}) @@ -223,6 +224,8 @@ else() add_asm_sources(TSAN_ASM_SOURCES tsan_rtl_mips64.S ) + elseif(arch MATCHES "loongarch64") + add_asm_sources(TSAN_ASM_SOURCES tsan_rtl_loongarch64.S) elseif(arch MATCHES "s390x") add_asm_sources(TSAN_ASM_SOURCES tsan_rtl_s390x.S diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 6b0e255f6db3..ec9ccb223890 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -72,8 +72,8 @@ struct ucontext_t { #endif #if defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1 || \ - defined(__s390x__) -#define PTHREAD_ABI_BASE "GLIBC_2.3.2" + defined(__s390x__) || defined(__loongarch__) +# define PTHREAD_ABI_BASE "GLIBC_2.3.2" #elif defined(__aarch64__) || SANITIZER_PPC64V2 #define PTHREAD_ABI_BASE "GLIBC_2.17" #endif diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index 7c13c7335136..f3edd602e92b 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -115,6 +115,42 @@ struct MappingMips64_40 { static const uptr kVdsoBeg = 0xfffff00000ull; }; +/* + * TODO same as mips64 and need to change in the future +C/C++ on linux/loongarch64 (40-bit VMA) +0000 0000 00 - 0100 0000 00: - (4 GB) +0100 0000 00 - 0200 0000 00: main binary (4 GB) +0200 0000 00 - 2000 0000 00: - (120 GB) +2000 0000 00 - 4000 0000 00: shadow (128 GB) +4000 0000 00 - 5000 0000 00: metainfo (memory blocks and sync objects) (64 GB) +5000 0000 00 - aa00 0000 00: - (360 GB) +aa00 0000 00 - ab00 0000 00: main binary (PIE) (4 GB) +ab00 0000 00 - b000 0000 00: - (20 GB) +b000 0000 00 - b200 0000 00: traces (8 GB) +b200 0000 00 - fe00 0000 00: - (304 GB) +fe00 0000 00 - ff00 0000 00: heap (4 GB) +ff00 0000 00 - ff80 0000 00: - (2 GB) +ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB) +*/ +struct MappingLoongArch64_40 { + static const uptr kMetaShadowBeg = 0x4000000000ull; + static const uptr kMetaShadowEnd = 0x5000000000ull; + static const uptr kShadowBeg = 0x1200000000ull; + static const uptr kShadowEnd = 0x2200000000ull; + static const uptr kHeapMemBeg = 0xfe00000000ull; + static const uptr kHeapMemEnd = 0xff00000000ull; + static const uptr kLoAppMemBeg = 0x0100000000ull; + static const uptr kLoAppMemEnd = 0x0200000000ull; + static const uptr kMidAppMemBeg = 0xaa00000000ull; + static const uptr kMidAppMemEnd = 0xab00000000ull; + static const uptr kHiAppMemBeg = 0xff80000000ull; + static const uptr kHiAppMemEnd = 0xffffffffffull; + static const uptr kShadowMsk = 0xf800000000ull; + static const uptr kShadowXor = 0x0800000000ull; + static const uptr kShadowAdd = 0x0000000000ull; + static const uptr kVdsoBeg = 0xfffff00000ull; +}; + /* C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM) 0000 0000 00 - 0100 0000 00: - (4 GB) @@ -610,6 +646,8 @@ ALWAYS_INLINE auto SelectMapping(Arg arg) { } # elif defined(__mips64) return Func::template Apply(arg); +# elif defined(__loongarch64) + return Func::template Apply(arg); # elif defined(__s390x__) return Func::template Apply(arg); # else @@ -623,6 +661,7 @@ template void ForEachMapping() { Func::template Apply(); Func::template Apply(); + Func::template Apply(); Func::template Apply(); Func::template Apply(); Func::template Apply(); diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 807f6be2eee3..48ad47270e8a 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -384,13 +384,15 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { return mangled_sp ^ xor_key; #elif defined(__mips__) return mangled_sp; -#elif defined(__s390x__) +# elif defined(__loongarch__) + return mangled_sp; +# elif defined(__s390x__) // tcbhead_t.stack_guard uptr xor_key = ((uptr *)__builtin_thread_pointer())[5]; return mangled_sp ^ xor_key; -#else - #error "Unknown platform" -#endif +# else +# error "Unknown platform" +# endif } #if SANITIZER_NETBSD @@ -414,6 +416,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { # define LONG_JMP_SP_ENV_SLOT 1 # elif defined(__s390x__) # define LONG_JMP_SP_ENV_SLOT 9 +# elif defined(__loongarch64) +# define LONG_JMP_SP_ENV_SLOT 1 # else # define LONG_JMP_SP_ENV_SLOT 6 # endif diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index f3702a52f6c5..e38c578abe67 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -528,7 +528,7 @@ static void StartBackgroundThread() { // OHOS_LOCAL end } -#ifndef __mips__ +# if !(defined(__mips__) || defined(__loongarch__)) static void StopBackgroundThread() { // OHOS_LOCAL begin if (!flags()->disable_background_thread) { @@ -768,7 +768,7 @@ void MaybeSpawnBackgroundThread() { // On MIPS, TSan initialization is run before // __pthread_initialize_minimal_internal() is finished, so we can not spawn // new threads. -#if !SANITIZER_GO && !defined(__mips__) +#if !SANITIZER_GO && !(defined(__mips__) || defined(__loongarch__)) static atomic_uint32_t bg_thread = {}; if (atomic_load(&bg_thread, memory_order_relaxed) == 0 && atomic_exchange(&bg_thread, 1, memory_order_relaxed) == 0) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index e1e121e2ee07..bd79d7870d02 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -56,7 +56,8 @@ namespace __tsan { #if !SANITIZER_GO struct MapUnmapCallback; -#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__) +# if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__) || \ + defined(__loongarch64) struct AP32 { static const uptr kSpaceBeg = 0; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_loongarch64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_loongarch64.S new file mode 100644 index 000000000000..8da65e890fd2 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_loongarch64.S @@ -0,0 +1,149 @@ +#include "sanitizer_common/sanitizer_asm.h" + +.section .text + +.hidden __tsan_setjmp +.comm _ZN14__interception11real_setjmpE,8,8 +.globl setjmp +.type setjmp, @function +setjmp: + + // Save env parameters + addi.d $r3,$r3,-24 + st.d $r1,$r3,16 + + // Save jmp_buf + st.d $r4,$r3,0 + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi.d $r4,$r3,24 + + // call tsan interceptor + bl __tsan_setjmp + + // Restore jmp_buf + ld.d $r4,$r3,0 + + // Load libc setjmp to r20 + la $r20,_ZN14__interception11real_setjmpE + + // Restore env parameters + ld.d $r1,$r3,16 + addi.d $r3,$r3,24 + + # tail jump to libc setjmp + ld.d $r20,$r20,0 + jr $r20 + +.size setjmp, .-setjmp + +.hidden __tsan_setjmp +.globl _setjmp +.comm _ZN14__interception12real__setjmpE,8,8 +.type _setjmp, @function +_setjmp: + + // Save env parameters + addi.d $r3,$r3,-24 + st.d $r1,$r3,16 + + // Save jmp_buf + st.d $r4,$r3,0 + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi.d $r4,$r3,24 + + // call tsan interceptor + bl __tsan_setjmp + + // Restore jmp_buf + ld.d $r4,$r3,0 + + // Load libc _setjmp to r20 + la $r20,_ZN14__interception12real__setjmpE + + // Restore env parameters + ld.d $r1,$r3,16 + addi.d $r3,$r3,24 + + // tail jump to libc _setjmp + ld.d $r20,$r20,0 + jr $r20 + +.size _setjmp, .-_setjmp + +.hidden __tsan_setjmp +.globl sigsetjmp +.comm _ZN14__interception14real_sigsetjmpE,8,8 +.type sigsetjmp, @function +sigsetjmp: + + // Save env parameters + addi.d $r3,$r3,-32 + st.d $r1,$r3,24 + + // Save jmp_buf and savesig + st.d $r4,$r3,0 + st.d $r5,$r3,8 + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi.d $r4,$r3,32 + + // call tsan interceptor + bl __tsan_setjmp + + // Restore jmp_buf and savesig + ld.d $r4,$r3,0 + ld.d $r5,$r3,8 + + // Load libc sigsetjmp to r20 + la $r20,_ZN14__interception14real_sigsetjmpE + + // Restore env parameters + ld.d $r1,$r3,24 + addi.d $r3,$r3,32 + + // tail jump to libc sigsetjmp + ld.d $r20,$r20,0 + jr $r20 + +.size sigsetjmp, .-sigsetjmp + +.hidden __tsan_setjmp +.comm _ZN14__interception16real___sigsetjmpE,8,8 +.globl __sigsetjmp +.type __sigsetjmp, @function +__sigsetjmp: + + // Save env parameters + addi.d $r3,$r3,-32 + st.d $r1,$r3,24 + + // Save jmp_buf and savesig + st.d $r4,$r3,0 + st.d $r5,$r3,8 + + // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` + addi.d $r4,$r3,32 + + // call tsan interceptor + bl __tsan_setjmp + + // Restore jmp_buf and savesig + ld.d $r4,$r3,0 + ld.d $r5,$r3,8 + + // Load libc __sigsetjmp in r20 + la $r20,_ZN14__interception16real___sigsetjmpE + + // Restore env parameters + ld.d $r1,$r3,24 + addi.d $r3,$r3,32 + + // tail jump to libc __sigsetjmp + ld.d $r20,$r20,0 + jr $r20 + +.size __sigsetjmp, .-__sigsetjmp + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/xray/CMakeLists.txt b/compiler-rt/lib/xray/CMakeLists.txt index 731de2cde74f..3e4a3ef02052 100644 --- a/compiler-rt/lib/xray/CMakeLists.txt +++ b/compiler-rt/lib/xray/CMakeLists.txt @@ -47,6 +47,11 @@ set(aarch64_SOURCES xray_trampoline_AArch64.S ) +set(loongarch64_SOURCES + xray_loongarch.cpp + xray_trampoline_loongarch.S + ) + set(mips_SOURCES xray_mips.cpp xray_trampoline_mips.S @@ -117,6 +122,7 @@ set(XRAY_ALL_SOURCE_FILES ${arm_SOURCES} ${armhf_SOURCES} ${hexagon_SOURCES} + ${loongarch64_SOURCES} ${mips_SOURCES} ${mipsel_SOURCES} ${mips64_SOURCES} diff --git a/compiler-rt/lib/xray/tests/CMakeLists.txt b/compiler-rt/lib/xray/tests/CMakeLists.txt index 2db21d43ffb5..4ca9ca171143 100644 --- a/compiler-rt/lib/xray/tests/CMakeLists.txt +++ b/compiler-rt/lib/xray/tests/CMakeLists.txt @@ -66,6 +66,7 @@ if (NOT APPLE) ${LLVM_TESTINGSUPPORT_LDFLAGS} XRAY_UNITTEST_LINK_FLAGS) append_list_if(COMPILER_RT_HAS_LLVMTESTINGSUPPORT ${LLVM_TESTINGSUPPORT_LIBLIST} XRAY_UNITTEST_LINK_FLAGS) + list(APPEND XRAY_UNITTEST_LINK_FLAGS -lLLVMXRay -lLLVMSupport -lLLVMDemangle -lLLVMTestingSupport) else() # We add the library directories one at a time in our CFLAGS. foreach (DIR ${LLVM_LIBRARY_DIR}) diff --git a/compiler-rt/lib/xray/xray_interface.cpp b/compiler-rt/lib/xray/xray_interface.cpp index 73e67618c9d5..206f53308d39 100644 --- a/compiler-rt/lib/xray/xray_interface.cpp +++ b/compiler-rt/lib/xray/xray_interface.cpp @@ -50,6 +50,8 @@ static const int16_t cSledLength = 28; static const int16_t cSledLength = 48; #elif SANITIZER_MIPS64 static const int16_t cSledLength = 64; +#elif SANITIZER_LOONGARCH64 +static const int16_t cSledLength = 48; #elif defined(__powerpc64__) static const int16_t cSledLength = 8; #elif defined(__hexagon__) diff --git a/compiler-rt/lib/xray/xray_loongarch.cpp b/compiler-rt/lib/xray/xray_loongarch.cpp new file mode 100644 index 000000000000..379526b5aabf --- /dev/null +++ b/compiler-rt/lib/xray/xray_loongarch.cpp @@ -0,0 +1,173 @@ +//===-- xray_loongarch.cpp -----------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of loongarch-specific routines. +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_interface_internal.h" +#include + +namespace __xray { + +// The machine codes for some instructions used in runtime patching. +enum PatchOpcodes : uint32_t { + PO_ADDID = 0x02c00000, // addi.d rd, rj, imm + PO_SD = 0x29c00000, // st.d rd, base, offset + PO_LU12IW = 0x14000000, // lu12i.w rd, imm + PO_ORI = 0x03800000, // ori rd, rs, imm + PO_LU32ID = 0x16000000, // lu32i.d rd, imm + PO_LU52ID = 0x03000000, // lu52i.d rd, rj, imm + PO_JIRL = 0x4c000000, // jirl rd, rj, 0 + PO_LD = 0x28c00000, // ld.d rd, base, offset + PO_B48 = 0x50003000, // b #48 +}; + +enum RegNum : uint32_t { + RN_T0 = 0xC, + RN_T1 = 0xD, + RN_RA = 0x1, + RN_SP = 0x3, +}; + +// addi.d lu521.d ori ld.d st.d +inline static uint32_t +encodeInstruction_i12(uint32_t Opcode, uint32_t Rd, uint32_t Rj, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rj << 5 | Rd | Imm << 10); +} + +// lu12i.w lu32i.d +inline static uint32_t +encodeInstruction_si20(uint32_t Opcode, uint32_t Rd, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rd | Imm << 5); +} + +// jirl +inline static uint32_t +encodeInstruction_si16(uint32_t Opcode, uint32_t Rd, uint32_t Rj, + uint32_t Imm) XRAY_NEVER_INSTRUMENT { + return (Opcode | Rj << 5 | Rd | Imm << 10); +} + +inline static bool patchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B .tmpN + // 11 NOPs (44 bytes) + // .tmpN + // + // With the following runtime patch: + // xray_sled_n (64-bit): + // addi.d sp,sp, -16 ;create stack frame + // st.d ra, sp, 8 ;save return address + // lu12i.w t0,%%abs_hi20(__xray_FunctionEntry/Exit) + // ori t0,t0,%%abs_lo12(__xray_FunctionEntry/Exit) + // lu32i.d t0,%%abs64_lo20(__xray_FunctionEntry/Exit) + // lu52i.d t0,t0,%%abs64_hi12(__xray_FunctionEntry/Exit) + // lu12i.w t1,%%abs_hi20(function_id) + // ori t1,t1,%%abs_lo12(function_id) ;pass function id + // jirl ra, t0, 0 ;call Tracing hook + // ld.d ra, sp, 8 ;restore return address + // addi.d sp, sp, 16 ;delete stack frame + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #48 + + uint32_t *Address = reinterpret_cast(Sled.address()); + if (Enable) { + uint32_t LoTracingHookAddr = reinterpret_cast(TracingHook) & 0xfff; + uint32_t HiTracingHookAddr = + (reinterpret_cast(TracingHook) >> 12) & 0xfffff; + uint32_t HigherTracingHookAddr = + (reinterpret_cast(TracingHook) >> 32) & 0xfffff; + uint32_t HighestTracingHookAddr = + (reinterpret_cast(TracingHook) >> 52) & 0xfff; + uint32_t LoFunctionID = FuncId & 0xfff; + uint32_t HiFunctionID = (FuncId >> 12) & 0xfffff; + Address[1] = encodeInstruction_i12(PatchOpcodes::PO_SD, RegNum::RN_RA, + RegNum::RN_SP, 0x8); + Address[2] = encodeInstruction_si20(PatchOpcodes::PO_LU12IW, RegNum::RN_T0, + HiTracingHookAddr); + Address[3] = encodeInstruction_i12(PatchOpcodes::PO_ORI, RegNum::RN_T0, + RegNum::RN_T0, LoTracingHookAddr); + Address[4] = encodeInstruction_si20(PatchOpcodes::PO_LU32ID, RegNum::RN_T0, + HigherTracingHookAddr); + Address[5] = encodeInstruction_i12(PatchOpcodes::PO_LU52ID, RegNum::RN_T0, + RegNum::RN_T0, HighestTracingHookAddr); + Address[6] = encodeInstruction_si20(PatchOpcodes::PO_LU12IW, RegNum::RN_T1, + HiFunctionID); + Address[7] = encodeInstruction_i12(PatchOpcodes::PO_ORI, RegNum::RN_T1, + RegNum::RN_T1, LoFunctionID); + Address[8] = encodeInstruction_si16(PatchOpcodes::PO_JIRL, RegNum::RN_RA, + RegNum::RN_T0, 0); + Address[9] = encodeInstruction_i12(PatchOpcodes::PO_LD, RegNum::RN_RA, + RegNum::RN_SP, 0x8); + Address[10] = encodeInstruction_i12(PatchOpcodes::PO_ADDID, RegNum::RN_SP, + RegNum::RN_SP, 0x10); + uint32_t CreateStackSpace = encodeInstruction_i12( + PatchOpcodes::PO_ADDID, RegNum::RN_SP, RegNum::RN_SP, 0xff0); + std::atomic_store_explicit( + reinterpret_cast *>(Address), CreateStackSpace, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Address), + uint32_t(PatchOpcodes::PO_B48), std::memory_order_release); + } + return true; +} + +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, + void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, Trampoline); +} + +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in loongarch? + return false; +} + +bool patchTypedEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in loongarch? + return false; +} +} // namespace __xray + +extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT { + // FIXME: this will have to be implemented in the trampoline assembly file +} diff --git a/compiler-rt/lib/xray/xray_trampoline_loongarch.S b/compiler-rt/lib/xray/xray_trampoline_loongarch.S new file mode 100644 index 000000000000..5c93cdfa8212 --- /dev/null +++ b/compiler-rt/lib/xray/xray_trampoline_loongarch.S @@ -0,0 +1,126 @@ +#include "../sanitizer_common/sanitizer_asm.h" + +//===-- xray_trampoline_loongarch.s -----------------------------*- ASM -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This implements the loongarch-specific assembler for the trampolines. +// +//===----------------------------------------------------------------------===// + + .text + .file "xray_trampoline_loongarch.S" + .globl __xray_FunctionEntry + .p2align 2 + .type __xray_FunctionEntry,@function +__xray_FunctionEntry: + .cfi_startproc + // Save argument registers before doing any actual work. + .cfi_def_cfa_offset 136 + addi.d $sp, $sp, -136 + st.d $ra, $sp, 128 + .cfi_offset 1, -8 + st.d $a7, $sp, 120 + st.d $a6, $sp, 112 + st.d $a5, $sp, 104 + st.d $a4, $sp, 96 + st.d $a3, $sp, 88 + st.d $a2, $sp, 80 + st.d $a1, $sp, 72 + st.d $a0, $sp, 64 + fst.d $f7, $sp, 56 + fst.d $f6, $sp, 48 + fst.d $f5, $sp, 40 + fst.d $f4, $sp, 32 + fst.d $f3, $sp, 24 + fst.d $f2, $sp, 16 + fst.d $f1, $sp, 8 + fst.d $f0, $sp, 0 + + + la.got $t2, _ZN6__xray19XRayPatchedFunctionE + ld.d $t2, $t2, 0 + + beqz $t2, FunctionEntry_restore + + // a1=0 means that we are tracing an entry event + move $a1, $zero + // Function ID is in t1 (the first parameter). + move $a0, $t1 + jirl $ra, $t2, 0 + +FunctionEntry_restore: + // Restore argument registers + fld.d $f0, $sp, 0 + fld.d $f1, $sp, 8 + fld.d $f2, $sp, 16 + fld.d $f3, $sp, 24 + fld.d $f4, $sp, 32 + fld.d $f5, $sp, 40 + fld.d $f6, $sp, 48 + fld.d $f7, $sp, 56 + ld.d $a0, $sp, 64 + ld.d $a1, $sp, 72 + ld.d $a2, $sp, 80 + ld.d $a3, $sp, 88 + ld.d $a4, $sp, 96 + ld.d $a5, $sp, 104 + ld.d $a6, $sp, 112 + ld.d $a7, $sp, 120 + ld.d $ra, $sp, 128 + addi.d $sp, $sp, 136 + jr $ra +FunctionEntry_end: + .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry + .cfi_endproc + + .text + .globl __xray_FunctionExit + .p2align 2 + .type __xray_FunctionExit,@function +__xray_FunctionExit: + .cfi_startproc + // Save return registers before doing any actual work. + .cfi_def_cfa_offset 48 + addi.d $sp, $sp, -48 + st.d $ra, $sp, 40 + .cfi_offset 1, -8 + st.d $fp, $sp, 32 + st.d $a1, $sp, 24 + st.d $a0, $sp, 16 + fst.d $f1, $sp, 8 + fst.d $f0, $sp, 0 + + la.got $t2, _ZN6__xray19XRayPatchedFunctionE + ld.d $t2, $t2, 0 + + beqz $t2, FunctionExit_restore + + // a1=1 means that we are tracing an exit event + ori $a1, $zero, 1 + // Function ID is in t1 (the first parameter). + move $a0, $t1 + jirl $ra, $t2, 0 + +FunctionExit_restore: + // Restore return registers + fld.d $f0, $sp, 0 + fld.d $f1, $sp, 8 + ld.d $a1, $sp, 24 + ld.d $a0, $sp, 16 + ld.d $fp, $sp, 32 + ld.d $ra, $sp, 40 + addi.d $sp, $sp, 48 + jr $ra + +FunctionExit_end: + .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit + .cfi_endproc + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/xray/xray_tsc.h b/compiler-rt/lib/xray/xray_tsc.h index 58347dca5f7a..c685e6cbe346 100644 --- a/compiler-rt/lib/xray/xray_tsc.h +++ b/compiler-rt/lib/xray/xray_tsc.h @@ -43,7 +43,7 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { #elif defined(__powerpc64__) #include "xray_powerpc64.inc" #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \ - defined(__hexagon__) + defined(__hexagon__) || defined(__loongarch__) // Emulated TSC. // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does // not have a constant frequency like TSC on x86(_64), it may go faster diff --git a/compiler-rt/test/asan/CMakeLists.txt b/compiler-rt/test/asan/CMakeLists.txt index 91f4bc675d79..536f2f4f1cc5 100644 --- a/compiler-rt/test/asan/CMakeLists.txt +++ b/compiler-rt/test/asan/CMakeLists.txt @@ -14,7 +14,7 @@ if(OS_NAME MATCHES "Windows" AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND endif() macro(get_bits_for_arch arch bits) - if (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|arm64|mips64|mips64el|s390x|sparcv9|riscv64") + if (${arch} MATCHES "x86_64|powerpc64|powerpc64le|aarch64|arm64|mips64|mips64el|s390x|sparcv9|riscv64|loongarch64") set(${bits} 64) elseif (${arch} MATCHES "i386|arm|mips|mipsel|sparc") set(${bits} 32) diff --git a/compiler-rt/test/asan/TestCases/Linux/leak_check_segv.cpp b/compiler-rt/test/asan/TestCases/Linux/leak_check_segv.cpp index 2a2010f7ab0f..d4cdc73e1db0 100644 --- a/compiler-rt/test/asan/TestCases/Linux/leak_check_segv.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/leak_check_segv.cpp @@ -1,18 +1,23 @@ // Test that SIGSEGV during leak checking does not crash the process. // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s // REQUIRES: leak-detection -#include +#include #include +#include #include -#include +#include char data[10 * 1024 * 1024]; int main() { void *p = malloc(10 * 1024 * 1024); + long pagesz_minus_one = sysconf(_SC_PAGESIZE) - 1; // surprise-surprise! - mprotect((void*)(((unsigned long)p + 4095) & ~4095), 16 * 1024, PROT_NONE); - mprotect((void*)(((unsigned long)data + 4095) & ~4095), 16 * 1024, PROT_NONE); + mprotect((void *)(((unsigned long)p + pagesz_minus_one) & ~pagesz_minus_one), + 16 * 1024, PROT_NONE); + mprotect( + (void *)(((unsigned long)data + pagesz_minus_one) & ~pagesz_minus_one), + 16 * 1024, PROT_NONE); __lsan_do_leak_check(); fprintf(stderr, "DONE\n"); } diff --git a/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp b/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp index 21743cfdd9af..b7455e693ab9 100644 --- a/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/ptrace.cpp @@ -1,6 +1,7 @@ // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316 // XFAIL: android // XFAIL: mips +// XFAIL: loongarch // // RUN: %clangxx_asan -O0 %s -o %t && %run %t // RUN: %clangxx_asan -DPOSITIVE -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s diff --git a/compiler-rt/test/asan/TestCases/Linux/segv_read_write.c b/compiler-rt/test/asan/TestCases/Linux/segv_read_write.c index b1379703ed86..d6c4fb18819b 100644 --- a/compiler-rt/test/asan/TestCases/Linux/segv_read_write.c +++ b/compiler-rt/test/asan/TestCases/Linux/segv_read_write.c @@ -1,7 +1,7 @@ // RUN: %clangxx_asan -std=c++11 -O0 %s -o %t // RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=READ // RUN: not %run %t write 2>&1 | FileCheck %s --check-prefix=WRITE -// UNSUPPORTED: powerpc64,mips,s390 +// UNSUPPORTED: powerpc64,mips,s390,loongarch #include diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py index 3fb929e3b16a..38c004207995 100644 --- a/compiler-rt/test/asan/lit.cfg.py +++ b/compiler-rt/test/asan/lit.cfg.py @@ -202,7 +202,7 @@ if not config.arm_thumb: # Turn on leak detection on 64-bit Linux. leak_detection_android = config.android and 'android-thread-properties-api' in config.available_features and (config.target_arch in ['x86_64', 'i386', 'i686', 'aarch64']) -leak_detection_linux = (config.host_os == 'Linux') and (not config.android) and (config.target_arch in ['x86_64', 'i386', 'riscv64']) +leak_detection_linux = (config.host_os == 'Linux') and (not config.android) and (config.target_arch in ['x86_64', 'i386', 'riscv64', 'loongarch64']) leak_detection_mac = (config.host_os == 'Darwin') and (config.apple_platform == 'osx') leak_detection_netbsd = (config.host_os == 'NetBSD') and (config.target_arch in ['x86_64', 'i386']) if leak_detection_android or leak_detection_linux or leak_detection_mac or leak_detection_netbsd: diff --git a/compiler-rt/test/builtins/Unit/addtf3_test.c b/compiler-rt/test/builtins/Unit/addtf3_test.c index 82a802022318..fe2e2c80f655 100644 --- a/compiler-rt/test/builtins/Unit/addtf3_test.c +++ b/compiler-rt/test/builtins/Unit/addtf3_test.c @@ -66,7 +66,8 @@ int main() return 1; #if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) || \ - defined(i386) || defined(__x86_64__) + defined(i386) || defined(__x86_64__) || (defined(__loongarch__) && \ + __loongarch_frlen != 0) // Rounding mode tests on supported architectures const long double m = 1234.0L, n = 0.01L; diff --git a/compiler-rt/test/builtins/Unit/subtf3_test.c b/compiler-rt/test/builtins/Unit/subtf3_test.c index c06a0baba3c3..377ae95a9a7d 100644 --- a/compiler-rt/test/builtins/Unit/subtf3_test.c +++ b/compiler-rt/test/builtins/Unit/subtf3_test.c @@ -59,7 +59,8 @@ int main() return 1; #if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) || \ - defined(i386) || defined(__x86_64__) + defined(i386) || defined(__x86_64__) || (defined(__loongarch__) && \ + __loongarch_frlen != 0) // Rounding mode tests on supported architectures const long double m = 1234.02L, n = 0.01L; diff --git a/compiler-rt/test/fuzzer/disable-leaks.test b/compiler-rt/test/fuzzer/disable-leaks.test index 1c65884e3213..fc762d29295f 100644 --- a/compiler-rt/test/fuzzer/disable-leaks.test +++ b/compiler-rt/test/fuzzer/disable-leaks.test @@ -1,5 +1,5 @@ REQUIRES: lsan -UNSUPPORTED: aarch64 +UNSUPPORTED: aarch64, loongarch RUN: %cpp_compiler %S/AccumulateAllocationsTest.cpp -o %t-AccumulateAllocationsTest RUN: %run %t-AccumulateAllocationsTest -detect_leaks=1 -runs=100000 2>&1 | FileCheck %s --check-prefix=ACCUMULATE_ALLOCS ACCUMULATE_ALLOCS: INFO: libFuzzer disabled leak detection after every mutation diff --git a/compiler-rt/test/fuzzer/exit_on_src_pos.test b/compiler-rt/test/fuzzer/exit_on_src_pos.test index 881d31443985..9aca2330f830 100644 --- a/compiler-rt/test/fuzzer/exit_on_src_pos.test +++ b/compiler-rt/test/fuzzer/exit_on_src_pos.test @@ -6,6 +6,7 @@ # Test does not complete on Armv7 Thumb build bot UNSUPPORTED: thumb +UNSUPPORTED: loongarch RUN: %cpp_compiler -O0 %S/SimpleTest.cpp -o %t-SimpleTest.exe -mllvm -use-unknown-locations=Disable RUN: %cpp_compiler -O0 %S/ShrinkControlFlowTest.cpp -o %t-ShrinkControlFlowTest.exe diff --git a/compiler-rt/test/fuzzer/fork-ubsan.test b/compiler-rt/test/fuzzer/fork-ubsan.test index 16be90de292a..09af1f9542fe 100644 --- a/compiler-rt/test/fuzzer/fork-ubsan.test +++ b/compiler-rt/test/fuzzer/fork-ubsan.test @@ -1,4 +1,4 @@ -# UNSUPPORTED: darwin, freebsd, aarch64 +# UNSUPPORTED: darwin, freebsd, aarch64, loongarch # Tests how the fork mode works together with ubsan. RUN: %cpp_compiler %S/IntegerOverflowTest.cpp -o %t-IntegerOverflowTest -fsanitize=signed-integer-overflow -fno-sanitize-recover=signed-integer-overflow RUN: not %run %t-IntegerOverflowTest -fork=1 -ignore_crashes=1 -runs=10000 2>&1 | FileCheck %s --check-prefix=UBSAN_FORK diff --git a/compiler-rt/test/lsan/TestCases/strace_test.cpp b/compiler-rt/test/lsan/TestCases/strace_test.cpp index 18c809ca3296..2b4835dcf9c2 100644 --- a/compiler-rt/test/lsan/TestCases/strace_test.cpp +++ b/compiler-rt/test/lsan/TestCases/strace_test.cpp @@ -5,6 +5,7 @@ // FIXME: This technically works in practice but cannot be tested because the // fatal-error caused adb to failed. Could not be captured to stderr to lit-check. // XFAIL: android +// UNSUPPORTED : loongarch #include #include diff --git a/compiler-rt/test/lsan/TestCases/swapcontext.cpp b/compiler-rt/test/lsan/TestCases/swapcontext.cpp index f78867cc0695..5920e6ec543b 100644 --- a/compiler-rt/test/lsan/TestCases/swapcontext.cpp +++ b/compiler-rt/test/lsan/TestCases/swapcontext.cpp @@ -5,7 +5,7 @@ // RUN: %env_lsan_opts= %run %t 2>&1 // RUN: %env_lsan_opts= not %run %t foo 2>&1 | FileCheck %s // Missing 'getcontext' and 'makecontext' on Android. -// UNSUPPORTED: arm,aarch64,powerpc64,android +// UNSUPPORTED: arm,aarch64,powerpc64,android,loongarch #include "sanitizer_common/sanitizer_ucontext.h" #include diff --git a/compiler-rt/test/lsan/TestCases/use_registers.cpp b/compiler-rt/test/lsan/TestCases/use_registers.cpp index d7852d4e0613..224360edb02b 100644 --- a/compiler-rt/test/lsan/TestCases/use_registers.cpp +++ b/compiler-rt/test/lsan/TestCases/use_registers.cpp @@ -31,6 +31,10 @@ extern "C" void *registers_thread_func(void *arg) { asm("move $16, %0" : : "r"(p)); +#elif defined(__loongarch__) + asm("move $r23, %0" + : + : "r"(p)); #elif defined(__arm__) asm("mov r5, %0" : diff --git a/compiler-rt/test/lsan/lit.common.cfg.py b/compiler-rt/test/lsan/lit.common.cfg.py index 88c557549b38..a0421d74331a 100644 --- a/compiler-rt/test/lsan/lit.common.cfg.py +++ b/compiler-rt/test/lsan/lit.common.cfg.py @@ -76,7 +76,7 @@ config.substitutions.append( ("%clangxx_lsan ", build_invocation(clang_lsan_cxxf # LeakSanitizer tests are currently supported on # Android{aarch64, x86, x86_64}, x86-64 Linux, PowerPC64 Linux, arm Linux, mips64 Linux, s390x Linux and x86_64 Darwin. supported_android = config.android and config.target_arch in ['x86_64', 'i386', 'aarch64'] and 'android-thread-properties-api' in config.available_features -supported_linux = (not config.android) and config.host_os == 'Linux' and config.host_arch in ['aarch64', 'x86_64', 'ppc64', 'ppc64le', 'mips64', 'riscv64', 'arm', 'armhf', 'armv7l', 's390x'] +supported_linux = (not config.android) and config.host_os == 'Linux' and config.host_arch in ['aarch64', 'x86_64', 'ppc64', 'ppc64le', 'mips64', 'riscv64', 'arm', 'armhf', 'armv7l', 's390x', 'loongarch64'] supported_darwin = config.host_os == 'Darwin' and config.target_arch in ['x86_64'] supported_netbsd = config.host_os == 'NetBSD' and config.target_arch in ['x86_64', 'i386'] if not (supported_android or supported_linux or supported_darwin or supported_netbsd): diff --git a/compiler-rt/test/msan/allocator_mapping.cpp b/compiler-rt/test/msan/allocator_mapping.cpp index 533128f9a0f4..6bc4db38e152 100644 --- a/compiler-rt/test/msan/allocator_mapping.cpp +++ b/compiler-rt/test/msan/allocator_mapping.cpp @@ -8,7 +8,7 @@ // This test only makes sense for the 64-bit allocator. The 32-bit allocator // does not have a fixed mapping. Exclude platforms that use the 32-bit // allocator. -// UNSUPPORTED: target-is-mips64,target-is-mips64el,aarch64 +// UNSUPPORTED: target-is-mips64,target-is-mips64el,aarch64, loongarch #include #include diff --git a/compiler-rt/test/msan/lit.cfg.py b/compiler-rt/test/msan/lit.cfg.py index 8ec1614be130..2565fca233c1 100644 --- a/compiler-rt/test/msan/lit.cfg.py +++ b/compiler-rt/test/msan/lit.cfg.py @@ -44,7 +44,7 @@ if config.host_os not in ['Linux', 'NetBSD', 'FreeBSD']: # For mips64, mips64el we have forced store_context_size to 1 because these # archs use slow unwinder which is not async signal safe. Therefore we only # check the first frame since store_context size is 1. -if config.host_arch in ['mips64', 'mips64el']: +if config.host_arch in ['mips64', 'mips64el', 'loongarch64']: config.substitutions.append( ('CHECK-%short-stack', 'CHECK-SHORT-STACK')) else: config.substitutions.append( ('CHECK-%short-stack', 'CHECK-FULL-STACK')) diff --git a/compiler-rt/test/msan/mmap.cpp b/compiler-rt/test/msan/mmap.cpp index 2e7e883c863a..d493f9c3be67 100644 --- a/compiler-rt/test/msan/mmap.cpp +++ b/compiler-rt/test/msan/mmap.cpp @@ -18,7 +18,7 @@ bool AddrIsApp(void *p) { return (addr >= 0x000000000000ULL && addr < 0x010000000000ULL) || (addr >= 0x510000000000ULL && addr < 0x600000000000ULL) || (addr >= 0x700000000000ULL && addr < 0x800000000000ULL); -#elif defined(__mips64) +#elif defined(__mips64) || defined(__loongarch64) return (addr >= 0x0000000000ULL && addr <= 0x0200000000ULL) || (addr >= 0xa200000000ULL && addr <= 0xc000000000ULL) || addr >= 0xe200000000ULL; @@ -60,7 +60,7 @@ bool AddrIsApp(void *p) { int main() { // Large enough to quickly exhaust the entire address space. -#if defined(__mips64) || defined(__aarch64__) +#if defined(__mips64) || defined(__aarch64__) || defined(__loongarch64) const size_t kMapSize = 0x100000000ULL; #else const size_t kMapSize = 0x1000000000ULL; diff --git a/compiler-rt/test/msan/mmap_below_shadow.cpp b/compiler-rt/test/msan/mmap_below_shadow.cpp index 46d948c9a5eb..97c416b3004c 100644 --- a/compiler-rt/test/msan/mmap_below_shadow.cpp +++ b/compiler-rt/test/msan/mmap_below_shadow.cpp @@ -21,7 +21,7 @@ int main(void) { #elif defined(__x86_64__) uintptr_t hint = 0x4f0000000000ULL; const uintptr_t app_start = 0x600000000000ULL; -#elif defined (__mips64) +#elif defined(__mips64) || defined(__loongarch64) uintptr_t hint = 0x4f00000000ULL; const uintptr_t app_start = 0x6000000000ULL; #elif defined (__powerpc64__) diff --git a/compiler-rt/test/msan/param_tls_limit.cpp b/compiler-rt/test/msan/param_tls_limit.cpp index d6ff48c1bc58..4cd0d4147b69 100644 --- a/compiler-rt/test/msan/param_tls_limit.cpp +++ b/compiler-rt/test/msan/param_tls_limit.cpp @@ -5,9 +5,9 @@ // RUN: %clangxx_msan -fsanitize-memory-track-origins -O0 %s -o %t && %run %t // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -O0 %s -o %t && %run %t // -// AArch64 fails with: +// AArch64 and LoongArch fails with: // void f801(S<801>): Assertion `__msan_test_shadow(&s, sizeof(s)) == -1' failed -// XFAIL: aarch64 +// XFAIL: aarch64 || loongarch // When passing huge structs by value, SystemZ uses pointers, therefore this // test in its present form is unfortunately not applicable. // ABI says: "A struct or union of any other size . Replace such an diff --git a/compiler-rt/test/msan/poison_in_signal.cpp b/compiler-rt/test/msan/poison_in_signal.cpp index 5e833e516db9..5eaf0598d9e2 100644 --- a/compiler-rt/test/msan/poison_in_signal.cpp +++ b/compiler-rt/test/msan/poison_in_signal.cpp @@ -1,8 +1,9 @@ // Stress test of poisoning from signal handler. - // RUN: %clangxx_msan -std=c++11 -O2 %s -o %t && %run %t // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -std=c++11 -O2 %s -o %t && %run %t // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -fsanitize-memory-use-after-dtor -std=c++11 -O2 %s -o %t && %run %t +// +// UNSUPPORTED: loongarch #include #include diff --git a/compiler-rt/test/msan/strlen_of_shadow.cpp b/compiler-rt/test/msan/strlen_of_shadow.cpp index 5e7c89c7b59f..8dbfe91f8fff 100644 --- a/compiler-rt/test/msan/strlen_of_shadow.cpp +++ b/compiler-rt/test/msan/strlen_of_shadow.cpp @@ -15,7 +15,7 @@ const char *mem_to_shadow(const char *p) { #if defined(__x86_64__) return (char *)((uintptr_t)p ^ 0x500000000000ULL); -#elif defined (__mips64) +#elif defined(__mips64) || defined(__loongarch64) return (char *)((uintptr_t)p ^ 0x8000000000ULL); #elif defined(__powerpc64__) #define LINEARIZE_MEM(mem) \ diff --git a/compiler-rt/test/msan/vararg.cpp b/compiler-rt/test/msan/vararg.cpp index e1a7b1266165..ef4e40c36540 100644 --- a/compiler-rt/test/msan/vararg.cpp +++ b/compiler-rt/test/msan/vararg.cpp @@ -16,10 +16,11 @@ // Check that shadow and origin are passed through va_args. -// Copying origins on AArch64, MIPS and PowerPC isn't supported yet. +// Copying origins on AArch64, LoongArch, MIPS and PowerPC isn't supported yet. // XFAIL: aarch64 // XFAIL: mips // XFAIL: powerpc64 +// XFAIL: loongarch #include #include diff --git a/compiler-rt/test/msan/vector_select.cpp b/compiler-rt/test/msan/vector_select.cpp index 0cf116497801..8173b864ee59 100644 --- a/compiler-rt/test/msan/vector_select.cpp +++ b/compiler-rt/test/msan/vector_select.cpp @@ -11,7 +11,7 @@ __m128d select(bool b, __m128d c, __m128d d) { return b ? c : d; } -#elif defined (__mips64) || defined (__powerpc64__) +#elif defined(__mips64) || defined(__powerpc64__) || defined(__loongarch64) typedef double __w64d __attribute__ ((vector_size(16))); __w64d select(bool b, __w64d c, __w64d d) diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/ptrace.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/ptrace.cpp index a2d32439e500..dec96b401998 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/ptrace.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/ptrace.cpp @@ -21,6 +21,9 @@ // GLIBC 2.20+ sys/user does not include asm/ptrace.h #include #endif +#ifdef __loongarch64 +# include +#endif int main(void) { pid_t pid; @@ -120,6 +123,24 @@ int main(void) { printf("%x\n", fpregs.fpc); #endif // (__s390__) +#if (__loongarch64) + struct iovec regset_io; + + struct user_pt_regs regs; + regset_io.iov_base = ®s; + regset_io.iov_len = sizeof(regs); + res = ptrace(PTRACE_GETREGSET, pid, (void *)NT_PRSTATUS, (void *)®set_io); + assert(!res); + + struct user_fp_state fpregs; + regset_io.iov_base = &fpregs; + regset_io.iov_len = sizeof(fpregs); + res = ptrace(PTRACE_GETREGSET, pid, (void *)NT_FPREGSET, (void *)®set_io); + assert(!res); + if (fpregs.fcsr) + printf("%lx\n", fpregs.fcsr); +#endif // (__loongarch64) + siginfo_t siginfo; res = ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo); assert(!res); diff --git a/compiler-rt/test/sanitizer_common/print_address.h b/compiler-rt/test/sanitizer_common/print_address.h index 49b960ebbb2a..ab57bee128d3 100644 --- a/compiler-rt/test/sanitizer_common/print_address.h +++ b/compiler-rt/test/sanitizer_common/print_address.h @@ -8,7 +8,7 @@ void print_address(const char *str, int n, ...) { while (n--) { void *p = va_arg(ap, void *); #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ - defined(__s390x__) || (defined(__riscv) && __riscv_xlen == 64) + defined(__s390x__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__loongarch__) // On FreeBSD, the %p conversion specifier works as 0x%x and thus does not // match to the format used in the diagnotic message. fprintf(stderr, "0x%012lx ", (unsigned long) p); diff --git a/compiler-rt/test/tsan/map32bit.cpp b/compiler-rt/test/tsan/map32bit.cpp index 0f8236292be7..4077ef78e8a7 100644 --- a/compiler-rt/test/tsan/map32bit.cpp +++ b/compiler-rt/test/tsan/map32bit.cpp @@ -12,6 +12,7 @@ // XFAIL: aarch64 // XFAIL: powerpc64 // XFAIL: s390x +// XFAIL: loongarch // MAP_32BIT doesn't exist on OS X and NetBSD. // UNSUPPORTED: darwin,netbsd diff --git a/compiler-rt/test/tsan/mmap_large.cpp b/compiler-rt/test/tsan/mmap_large.cpp index 1d4c73252832..e6d1b11821b0 100644 --- a/compiler-rt/test/tsan/mmap_large.cpp +++ b/compiler-rt/test/tsan/mmap_large.cpp @@ -19,6 +19,8 @@ int main() { const size_t kLog2Size = 39; #elif defined(__mips64) || defined(__aarch64__) const size_t kLog2Size = 32; +#elif defined(__loongarch64) + const size_t kLog2Size = 32; #elif defined(__powerpc64__) const size_t kLog2Size = 39; #elif defined(__s390x__) diff --git a/compiler-rt/test/tsan/test.h b/compiler-rt/test/tsan/test.h index efd66cbf91a4..2afd217d81ed 100644 --- a/compiler-rt/test/tsan/test.h +++ b/compiler-rt/test/tsan/test.h @@ -73,6 +73,8 @@ unsigned long long monotonic_clock_ns() { const int kPCInc = 1; #elif defined(__sparc__) || defined(__mips__) const int kPCInc = 8; +#elif defined(__loongarch__) +const int kPCInc = 4; #else const int kPCInc = 4; #endif diff --git a/compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp b/compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp index 757f81a8babb..a531622864ee 100644 --- a/compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp @@ -6,7 +6,7 @@ // RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=arg0-arg1-logging-" %run %t // // TODO: Support these in ARM and PPC -// XFAIL: arm || aarch64 || mips +// XFAIL: arm || aarch64 || mips || loongarch // UNSUPPORTED: powerpc64le #include "xray/xray_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp b/compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp index 48544c392390..0d7c9a21e99c 100644 --- a/compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp @@ -11,7 +11,7 @@ // RUN: rm -f arg1-logger-* // // At the time of writing, the ARM trampolines weren't written yet. -// XFAIL: arm || aarch64 || mips +// XFAIL: arm || aarch64 || mips || loongarch // See the mailing list discussion of r296998. // UNSUPPORTED: powerpc64le diff --git a/compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp b/compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp index d8dd62247bff..26129a830742 100644 --- a/compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp @@ -4,7 +4,7 @@ // RUN: rm -f log-args-this-* // RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=log-args-this-" %run %t // -// XFAIL: FreeBSD || arm || aarch64 || mips +// XFAIL: FreeBSD || arm || aarch64 || mips || loongarch // UNSUPPORTED: powerpc64le #include "xray/xray_interface.h" #include diff --git a/compiler-rt/test/xray/TestCases/Posix/argv0-log-file-name.cpp b/compiler-rt/test/xray/TestCases/Posix/argv0-log-file-name.cpp index bd48693d37cd..f364151ebf45 100644 --- a/compiler-rt/test/xray/TestCases/Posix/argv0-log-file-name.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/argv0-log-file-name.cpp @@ -7,6 +7,7 @@ // RUN: rm xray-log.argv0-log-file-name.* xray.log.file.name // UNSUPPORTED: target-is-mips64,target-is-mips64el +// UNSUPPORTED: target-is-loongarch64 #include #include diff --git a/compiler-rt/test/xray/TestCases/Posix/coverage-sample.cpp b/compiler-rt/test/xray/TestCases/Posix/coverage-sample.cpp index 1903ad6fb23f..70dfd46422b7 100644 --- a/compiler-rt/test/xray/TestCases/Posix/coverage-sample.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/coverage-sample.cpp @@ -6,6 +6,7 @@ // RUN: XRAY_OPTIONS="patch_premain=false" %run %t | FileCheck %s // UNSUPPORTED: target-is-mips64,target-is-mips64el +// UNSUPPORTED: target-is-loongarch64 #include "xray/xray_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/fixedsize-logging.cpp b/compiler-rt/test/xray/TestCases/Posix/fixedsize-logging.cpp index e4462c8b45ef..d9cdad5ba222 100644 --- a/compiler-rt/test/xray/TestCases/Posix/fixedsize-logging.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fixedsize-logging.cpp @@ -8,6 +8,7 @@ // RUN: rm fixedsize-logging-* // UNSUPPORTED: target-is-mips64,target-is-mips64el +// UNSUPPORTED: target-is-loongarch64 #include diff --git a/compiler-rt/test/xray/TestCases/Posix/func-id-utils.cpp b/compiler-rt/test/xray/TestCases/Posix/func-id-utils.cpp index ab0c5b01cc11..b2631f1bc5a9 100644 --- a/compiler-rt/test/xray/TestCases/Posix/func-id-utils.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/func-id-utils.cpp @@ -7,6 +7,7 @@ // RUN: XRAY_OPTIONS="patch_premain=false" %run %t // UNSUPPORTED: target-is-mips64,target-is-mips64el +// UNSUPPORTED: target-is-loongarch64 #include "xray/xray_interface.h" #include diff --git a/compiler-rt/test/xray/TestCases/Posix/logging-modes.cpp b/compiler-rt/test/xray/TestCases/Posix/logging-modes.cpp index f839ba5e5f50..2302995c0faa 100644 --- a/compiler-rt/test/xray/TestCases/Posix/logging-modes.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/logging-modes.cpp @@ -5,6 +5,7 @@ // RUN: %run %t | FileCheck %s // // UNSUPPORTED: target-is-mips64,target-is-mips64el +// UNSUPPORTED: target-is-loongarch64 #include "xray/xray_interface.h" #include "xray/xray_log_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/optional-inmemory-log.cpp b/compiler-rt/test/xray/TestCases/Posix/optional-inmemory-log.cpp index a32c8746640f..59d4c53c20fe 100644 --- a/compiler-rt/test/xray/TestCases/Posix/optional-inmemory-log.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/optional-inmemory-log.cpp @@ -9,6 +9,7 @@ // RUN: rm -f optional-inmemory-log.xray-* // UNSUPPORTED: target-is-mips64,target-is-mips64el +// UNSUPPORTED: target-is-loongarch64 #include diff --git a/compiler-rt/test/xray/TestCases/Posix/patching-unpatching.cpp b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching.cpp index 978a897ac544..267c431f8570 100644 --- a/compiler-rt/test/xray/TestCases/Posix/patching-unpatching.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/patching-unpatching.cpp @@ -7,6 +7,7 @@ // RUN: XRAY_OPTIONS="patch_premain=false" %run %t 2>&1 | FileCheck %s // UNSUPPORTED: target-is-mips64,target-is-mips64el +// UNSUPPORTED: target-is-loongarch64 #include "xray/xray_interface.h" diff --git a/compiler-rt/test/xray/TestCases/Posix/pic_test.cpp b/compiler-rt/test/xray/TestCases/Posix/pic_test.cpp index fbf6bdcd4a7d..161567b64496 100644 --- a/compiler-rt/test/xray/TestCases/Posix/pic_test.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/pic_test.cpp @@ -10,6 +10,7 @@ // RUN: rm -f pic-test-logging-* // UNSUPPORTED: target-is-mips64,target-is-mips64el +// UNSUPPORTED: target-is-loongarch64 #include diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp index 3f3ed5baf1f4..39fa5a6bdd74 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp @@ -82,6 +82,8 @@ bool test() { # endif # elif defined(__sparc64__) half_size(); +# elif defined(__loongarch__) + full_size(); # elif defined(_WIN32) full_size(); # else diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index e3f0c6dfecdc..0b22fdbe3be6 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -5099,7 +5099,7 @@ template <> struct FloatData { #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ - defined(__wasm__) || defined(__riscv) + defined(__wasm__) || defined(__riscv) || defined(__loongarch__) static const size_t mangled_size = 32; #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) static const size_t mangled_size = 16; diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h index 5e9de90f649f..f69fe89e9a26 100644 --- a/libunwind/include/__libunwind_config.h +++ b/libunwind/include/__libunwind_config.h @@ -30,6 +30,7 @@ #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV 64 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_VE 143 #define _LIBUNWIND_HIGHEST_DWARF_REGISTER_S390X 83 +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH 64 #if defined(_LIBUNWIND_IS_NATIVE_ONLY) # if defined(__linux__) @@ -166,6 +167,16 @@ # define _LIBUNWIND_CONTEXT_SIZE 34 # define _LIBUNWIND_CURSOR_SIZE 46 # define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_S390X +#elif defined(__loongarch__) +#define _LIBUNWIND_TARGET_LOONGARCH 1 +#if __loongarch_grlen == 64 +#define _LIBUNWIND_CONTEXT_SIZE 65 +#define _LIBUNWIND_CURSOR_SIZE 77 +#else +#error "Unsupported LoongArch ABI" +#endif +#define _LIBUNWIND_HIGHEST_DWARF_REGISTER \ + _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH # else # error "Unsupported architecture." # endif @@ -185,6 +196,7 @@ # define _LIBUNWIND_TARGET_RISCV 1 # define _LIBUNWIND_TARGET_VE 1 # define _LIBUNWIND_TARGET_S390X 1 +#define _LIBUNWIND_TARGET_LOONGARCH 1 # define _LIBUNWIND_CONTEXT_SIZE 167 # define _LIBUNWIND_CURSOR_SIZE 179 # define _LIBUNWIND_HIGHEST_DWARF_REGISTER 287 diff --git a/libunwind/include/libunwind.h b/libunwind/include/libunwind.h index b01348d8c929..3393221dced6 100644 --- a/libunwind/include/libunwind.h +++ b/libunwind/include/libunwind.h @@ -1231,4 +1231,72 @@ enum { // 68-83 Vector Registers %v16-%v31 }; +// LoongArch registers. +enum { + UNW_LOONGARCH_R0 = 0, + UNW_LOONGARCH_R1 = 1, + UNW_LOONGARCH_R2 = 2, + UNW_LOONGARCH_R3 = 3, + UNW_LOONGARCH_R4 = 4, + UNW_LOONGARCH_R5 = 5, + UNW_LOONGARCH_R6 = 6, + UNW_LOONGARCH_R7 = 7, + UNW_LOONGARCH_R8 = 8, + UNW_LOONGARCH_R9 = 9, + UNW_LOONGARCH_R10 = 10, + UNW_LOONGARCH_R11 = 11, + UNW_LOONGARCH_R12 = 12, + UNW_LOONGARCH_R13 = 13, + UNW_LOONGARCH_R14 = 14, + UNW_LOONGARCH_R15 = 15, + UNW_LOONGARCH_R16 = 16, + UNW_LOONGARCH_R17 = 17, + UNW_LOONGARCH_R18 = 18, + UNW_LOONGARCH_R19 = 19, + UNW_LOONGARCH_R20 = 20, + UNW_LOONGARCH_R21 = 21, + UNW_LOONGARCH_R22 = 22, + UNW_LOONGARCH_R23 = 23, + UNW_LOONGARCH_R24 = 24, + UNW_LOONGARCH_R25 = 25, + UNW_LOONGARCH_R26 = 26, + UNW_LOONGARCH_R27 = 27, + UNW_LOONGARCH_R28 = 28, + UNW_LOONGARCH_R29 = 29, + UNW_LOONGARCH_R30 = 30, + UNW_LOONGARCH_R31 = 31, + UNW_LOONGARCH_F0 = 32, + UNW_LOONGARCH_F1 = 33, + UNW_LOONGARCH_F2 = 34, + UNW_LOONGARCH_F3 = 35, + UNW_LOONGARCH_F4 = 36, + UNW_LOONGARCH_F5 = 37, + UNW_LOONGARCH_F6 = 38, + UNW_LOONGARCH_F7 = 39, + UNW_LOONGARCH_F8 = 40, + UNW_LOONGARCH_F9 = 41, + UNW_LOONGARCH_F10 = 42, + UNW_LOONGARCH_F11 = 43, + UNW_LOONGARCH_F12 = 44, + UNW_LOONGARCH_F13 = 45, + UNW_LOONGARCH_F14 = 46, + UNW_LOONGARCH_F15 = 47, + UNW_LOONGARCH_F16 = 48, + UNW_LOONGARCH_F17 = 49, + UNW_LOONGARCH_F18 = 50, + UNW_LOONGARCH_F19 = 51, + UNW_LOONGARCH_F20 = 52, + UNW_LOONGARCH_F21 = 53, + UNW_LOONGARCH_F22 = 54, + UNW_LOONGARCH_F23 = 55, + UNW_LOONGARCH_F24 = 56, + UNW_LOONGARCH_F25 = 57, + UNW_LOONGARCH_F26 = 58, + UNW_LOONGARCH_F27 = 59, + UNW_LOONGARCH_F28 = 60, + UNW_LOONGARCH_F29 = 61, + UNW_LOONGARCH_F30 = 62, + UNW_LOONGARCH_F31 = 63, +}; + #endif diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index ac9d8f908602..85eeb11dfe30 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -40,6 +40,7 @@ enum { REGISTERS_RISCV, REGISTERS_VE, REGISTERS_S390X, + REGISTERS_LOONGARCH, }; #if defined(_LIBUNWIND_TARGET_I386) @@ -5123,6 +5124,271 @@ inline const char *Registers_s390x::getRegisterName(int regNum) { } #endif // _LIBUNWIND_TARGET_S390X +#if defined(_LIBUNWIND_TARGET_LOONGARCH) +/// Registers_loongarch holds the register state of a thread in a 64-bit +/// LoongArch process. +class _LIBUNWIND_HIDDEN Registers_loongarch { +public: + Registers_loongarch(); + Registers_loongarch(const void *registers); + + bool validRegister(int num) const; + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value); + bool validFloatRegister(int num) const; + unw_fpreg_t getFloatRegister(int num) const; + void setFloatRegister(int num, unw_fpreg_t value); + bool validVectorRegister(int num) const; + v128 getVectorRegister(int num) const; + void setVectorRegister(int num, v128 value); + static const char *getRegisterName(int num); + void jumpto(); + static constexpr int lastDwarfRegNum() { + return _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH; + } + static int getArch() { return REGISTERS_LOONGARCH; } + + uint64_t getSP() const { return _registers.__r[3]; } + void setSP(uint64_t value) { _registers.__r[3] = value; } + uint64_t getIP() const { return _registers.__pc; } + void setIP(uint64_t value) { _registers.__pc = value; } + +private: + struct loongarch_thread_state_t { + uint64_t __r[32]; + uint64_t __pc; + }; + + loongarch_thread_state_t _registers; +#if __loongarch_frlen == 64 + double _floats[32]; +#endif +}; + +inline Registers_loongarch::Registers_loongarch(const void *registers) { + static_assert((check_fit::does_fit), + "loongarch registers do not fit into unw_context_t"); + memcpy(&_registers, registers, sizeof(_registers)); + static_assert(sizeof(_registers) == 0x108, + "expected float registers to be at offset 264"); +#if __loongarch_frlen == 64 + memcpy(_floats, static_cast(registers) + sizeof(_registers), + sizeof(_floats)); +#endif +} + +inline Registers_loongarch::Registers_loongarch() { + memset(&_registers, 0, sizeof(_registers)); +#if __loongarch_frlen == 64 + memset(&_floats, 0, sizeof(_floats)); +#endif +} + +inline bool Registers_loongarch::validRegister(int regNum) const { + if (regNum == UNW_REG_IP || regNum == UNW_REG_SP) + return true; + if (regNum < 0 || regNum > UNW_LOONGARCH_F31) + return false; + return true; +} + +inline uint64_t Registers_loongarch::getRegister(int regNum) const { + if (regNum >= UNW_LOONGARCH_R0 && regNum <= UNW_LOONGARCH_R31) + return _registers.__r[regNum - UNW_LOONGARCH_R0]; + + if (regNum == UNW_REG_IP) + return _registers.__pc; + if (regNum == UNW_REG_SP) + return _registers.__r[3]; + _LIBUNWIND_ABORT("unsupported loongarch register"); +} + +inline void Registers_loongarch::setRegister(int regNum, uint64_t value) { + if (regNum >= UNW_LOONGARCH_R0 && regNum <= UNW_LOONGARCH_R31) + _registers.__r[regNum - UNW_LOONGARCH_R0] = value; + else if (regNum == UNW_REG_IP) + _registers.__pc = value; + else if (regNum == UNW_REG_SP) + _registers.__r[3] = value; + else + _LIBUNWIND_ABORT("unsupported loongarch register"); +} + +inline const char *Registers_loongarch::getRegisterName(int regNum) { + switch (regNum) { + case UNW_REG_IP: + return "$pc"; + case UNW_REG_SP: + return "$sp"; + case UNW_LOONGARCH_R0: + return "$r0"; + case UNW_LOONGARCH_R1: + return "$r1"; + case UNW_LOONGARCH_R2: + return "$r2"; + case UNW_LOONGARCH_R3: + return "$r3"; + case UNW_LOONGARCH_R4: + return "$r4"; + case UNW_LOONGARCH_R5: + return "$r5"; + case UNW_LOONGARCH_R6: + return "$r6"; + case UNW_LOONGARCH_R7: + return "$r7"; + case UNW_LOONGARCH_R8: + return "$r8"; + case UNW_LOONGARCH_R9: + return "$r9"; + case UNW_LOONGARCH_R10: + return "$r10"; + case UNW_LOONGARCH_R11: + return "$r11"; + case UNW_LOONGARCH_R12: + return "$r12"; + case UNW_LOONGARCH_R13: + return "$r13"; + case UNW_LOONGARCH_R14: + return "$r14"; + case UNW_LOONGARCH_R15: + return "$r15"; + case UNW_LOONGARCH_R16: + return "$r16"; + case UNW_LOONGARCH_R17: + return "$r17"; + case UNW_LOONGARCH_R18: + return "$r18"; + case UNW_LOONGARCH_R19: + return "$r19"; + case UNW_LOONGARCH_R20: + return "$r20"; + case UNW_LOONGARCH_R21: + return "$r21"; + case UNW_LOONGARCH_R22: + return "$r22"; + case UNW_LOONGARCH_R23: + return "$r23"; + case UNW_LOONGARCH_R24: + return "$r24"; + case UNW_LOONGARCH_R25: + return "$r25"; + case UNW_LOONGARCH_R26: + return "$r26"; + case UNW_LOONGARCH_R27: + return "$r27"; + case UNW_LOONGARCH_R28: + return "$r28"; + case UNW_LOONGARCH_R29: + return "$r29"; + case UNW_LOONGARCH_R30: + return "$r30"; + case UNW_LOONGARCH_R31: + return "$r31"; + case UNW_LOONGARCH_F0: + return "$f0"; + case UNW_LOONGARCH_F1: + return "$f1"; + case UNW_LOONGARCH_F2: + return "$f2"; + case UNW_LOONGARCH_F3: + return "$f3"; + case UNW_LOONGARCH_F4: + return "$f4"; + case UNW_LOONGARCH_F5: + return "$f5"; + case UNW_LOONGARCH_F6: + return "$f6"; + case UNW_LOONGARCH_F7: + return "$f7"; + case UNW_LOONGARCH_F8: + return "$f8"; + case UNW_LOONGARCH_F9: + return "$f9"; + case UNW_LOONGARCH_F10: + return "$f10"; + case UNW_LOONGARCH_F11: + return "$f11"; + case UNW_LOONGARCH_F12: + return "$f12"; + case UNW_LOONGARCH_F13: + return "$f13"; + case UNW_LOONGARCH_F14: + return "$f14"; + case UNW_LOONGARCH_F15: + return "$f15"; + case UNW_LOONGARCH_F16: + return "$f16"; + case UNW_LOONGARCH_F17: + return "$f17"; + case UNW_LOONGARCH_F18: + return "$f18"; + case UNW_LOONGARCH_F19: + return "$f19"; + case UNW_LOONGARCH_F20: + return "$f20"; + case UNW_LOONGARCH_F21: + return "$f21"; + case UNW_LOONGARCH_F22: + return "$f22"; + case UNW_LOONGARCH_F23: + return "$f23"; + case UNW_LOONGARCH_F24: + return "$f24"; + case UNW_LOONGARCH_F25: + return "$f25"; + case UNW_LOONGARCH_F26: + return "$f26"; + case UNW_LOONGARCH_F27: + return "$f27"; + case UNW_LOONGARCH_F28: + return "$f28"; + case UNW_LOONGARCH_F29: + return "$f29"; + case UNW_LOONGARCH_F30: + return "$f30"; + case UNW_LOONGARCH_F31: + return "$f31"; + default: + return "unknown register"; + } +} + +inline bool Registers_loongarch::validFloatRegister(int regNum) const { + if (regNum < UNW_LOONGARCH_F0 || regNum > UNW_LOONGARCH_F31) + return false; + return true; +} + +inline unw_fpreg_t Registers_loongarch::getFloatRegister(int regNum) const { +#if __loongarch_frlen == 64 + assert(validFloatRegister(regNum)); + return _floats[regNum - UNW_LOONGARCH_F0]; +#else + _LIBUNWIND_ABORT("libunwind not built with float support"); +#endif +} + +inline void Registers_loongarch::setFloatRegister(int regNum, unw_fpreg_t value) { +#if __loongarch_frlen == 64 + assert(validFloatRegister(regNum)); + _floats[regNum - UNW_LOONGARCH_F0] = value; +#else + _LIBUNWIND_ABORT("libunwind not built with float support"); +#endif +} + +inline bool Registers_loongarch::validVectorRegister(int) const { + return false; +} + +inline v128 Registers_loongarch::getVectorRegister(int) const { + _LIBUNWIND_ABORT("loongarch vector support not implemented"); +} + +inline void Registers_loongarch::setVectorRegister(int, v128) { + _LIBUNWIND_ABORT("loongarch vector support not implemented"); +} +#endif //_LIBUNWIND_TARGET_LOONGARCH } // namespace libunwind diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index b8bd9bc59010..ee535fffb131 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -1066,6 +1066,10 @@ private: } #endif +#if defined(_LIBUNWIND_TARGET_LOONGARCH) + int stepWithCompactEncoding(Registers_loongarch &) { return UNW_EINVAL; } +#endif + #if defined(_LIBUNWIND_TARGET_SPARC) int stepWithCompactEncoding(Registers_sparc &) { return UNW_EINVAL; } #endif @@ -1142,6 +1146,12 @@ private: } #endif +#if defined(_LIBUNWIND_TARGET_LOONGARCH) + bool compactSaysUseDwarf(Registers_loongarch &, uint32_t *) const { + return true; + } +#endif + #if defined(_LIBUNWIND_TARGET_SPARC) bool compactSaysUseDwarf(Registers_sparc &, uint32_t *) const { return true; } #endif @@ -1226,6 +1236,12 @@ private: } #endif +#if defined(_LIBUNWIND_TARGET_LOONGARCH) + compact_unwind_encoding_t dwarfEncoding(Registers_loongarch &) const { + return 0; + } +#endif + #if defined(_LIBUNWIND_TARGET_SPARC) compact_unwind_encoding_t dwarfEncoding(Registers_sparc &) const { return 0; } #endif diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 749db2357159..371ec46f5370 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -8,6 +8,12 @@ #include "assembly.h" +#define FROM_0_TO_15 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 +#define FROM_16_TO_31 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + +#define FROM_0_TO_31 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +#define FROM_32_TO_63 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 + #if defined(_AIX) .toc #else @@ -1111,38 +1117,9 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind21Registers_mips_newabi6jumptoEv) .set noreorder .set nomacro #ifdef __mips_hard_float - ldc1 $f0, (8 * 35)($4) - ldc1 $f1, (8 * 36)($4) - ldc1 $f2, (8 * 37)($4) - ldc1 $f3, (8 * 38)($4) - ldc1 $f4, (8 * 39)($4) - ldc1 $f5, (8 * 40)($4) - ldc1 $f6, (8 * 41)($4) - ldc1 $f7, (8 * 42)($4) - ldc1 $f8, (8 * 43)($4) - ldc1 $f9, (8 * 44)($4) - ldc1 $f10, (8 * 45)($4) - ldc1 $f11, (8 * 46)($4) - ldc1 $f12, (8 * 47)($4) - ldc1 $f13, (8 * 48)($4) - ldc1 $f14, (8 * 49)($4) - ldc1 $f15, (8 * 50)($4) - ldc1 $f16, (8 * 51)($4) - ldc1 $f17, (8 * 52)($4) - ldc1 $f18, (8 * 53)($4) - ldc1 $f19, (8 * 54)($4) - ldc1 $f20, (8 * 55)($4) - ldc1 $f21, (8 * 56)($4) - ldc1 $f22, (8 * 57)($4) - ldc1 $f23, (8 * 58)($4) - ldc1 $f24, (8 * 59)($4) - ldc1 $f25, (8 * 60)($4) - ldc1 $f26, (8 * 61)($4) - ldc1 $f27, (8 * 62)($4) - ldc1 $f28, (8 * 63)($4) - ldc1 $f29, (8 * 64)($4) - ldc1 $f30, (8 * 65)($4) - ldc1 $f31, (8 * 66)($4) + .irp i,FROM_0_TO_31 + ldc1 $f\i, (280+8*\i)($4) + .endr #endif // restore hi and lo ld $8, (8 * 33)($4) @@ -1154,32 +1131,9 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind21Registers_mips_newabi6jumptoEv) ld $2, (8 * 2)($4) ld $3, (8 * 3)($4) // skip a0 for now - ld $5, (8 * 5)($4) - ld $6, (8 * 6)($4) - ld $7, (8 * 7)($4) - ld $8, (8 * 8)($4) - ld $9, (8 * 9)($4) - ld $10, (8 * 10)($4) - ld $11, (8 * 11)($4) - ld $12, (8 * 12)($4) - ld $13, (8 * 13)($4) - ld $14, (8 * 14)($4) - ld $15, (8 * 15)($4) - ld $16, (8 * 16)($4) - ld $17, (8 * 17)($4) - ld $18, (8 * 18)($4) - ld $19, (8 * 19)($4) - ld $20, (8 * 20)($4) - ld $21, (8 * 21)($4) - ld $22, (8 * 22)($4) - ld $23, (8 * 23)($4) - ld $24, (8 * 24)($4) - ld $25, (8 * 25)($4) - ld $26, (8 * 26)($4) - ld $27, (8 * 27)($4) - ld $28, (8 * 28)($4) - ld $29, (8 * 29)($4) - ld $30, (8 * 30)($4) + .irp i,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + ld $\i, (8 * \i)($4) + .endr // load new pc into ra ld $31, (8 * 32)($4) // jump to ra, load a0 in the delay slot @@ -1267,72 +1221,20 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_sparc6jumptoEv) .p2align 2 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_riscv6jumptoEv) # if defined(__riscv_flen) - FLOAD f0, (RISCV_FOFFSET + RISCV_FSIZE * 0)(a0) - FLOAD f1, (RISCV_FOFFSET + RISCV_FSIZE * 1)(a0) - FLOAD f2, (RISCV_FOFFSET + RISCV_FSIZE * 2)(a0) - FLOAD f3, (RISCV_FOFFSET + RISCV_FSIZE * 3)(a0) - FLOAD f4, (RISCV_FOFFSET + RISCV_FSIZE * 4)(a0) - FLOAD f5, (RISCV_FOFFSET + RISCV_FSIZE * 5)(a0) - FLOAD f6, (RISCV_FOFFSET + RISCV_FSIZE * 6)(a0) - FLOAD f7, (RISCV_FOFFSET + RISCV_FSIZE * 7)(a0) - FLOAD f8, (RISCV_FOFFSET + RISCV_FSIZE * 8)(a0) - FLOAD f9, (RISCV_FOFFSET + RISCV_FSIZE * 9)(a0) - FLOAD f10, (RISCV_FOFFSET + RISCV_FSIZE * 10)(a0) - FLOAD f11, (RISCV_FOFFSET + RISCV_FSIZE * 11)(a0) - FLOAD f12, (RISCV_FOFFSET + RISCV_FSIZE * 12)(a0) - FLOAD f13, (RISCV_FOFFSET + RISCV_FSIZE * 13)(a0) - FLOAD f14, (RISCV_FOFFSET + RISCV_FSIZE * 14)(a0) - FLOAD f15, (RISCV_FOFFSET + RISCV_FSIZE * 15)(a0) - FLOAD f16, (RISCV_FOFFSET + RISCV_FSIZE * 16)(a0) - FLOAD f17, (RISCV_FOFFSET + RISCV_FSIZE * 17)(a0) - FLOAD f18, (RISCV_FOFFSET + RISCV_FSIZE * 18)(a0) - FLOAD f19, (RISCV_FOFFSET + RISCV_FSIZE * 19)(a0) - FLOAD f20, (RISCV_FOFFSET + RISCV_FSIZE * 20)(a0) - FLOAD f21, (RISCV_FOFFSET + RISCV_FSIZE * 21)(a0) - FLOAD f22, (RISCV_FOFFSET + RISCV_FSIZE * 22)(a0) - FLOAD f23, (RISCV_FOFFSET + RISCV_FSIZE * 23)(a0) - FLOAD f24, (RISCV_FOFFSET + RISCV_FSIZE * 24)(a0) - FLOAD f25, (RISCV_FOFFSET + RISCV_FSIZE * 25)(a0) - FLOAD f26, (RISCV_FOFFSET + RISCV_FSIZE * 26)(a0) - FLOAD f27, (RISCV_FOFFSET + RISCV_FSIZE * 27)(a0) - FLOAD f28, (RISCV_FOFFSET + RISCV_FSIZE * 28)(a0) - FLOAD f29, (RISCV_FOFFSET + RISCV_FSIZE * 29)(a0) - FLOAD f30, (RISCV_FOFFSET + RISCV_FSIZE * 30)(a0) - FLOAD f31, (RISCV_FOFFSET + RISCV_FSIZE * 31)(a0) + .irp i,FROM_0_TO_31 + FLOAD f\i, (RISCV_FOFFSET + RISCV_FSIZE * \i)(a0) + .endr # endif // x0 is zero ILOAD x1, (RISCV_ISIZE * 0)(a0) // restore pc into ra - ILOAD x2, (RISCV_ISIZE * 2)(a0) - ILOAD x3, (RISCV_ISIZE * 3)(a0) - ILOAD x4, (RISCV_ISIZE * 4)(a0) - ILOAD x5, (RISCV_ISIZE * 5)(a0) - ILOAD x6, (RISCV_ISIZE * 6)(a0) - ILOAD x7, (RISCV_ISIZE * 7)(a0) - ILOAD x8, (RISCV_ISIZE * 8)(a0) - ILOAD x9, (RISCV_ISIZE * 9)(a0) + .irp i,2,3,4,5,6,7,8,9 + ILOAD x\i, (RISCV_ISIZE * \i)(a0) + .endr // skip a0 for now - ILOAD x11, (RISCV_ISIZE * 11)(a0) - ILOAD x12, (RISCV_ISIZE * 12)(a0) - ILOAD x13, (RISCV_ISIZE * 13)(a0) - ILOAD x14, (RISCV_ISIZE * 14)(a0) - ILOAD x15, (RISCV_ISIZE * 15)(a0) - ILOAD x16, (RISCV_ISIZE * 16)(a0) - ILOAD x17, (RISCV_ISIZE * 17)(a0) - ILOAD x18, (RISCV_ISIZE * 18)(a0) - ILOAD x19, (RISCV_ISIZE * 19)(a0) - ILOAD x20, (RISCV_ISIZE * 20)(a0) - ILOAD x21, (RISCV_ISIZE * 21)(a0) - ILOAD x22, (RISCV_ISIZE * 22)(a0) - ILOAD x23, (RISCV_ISIZE * 23)(a0) - ILOAD x24, (RISCV_ISIZE * 24)(a0) - ILOAD x25, (RISCV_ISIZE * 25)(a0) - ILOAD x26, (RISCV_ISIZE * 26)(a0) - ILOAD x27, (RISCV_ISIZE * 27)(a0) - ILOAD x28, (RISCV_ISIZE * 28)(a0) - ILOAD x29, (RISCV_ISIZE * 29)(a0) - ILOAD x30, (RISCV_ISIZE * 30)(a0) - ILOAD x31, (RISCV_ISIZE * 31)(a0) + .irp i,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ILOAD x\i, (RISCV_ISIZE * \i)(a0) + .endr ILOAD x10, (RISCV_ISIZE * 10)(a0) // restore a0 ret // jump to ra @@ -1351,22 +1253,9 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_s390x6jumptoEv) lg %r1, 8(%r2) // Restore FPRs - ld %f0, 144(%r2) - ld %f1, 152(%r2) - ld %f2, 160(%r2) - ld %f3, 168(%r2) - ld %f4, 176(%r2) - ld %f5, 184(%r2) - ld %f6, 192(%r2) - ld %f7, 200(%r2) - ld %f8, 208(%r2) - ld %f9, 216(%r2) - ld %f10, 224(%r2) - ld %f11, 232(%r2) - ld %f12, 240(%r2) - ld %f13, 248(%r2) - ld %f14, 256(%r2) - ld %f15, 264(%r2) + .irp i,FROM_0_TO_15 + ld %f\i, (144+8*\i)(%r2) + .endr // Restore GPRs - skipping %r0 and %r1 lmg %r2, %r15, 32(%r2) @@ -1374,6 +1263,36 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_s390x6jumptoEv) // Return to PSWA (was loaded into %r1 above) br %r1 +#elif defined(__loongarch__) && __loongarch_grlen == 64 + +// +// void libunwind::Registers_loongarch::jumpto() +// +// On entry: +// thread_state pointer is in $a0($r4) +// + .p2align 2 +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind19Registers_loongarch6jumptoEv) +# if __loongarch_frlen == 64 + .irp i,FROM_0_TO_31 + fld.d $f\i, $a0, (8 * 33 + 8 * \i) + .endr +# endif + + // $r0 is zero + .irp i,1,2,3 + ld.d $r\i, $a0, (8 * \i) + .endr + // skip $a0 for now + .irp i,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ld.d $r\i, $a0, (8 * \i) + .endr + + ld.d $ra, $a0, (8 * 32) // load new pc into $ra + ld.d $a0, $a0, (8 * 4) // restore $a0 last + + jr $ra + #endif #endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index c5bfd128cdf6..f46b5463956d 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -8,6 +8,12 @@ #include "assembly.h" +#define FROM_0_TO_15 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 +#define FROM_16_TO_31 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + +#define FROM_0_TO_31 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +#define FROM_32_TO_63 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63 + #if defined(_AIX) .toc #else @@ -318,37 +324,9 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) .set noat .set noreorder .set nomacro - sd $1, (8 * 1)($4) - sd $2, (8 * 2)($4) - sd $3, (8 * 3)($4) - sd $4, (8 * 4)($4) - sd $5, (8 * 5)($4) - sd $6, (8 * 6)($4) - sd $7, (8 * 7)($4) - sd $8, (8 * 8)($4) - sd $9, (8 * 9)($4) - sd $10, (8 * 10)($4) - sd $11, (8 * 11)($4) - sd $12, (8 * 12)($4) - sd $13, (8 * 13)($4) - sd $14, (8 * 14)($4) - sd $15, (8 * 15)($4) - sd $16, (8 * 16)($4) - sd $17, (8 * 17)($4) - sd $18, (8 * 18)($4) - sd $19, (8 * 19)($4) - sd $20, (8 * 20)($4) - sd $21, (8 * 21)($4) - sd $22, (8 * 22)($4) - sd $23, (8 * 23)($4) - sd $24, (8 * 24)($4) - sd $25, (8 * 25)($4) - sd $26, (8 * 26)($4) - sd $27, (8 * 27)($4) - sd $28, (8 * 28)($4) - sd $29, (8 * 29)($4) - sd $30, (8 * 30)($4) - sd $31, (8 * 31)($4) + .irp i,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + sd $\i, (8 * \i)($4) + .endr # Store return address to pc sd $31, (8 * 32)($4) # hi and lo @@ -357,38 +335,9 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) mflo $8 sd $8, (8 * 34)($4) #ifdef __mips_hard_float - sdc1 $f0, (8 * 35)($4) - sdc1 $f1, (8 * 36)($4) - sdc1 $f2, (8 * 37)($4) - sdc1 $f3, (8 * 38)($4) - sdc1 $f4, (8 * 39)($4) - sdc1 $f5, (8 * 40)($4) - sdc1 $f6, (8 * 41)($4) - sdc1 $f7, (8 * 42)($4) - sdc1 $f8, (8 * 43)($4) - sdc1 $f9, (8 * 44)($4) - sdc1 $f10, (8 * 45)($4) - sdc1 $f11, (8 * 46)($4) - sdc1 $f12, (8 * 47)($4) - sdc1 $f13, (8 * 48)($4) - sdc1 $f14, (8 * 49)($4) - sdc1 $f15, (8 * 50)($4) - sdc1 $f16, (8 * 51)($4) - sdc1 $f17, (8 * 52)($4) - sdc1 $f18, (8 * 53)($4) - sdc1 $f19, (8 * 54)($4) - sdc1 $f20, (8 * 55)($4) - sdc1 $f21, (8 * 56)($4) - sdc1 $f22, (8 * 57)($4) - sdc1 $f23, (8 * 58)($4) - sdc1 $f24, (8 * 59)($4) - sdc1 $f25, (8 * 60)($4) - sdc1 $f26, (8 * 61)($4) - sdc1 $f27, (8 * 62)($4) - sdc1 $f28, (8 * 63)($4) - sdc1 $f29, (8 * 64)($4) - sdc1 $f30, (8 * 65)($4) - sdc1 $f31, (8 * 66)($4) + .irp i,FROM_0_TO_31 + sdc1 $f\i, (280+8*\i)($4) + .endr #endif jr $31 # return UNW_ESUCCESS @@ -1184,71 +1133,14 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) # DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) ISTORE x1, (RISCV_ISIZE * 0)(a0) // store ra as pc - ISTORE x1, (RISCV_ISIZE * 1)(a0) - ISTORE x2, (RISCV_ISIZE * 2)(a0) - ISTORE x3, (RISCV_ISIZE * 3)(a0) - ISTORE x4, (RISCV_ISIZE * 4)(a0) - ISTORE x5, (RISCV_ISIZE * 5)(a0) - ISTORE x6, (RISCV_ISIZE * 6)(a0) - ISTORE x7, (RISCV_ISIZE * 7)(a0) - ISTORE x8, (RISCV_ISIZE * 8)(a0) - ISTORE x9, (RISCV_ISIZE * 9)(a0) - ISTORE x10, (RISCV_ISIZE * 10)(a0) - ISTORE x11, (RISCV_ISIZE * 11)(a0) - ISTORE x12, (RISCV_ISIZE * 12)(a0) - ISTORE x13, (RISCV_ISIZE * 13)(a0) - ISTORE x14, (RISCV_ISIZE * 14)(a0) - ISTORE x15, (RISCV_ISIZE * 15)(a0) - ISTORE x16, (RISCV_ISIZE * 16)(a0) - ISTORE x17, (RISCV_ISIZE * 17)(a0) - ISTORE x18, (RISCV_ISIZE * 18)(a0) - ISTORE x19, (RISCV_ISIZE * 19)(a0) - ISTORE x20, (RISCV_ISIZE * 20)(a0) - ISTORE x21, (RISCV_ISIZE * 21)(a0) - ISTORE x22, (RISCV_ISIZE * 22)(a0) - ISTORE x23, (RISCV_ISIZE * 23)(a0) - ISTORE x24, (RISCV_ISIZE * 24)(a0) - ISTORE x25, (RISCV_ISIZE * 25)(a0) - ISTORE x26, (RISCV_ISIZE * 26)(a0) - ISTORE x27, (RISCV_ISIZE * 27)(a0) - ISTORE x28, (RISCV_ISIZE * 28)(a0) - ISTORE x29, (RISCV_ISIZE * 29)(a0) - ISTORE x30, (RISCV_ISIZE * 30)(a0) - ISTORE x31, (RISCV_ISIZE * 31)(a0) + .irp i,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ISTORE x\i, (RISCV_ISIZE * \i)(a0) + .endr # if defined(__riscv_flen) - FSTORE f0, (RISCV_FOFFSET + RISCV_FSIZE * 0)(a0) - FSTORE f1, (RISCV_FOFFSET + RISCV_FSIZE * 1)(a0) - FSTORE f2, (RISCV_FOFFSET + RISCV_FSIZE * 2)(a0) - FSTORE f3, (RISCV_FOFFSET + RISCV_FSIZE * 3)(a0) - FSTORE f4, (RISCV_FOFFSET + RISCV_FSIZE * 4)(a0) - FSTORE f5, (RISCV_FOFFSET + RISCV_FSIZE * 5)(a0) - FSTORE f6, (RISCV_FOFFSET + RISCV_FSIZE * 6)(a0) - FSTORE f7, (RISCV_FOFFSET + RISCV_FSIZE * 7)(a0) - FSTORE f8, (RISCV_FOFFSET + RISCV_FSIZE * 8)(a0) - FSTORE f9, (RISCV_FOFFSET + RISCV_FSIZE * 9)(a0) - FSTORE f10, (RISCV_FOFFSET + RISCV_FSIZE * 10)(a0) - FSTORE f11, (RISCV_FOFFSET + RISCV_FSIZE * 11)(a0) - FSTORE f12, (RISCV_FOFFSET + RISCV_FSIZE * 12)(a0) - FSTORE f13, (RISCV_FOFFSET + RISCV_FSIZE * 13)(a0) - FSTORE f14, (RISCV_FOFFSET + RISCV_FSIZE * 14)(a0) - FSTORE f15, (RISCV_FOFFSET + RISCV_FSIZE * 15)(a0) - FSTORE f16, (RISCV_FOFFSET + RISCV_FSIZE * 16)(a0) - FSTORE f17, (RISCV_FOFFSET + RISCV_FSIZE * 17)(a0) - FSTORE f18, (RISCV_FOFFSET + RISCV_FSIZE * 18)(a0) - FSTORE f19, (RISCV_FOFFSET + RISCV_FSIZE * 19)(a0) - FSTORE f20, (RISCV_FOFFSET + RISCV_FSIZE * 20)(a0) - FSTORE f21, (RISCV_FOFFSET + RISCV_FSIZE * 21)(a0) - FSTORE f22, (RISCV_FOFFSET + RISCV_FSIZE * 22)(a0) - FSTORE f23, (RISCV_FOFFSET + RISCV_FSIZE * 23)(a0) - FSTORE f24, (RISCV_FOFFSET + RISCV_FSIZE * 24)(a0) - FSTORE f25, (RISCV_FOFFSET + RISCV_FSIZE * 25)(a0) - FSTORE f26, (RISCV_FOFFSET + RISCV_FSIZE * 26)(a0) - FSTORE f27, (RISCV_FOFFSET + RISCV_FSIZE * 27)(a0) - FSTORE f28, (RISCV_FOFFSET + RISCV_FSIZE * 28)(a0) - FSTORE f29, (RISCV_FOFFSET + RISCV_FSIZE * 29)(a0) - FSTORE f30, (RISCV_FOFFSET + RISCV_FSIZE * 30)(a0) - FSTORE f31, (RISCV_FOFFSET + RISCV_FSIZE * 31)(a0) + .irp i,FROM_0_TO_31 + FSTORE f\i, (RISCV_FOFFSET + RISCV_FSIZE * \i)(a0) + .endr # endif li a0, 0 // return UNW_ESUCCESS @@ -1275,27 +1167,37 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) stg %r14, 8(%r2) // Save FPRs - std %f0, 144(%r2) - std %f1, 152(%r2) - std %f2, 160(%r2) - std %f3, 168(%r2) - std %f4, 176(%r2) - std %f5, 184(%r2) - std %f6, 192(%r2) - std %f7, 200(%r2) - std %f8, 208(%r2) - std %f9, 216(%r2) - std %f10, 224(%r2) - std %f11, 232(%r2) - std %f12, 240(%r2) - std %f13, 248(%r2) - std %f14, 256(%r2) - std %f15, 264(%r2) + .irp i,FROM_0_TO_15 + std %f\i, (144+8*\i)(%r2) + .endr // Return UNW_ESUCCESS lghi %r2, 0 br %r14 +#elif defined(__loongarch__) && __loongarch_grlen == 64 + +# +# extern int __unw_getcontext(unw_context_t* thread_state) +# +# On entry: +# thread_state pointer is in $a0($r4) +# +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + .irp i,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + st.d $r\i, $a0, (8*\i) + .endr + st.d $r1, $a0, (8 * 32) // store $ra to pc + +# if __loongarch_frlen == 64 + .irp i,FROM_0_TO_31 + fst.d $f\i, $a0, (8 * 33 + 8 * \i) + .endr +# endif + + move $a0, $zero // UNW_ESUCCESS + jr $ra + #endif WEAK_ALIAS(__unw_getcontext, unw_getcontext) diff --git a/libunwind/src/config.h b/libunwind/src/config.h index cc41b817acf6..4bbac951624f 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -115,7 +115,7 @@ #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ (!defined(__APPLE__) && defined(__arm__)) || defined(__aarch64__) || \ defined(__mips__) || defined(__riscv) || defined(__hexagon__) || \ - defined(__sparc__) || defined(__s390x__) + defined(__sparc__) || defined(__s390x__) || defined(__loongarch__) #if !defined(_LIBUNWIND_BUILD_SJLJ_APIS) #define _LIBUNWIND_BUILD_ZERO_COST_APIS #endif diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp index 491ba882891d..e0e98c04aabe 100644 --- a/libunwind/src/libunwind.cpp +++ b/libunwind/src/libunwind.cpp @@ -77,6 +77,8 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, # define REGISTER_KIND Registers_ve #elif defined(__s390x__) # define REGISTER_KIND Registers_s390x +#elif defined(__loongarch__) && __loongarch_grlen == 64 +#define REGISTER_KIND Registers_loongarch #else # error Architecture not supported #endif diff --git a/libunwind/test/unw_resume.pass.cpp b/libunwind/test/unw_resume.pass.cpp new file mode 100644 index 000000000000..76273e4a8ef0 --- /dev/null +++ b/libunwind/test/unw_resume.pass.cpp @@ -0,0 +1,34 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Ensure that unw_resume() resumes execution at the stack frame identified by +// cursor. + +// TODO: Investigate this failure on AIX system. +// XFAIL: target={{.*}}-aix{{.*}} + +// TODO: Figure out why this fails with Memory Sanitizer. +// XFAIL: msan + +#include + +void test_unw_resume() { + unw_context_t context; + unw_cursor_t cursor; + + unw_getcontext(&context); + unw_init_local(&cursor, &context); + unw_step(&cursor); + unw_resume(&cursor); +} + +int main() { + test_unw_resume(); + return 0; +} diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp new file mode 100644 index 000000000000..50eab54ac41f --- /dev/null +++ b/lld/ELF/Arch/LoongArch.cpp @@ -0,0 +1,473 @@ +//===- LoongArch.cpp +//--------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "OutputSections.h" +#include "Symbols.h" +#include "SyntheticSections.h" +#include "Target.h" +#include "Thunks.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; +using namespace lld; +using namespace lld::elf; + +namespace { +#define REL_STACK_MAX_SIZE 16 +struct RelStack { + uint64_t stack[REL_STACK_MAX_SIZE] = {}; + int top = -1; + void push_back(uint64_t e) { + if (top < REL_STACK_MAX_SIZE) { + top++; + stack[top] = e; + } else { + report_fatal_error("stack is overflow, top = " + Twine(top)); + } + } + + uint64_t pop_back_val() { + uint64_t e; + if (top >= 0) { + e = stack[top]; + top--; + } else { + report_fatal_error("stack is empty, top = " + Twine(top)); + } + return e; + } +}; +// The lld multi-thread is used to speed up link procedure. The lld will +// create a thread for every input section to handle relocation. So we +// must use thread-safe stack for every work thread. +__thread RelStack relStack; + +template class LoongArch final : public TargetInfo { +public: + LoongArch(); + uint32_t calcEFlags() const override; + RelExpr getRelExpr(RelType type, const Symbol &s, + const uint8_t *loc) const override; + RelType getDynRel(RelType type) const override; + bool convertAbsRelToPC(RelType &type) const override; + void writeGotHeader(uint8_t *buf) const override; + void writeGotPlt(uint8_t *buf, const Symbol &s) const override; + void writePltHeader(uint8_t *buf) const override; + void writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const override; + void relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const override; +}; +} // namespace + +template LoongArch::LoongArch() { + // .got[0] = _DYNAMIC + gotHeaderEntriesNum = 1; + // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map + gotPltHeaderEntriesNum = 2; + defaultMaxPageSize = 65536; + gotEntrySize = sizeof(typename ELFT::uint); + pltEntrySize = 16; + ipltEntrySize = 16; + pltHeaderSize = 32; + copyRel = R_LARCH_COPY; + pltRel = R_LARCH_JUMP_SLOT; + relativeRel = R_LARCH_RELATIVE; + iRelativeRel = R_LARCH_IRELATIVE; + // _GLOBAL_OFFSET_TABLE_ is relative to .got + gotBaseSymInGotPlt = false; + + if (ELFT::Is64Bits) { + symbolicRel = R_LARCH_64; + tlsGotRel = R_LARCH_TLS_TPREL64; + tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64; + tlsOffsetRel = R_LARCH_TLS_DTPREL64; + defaultImageBase = 0x120000000; + } else { + symbolicRel = R_LARCH_32; + tlsGotRel = R_LARCH_TLS_TPREL32; + tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32; + tlsOffsetRel = R_LARCH_TLS_DTPREL32; + } + gotRel = symbolicRel; +} + +template static TargetInfo *getTargetInfo() { + static LoongArch target; + return ⌖ +} + +TargetInfo *elf::getLoongArch32TargetInfo() { return getTargetInfo(); } +TargetInfo *elf::getLoongArch64TargetInfo() { return getTargetInfo(); } + +template +RelExpr LoongArch::getRelExpr(RelType type, const Symbol &s, + const uint8_t *loc) const { + switch (type) { + case R_LARCH_64: + case R_LARCH_32: + return R_ABS; + case R_LARCH_SOP_PUSH_PCREL: + case R_LARCH_32_PCREL: + case R_LARCH_64_PCREL: + return R_PC; + case R_LARCH_SOP_PUSH_TLS_GOT: + return R_GOT_OFF; + case R_LARCH_SOP_PUSH_TLS_GD: + return R_TLSGD_GOT; + case R_LARCH_SOP_PUSH_TLS_TPREL: + return R_TPREL; + case R_LARCH_SOP_PUSH_GPREL: + return R_LARCH_GOTREL; + case R_LARCH_SOP_PUSH_PLT_PCREL: + return R_PLT_PC; + default: + return R_LARCH_ABS; + } +} + +template static uint32_t getEFlags(InputFile *F) { + return cast>(F)->getObj().getHeader().e_flags; +} + +static Twine getAbiName(uint32_t eflags) { + switch (eflags) { + case EF_LARCH_ABI_LP64: + return Twine("LP64"); + case EF_LARCH_ABI_LP32: + return Twine("LP32"); + case EF_LARCH_ABI_LPX32: + return Twine("LPX32"); + default: + return Twine("Unknown ABI"); + } +} + +template uint32_t LoongArch::calcEFlags() const { + assert(!ctx->objectFiles.empty()); + + uint32_t target = getEFlags(ctx->objectFiles.front()); + + for (InputFile *f : ctx->objectFiles) { + uint32_t eflags = getEFlags(f); + if (eflags != EF_LARCH_ABI_LP64 && eflags != EF_LARCH_ABI_LP32 && + eflags != EF_LARCH_ABI_LPX32) + error(toString(f) + ": unrecognized e_flags: " + Twine(eflags)); + if (eflags != target) + error(toString(f) + ": ABI '" + getAbiName(eflags) + + "' is incompatible with target ABI '" + getAbiName(target) + "'"); + } + + return target; +} + +template RelType LoongArch::getDynRel(RelType type) const { + if (type == R_LARCH_32 || type == R_LARCH_64) + return type; + return R_LARCH_NONE; +} + +// NOTE: R_LARCH_{32,64}_PCREL are part of psABI v2. Here we only use them in +// lld to workaround the well-known .eh_frame encoding issue. Please DO NOT use +// them directly in handwritten assembly as binutils do not support these types. +template bool LoongArch::convertAbsRelToPC(RelType &type) const { + switch (type) { + case R_LARCH_32: + type = R_LARCH_32_PCREL; + return true; + case R_LARCH_64: + type = R_LARCH_64_PCREL; + return true; + } + return false; +} + +template void LoongArch::writeGotHeader(uint8_t *buf) const { + if (ELFT::Is64Bits) + write64le(buf, mainPart->dynamic->getVA()); + else + write32le(buf, mainPart->dynamic->getVA()); +} + +template +void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const { + if (ELFT::Is64Bits) + write64le(buf, in.plt->getVA()); + else + write32le(buf, in.plt->getVA()); +} + +/* Add 0x800 to maintain the sign bit */ +static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } +static uint32_t lo12(uint32_t val) { return val & 0xfff; } + +template void LoongArch::writePltHeader(uint8_t *buf) const { + uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); + + /* pcaddu12i $t2, %hi(%pcrel(.got.plt)) + sub.[wd] $t1, $t1, $t3 + ld.[wd] $t3, $t2, %lo(%pcrel(.got.plt)) # _dl_runtime_resolve + addi.[wd] $t1, $t1, -(PLT_HEADER_SIZE + 12) + 4 + addi.[wd] $t0, $t2, %lo(%pcrel(.got.plt)) + srli.[wd] $t1, $t1, log2(16 / GOT_ENTRY_SIZE) + ld.[wd] $t0, $t0, GOT_ENTRY_SIZE + jirl $r0, $t3, 0 */ + + if (ELFT::Is64Bits) { + write32le(buf + 0, 0x1c00000e | hi20(offset) << 5); + write32le(buf + 4, 0x0011bdad); + write32le(buf + 8, 0x28c001cf | lo12(offset) << 10); + write32le(buf + 12, 0x02c001ad | ((-(pltHeaderSize + 12) + 4) & 0xfff) + << 10); + write32le(buf + 16, 0x02c001cc | lo12(offset) << 10); + write32le(buf + 20, 0x004501ad | 0x400); + write32le(buf + 24, 0x28c0018c | gotEntrySize << 10); + write32le(buf + 28, 0x4c0001e0); + } else { + write32le(buf + 0, 0x1c00000e | hi20(offset) << 5); + write32le(buf + 4, 0x00113dad); + write32le(buf + 8, 0x288001cf | lo12(offset) << 10); + write32le(buf + 12, 0x028001ad | ((-(pltHeaderSize + 12)) & 0xfff) << 10); + write32le(buf + 16, 0x028001cc | lo12(offset) << 10); + write32le(buf + 20, 0x004481ad | 0x800); + write32le(buf + 24, 0x2880018c | gotEntrySize << 10); + write32le(buf + 28, 0x4c0001e0); + } + + return; +} + +template +void LoongArch::writePlt(uint8_t *buf, const Symbol &sym, + uint64_t pltEntryAddr) const { + uint32_t offset = sym.getGotPltVA() - pltEntryAddr; + + /* pcaddu12i $t3, %hi(%pcrel(.got.plt entry)) + ld.[wd] $t3, $t3, %lo(%pcrel(.got.plt entry)) + pcaddu12i $t1, 0 + jirl $r0, $t3, 0 */ + + write32le(buf, 0x1c00000f | hi20(offset) << 5); + if (ELFT::Is64Bits) + write32le(buf + 4, 0x28c001ef | lo12(offset) << 10); + else + write32le(buf + 4, 0x288001ef | lo12(offset) << 10); + write32le(buf + 8, 0x1c00000d); + write32le(buf + 12, 0x4c0001e0); + + return; +} + +// Extract bits v[hi:lo], where range is inclusive, and hi must be < 63. +static uint32_t extractBits(uint64_t v, uint32_t hi, uint32_t lo) { + return (v & ((1ULL << (hi + 1)) - 1)) >> lo; +} + +// Clean bits v[hi:lo] to 0, where range is inclusive, and hi must be +// < 32. +static uint32_t cleanInstrImm(uint32_t v, uint32_t hi, uint32_t lo) { + return v & ~((((1ULL << (hi + 1)) - 1) >> lo) << lo); +} + +template +void LoongArch::relocate(uint8_t *loc, const Relocation &rel, + uint64_t val) const { + switch (rel.type) { + case R_LARCH_32_PCREL: + checkInt(loc, val, 32, rel); + LLVM_FALLTHROUGH; + case R_LARCH_32: + write32le(loc, val); + break; + case R_LARCH_TLS_DTPREL32: + write32le(loc, val); + break; + case R_LARCH_64_PCREL: + case R_LARCH_64: + write64le(loc, val); + return; + case R_LARCH_TLS_DTPREL64: + write64le(loc, val); + break; + case R_LARCH_TLS_DTPMOD32: + write32le(loc, val); + break; + case R_LARCH_TLS_DTPMOD64: + write64le(loc, val); + break; + case R_LARCH_MARK_LA: + case R_LARCH_MARK_PCREL: + case R_LARCH_NONE: + break; + case R_LARCH_SOP_PUSH_PCREL: + case R_LARCH_SOP_PUSH_ABSOLUTE: + case R_LARCH_SOP_PUSH_GPREL: + case R_LARCH_SOP_PUSH_TLS_TPREL: + case R_LARCH_SOP_PUSH_TLS_GOT: + case R_LARCH_SOP_PUSH_TLS_GD: + case R_LARCH_SOP_PUSH_PLT_PCREL: + relStack.push_back(val); + break; + case R_LARCH_SOP_PUSH_DUP: { + uint64_t opr1 = relStack.pop_back_val(); + relStack.push_back(opr1); + relStack.push_back(opr1); + } break; + case R_LARCH_SOP_ASSERT: { + uint64_t opr1 = relStack.pop_back_val(); + assert(opr1 == 0 && "R_LARCH_SOP_ASSERT relocation type assert fail."); + } break; + case R_LARCH_SOP_NOT: { + uint64_t opr1 = relStack.pop_back_val(); + relStack.push_back(!opr1); + } break; + case R_LARCH_SOP_SUB: { + uint64_t opr2 = relStack.pop_back_val(); + uint64_t opr1 = relStack.pop_back_val(); + relStack.push_back(opr1 - opr2); + } break; + case R_LARCH_SOP_SL: { + uint64_t opr2 = relStack.pop_back_val(); + uint64_t opr1 = relStack.pop_back_val(); + relStack.push_back(opr1 << opr2); + } break; + case R_LARCH_SOP_SR: { + uint64_t opr2 = relStack.pop_back_val(); + uint64_t opr1 = relStack.pop_back_val(); + relStack.push_back((int64_t)opr1 >> opr2); + } break; + case R_LARCH_SOP_ADD: { + uint64_t opr2 = relStack.pop_back_val(); + uint64_t opr1 = relStack.pop_back_val(); + relStack.push_back(opr1 + opr2); + } break; + case R_LARCH_SOP_AND: { + uint64_t opr2 = relStack.pop_back_val(); + uint64_t opr1 = relStack.pop_back_val(); + relStack.push_back(opr1 & opr2); + } break; + case R_LARCH_SOP_IF_ELSE: { + uint64_t opr3 = relStack.pop_back_val(); + uint64_t opr2 = relStack.pop_back_val(); + uint64_t opr1 = relStack.pop_back_val(); + relStack.push_back(opr1 ? opr2 : opr3); + } break; + case R_LARCH_SOP_POP_32_S_10_5: { + uint64_t opr1 = relStack.pop_back_val(); + checkInt(loc, static_cast(opr1), 5, rel); + uint32_t imm10_5 = extractBits(opr1, 4, 0) << 10; + uint32_t ins = cleanInstrImm(read32le(loc), 14, 10); + write32le(loc, ins | imm10_5); + } break; + case R_LARCH_SOP_POP_32_S_10_12: { + uint64_t opr1 = relStack.pop_back_val(); + checkInt(loc, static_cast(opr1), 12, rel); + uint32_t imm10_12 = extractBits(opr1, 11, 0) << 10; + uint32_t ins = cleanInstrImm(read32le(loc), 21, 10); + write32le(loc, ins | imm10_12); + } break; + case R_LARCH_SOP_POP_32_S_10_16: { + uint64_t opr1 = relStack.pop_back_val(); + checkInt(loc, static_cast(opr1), 16, rel); + uint32_t imm10_16 = extractBits(opr1, 15, 0) << 10; + uint32_t ins = cleanInstrImm(read32le(loc), 25, 10); + write32le(loc, ins | imm10_16); + } break; + case R_LARCH_SOP_POP_32_S_10_16_S2: { + int64_t opr1 = (int64_t)relStack.pop_back_val(); + checkInt(loc, static_cast(opr1), 18, rel); + checkAlignment(loc, opr1, 4, rel); + uint32_t imm10_16 = extractBits(opr1, 17, 2) << 10; + uint32_t ins = cleanInstrImm(read32le(loc), 25, 10); + write32le(loc, ins | imm10_16); + } break; + case R_LARCH_SOP_POP_32_U_10_12: { + uint64_t opr1 = relStack.pop_back_val(); + checkUInt(loc, opr1, 12, rel); + uint32_t imm10_12 = extractBits(opr1, 11, 0) << 10; + uint32_t ins = cleanInstrImm(read32le(loc), 21, 10); + write32le(loc, ins | imm10_12); + } break; + case R_LARCH_SOP_POP_32_S_5_20: { + uint64_t opr1 = relStack.pop_back_val(); + checkInt(loc, static_cast(opr1), 20, rel); + uint32_t imm5_20 = extractBits(opr1, 19, 0) << 5; + uint32_t ins = cleanInstrImm(read32le(loc), 24, 5); + write32le(loc, ins | imm5_20); + } break; + case R_LARCH_SOP_POP_32_S_0_5_10_16_S2: { + uint64_t opr1 = relStack.pop_back_val(); + checkInt(loc, static_cast(opr1), 23, rel); + checkAlignment(loc, opr1, 4, rel); + uint32_t imm0_5 = extractBits(opr1, 22, 18); + uint32_t imm10_16 = extractBits(opr1, 17, 2) << 10; + uint32_t ins = cleanInstrImm(read32le(loc), 4, 0); + ins = cleanInstrImm(ins, 25, 10); + write32le(loc, ins | imm0_5 | imm10_16); + } break; + case R_LARCH_SOP_POP_32_S_0_10_10_16_S2: { + uint64_t opr1 = relStack.pop_back_val(); + checkInt(loc, static_cast(opr1), 28, rel); + checkAlignment(loc, opr1, 4, rel); + uint32_t imm0_10 = extractBits(opr1, 27, 18); + uint32_t imm10_16 = extractBits(opr1, 17, 2) << 10; + uint32_t ins = cleanInstrImm(read32le(loc), 25, 0); + write32le(loc, ins | imm0_10 | imm10_16); + } break; + case R_LARCH_SOP_POP_32_U: { + uint64_t opr1 = relStack.pop_back_val(); + checkUInt(loc, opr1, 32, rel); + write32le(loc, (uint32_t)opr1); + } break; + case R_LARCH_ADD8: + *loc += val; + break; + case R_LARCH_ADD16: + write16le(loc, read16le(loc) + val); + break; + case R_LARCH_ADD24: + write32le(loc, (read32le(loc) | *(loc + 2) << 16) + val); + break; + case R_LARCH_ADD32: + write32le(loc, read32le(loc) + val); + break; + case R_LARCH_ADD64: + write64le(loc, read64le(loc) + val); + break; + case R_LARCH_SUB8: + *loc -= val; + break; + case R_LARCH_SUB16: + write16le(loc, read16le(loc) - val); + break; + case R_LARCH_SUB24: + write16le(loc, (read16le(loc) | *(loc + 2) << 16) - val); + break; + case R_LARCH_SUB32: + write32le(loc, read32le(loc) - val); + break; + case R_LARCH_SUB64: + write64le(loc, read64le(loc) - val); + break; + // GNU C++ vtable hierarchy + case R_LARCH_GNU_VTINHERIT: + // GNU C++ vtable member usage + case R_LARCH_GNU_VTENTRY: + break; + default: + error(getErrorLocation(loc) + "unrecognized reloc " + Twine(rel.type)); + } +} diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt index b37035d3e742..6d12da1f4b08 100644 --- a/lld/ELF/CMakeLists.txt +++ b/lld/ELF/CMakeLists.txt @@ -13,6 +13,7 @@ add_lld_library(lldELF Arch/ARM.cpp Arch/AVR.cpp Arch/Hexagon.cpp + Arch/LoongArch.cpp Arch/Mips.cpp Arch/MipsArchTree.cpp Arch/MSP430.cpp diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index abcc8a984a65..fab3b452efe0 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -158,6 +158,7 @@ static std::tuple parseEmulation(StringRef emul) { .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU}) .Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9}) .Case("msp430elf", {ELF32LEKind, EM_MSP430}) + .Case("elf64loongarch", {ELF64LEKind, EM_LOONGARCH}) .Default({ELFNoneKind, EM_NONE}); if (ret.first == ELFNoneKind) @@ -1000,7 +1001,7 @@ static bool getIsRela(opt::InputArgList &args) { // Otherwise use the psABI defined relocation entry format. uint16_t m = config->emachine; return m == EM_AARCH64 || m == EM_AMDGPU || m == EM_HEXAGON || m == EM_PPC || - m == EM_PPC64 || m == EM_RISCV || m == EM_X86_64; + m == EM_PPC64 || m == EM_RISCV || m == EM_X86_64 || m == EM_LOONGARCH; } static void parseClangOption(StringRef opt, const Twine &msg) { diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 473809b05e9c..8691bab8b322 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1518,6 +1518,9 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { return EM_AVR; case Triple::hexagon: return EM_HEXAGON; + case Triple::loongarch32: + case Triple::loongarch64: + return EM_LOONGARCH; case Triple::mips: case Triple::mipsel: case Triple::mips64: diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 8fe36eca6a4b..aa279bd482cf 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -597,6 +597,7 @@ static int64_t getTlsTpOffset(const Symbol &s) { // data and 0xf000 of the program's TLS segment. return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)) - 0x7000; case EM_RISCV: + case EM_LOONGARCH: return s.getVA(0) + (tls->p_vaddr & (tls->p_align - 1)); // Variant 2. @@ -616,6 +617,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, const Symbol &sym, RelExpr expr) { switch (expr) { case R_ABS: + case R_LARCH_ABS: case R_DTPREL: case R_RELAX_TLS_LD_TO_LE_ABS: case R_RELAX_GOT_PC_NOPIC: @@ -637,6 +639,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_GOTREL: case R_PPC64_RELAX_TOC: return sym.getVA(a) - in.got->getVA(); + case R_LARCH_GOTREL: + return sym.getGotVA() - in.got->getVA(); case R_GOTPLTREL: return sym.getVA(a) - in.gotPlt->getVA(); case R_GOTPLT: @@ -902,7 +906,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC // sections. if (expr == R_ABS || expr == R_DTPREL || expr == R_GOTPLTREL || - expr == R_RISCV_ADD) { + expr == R_RISCV_ADD || expr == R_LARCH_ABS) { target.relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend))); continue; } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 277c57505bb2..93841d7d9a4c 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -200,7 +200,7 @@ static bool needsPlt(RelExpr expr) { static bool needsGot(RelExpr expr) { return oneof(expr); + R_AARCH64_GOT_PAGE, R_LARCH_GOTREL>(expr); } // True if this expression is of the form Sym - X, where X is a position in the @@ -211,7 +211,6 @@ static bool isRelExpr(RelExpr expr) { R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr); } - static RelExpr toPlt(RelExpr expr) { switch (expr) { case R_PPC64_CALL: @@ -518,8 +517,7 @@ int64_t RelocationScanner::computeAddend(const RelTy &rel, RelExpr expr, } // Custom error message if Sym is defined in a discarded section. -template -static std::string maybeReportDiscarded(Undefined &sym) { +template static std::string maybeReportDiscarded(Undefined &sym) { auto *file = dyn_cast_or_null>(sym.file); if (!file || !sym.discardedSecIdx || file->getSections()[sym.discardedSecIdx] != &InputSection::discarded) @@ -931,10 +929,18 @@ static bool canDefineSymbolInExecutable(Symbol &sym) { // If the symbol has default visibility the symbol defined in the // executable will preempt it. // Note that we want the visibility of the shared symbol itself, not + // the visibility of the symbol in the output file we are producing. + // Use `!= STV_PROTECTED` but not `== STV_DEFAULT`. This is a partial backport + // from llvm-16. + if (config->emachine == EM_LOONGARCH) { + if ((sym.stOther & 0x3) != STV_PROTECTED) + return true; + } else { // the visibility of the symbol in the output file we are producing. That is // why we use Sym.stOther. - if ((sym.stOther & 0x3) == STV_DEFAULT) + if ((sym.stOther & 0x3) == STV_DEFAULT) return true; + } // If we are allowed to break address equality of functions, defining // a plt entry will allow the program to call the function in the @@ -962,7 +968,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, - R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) + R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE, R_LARCH_GOTREL, + R_LARCH_ABS>(e)) return true; // These never do, except if the entire file is position dependent or if @@ -1003,7 +1010,7 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, // We set the final symbols values for linker script defined symbols later. // They always can be computed as a link time constant. if (sym.scriptDefined) - return true; + return true; error("relocation " + toString(type) + " cannot refer to absolute symbol: " + toString(sym) + getLocation(sec, sym, relOff)); @@ -1049,6 +1056,7 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, if (canWrite) { RelType rel = target.getDynRel(type); if (expr == R_GOT || (rel == target.symbolicRel && !sym.isPreemptible)) { + addRelativeReloc(sec, offset, sym, addend, expr, type); return; } else if (rel != 0) { @@ -1140,6 +1148,8 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, } } + + errorOrWarn("relocation " + toString(type) + " cannot be used against " + (sym.getName().empty() ? "local symbol" : "symbol '" + toString(sym) + "'") + @@ -1194,13 +1204,13 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, return 1; } - // ARM, Hexagon and RISC-V do not support GD/LD to IE/LE relaxation. For - // PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable - // relaxation as well. - bool toExecRelax = !config->shared && config->emachine != EM_ARM && - config->emachine != EM_HEXAGON && - config->emachine != EM_RISCV && - !c.file->ppc64DisableTLSRelax; + // ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE + // relaxation. For PPC64, if the file has missing + // R_PPC64_TLSGD/R_PPC64_TLSLD, disable relaxation as well. + bool toExecRelax = + !config->shared && config->emachine != EM_ARM && + config->emachine != EM_HEXAGON && config->emachine != EM_LOONGARCH && + config->emachine != EM_RISCV && !c.file->ppc64DisableTLSRelax; // If we are producing an executable and the symbol is non-preemptable, it // must be defined and the code sequence can be relaxed to use Local-Exec. @@ -1215,8 +1225,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, // being suitable for being dynamically loaded via dlopen. GOT[e0] is the // module index, with a special value of 0 for the current module. GOT[e1] is // unused. There only needs to be one module index entry. - if (oneof( - expr)) { + if (oneof(expr)) { // Local-Dynamic relocs can be relaxed to Local-Exec. if (toExecRelax) { c.relocations.push_back( @@ -1404,10 +1413,10 @@ template void RelocationScanner::scanOne(RelTy *&i) { // R_HEX_GD_PLT_B22_PCREL (call a@GDPLT) is transformed into // call __tls_get_addr even if the symbol is non-preemptible. if (!(config->emachine == EM_HEXAGON && - (type == R_HEX_GD_PLT_B22_PCREL || - type == R_HEX_GD_PLT_B22_PCREL_X || - type == R_HEX_GD_PLT_B32_PCREL_X))) - expr = fromPlt(expr); + (type == R_HEX_GD_PLT_B22_PCREL || + type == R_HEX_GD_PLT_B22_PCREL_X || + type == R_HEX_GD_PLT_B32_PCREL_X))) + expr = fromPlt(expr); } else if (!isAbsoluteValue(sym)) { expr = target.adjustGotPcExpr(type, addend, relocatedAddr); } diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index f70d255ba229..4ec807b7c0a7 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -87,6 +87,8 @@ enum RelExpr { R_AARCH64_TLSDESC_PAGE, R_ARM_PCA, R_ARM_SBREL, + R_LARCH_ABS, + R_LARCH_GOTREL, R_MIPS_GOTREL, R_MIPS_GOT_GP, R_MIPS_GOT_GP_PC, diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index e37faf90e132..0c296fab1990 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -438,6 +438,7 @@ static std::pair parseBfdName(StringRef s) { .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV}) .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9}) .Case("elf32-msp430", {ELF32LEKind, EM_MSP430}) + .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH}) .Default({ELFNoneKind, EM_NONE}); } diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 7bc5121eabe4..da8e7785a2b7 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -62,6 +62,10 @@ TargetInfo *elf::getTarget() { return getAVRTargetInfo(); case EM_HEXAGON: return getHexagonTargetInfo(); + case EM_LOONGARCH: + if (config->ekind == ELF32LEKind) + return getLoongArch32TargetInfo(); + return getLoongArch64TargetInfo(); case EM_MIPS: switch (config->ekind) { case ELF32LEKind: diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 5ec2d85f64e8..1c9ac377a6eb 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -185,6 +185,8 @@ TargetInfo *getAMDGPUTargetInfo(); TargetInfo *getARMTargetInfo(); TargetInfo *getAVRTargetInfo(); TargetInfo *getHexagonTargetInfo(); +TargetInfo *getLoongArch32TargetInfo(); +TargetInfo *getLoongArch64TargetInfo(); TargetInfo *getMSP430TargetInfo(); TargetInfo *getPPC64TargetInfo(); TargetInfo *getPPCTargetInfo(); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index e27e656d43c7..9199919d3ac0 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -174,9 +174,9 @@ static Defined *addOptionalRegular(StringRef name, SectionBase *sec, return cast(s); } -static Defined *addAbsolute(StringRef name) { +static Defined *addAbsolute(StringRef name, SectionBase *section = nullptr) { Symbol *sym = symtab->addSymbol(Defined{nullptr, name, STB_GLOBAL, STV_HIDDEN, - STT_NOTYPE, 0, 0, nullptr}); + STT_NOTYPE, 0, 0, section}); sym->isUsedInRegularObj = true; return cast(sym); } @@ -236,6 +236,16 @@ void elf::addReservedSymbols() { s->resolve(Defined{/*file=*/nullptr, StringRef(), STB_GLOBAL, STV_HIDDEN, STT_NOTYPE, gotOff, /*size=*/0, Out::elfHeader}); ElfSym::globalOffsetTable = cast(s); + } else if (config->emachine == EM_LOONGARCH) { + // _GLOBAL_OFFSET_TABLE_ is generated in LoongArch llvm backend when + // accessing global variable. It is fine for normal compiliation and + // linking, but it does not work for the LTO scenario because lld has an + // ordering problem of when the linker defined symbol + // `_GLOBAL_OFFSET_TABLE_` is created and when the output ELF file from LTO + // is loaded. See https://github.com/llvm/llvm-project/issues/38982. + // So definie this symbol when it is not found in symtab. + addAbsolute(gotSymName, Out::elfHeader); + ElfSym::globalOffsetTable = cast(symtab->find(gotSymName)); } // __ehdr_start is the location of ELF file headers. Note that we define diff --git a/lld/test/ELF/Inputs/loongarch.s b/lld/test/ELF/Inputs/loongarch.s new file mode 100644 index 000000000000..7ba110fbd8ac --- /dev/null +++ b/lld/test/ELF/Inputs/loongarch.s @@ -0,0 +1,3 @@ +.global _start +_start: + nop diff --git a/lld/test/ELF/loongarch-branch.s b/lld/test/ELF/loongarch-branch.s new file mode 100644 index 000000000000..7c27b9c4e180 --- /dev/null +++ b/lld/test/ELF/loongarch-branch.s @@ -0,0 +1,42 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: obj2yaml %t.o | FileCheck %s --check-prefix=CHECK-RELOC + +# CHECK-RELOC: Relocations: +# CHECK-RELOC: - Symbol: foo +# CHECK-RELOC: Type: R_LARCH_SOP_PUSH_PCREL +# CHECK-RELOC: - Type: R_LARCH_SOP_POP_32_S_10_16_S2 +# CHECK-RELOC: - Offset: 0x4 +# CHECK-RELOC: Symbol: bar +# CHECK-RELOC: Type: R_LARCH_SOP_PUSH_PCREL +# CHECK-RELOC: - Offset: 0x4 +# CHECK-RELOC: Type: R_LARCH_SOP_POP_32_S_10_16_S2 + +# RUN: ld.lld %t.o --defsym foo=_start+0x4 --defsym bar=_start -o %t +# RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=OBJ + +# OBJ: 00 04 00 58 beq $zero, $zero, 4 +# OBJ: 00 fc ff 5f bne $zero, $zero, -4 + +# RUN: ld.lld %t.o --defsym foo=_start+0x1FFFC --defsym bar=_start+4-0x20000 \ +# RUN: -o %t.limits +# RUN: llvm-objdump -d %t.limits | FileCheck --check-prefix=LIMITS %s +# LIMITS: 00 fc ff 59 beq $zero, $zero, 131068 +# LIMITS-NEXT: 00 00 00 5e bne $zero, $zero, -131072 + +# RUN: not ld.lld %t.o --defsym foo=_start+0x20000 \ +# RUN: --defsym bar=_start+4-0x20004 -o /dev/null 2>&1 \ +# RUN: | FileCheck --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: relocation R_LARCH_SOP_POP_32_S_10_16_S2 out of range: 131072 is not in [-131072, 131071] +# ERROR-RANGE: relocation R_LARCH_SOP_POP_32_S_10_16_S2 out of range: -131076 is not in [-131072, 131071] + +# RUN: not ld.lld %t.o --defsym foo=_start+1 --defsym bar=_start-1 \ +# RUN: -o /dev/null 2>&1 | FileCheck --check-prefix=ERROR-ALIGN %s +# ERROR-ALIGN: improper alignment for relocation R_LARCH_SOP_POP_32_S_10_16_S2: 0x1 is not aligned to 4 bytes +# ERROR-ALIGN-NEXT: improper alignment for relocation R_LARCH_SOP_POP_32_S_10_16_S2: 0xFFFFFFFFFFFFFFFB is not aligned to 4 bytes + +.global _start +_start: + beq $r0, $r0, foo + bne $r0, $r0, bar diff --git a/lld/test/ELF/loongarch-eflags-diff-abi-err.test b/lld/test/ELF/loongarch-eflags-diff-abi-err.test new file mode 100644 index 000000000000..bae0f308564a --- /dev/null +++ b/lld/test/ELF/loongarch-eflags-diff-abi-err.test @@ -0,0 +1,24 @@ +# REQUIRES: loongarch + +# RUN: yaml2obj --docnum 1 %s -o %t1.o +# RUN: yaml2obj --docnum 2 %s -o %t2.o +# RUN: not ld.lld %t1.o %t2.o -o %t 2>&1 | FileCheck %s +# +# CHECK: {{.*}}: ABI 'LPX32' is incompatible with target ABI 'LP32' +# +#t1.o +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH + Flags: [ EF_LARCH_ABI_LP32 ] +# t2.o +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH + Flags: [ EF_LARCH_ABI_LPX32 ] diff --git a/lld/test/ELF/loongarch-eflags-lp32.test b/lld/test/ELF/loongarch-eflags-lp32.test new file mode 100644 index 000000000000..d081e7fb0901 --- /dev/null +++ b/lld/test/ELF/loongarch-eflags-lp32.test @@ -0,0 +1,29 @@ +#.global _start +#_start: +# nop +# +# REQUIRES: loongarch +# RUN: yaml2obj %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -h %t | FileCheck %s +# Verify the LoongArch LP32 ABI. +# CHECK: Flags: 0x1 +# +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH + Flags: [ EF_LARCH_ABI_LP32 ] +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x0000000000000010 + Content: '00004003' +Symbols: + - Name: _start + Section: .text + Binding: STB_GLOBAL +... diff --git a/lld/test/ELF/loongarch-eflags-lp64.s b/lld/test/ELF/loongarch-eflags-lp64.s new file mode 100644 index 000000000000..0775feeb4ce4 --- /dev/null +++ b/lld/test/ELF/loongarch-eflags-lp64.s @@ -0,0 +1,8 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %S/Inputs/loongarch.s -o %t2 +# RUN: ld.lld %t2 %t -o %t3 +# RUN: llvm-readelf -h %t3 | FileCheck %s +# Verify the LoongArch LP64 ABI. +# CHECK: Flags: 0x3, LP64 diff --git a/lld/test/ELF/loongarch-eflags-lpx32.test b/lld/test/ELF/loongarch-eflags-lpx32.test new file mode 100644 index 000000000000..27b2fc20f972 --- /dev/null +++ b/lld/test/ELF/loongarch-eflags-lpx32.test @@ -0,0 +1,29 @@ +#.global _start +#_start: +# nop +# +# REQUIRES: loongarch +# RUN: yaml2obj %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -h %t | FileCheck %s +# Verify the LoongArch LPX32 ABI. +# CHECK: Flags: 0x2 +# +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH + Flags: [ EF_LARCH_ABI_LPX32 ] +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x0000000000000010 + Content: '00004003' +Symbols: + - Name: _start + Section: .text + Binding: STB_GLOBAL +... diff --git a/lld/test/ELF/loongarch-got-reloc.s b/lld/test/ELF/loongarch-got-reloc.s new file mode 100644 index 000000000000..a1df5c876aae --- /dev/null +++ b/lld/test/ELF/loongarch-got-reloc.s @@ -0,0 +1,67 @@ +# REQUIRES: loongarch +# Check la.got relocation calculation. In this case, la.global will be expanded to la.got. + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: llvm-readobj --relocations %t.o | FileCheck -check-prefix=RELOC %s +# RUN: ld.lld %t.o -o %t.exe +# RUN: llvm-objdump --section-headers -t %t.exe | FileCheck -check-prefix=EXE_SYM %s +# RUN: llvm-objdump -s --section=.got %t.exe | FileCheck -check-prefix=EXE_GOT %s +# RUN: llvm-objdump -d %t.exe | FileCheck -check-prefix=EXE_DIS %s +# RUN: llvm-readobj --relocations %t.exe | FileCheck -check-prefix=NORELOC %s + +.text +.globl _start +_start: + la.global $r12, value + +.data +value: + .word 1 + +# RELOC: Relocations [ +# RELOC-NEXT: Section (3) .rela.text { +# RELOC-NEXT: 0x0 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x800 +# RELOC-NEXT: 0x0 R_LARCH_SOP_PUSH_GPREL value 0x0 +# RELOC-NEXT: 0x0 R_LARCH_SOP_ADD - 0x0 +# RELOC-NEXT: 0x0 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# RELOC-NEXT: 0x0 R_LARCH_SOP_SR - 0x0 +# RELOC-NEXT: 0x0 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# RELOC-NEXT: 0x4 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x4 +# RELOC-NEXT: 0x4 R_LARCH_SOP_PUSH_GPREL value 0x0 +# RELOC-NEXT: 0x4 R_LARCH_SOP_ADD - 0x0 +# RELOC-NEXT: 0x4 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x804 +# RELOC-NEXT: 0x4 R_LARCH_SOP_PUSH_GPREL value 0x0 +# RELOC-NEXT: 0x4 R_LARCH_SOP_ADD - 0x0 +# RELOC-NEXT: 0x4 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# RELOC-NEXT: 0x4 R_LARCH_SOP_SR - 0x0 +# RELOC-NEXT: 0x4 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# RELOC-NEXT: 0x4 R_LARCH_SOP_SL - 0x0 +# RELOC-NEXT: 0x4 R_LARCH_SOP_SUB - 0x0 +# RELOC-NEXT: 0x4 R_LARCH_SOP_POP_32_S_10_12 - 0x0 +# RELOC-NEXT: } +# RELOC-NEXT: ] + +# EXE_SYM: Sections: +# EXE_SYM: Idx Name Size VMA Type +# EXE_SYM: 2 .got 00000010 00000001200201d8 DATA +# EXE_SYM: SYMBOL TABLE: +# EXE_SYM: 00000001200201d8 l .got 0000000000000000 .hidden _GLOBAL_OFFSET_TABLE_ +# ^---- .got + +# EXE_GOT: Contents of section .got: +# EXE_GOT-NEXT: 1200201d8 00000000 00000000 f0010320 01000000 +# ^ ^---------value +# +-- .dynamic address (if exist) + +# pcaddu12i rd,(%pcrel(_GLOBAL_OFFSET_TABLE_+0x800)+%gprel(symbol))>>12 +# value_GotAddr=%gprel(synbol) = 0x1200201e0-0x1200201d8 = 8 +# (0x1200201d8+0x800-0x1200101d0+8)>>12 = 16 +# EXE_DIS: 1200101d0: 0c 02 00 1c pcaddu12i $r12, 16 + +# ld.d rd,rd,%pcrel(_GLOBAL_OFFSET_TABLE_+4)+%gprel(symbol)-((%pcrel( +# _GLOBAL_OFFSET_TABLE_+4+0x800)+%gprel(symbol))>>12<<12) +# (0x1200201d8+4-0x1200101d4)+8-((0x1200201d8+4+0x800-0x1200101d4)+8)>>12<<12 = 8 +# EXE_DIS-NEXT: 1200101d4: 8c 41 c0 28 ld.d $r12, $r12, 16 + +# NORELOC: Relocations [ +# NORELOC-NEXT: ] diff --git a/lld/test/ELF/loongarch-ifunc.s b/lld/test/ELF/loongarch-ifunc.s new file mode 100644 index 000000000000..2fabb12e5b4a --- /dev/null +++ b/lld/test/ELF/loongarch-ifunc.s @@ -0,0 +1,35 @@ +# REQUIRES: loongarch +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: ld.lld -pie %t.o -o %t +# RUN: llvm-readobj -r -x .got.plt %t | FileCheck --check-prefix=RELOC %s +# RUN: llvm-readelf -s %t | FileCheck --check-prefix=SYM %s +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=DIS %s + +# RELOC: .rela.dyn { +# RELOC-NEXT: 0x30390 R_LARCH_IRELATIVE - 0x10260 +# RELOC-NEXT: } +# RELOC: Hex dump of section '.got.plt': +# RELOC-NEXT: 0x00030390 00000000 00000000 + +# SYM: 0000000000010260 0 IFUNC GLOBAL DEFAULT {{.*}} ifunc + +# DIS: : +# DIS-NEXT: 10260: jirl $zero, $ra, 0 +# DIS: <_start>: +# DIS-NEXT: 10264: bl 12 +# DIS: Disassembly of section .iplt: +# DIS: <.iplt>: +# DIS-NEXT: 10270: pcaddu12i $r15, 32 +# DIS-NEXT: ld.d $r15, $r15, 288 +# DIS-NEXT: pcaddu12i $r13, 0 +# DIS-NEXT: jirl $zero, $r15, 0 + +.text +.globl ifunc +.type ifunc, @gnu_indirect_function +ifunc: + jr $ra + +.globl _start +_start: + bl ifunc diff --git a/lld/test/ELF/loongarch-plt.s b/lld/test/ELF/loongarch-plt.s new file mode 100644 index 000000000000..e7c54adfee25 --- /dev/null +++ b/lld/test/ELF/loongarch-plt.s @@ -0,0 +1,83 @@ +# REQUIRES: loongarch +# RUN: echo '.globl bar, weak; .type bar,@function; .type weak,@function; bar: weak:' > %t1.s + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %t1.s -o %t1.64.o +# RUN: ld.lld -shared %t1.64.o -soname=t1.64.so -o %t1.64.so +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.64.o +# RUN: ld.lld %t.64.o %t1.64.so -o %t.64 +# RUN: llvm-readelf -S -s %t.64 | FileCheck --check-prefixes=SEC,NM %s +# RUN: llvm-readobj -r %t.64 | FileCheck --check-prefix=RELOC64 %s +# RUN: llvm-readelf -x .got.plt %t.64 | FileCheck --check-prefix=GOTPLT64 %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.64 | FileCheck --check-prefixes=DIS,DIS64 %s + +# SEC: .plt PROGBITS {{0*}}1200102f0 + +## A canonical PLT has a non-zero st_value. bar and weak are called but their +## addresses are not taken, so a canonical PLT is not necessary. +# NM: {{0*}}00000000 0 FUNC GLOBAL DEFAULT UND bar +# NM: {{0*}}00000000 0 FUNC WEAK DEFAULT UND weak + +## The .got.plt slots relocated by .rela.plt point to .plt +## This is required by glibc. +# RELOC64: .rela.plt { +# RELOC64-NEXT: 0x120030420 R_LARCH_JUMP_SLOT bar 0x0 +# RELOC64-NEXT: 0x120030428 R_LARCH_JUMP_SLOT weak 0x0 +# RELOC64-NEXT: } +# GOTPLT64: section '.got.plt' +# GOTPLT64-NEXT: 0x120030410 00000000 00000000 00000000 00000000 +# GOTPLT64-NEXT: 0x120030420 f0020120 01000000 f0020120 01000000 + +# DIS: <_start>: +## foo - . = 0x1200102e0-0x1200102d0 = 16 +# DIS-NEXT: 1200102d0: bl 16 +## bar@plt - . = 0x120010310-0x1200102d4 = 60 +# DIS-NEXT: 1200102d4: bl 60 +## bar@plt - . = 0x120010310-0x1200102d8 = 56 +# DIS-NEXT: 1200102d8: bl 56 +## weak@plt - . = 0x120010320-0x1200102dc = 68 +# DIS-NEXT: 1200102dc: bl 68 +# DIS: : +# DIS-NEXT: 1200102e0: jirl $zero, $ra, 0 + +## 120030400 .got.plt +## 1200102f0 .plt +# DIS: Disassembly of section .plt: +# DIS: <.plt>: +## hi20(.got.plt - .plt + 0x800) = (0x120030410 - 0x1200102f0 + 0x800)>>12 = 0x20920 >> 12 = 0x20 +# DIS-NEXT: pcaddu12i $r14, 32 +# DIS-NEXT: sub.d $r13, $r13, $r15 +## lo12(.got.plt - .plt) = (0x120030410 - 0x1200102f0) & 0xfff = 0x20120 & 0xfff = 0x120 +# DIS64-NEXT: ld.d $r15, $r14, 288 +# DIS64-NEXT: addi.d $r13, $r13, -40 +## lo12(.got.plt - .plt) = (0x120030410 - 0x1200102f0) & 0xfff = 0x20120 & 0xfff = 0x120 +# DIS64-NEXT: addi.d $r12, $r14, 288 +# DIS64-NEXT: srli.d $r13, $r13, 1 +# DIS64-NEXT: ld.d $r12, $r12, 8 +# DIS-NEXT: jirl $zero, $r15, 0 + +## hi20(&.got.plt[bar]-.) = (0x120030420 - 0x120010310 + 0x800) >> 12 = 0x20910 >> 12 = 0x20 +# DIS: 120010310: pcaddu12i $r15, 32 +## lo12(&.got.plt[bar]-.) = (0x120030420 - 0x120010310) & 0xfff = 0x20110 & 0xfff = 0x110 +# DIS64-NEXT: ld.d $r15, $r15, 272 +# DIS-NEXT: pcaddu12i $r13, 0 +# DIS-NEXT: jirl $zero, $r15, 0 + +## hi20(&.got.plt[weak]-.) = (0x120030428 - 0x120010320 + 0x800) >> 12 = 0x20908 >> 12 = 0x20 +# DIS: 120010320: pcaddu12i $r15, 32 +## lo12(&.got.plt[weak]-.) = (0x120030428 - 0x120010320) & 0xfff = 0x20108 & 0xfff = 0x108 +# DIS64-NEXT: ld.d $r15, $r15, 264 +# DIS-NEXT: pcaddu12i $r13, 0 +# DIS-NEXT: jirl $zero, $r15, 0 + +.global _start, foo, bar +.weak weak + +_start: + bl foo + bl bar + bl bar@plt + bl weak + +## foo is local and non-preemptale, no PLT is generated. +foo: + jr $ra diff --git a/lld/test/ELF/loongarch-relative.s b/lld/test/ELF/loongarch-relative.s new file mode 100644 index 000000000000..cb1456054bc2 --- /dev/null +++ b/lld/test/ELF/loongarch-relative.s @@ -0,0 +1,59 @@ +// XFAIL: loongarch +// REQUIRES: loongarch +// Test that we create R_LARCH_RELATIVE relocations for the dynamic linker +// but don't put any symbols in the dynamic symbol table. + +// RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +// RUN: ld.lld -shared %t.o -o %t.so +// RUN: llvm-readobj -t -r -dyn-symbols %t.so | FileCheck %s + +// CHECK: Relocations [ +// CHECK-NEXT: Section ({{.*}}) .rela.dyn { +// CHECK-NEXT: 0x[[FOO_ADDR:.*]] R_LARCH_RELATIVE - 0x[[FOO_ADDR]] +// CHECK-NEXT: 0x[[BAR_ADDR:.*]] R_LARCH_RELATIVE - 0x[[BAR_ADDR]] +// CHECK-NEXT: 0x10008 R_LARCH_RELATIVE - 0x10005 +// CHECK-NEXT: 0x{{.*}} R_LARCH_RELATIVE - 0x[[FOO_ADDR]] +// CHECK-NEXT: 0x{{.*}} R_LARCH_32 external 0x0 +// CHECK-NEXT: } +// CHECK-NEXT: ] + +// CHECK: Symbols [ +// CHECK: Name: foo +// CHECK-NEXT: Value: 0x[[FOO_ADDR]] +// CHECK: Name: bar +// CHECK-NEXT: Value: 0x[[BAR_ADDR]] +// CHECK: ] + +// CHECK: DynamicSymbols [ +// CHECK-NEXT: Symbol { +// CHECK-NEXT: Name: +// CHECK-NEXT: Value: 0x0 +// CHECK-NEXT: Size: 0 +// CHECK-NEXT: Binding: Local +// CHECK-NEXT: Type: None +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: Undefined +// CHECK-NEXT: } +// CHECK-NEXT: Symbol { +// CHECK-NEXT: Name: external +// CHECK-NEXT: Value: 0x0 +// CHECK-NEXT: Size: 0 +// CHECK-NEXT: Binding: Global +// CHECK-NEXT: Type: None +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: Undefined +// CHECK-NEXT: } +// CHECK-NEXT: ] + + .data +foo: + .long foo + + .hidden bar + .globl bar +bar: + .long bar + .long bar + 1 + .long foo + + .long external diff --git a/lld/test/ELF/loongarch-tls-gd.s b/lld/test/ELF/loongarch-tls-gd.s new file mode 100644 index 000000000000..a742169b6f6c --- /dev/null +++ b/lld/test/ELF/loongarch-tls-gd.s @@ -0,0 +1,137 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc -filetype=obj -triple=loongarch64-unknown-linux %s -o %t.o +# RUN: llvm-readobj --relocations %t.o | FileCheck -check-prefix=InputRelocs %s +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -s --section=.got %t | FileCheck -check-prefix=GOT %s +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readobj --relocations %t.so | FileCheck -check-prefix=OutputRelocs %s +# RUN: llvm-objdump --section-headers -t %t.so | FileCheck -check-prefix=SO_SYM %s +# RUN: llvm-objdump -s --section=.got %t.so | FileCheck -check-prefix=SO_GOT %s +# RUN: llvm-objdump -d %t.so | FileCheck --check-prefixes=DIS %s + +# Test the handling of the global-dynamic TLS model. Dynamic Loader finds +# module index R_LARCH_TLS_DTPMODNN. For an executable we write the module +# index 1 and the offset into the TLS directly into the GOT. For a shared +# library we can only write the offset into the TLS directly if the symbol +# is non-preemptible + +# la.tls.ld alias for la.tls.gd, So we only check la.tls.gd. + +.globl _start +_start: + la.tls.gd $r12, x + la.tls.gd $r15, z + +.section .tdata +.local x +x: +.byte 10 + +.global z +z: +.long 10 + +# InputRelocs: Relocations [ +# InputRelocs-NEXT: Section (3) .rela.text { +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x800 +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_PUSH_TLS_GD x 0x0 +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_ADD - 0x0 +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x4 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_PUSH_TLS_GD x 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_ADD - 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x804 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_PUSH_TLS_GD x 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_ADD - 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_SL - 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_SUB - 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_POP_32_S_10_12 - 0x0 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x800 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_PUSH_TLS_GD z 0x0 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_ADD - 0x0 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x4 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_PUSH_TLS_GD z 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_ADD - 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x804 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_PUSH_TLS_GD z 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_ADD - 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0xC R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0xC R_LARCH_SOP_SL - 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_SUB - 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_POP_32_S_10_12 - 0x0 +# InputRelocs-NEXT: } +# InputRelocs-NEXT: ] + + +# For an executable we write the module index 1 and the offset +# into the TLS directly into the GOT. + +# GOT: Contents of section .got: +# GOT-NEXT: 120020218 00000000 00000000 01000000 00000000 {{.*}} +# ^ ^-----x-module-id +# +-.dynamic address(not exist) +# GOT-NEXT: 120020228 01000000 00000000 01000000 00000000 {{.*}} +# ^--x-offset ^-----z-module-id +# GOT-NEXT: 120020238 00000000 00000000 {{.*}} +# ^--z-offset + +# OutputRelocs: Relocations [ +# OutputRelocs-NEXT: Section (5) .rela.dyn { +# OutputRelocs-NEXT: 0x203F0 R_LARCH_TLS_DTPMOD64 - 0x0 +# OutputRelocs-NEXT: 0x203E0 R_LARCH_TLS_DTPMOD64 z 0x0 +# OutputRelocs-NEXT: 0x203E8 R_LARCH_TLS_DTPREL64 z 0x0 +# OutputRelocs-NEXT: } +# OutputRelocs-NEXT: ] + +# SO_SYM: Sections: +# SO_SYM: Idx Name Size VMA Type +# SO_SYM: 7 .tdata 00000005 0000000000020330 DATA +# SO_SYM: 8 .dynamic 000000a0 0000000000020338 +# SO_SYM: 9 .got 00000028 00000000000203d8 DATA +# SO_SYM: SYMBOL TABLE: +# SO_SYM: {{0*}} l .tdata {{0*}} x +# _GLOBAL_OFFSET_TABLE_ 0x203d8 +# SO_SYM: 00000000000203d8 l .got {{0*}} .hidden _GLOBAL_OFFSET_TABLE_ +# SO_SYM: 0000000000020338 l .dynamic {{0*}} .hidden _DYNAMIC +# SO_SYM: 0000000000000001 g .tdata {{0*}} z + +# For a shared library we can only write the offset into the TLS directly + +# SO_GOT: Contents of section .got: +# SO_GOT-NEXT: 203d8 38030200 00000000 00000000 00000000 +# ^ ^-----x-module-id +# +---.dynamic address +# SO_GOT-NEXT: 203e8 00000000 00000000 00000000 00000000 +# ^---x-offset ^-----z-module-id +# SO_GOT-NEXT: 203f8 00000000 00000000 +# ^---z-offset + +# %tlsgd(x)=8; %tlsgd(z)=24 + +# la.tls.gd rd, symbol will be expanded to such instructions: +# pcaddu12i rd,(%pcrel(_GLOBAL_OFFSET_TABLE_+0x800)+%tlsgd(symbol))>>12 +# addi.d rd,rd,%pcrel(_GLOBAL_OFFSET_TABLE_+4)+%tlsgd(symbol) - \ +# ((%pcrel(_GLOBAL_OFFSET_TABLE_+4+0x800)+%tlsgd(symbol))>>12<<12) + +# la.tls.gd $r12, x +# (0x203d8+0x800-0x10320+8)>>12 = 16 +# DIS: 10320: 0c 02 00 1c pcaddu12i $r12, 16 +# (0x203d8+4-0x10320+8)-((0x203d8+4+0x800-0x10320)+8)>>12<<12 = 0xC4 = 196 TODO +# DIS-NEXT: 10324: 8c 41 c3 02 addi.d $r12, $r12, 208 + +# la.tls.gd $r15, z +# (0x203d8+0x800-0x10328+8)>>12 = 16 +# DIS-NEXT: 10328: 0f 02 00 1c pcaddu12i $r15, 16 +# (0x203d8+4-0x10328+24)-((0x203d8+4+0x800-0x10328)+24)>>12<<12 = 0xCC = 204 TODO +# DIS-NEXT: 1032c: ef e1 c2 02 addi.d $r15, $r15, 184 diff --git a/lld/test/ELF/loongarch-tls-ie.s b/lld/test/ELF/loongarch-tls-ie.s new file mode 100644 index 000000000000..a0037b958e0e --- /dev/null +++ b/lld/test/ELF/loongarch-tls-ie.s @@ -0,0 +1,61 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.64.o +# RUN: llvm-objdump -r %t.64.o | FileCheck --check-prefix=RELOCS %s +## loongarch64 IE +# RUN: ld.lld -shared %t.64.o -o %t.64.so +# RUN: llvm-readobj -r -d %t.64.so | FileCheck --check-prefix=IE64-REL %s +# RUN: llvm-objdump -d --no-show-raw-insn %t.64.so | FileCheck --check-prefixes=IE,IE64 %s + +# RELOCS: RELOCATION RECORDS FOR [.text]: +# RELOCS-NEXT: OFFSET TYPE VALUE +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_+0x800 +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_TLS_GOT y +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_ADD *ABS* +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_ABSOLUTE *ABS*+0xc +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_SR *ABS* +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_POP_32_S_5_20 *ABS* +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_+0x4 +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_TLS_GOT y +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_ADD *ABS* +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_+0x804 +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_TLS_GOT y +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_ADD *ABS* +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_ABSOLUTE *ABS*+0xc +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_SR *ABS* +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_PUSH_ABSOLUTE *ABS*+0xc +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_SL *ABS* +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_SUB *ABS* +# RELOCS-NEXT: {{[0-9abcdef]*}} R_LARCH_SOP_POP_32_S_10_12 *ABS* + +# IE64-REL: .rela.dyn { +# IE64-REL-NEXT: 0x203B0 R_LARCH_TLS_TPREL64 - 0x4 +# IE64-REL-NEXT: 0x203A8 R_LARCH_TLS_TPREL64 y 0x0 +# IE64-REL-NEXT: } + +## loongarch64: &.got[y] - . = 0x203A8 - . = 0x100C8 = 4096*16+200 +# IE: 102e0: pcaddu12i $r4, 16 +# IE64-NEXT: ld.d $r4, $r4, 200 +# IE-NEXT: addi.w $r5, $zero, 10 +# IE-NEXT: stx.w $r5, $tp, $r4 +## loongarch64: &.got[z] - . = 0x203B0 - . = 0x100C0 = 4096*16+192 +# IE: 102f0: pcaddu12i $r6, 16 +# IE64-NEXT: ld.d $r6, $r6, 192 +# IE-NEXT: addi.w $r7, $zero, 100 +# IE-NEXT: stx.w $r7, $tp, $r6 + +la.tls.ie $r4, y +addi.w $r5, $zero, 10 +stx.w $r5, $tp, $r4 +la.tls.ie $r6, z +addi.w $r7, $zero, 100 +stx.w $r7, $tp, $r6 + +.section .tbss +.globl y +y: +.word 0 +.size y, 4 +z: +.word 0 +.size z, 4 diff --git a/lld/test/ELF/loongarch-tls-le.s b/lld/test/ELF/loongarch-tls-le.s new file mode 100644 index 000000000000..cdd6bcd7cd32 --- /dev/null +++ b/lld/test/ELF/loongarch-tls-le.s @@ -0,0 +1,130 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: llvm-readobj --relocations %t.o | FileCheck -check-prefix=InputRelocs %s +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readobj --relocations %t | FileCheck -check-prefix=OutputRelocs %s +# RUN: llvm-objdump --section-headers -t %t | FileCheck -check-prefix=SO_SYM %s +# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=DIS %s + +# Test the handling of the Local-Exec TLS model. TLS can be resolved +# statically for an application. + +.globl _start +_start: + la.tls.le $r12, x + la.tls.le $r13, y + la.tls.le $r15, z + 0x100 + +.section .tdata +.local x +x: +.byte 10 + +.globl y +y: +.word 10 + +.local z +z: +.long 10 + +# InputRelocs: Relocations [ +# InputRelocs-NEXT: Section (3) .rela.text { +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_PUSH_TLS_TPREL x 0x0 +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_PUSH_ABSOLUTE - 0x20 +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_SL - 0x0 +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_PUSH_ABSOLUTE - 0x2C +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x0 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_PUSH_TLS_TPREL x 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_PUSH_ABSOLUTE - 0xFFF +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_AND - 0x0 +# InputRelocs-NEXT: 0x4 R_LARCH_SOP_POP_32_U_10_12 - 0x0 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_PUSH_TLS_TPREL x 0x0 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_SL - 0x0 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_PUSH_ABSOLUTE - 0x2C +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x8 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_PUSH_TLS_TPREL x 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_PUSH_ABSOLUTE - 0x34 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0xC R_LARCH_SOP_POP_32_S_10_12 - 0x0 +# InputRelocs-NEXT: 0x10 R_LARCH_SOP_PUSH_TLS_TPREL y 0x0 +# InputRelocs-NEXT: 0x10 R_LARCH_SOP_PUSH_ABSOLUTE - 0x20 +# InputRelocs-NEXT: 0x10 R_LARCH_SOP_SL - 0x0 +# InputRelocs-NEXT: 0x10 R_LARCH_SOP_PUSH_ABSOLUTE - 0x2C +# InputRelocs-NEXT: 0x10 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x10 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# InputRelocs-NEXT: 0x14 R_LARCH_SOP_PUSH_TLS_TPREL y 0x0 +# InputRelocs-NEXT: 0x14 R_LARCH_SOP_PUSH_ABSOLUTE - 0xFFF +# InputRelocs-NEXT: 0x14 R_LARCH_SOP_AND - 0x0 +# InputRelocs-NEXT: 0x14 R_LARCH_SOP_POP_32_U_10_12 - 0x0 +# InputRelocs-NEXT: 0x18 R_LARCH_SOP_PUSH_TLS_TPREL y 0x0 +# InputRelocs-NEXT: 0x18 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0x18 R_LARCH_SOP_SL - 0x0 +# InputRelocs-NEXT: 0x18 R_LARCH_SOP_PUSH_ABSOLUTE - 0x2C +# InputRelocs-NEXT: 0x18 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x18 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# InputRelocs-NEXT: 0x1C R_LARCH_SOP_PUSH_TLS_TPREL y 0x0 +# InputRelocs-NEXT: 0x1C R_LARCH_SOP_PUSH_ABSOLUTE - 0x34 +# InputRelocs-NEXT: 0x1C R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x1C R_LARCH_SOP_POP_32_S_10_12 - 0x0 +# InputRelocs-NEXT: 0x20 R_LARCH_SOP_PUSH_TLS_TPREL z 0x100 +# InputRelocs-NEXT: 0x20 R_LARCH_SOP_PUSH_ABSOLUTE - 0x20 +# InputRelocs-NEXT: 0x20 R_LARCH_SOP_SL - 0x0 +# InputRelocs-NEXT: 0x20 R_LARCH_SOP_PUSH_ABSOLUTE - 0x2C +# InputRelocs-NEXT: 0x20 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x20 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# InputRelocs-NEXT: 0x24 R_LARCH_SOP_PUSH_TLS_TPREL z 0x100 +# InputRelocs-NEXT: 0x24 R_LARCH_SOP_PUSH_ABSOLUTE - 0xFFF +# InputRelocs-NEXT: 0x24 R_LARCH_SOP_AND - 0x0 +# InputRelocs-NEXT: 0x24 R_LARCH_SOP_POP_32_U_10_12 - 0x0 +# InputRelocs-NEXT: 0x28 R_LARCH_SOP_PUSH_TLS_TPREL z 0x100 +# InputRelocs-NEXT: 0x28 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# InputRelocs-NEXT: 0x28 R_LARCH_SOP_SL - 0x0 +# InputRelocs-NEXT: 0x28 R_LARCH_SOP_PUSH_ABSOLUTE - 0x2C +# InputRelocs-NEXT: 0x28 R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x28 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# InputRelocs-NEXT: 0x2C R_LARCH_SOP_PUSH_TLS_TPREL z 0x100 +# InputRelocs-NEXT: 0x2C R_LARCH_SOP_PUSH_ABSOLUTE - 0x34 +# InputRelocs-NEXT: 0x2C R_LARCH_SOP_SR - 0x0 +# InputRelocs-NEXT: 0x2C R_LARCH_SOP_POP_32_S_10_12 - 0x0 +# InputRelocs-NEXT: } +# InputRelocs-NEXT: ] + +# Local-Exec creates no +# OutputRelocs: Relocations [ +# OutputRelocs-NEXT: ] + +# SO_SYM: Sections: +# SO_SYM: Idx Name Size VMA Type +# SO_SYM: 2 .tdata 00000009 0000000120020230 DATA +# SO_SYM: SYMBOL TABLE: +# SO_SYM: 0000000000000000 l .tdata {{0*}} x +# SO_SYM: 0000000000000005 l .tdata {{0*}} z +# SO_SYM: 0000000000000001 g .tdata {{0*}} y + +# %tprel(x+addend)=0 ; %tprel(y+addend)=1 ; %tprel(z+0x100)=0x105=261 + +# la.tls.le rd, symbol + addend +# Load $tp-relative offset of TLS symbol +# will be expanded to such instructions: +# lu12i.w rd,%tprel(symbol + addend)<<32>>44 +# ori rd,rd,%tprel(symbol + addend)&0xfff +# lu32i.d rd,%tprel(symbol + addend)<<12>>44 +# lu52i.d rd,rd,%tprel(symbol + addend)>>52 + +# DIS: 120010200: 0c 00 00 14 lu12i.w $r12, 0 +# DIS-NEXT: 120010204: 8c 01 80 03 ori $r12, $r12, 0 +# DIS-NEXT: 120010208: 0c 00 00 16 lu32i.d $r12, 0 +# DIS-NEXT: 12001020c: 8c 01 00 03 lu52i.d $r12, $r12, 0 +# DIS-NEXT: 120010210: 0d 00 00 14 lu12i.w $r13, 0 +# DIS-NEXT: 120010214: ad 05 80 03 ori $r13, $r13, 1 +# DIS-NEXT: 120010218: 0d 00 00 16 lu32i.d $r13, 0 +# DIS-NEXT: 12001021c: ad 01 00 03 lu52i.d $r13, $r13, 0 +# DIS-NEXT: 120010220: 0f 00 00 14 lu12i.w $r15, 0 +# DIS-NEXT: 120010224: ef 15 84 03 ori $r15, $r15, 261 +# DIS-NEXT: 120010228: 0f 00 00 16 lu32i.d $r15, 0 +# DIS-NEXT: 12001022c: ef 01 00 03 lu52i.d $r15, $r15, 0 diff --git a/lld/test/ELF/loongarch_eh_frame.s b/lld/test/ELF/loongarch_eh_frame.s new file mode 100644 index 000000000000..6e9769ffeb73 --- /dev/null +++ b/lld/test/ELF/loongarch_eh_frame.s @@ -0,0 +1,23 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so + +.globl _start +_start: + nop + +.section foo,"ax",@progbits +.cfi_startproc + nop +.cfi_endproc + +.section bar,"ax",@progbits +.cfi_startproc + nop +.cfi_endproc + +.section dah,"ax",@progbits +.cfi_startproc + nop +.cfi_endproc diff --git a/lld/test/ELF/loongarch_not_relative_eh_frame.test b/lld/test/ELF/loongarch_not_relative_eh_frame.test new file mode 100644 index 000000000000..5143a5b8b22e --- /dev/null +++ b/lld/test/ELF/loongarch_not_relative_eh_frame.test @@ -0,0 +1,49 @@ +# REQUIRES: loongarch + +# RUN: yaml2obj %s -o %t.o +# RUN: ld.lld -pie %t.o -o %t +# RUN: llvm-readelf -r %t | FileCheck %s +# +# CHECK-NOT: {{.*}} R_LARCH_RELATIVE {{.*}} +# +# t.o +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH + Flags: [ EF_LARCH_ABI_LP64 ] + SectionHeaderStringTable: .strtab +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x10 + Content: '00004003' + - Name: .eh_frame + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + AddressAlign: 0x8 + Content: 1000000000000000017A5200017801010C0C030018000000180000000000000000000000040000000000000000000000 + - Name: .rela.eh_frame + Type: SHT_RELA + Link: .symtab + AddressAlign: 0x8 + Info: .eh_frame + Relocations: + - Offset: 0x1C + Symbol: _start + Type: R_LARCH_64 + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .text + - Name: .eh_frame + - Name: .rela.eh_frame + - Name: .symtab +Symbols: + - Name: _start + Section: .text + Binding: STB_GLOBAL +... diff --git a/lld/test/ELF/loongarch_preempt_hidden_sym.s b/lld/test/ELF/loongarch_preempt_hidden_sym.s new file mode 100644 index 000000000000..a25f0558cf02 --- /dev/null +++ b/lld/test/ELF/loongarch_preempt_hidden_sym.s @@ -0,0 +1,12 @@ +# REQUIRES: loongarch + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: ld.lld -pie %t.o -o %t.so + +# CHECK-NOT: error: cannot preempt symbol: hidden + +.globl hidden +.hidden hidden +hidden: +.rodata +.long hidden diff --git a/lld/test/ELF/lto/loongarch-global-offset-table.ll b/lld/test/ELF/lto/loongarch-global-offset-table.ll new file mode 100644 index 000000000000..74b71e753748 --- /dev/null +++ b/lld/test/ELF/lto/loongarch-global-offset-table.ll @@ -0,0 +1,26 @@ +; REQUIRES: loongarch + +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld %t.o -shared -o %t.so --no-undefined +; RUN: llvm-readelf -S -s %t.so | FileCheck %s + +;; Check that the linker reserved symbol `_GLOBAL_OFFSET_TABLE_` is created when +;; LTO which points to the begining of .got section. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128" +target triple = "loongarch64-unknown-elf" + +@g = global i32 1 + +define i32 @foo() { + %res = load i32, ptr @g + ret i32 %res +} + +; CHECK: Section Headers: +; CHECK: [Nr] Name Type Address +; CHECK: [{{.+}}] .got PROGBITS [[GOT:[0-9a-z]+]] + +; CHECK: Symbol table '.symtab' contains {{.+}} entries: +; CHECK: Num: Value Size Type Bind Vis Ndx Name +; CHECK: {{.+}}: [[GOT]] 0 NOTYPE LOCAL HIDDEN {{.+}} _GLOBAL_OFFSET_TABLE_ diff --git a/lld/test/ELF/lto/loongarch.ll b/lld/test/ELF/lto/loongarch.ll new file mode 100644 index 000000000000..f38ac3c8f634 --- /dev/null +++ b/lld/test/ELF/lto/loongarch.ll @@ -0,0 +1,10 @@ +; REQUIRES: loongarch + +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld %t.o -o %t +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-n32:64-S128" +target triple = "loongarch64-unknown-elf" + +define void @f() { + ret void +} diff --git a/lld/test/ELF/x86-64-dyn-rel-error5.s b/lld/test/ELF/x86-64-dyn-rel-error5.s index 495104509492..bfd60ef2f8ac 100644 --- a/lld/test/ELF/x86-64-dyn-rel-error5.s +++ b/lld/test/ELF/x86-64-dyn-rel-error5.s @@ -1,7 +1,7 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o -# RUN: not ld.lld -pie %t.o -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,PIE %s -# RUN: not ld.lld -shared %t.o -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,SHARED %s +# RUN: not ld.lld -pie %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -shared %t.o -o /dev/null 2>&1 | FileCheck %s ## Check we don't create dynamic relocations in a writable section, ## if the number of bits is smaller than the wordsize. @@ -17,8 +17,7 @@ hidden: # CHECK: error: relocation R_X86_64_16 cannot be used against local symbol; recompile with -fPIC # CHECK: error: relocation R_X86_64_32 cannot be used against local symbol; recompile with -fPIC -# PIE: error: cannot preempt symbol: hidden -# SHARED: error: relocation R_X86_64_32 cannot be used against symbol 'hidden'; recompile with -fPIC +# CHECK: error: relocation R_X86_64_32 cannot be used against symbol 'hidden'; recompile with -fPIC .data .byte local # R_X86_64_8 diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index 96a1d652573f..1b1f1e781240 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -68,6 +68,7 @@ llvm_config.feature_config( 'ARM': 'arm', 'AVR': 'avr', 'Hexagon': 'hexagon', + 'LoongArch': 'loongarch', 'Mips': 'mips', 'MSP430': 'msp430', 'PowerPC': 'ppc', diff --git a/llvm-build/Makefile b/llvm-build/Makefile index ecb982228b0e..b6b1bae7d9fc 100644 --- a/llvm-build/Makefile +++ b/llvm-build/Makefile @@ -83,6 +83,9 @@ else ifeq ($(ARCH),x86_64) ARCH_CFLAGS = else +ifeq ($(ARCH),loongarch64) +ARCH_CFLAGS = +else $(warning *** warning: ARCH $(ARCH) has not been tested yet, use with cautions!) ARCH_CFLAGS = endif @@ -103,6 +106,9 @@ else ifeq ($(ARCH),x86_64) CFLAGS = -march=x86-64 -O2 -Wall -fstack-protector-strong -D_FORTIFY_SOURCE=2 -Wl,-z,relro,-z,now,-z,noexecstack else +ifeq ($(ARCH),loongarch64) +CFLAGS = -march=loongarch64 -Wall -fstack-protector-strong -D_FORTIFY_SOURCE=2 -Wl,-z,relro,-z,now,-z,noexecstack +else CFLAGS = -march=armv7-a -O2 -Wall -fstack-protector-strong -D_FORTIFY_SOURCE=2 -Wl,-z,relro,-z,now,-z,noexecstack endif endif diff --git a/llvm-build/build.py b/llvm-build/build.py index ee12969b8e24..1cc96b12b344 100755 --- a/llvm-build/build.py +++ b/llvm-build/build.py @@ -59,6 +59,7 @@ class BuildConfig(): self.no_build_riscv64 = args.skip_build or args.no_build_riscv64 self.no_build_mipsel = args.skip_build or args.no_build_mipsel self.no_build_x86_64 = args.skip_build or args.no_build_x86_64 + self.no_build_loongarch64 = args.skip_build or args.no_build_loongarch64 self.build_ncurses = args.build_ncurses self.build_libedit = args.build_libedit self.build_lldb_static = args.build_lldb_static @@ -73,7 +74,7 @@ class BuildConfig(): self.enable_check_abi = args.enable_check_abi self.discover_paths() - self.TARGETS = 'AArch64;ARM;BPF;Mips;RISCV;X86' + self.TARGETS = 'AArch64;ARM;BPF;Mips;RISCV;X86;LoongArch' self.ORIG_ENV = dict(os.environ) self.VERSION = None # autodetected @@ -183,6 +184,12 @@ class BuildConfig(): default=False, help='Omit build os target: x86_64.') + parser.add_argument( + '--no-build-loongarch64', + action='store_true', + default=False, + help='Omit build os target: loongarch64.') + parser.add_argument( '--no-lto', action='store_true', @@ -1165,6 +1172,13 @@ class SysrootComposer(BuildUtils): self.build_musl_libs(product_name, target_cpu, target_name, multi_lib_dir, sysroot_multi_lib_dir, ld_musl_lib, gn_args) + if target_cpu == 'loongarch64': + gn_args += ' is_2k1500=true musl_target_multilib=la264' + multi_lib_dir = os.path.join(ohos_lib_dir, 'la264') + sysroot_multi_lib_dir = os.path.join(sysroot_lib_dir, 'la264') + ld_musl_lib = os.path.join(sysroot_multi_lib_dir, 'ld-musl-{}.so.1'.format(ld_arch)) + self.build_musl_libs(product_name, target_cpu, target_name, multi_lib_dir, + sysroot_multi_lib_dir, ld_musl_lib, gn_args) def install_linux_headers(self, arch, target): dir_suffix = arch @@ -1295,7 +1309,9 @@ class LlvmLibs(BuildUtils): ('riscv64', self.open_ohos_triple('riscv64'), '', ''), ('mipsel', self.open_ohos_triple('mipsel'), '', ''), ('mipsel', self.open_ohos_triple('mipsel'), '-mnan=legacy', 'nanlegacy'), - ('x86_64', self.open_ohos_triple('x86_64'), '', ''),] + ('x86_64', self.open_ohos_triple('x86_64'), '', ''), + ('loongarch64', self.open_ohos_triple('loongarch64'), '', ''), + ('loongarch64', self.open_ohos_triple('loongarch64'), '-march=la264', 'la264'),] cc = os.path.join(llvm_install, 'bin', 'clang') cxx = os.path.join(llvm_install, 'bin', 'clang++') @@ -1313,7 +1329,7 @@ class LlvmLibs(BuildUtils): if llvm_build != llvm_triple: continue - has_lldb_tools = arch not in ['riscv64', 'mipsel'] + has_lldb_tools = arch not in ['riscv64', 'mipsel', 'loongarch64'] defines = self.base_cmake_defines() ldflags = [] @@ -1329,7 +1345,8 @@ class LlvmLibs(BuildUtils): llvm_path = self.merge_out_path('llvm_make') arch_list = [self.liteos_triple('arm'), self.open_ohos_triple('arm'), self.open_ohos_triple('aarch64'), self.open_ohos_triple('riscv64'), - self.open_ohos_triple('mipsel'), self.open_ohos_triple('x86_64')] + self.open_ohos_triple('mipsel'), self.open_ohos_triple('x86_64'), + self.open_ohos_triple('loongarch64')] libcxx_ndk_install = self.merge_out_path('libcxx-ndk') self.check_create_dir(libcxx_ndk_install) @@ -2559,6 +2576,9 @@ def main(): if not build_config.no_build_x86_64: configs.append(('x86_64', build_utils.open_ohos_triple('x86_64'))) + if not build_config.no_build_loongarch64: + configs.append(('loongarch64', build_utils.open_ohos_triple('loongarch64'))) + if build_config.build_ncurses: llvm_libs.build_ncurses(llvm_make, llvm_install) diff --git a/llvm-build/build_musl.sh b/llvm-build/build_musl.sh index 7f7f44c774e2..39a0d23ac893 100755 --- a/llvm-build/build_musl.sh +++ b/llvm-build/build_musl.sh @@ -87,6 +87,9 @@ elif [ $TARGET_TRIPLE == "riscv64-linux-ohos" ]; then elif [ $TARGET_TRIPLE == "x86_64-linux-ohos" ]; then TARGET_USER="linux_user" TARGETS_PREFIX="x86_64" +elif [ $TARGET_TRIPLE == "loongarch64-linux-ohos" ]; then + TARGET_USER="linux_user" + TARGETS_PREFIX="loongarch64" else TARGET_USER="linux_user" TARGETS_PREFIX="aarch64" @@ -109,7 +112,7 @@ make musl_header_install_for_${TARGET_USER} CLANG="${CLANG_BIN_ROOT}/clang" TOPD # build musl_libs if ((make_libs == 1)); then if [ $TARGET_TRIPLE == "aarch64-linux-ohos" ] || [ $TARGET_TRIPLE == "riscv64-linux-ohos" ] || \ - [ $TARGET_TRIPLE == "x86_64-linux-ohos" ]; then + [ $TARGET_TRIPLE == "x86_64-linux-ohos" ] || [ $TARGET_TRIPLE == "loongarch64-linux-ohos" ]; then make CLANG="${CLANG_BIN_ROOT}/clang" TOPDIR=${TOPDIR} SYSROOTDIR=${OUT}/sysroot MUSLCOPYDIR=${OUT}/musl_build \ TARGETS=${TARGET_USER} TARGET=${TARGET_TRIPLE} ARCH=${TARGETS_PREFIX} -f Makefile else diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 2bbdfcae6ae8..427ec2e6a7f2 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -373,6 +373,7 @@ set(LLVM_ALL_TARGETS BPF Hexagon Lanai + LoongArch Mips MSP430 NVPTX diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 83512760d8dd..6beb25017ee5 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -498,6 +498,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "riscv64") set(LLVM_NATIVE_ARCH RISCV) elseif (LLVM_NATIVE_ARCH STREQUAL "m68k") set(LLVM_NATIVE_ARCH M68k) +elseif (LLVM_NATIVE_ARCH MATCHES "loongarch") + set(LLVM_NATIVE_ARCH LoongArch) else () message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}") endif () diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess index 60d3f588d6f7..255257d40b63 100644 --- a/llvm/cmake/config.guess +++ b/llvm/cmake/config.guess @@ -1021,6 +1021,9 @@ EOF x86_64:Linux:*:*) echo x86_64-unknown-linux-gnu exit ;; + loongarch64:Linux:*:*) + echo loongarch64-unknown-linux-gnu + exit ;; xtensa*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 72657016efcf..27c77ef4226a 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -233,6 +233,7 @@ public: GNUX32, GNUILP32, CODE16, + GNUABILPX32, EABI, EABIHF, Android, @@ -823,6 +824,16 @@ public: getArch() == Triple::aarch64_32; } + /// Tests whether the target is LoongArch 32-bit + bool isLoongArch32() const { + return getArch() == Triple::loongarch32; + } + + /// Tests whether the target is LoongArch 64-bit. + bool isLoongArch64() const { + return getArch() == Triple::loongarch64; + } + /// Tests whether the target is AArch64 and pointers are the size specified by /// \p PointerWidth. bool isAArch64(int PointerWidth) const { diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 556fe9c6a1a0..8cfcbb3d9d8c 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -902,22 +902,13 @@ enum { // LoongArch Specific e_flags enum : unsigned { - // Reference: https://github.com/loongson/LoongArch-Documentation. - // The last commit hash (main branch) is - // 99016636af64d02dee05e39974d4c1e55875c45b. - // Note that there is an open PR - // https://github.com/loongson/LoongArch-Documentation/pull/47 - // talking about using 0x1, 0x2, 0x3 for ILP32S/F/D and use EI_CLASS to - // distinguish LP64 and ILP32. If this PR get merged, we will update - // the definition here. - // Base ABI Types. - EF_LOONGARCH_BASE_ABI_LP64S = 0x1, // LP64 soft-float ABI - EF_LOONGARCH_BASE_ABI_LP64F = 0x2, // LP64 single-float ABI - EF_LOONGARCH_BASE_ABI_LP64D = 0x3, // LP64 double-float ABI - EF_LOONGARCH_BASE_ABI_ILP32S = 0x5, // ILP32 soft-float ABI - EF_LOONGARCH_BASE_ABI_ILP32F = 0x6, // ILP32 single-float ABI - EF_LOONGARCH_BASE_ABI_ILP32D = 0x7, // ILP32 double-float ABI - EF_LOONGARCH_BASE_ABI_MASK = 0x7, // Mask for selecting base ABI + // FIXME: Change these when all ABIs definition were finalized. + // See current definitions: + // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_e_flags_identifies_abi_type_and_version + EF_LARCH_ABI = 0x0003, + EF_LARCH_ABI_LP32 = 0x0001, + EF_LARCH_ABI_LPX32 = 0x0002, + EF_LARCH_ABI_LP64 = 0x0003, }; // ELF Relocation types for LoongArch diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def index 8cbfe2fe4235..d540e2b89bea 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def @@ -60,3 +60,9 @@ ELF_RELOC(R_LARCH_SUB32, 55) ELF_RELOC(R_LARCH_SUB64, 56) ELF_RELOC(R_LARCH_GNU_VTINHERIT, 57) ELF_RELOC(R_LARCH_GNU_VTENTRY, 58) + +// NOTE: R_LARCH_{32,64}_PCREL are part of psABI v2. Here we only use them in +// lld to workaround the well-known .eh_frame encoding issue. Please DO NOT use +// them directly in handwritten assembly as binutils do not support these types. +ELF_RELOC(R_LARCH_32_PCREL, 99) +ELF_RELOC(R_LARCH_64_PCREL, 109) diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 6d4f6222af44..226601b204c5 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -5099,7 +5099,7 @@ template <> struct FloatData { #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ - defined(__wasm__) || defined(__riscv) + defined(__wasm__) || defined(__riscv) || defined(__loongarch__) static const size_t mangled_size = 32; #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) static const size_t mangled_size = 16; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h index c5c2780bc9ee..5620df0790a9 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h @@ -330,6 +330,42 @@ public: JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); }; +// @brief LoongArch64 support. +class OrcLoongArch64 { +public: + static constexpr unsigned PointerSize = 8; + static constexpr unsigned TrampolineSize = 40; + static constexpr unsigned StubSize = 32; + static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31; + static constexpr unsigned ResolverCodeSize = 0x120; + /// Write the resolver code into the given memory. The user is + /// responsible for allocating the memory and setting permissions. + /// + /// ReentryFnAddr should be the address of a function whose signature matches + /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr + /// argument of writeResolverCode will be passed as the second argument to + /// the function at ReentryFnAddr. + static void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); + + /// Write the requested number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverFnAddr, + unsigned NumTrampolines); + /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem. + /// Stubs will be written as if linked at StubsBlockTargetAddress, with the + /// Nth stub using the Nth pointer in memory starting at + /// PointersBlockTargetAddress. + static void writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs); +}; + // @brief riscv64 support. // // RISC-V 64 supports lazy JITing. diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt index 5151f9125b94..468d663796ed 100644 --- a/llvm/include/llvm/IR/CMakeLists.txt +++ b/llvm/include/llvm/IR/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(LLVM IntrinsicsARM.h -gen-intrinsic-enums -intrinsic-prefix=arm) tablegen(LLVM IntrinsicsBPF.h -gen-intrinsic-enums -intrinsic-prefix=bpf) tablegen(LLVM IntrinsicsDirectX.h -gen-intrinsic-enums -intrinsic-prefix=dx) tablegen(LLVM IntrinsicsHexagon.h -gen-intrinsic-enums -intrinsic-prefix=hexagon) +tablegen(LLVM IntrinsicsLoongArch.h -gen-intrinsic-enums -intrinsic-prefix=loongarch) tablegen(LLVM IntrinsicsMips.h -gen-intrinsic-enums -intrinsic-prefix=mips) tablegen(LLVM IntrinsicsNVPTX.h -gen-intrinsic-enums -intrinsic-prefix=nvvm) tablegen(LLVM IntrinsicsPowerPC.h -gen-intrinsic-enums -intrinsic-prefix=ppc) diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h index 0a8d27aad58a..3f78eb41f8f8 100644 --- a/llvm/include/llvm/IR/InlineAsm.h +++ b/llvm/include/llvm/IR/InlineAsm.h @@ -269,6 +269,7 @@ public: Constraint_Uy, Constraint_X, Constraint_Z, + Constraint_ZB, Constraint_ZC, Constraint_Zy, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d46fa4fbf5b5..76f2d66b74d6 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2053,3 +2053,4 @@ include "llvm/IR/IntrinsicsRISCV.td" include "llvm/IR/IntrinsicsSPIRV.td" include "llvm/IR/IntrinsicsVE.td" include "llvm/IR/IntrinsicsDirectX.td" +include "llvm/IR/IntrinsicsLoongArch.td" diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td new file mode 100644 index 000000000000..42c4e371db07 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -0,0 +1,3657 @@ +//===- IntrinsicsLoongArch.td - Defines LoongArch intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the LoongArch-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "loongarch" in { // All intrinsics start with "llvm.loongarch.". + +//===----------------------------------------------------------------------===// +// LoongArch LSX + +def int_loongarch_lsx_vclo_b : ClangBuiltin<"__builtin_lsx_vclo_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclo_h : ClangBuiltin<"__builtin_lsx_vclo_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclo_w : ClangBuiltin<"__builtin_lsx_vclo_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclo_d : ClangBuiltin<"__builtin_lsx_vclo_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vflogb_s : ClangBuiltin<"__builtin_lsx_vflogb_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vflogb_d : ClangBuiltin<"__builtin_lsx_vflogb_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpickve2gr_b : ClangBuiltin<"__builtin_lsx_vpickve2gr_b">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_h : ClangBuiltin<"__builtin_lsx_vpickve2gr_h">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_w : ClangBuiltin<"__builtin_lsx_vpickve2gr_w">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_d : ClangBuiltin<"__builtin_lsx_vpickve2gr_d">, + Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpickve2gr_bu : ClangBuiltin<"__builtin_lsx_vpickve2gr_bu">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_hu : ClangBuiltin<"__builtin_lsx_vpickve2gr_hu">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_wu : ClangBuiltin<"__builtin_lsx_vpickve2gr_wu">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickve2gr_du : ClangBuiltin<"__builtin_lsx_vpickve2gr_du">, + Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vreplvei_b : ClangBuiltin<"__builtin_lsx_vreplvei_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplvei_h : ClangBuiltin<"__builtin_lsx_vreplvei_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplvei_w : ClangBuiltin<"__builtin_lsx_vreplvei_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplvei_d : ClangBuiltin<"__builtin_lsx_vreplvei_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmskltz_b : ClangBuiltin<"__builtin_lsx_vmskltz_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmskltz_h : ClangBuiltin<"__builtin_lsx_vmskltz_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmskltz_w : ClangBuiltin<"__builtin_lsx_vmskltz_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmskltz_d : ClangBuiltin<"__builtin_lsx_vmskltz_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmadd_s : ClangBuiltin<"__builtin_lsx_vfmadd_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmadd_d : ClangBuiltin<"__builtin_lsx_vfmadd_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmsub_s : ClangBuiltin<"__builtin_lsx_vfmsub_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmsub_d : ClangBuiltin<"__builtin_lsx_vfmsub_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfnmadd_s : ClangBuiltin<"__builtin_lsx_vfnmadd_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfnmadd_d : ClangBuiltin<"__builtin_lsx_vfnmadd_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfnmsub_s : ClangBuiltin<"__builtin_lsx_vfnmsub_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfnmsub_d : ClangBuiltin<"__builtin_lsx_vfnmsub_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_caf_s : ClangBuiltin<"__builtin_lsx_vfcmp_caf_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_caf_d : ClangBuiltin<"__builtin_lsx_vfcmp_caf_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cor_s : ClangBuiltin<"__builtin_lsx_vfcmp_cor_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cor_d : ClangBuiltin<"__builtin_lsx_vfcmp_cor_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cun_s : ClangBuiltin<"__builtin_lsx_vfcmp_cun_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cun_d : ClangBuiltin<"__builtin_lsx_vfcmp_cun_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cune_s : ClangBuiltin<"__builtin_lsx_vfcmp_cune_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cune_d : ClangBuiltin<"__builtin_lsx_vfcmp_cune_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cueq_s : ClangBuiltin<"__builtin_lsx_vfcmp_cueq_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cueq_d : ClangBuiltin<"__builtin_lsx_vfcmp_cueq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_ceq_s : ClangBuiltin<"__builtin_lsx_vfcmp_ceq_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_ceq_d : ClangBuiltin<"__builtin_lsx_vfcmp_ceq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cne_s : ClangBuiltin<"__builtin_lsx_vfcmp_cne_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cne_d : ClangBuiltin<"__builtin_lsx_vfcmp_cne_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_clt_s : ClangBuiltin<"__builtin_lsx_vfcmp_clt_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_clt_d : ClangBuiltin<"__builtin_lsx_vfcmp_clt_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cult_s : ClangBuiltin<"__builtin_lsx_vfcmp_cult_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cult_d : ClangBuiltin<"__builtin_lsx_vfcmp_cult_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cle_s : ClangBuiltin<"__builtin_lsx_vfcmp_cle_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cle_d : ClangBuiltin<"__builtin_lsx_vfcmp_cle_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_cule_s : ClangBuiltin<"__builtin_lsx_vfcmp_cule_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_cule_d : ClangBuiltin<"__builtin_lsx_vfcmp_cule_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_saf_s : ClangBuiltin<"__builtin_lsx_vfcmp_saf_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_saf_d : ClangBuiltin<"__builtin_lsx_vfcmp_saf_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sor_s : ClangBuiltin<"__builtin_lsx_vfcmp_sor_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sor_d : ClangBuiltin<"__builtin_lsx_vfcmp_sor_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sun_s : ClangBuiltin<"__builtin_lsx_vfcmp_sun_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sun_d : ClangBuiltin<"__builtin_lsx_vfcmp_sun_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sune_s : ClangBuiltin<"__builtin_lsx_vfcmp_sune_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sune_d : ClangBuiltin<"__builtin_lsx_vfcmp_sune_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sueq_s : ClangBuiltin<"__builtin_lsx_vfcmp_sueq_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sueq_d : ClangBuiltin<"__builtin_lsx_vfcmp_sueq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_seq_s : ClangBuiltin<"__builtin_lsx_vfcmp_seq_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_seq_d : ClangBuiltin<"__builtin_lsx_vfcmp_seq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sne_s : ClangBuiltin<"__builtin_lsx_vfcmp_sne_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sne_d : ClangBuiltin<"__builtin_lsx_vfcmp_sne_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_slt_s : ClangBuiltin<"__builtin_lsx_vfcmp_slt_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_slt_d : ClangBuiltin<"__builtin_lsx_vfcmp_slt_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sult_s : ClangBuiltin<"__builtin_lsx_vfcmp_sult_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sult_d : ClangBuiltin<"__builtin_lsx_vfcmp_sult_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sle_s : ClangBuiltin<"__builtin_lsx_vfcmp_sle_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sle_d : ClangBuiltin<"__builtin_lsx_vfcmp_sle_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcmp_sule_s : ClangBuiltin<"__builtin_lsx_vfcmp_sule_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcmp_sule_d : ClangBuiltin<"__builtin_lsx_vfcmp_sule_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitsel_v : ClangBuiltin<"__builtin_lsx_vbitsel_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vshuf_b : ClangBuiltin<"__builtin_lsx_vshuf_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vldrepl_b : ClangBuiltin<"__builtin_lsx_vldrepl_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vldrepl_h : ClangBuiltin<"__builtin_lsx_vldrepl_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vldrepl_w : ClangBuiltin<"__builtin_lsx_vldrepl_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vldrepl_d : ClangBuiltin<"__builtin_lsx_vldrepl_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lsx_vstelm_b : ClangBuiltin<"__builtin_lsx_vstelm_b">, + Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lsx_vstelm_h : ClangBuiltin<"__builtin_lsx_vstelm_h">, + Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lsx_vstelm_w : ClangBuiltin<"__builtin_lsx_vstelm_w">, + Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lsx_vstelm_d : ClangBuiltin<"__builtin_lsx_vstelm_d">, + Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; + +def int_loongarch_lsx_vldx : ClangBuiltin<"__builtin_lsx_vldx">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lsx_vstx : ClangBuiltin<"__builtin_lsx_vstx">, + Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly]>; + +def int_loongarch_lsx_vaddwev_d_w : ClangBuiltin<"__builtin_lsx_vaddwev_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_w_h : ClangBuiltin<"__builtin_lsx_vaddwev_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_h_b : ClangBuiltin<"__builtin_lsx_vaddwev_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_q_d : ClangBuiltin<"__builtin_lsx_vaddwev_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsubwev_d_w : ClangBuiltin<"__builtin_lsx_vsubwev_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_w_h : ClangBuiltin<"__builtin_lsx_vsubwev_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_h_b : ClangBuiltin<"__builtin_lsx_vsubwev_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_q_d : ClangBuiltin<"__builtin_lsx_vsubwev_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + + +def int_loongarch_lsx_vaddwod_d_w : ClangBuiltin<"__builtin_lsx_vaddwod_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_w_h : ClangBuiltin<"__builtin_lsx_vaddwod_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_h_b : ClangBuiltin<"__builtin_lsx_vaddwod_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_q_d : ClangBuiltin<"__builtin_lsx_vaddwod_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsubwod_d_w : ClangBuiltin<"__builtin_lsx_vsubwod_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_w_h : ClangBuiltin<"__builtin_lsx_vsubwod_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_h_b : ClangBuiltin<"__builtin_lsx_vsubwod_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_q_d : ClangBuiltin<"__builtin_lsx_vsubwod_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddwev_d_wu : ClangBuiltin<"__builtin_lsx_vaddwev_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_w_hu : ClangBuiltin<"__builtin_lsx_vaddwev_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_h_bu : ClangBuiltin<"__builtin_lsx_vaddwev_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_q_du : ClangBuiltin<"__builtin_lsx_vaddwev_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsubwev_d_wu : ClangBuiltin<"__builtin_lsx_vsubwev_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_w_hu : ClangBuiltin<"__builtin_lsx_vsubwev_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_h_bu : ClangBuiltin<"__builtin_lsx_vsubwev_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwev_q_du : ClangBuiltin<"__builtin_lsx_vsubwev_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddwod_d_wu : ClangBuiltin<"__builtin_lsx_vaddwod_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_w_hu : ClangBuiltin<"__builtin_lsx_vaddwod_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_h_bu : ClangBuiltin<"__builtin_lsx_vaddwod_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_q_du : ClangBuiltin<"__builtin_lsx_vaddwod_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsubwod_d_wu : ClangBuiltin<"__builtin_lsx_vsubwod_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_w_hu : ClangBuiltin<"__builtin_lsx_vsubwod_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_h_bu : ClangBuiltin<"__builtin_lsx_vsubwod_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubwod_q_du : ClangBuiltin<"__builtin_lsx_vsubwod_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddwev_d_wu_w : ClangBuiltin<"__builtin_lsx_vaddwev_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_w_hu_h : ClangBuiltin<"__builtin_lsx_vaddwev_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_h_bu_b : ClangBuiltin<"__builtin_lsx_vaddwev_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwev_q_du_d : ClangBuiltin<"__builtin_lsx_vaddwev_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddwod_d_wu_w : ClangBuiltin<"__builtin_lsx_vaddwod_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_w_hu_h : ClangBuiltin<"__builtin_lsx_vaddwod_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_h_bu_b : ClangBuiltin<"__builtin_lsx_vaddwod_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vaddwod_q_du_d : ClangBuiltin<"__builtin_lsx_vaddwod_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhaddw_qu_du : ClangBuiltin<"__builtin_lsx_vhaddw_qu_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_qu_du : ClangBuiltin<"__builtin_lsx_vhsubw_qu_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhaddw_q_d : ClangBuiltin<"__builtin_lsx_vhaddw_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_q_d : ClangBuiltin<"__builtin_lsx_vhsubw_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmuh_b : ClangBuiltin<"__builtin_lsx_vmuh_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_h : ClangBuiltin<"__builtin_lsx_vmuh_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_w : ClangBuiltin<"__builtin_lsx_vmuh_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_d : ClangBuiltin<"__builtin_lsx_vmuh_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmuh_bu : ClangBuiltin<"__builtin_lsx_vmuh_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_hu : ClangBuiltin<"__builtin_lsx_vmuh_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_wu : ClangBuiltin<"__builtin_lsx_vmuh_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmuh_du : ClangBuiltin<"__builtin_lsx_vmuh_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwev_d_w : ClangBuiltin<"__builtin_lsx_vmulwev_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_w_h : ClangBuiltin<"__builtin_lsx_vmulwev_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_h_b : ClangBuiltin<"__builtin_lsx_vmulwev_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_q_d : ClangBuiltin<"__builtin_lsx_vmulwev_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwod_d_w : ClangBuiltin<"__builtin_lsx_vmulwod_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_w_h : ClangBuiltin<"__builtin_lsx_vmulwod_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_h_b : ClangBuiltin<"__builtin_lsx_vmulwod_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_q_d : ClangBuiltin<"__builtin_lsx_vmulwod_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwev_d_wu : ClangBuiltin<"__builtin_lsx_vmulwev_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_w_hu : ClangBuiltin<"__builtin_lsx_vmulwev_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_h_bu : ClangBuiltin<"__builtin_lsx_vmulwev_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_q_du : ClangBuiltin<"__builtin_lsx_vmulwev_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwod_d_wu : ClangBuiltin<"__builtin_lsx_vmulwod_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_w_hu : ClangBuiltin<"__builtin_lsx_vmulwod_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_h_bu : ClangBuiltin<"__builtin_lsx_vmulwod_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_q_du : ClangBuiltin<"__builtin_lsx_vmulwod_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwev_d_wu_w : ClangBuiltin<"__builtin_lsx_vmulwev_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_w_hu_h : ClangBuiltin<"__builtin_lsx_vmulwev_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_h_bu_b : ClangBuiltin<"__builtin_lsx_vmulwev_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwev_q_du_d : ClangBuiltin<"__builtin_lsx_vmulwev_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmulwod_d_wu_w : ClangBuiltin<"__builtin_lsx_vmulwod_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_w_hu_h : ClangBuiltin<"__builtin_lsx_vmulwod_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_h_bu_b : ClangBuiltin<"__builtin_lsx_vmulwod_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmulwod_q_du_d : ClangBuiltin<"__builtin_lsx_vmulwod_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwev_d_w : ClangBuiltin<"__builtin_lsx_vmaddwev_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_w_h : ClangBuiltin<"__builtin_lsx_vmaddwev_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_h_b : ClangBuiltin<"__builtin_lsx_vmaddwev_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_q_d : ClangBuiltin<"__builtin_lsx_vmaddwev_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwod_d_w : ClangBuiltin<"__builtin_lsx_vmaddwod_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_w_h : ClangBuiltin<"__builtin_lsx_vmaddwod_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_h_b : ClangBuiltin<"__builtin_lsx_vmaddwod_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_q_d : ClangBuiltin<"__builtin_lsx_vmaddwod_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwev_d_wu : ClangBuiltin<"__builtin_lsx_vmaddwev_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_w_hu : ClangBuiltin<"__builtin_lsx_vmaddwev_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_h_bu : ClangBuiltin<"__builtin_lsx_vmaddwev_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_q_du : ClangBuiltin<"__builtin_lsx_vmaddwev_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwod_d_wu : ClangBuiltin<"__builtin_lsx_vmaddwod_d_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_w_hu : ClangBuiltin<"__builtin_lsx_vmaddwod_w_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_h_bu : ClangBuiltin<"__builtin_lsx_vmaddwod_h_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_q_du : ClangBuiltin<"__builtin_lsx_vmaddwod_q_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwev_d_wu_w : ClangBuiltin<"__builtin_lsx_vmaddwev_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_w_hu_h : ClangBuiltin<"__builtin_lsx_vmaddwev_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_h_bu_b : ClangBuiltin<"__builtin_lsx_vmaddwev_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwev_q_du_d : ClangBuiltin<"__builtin_lsx_vmaddwev_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaddwod_d_wu_w : ClangBuiltin<"__builtin_lsx_vmaddwod_d_wu_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_w_hu_h : ClangBuiltin<"__builtin_lsx_vmaddwod_w_hu_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_h_bu_b : ClangBuiltin<"__builtin_lsx_vmaddwod_h_bu_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaddwod_q_du_d : ClangBuiltin<"__builtin_lsx_vmaddwod_q_du_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrln_b_h : ClangBuiltin<"__builtin_lsx_vsrln_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrln_h_w : ClangBuiltin<"__builtin_lsx_vsrln_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrln_w_d : ClangBuiltin<"__builtin_lsx_vsrln_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsran_b_h : ClangBuiltin<"__builtin_lsx_vsran_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsran_h_w : ClangBuiltin<"__builtin_lsx_vsran_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsran_w_d : ClangBuiltin<"__builtin_lsx_vsran_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrlrn_b_h : ClangBuiltin<"__builtin_lsx_vsrlrn_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlrn_h_w : ClangBuiltin<"__builtin_lsx_vsrlrn_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlrn_w_d : ClangBuiltin<"__builtin_lsx_vsrlrn_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrarn_b_h : ClangBuiltin<"__builtin_lsx_vsrarn_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarn_h_w : ClangBuiltin<"__builtin_lsx_vsrarn_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarn_w_d : ClangBuiltin<"__builtin_lsx_vsrarn_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrln_b_h : ClangBuiltin<"__builtin_lsx_vssrln_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrln_h_w : ClangBuiltin<"__builtin_lsx_vssrln_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrln_w_d : ClangBuiltin<"__builtin_lsx_vssrln_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssran_b_h : ClangBuiltin<"__builtin_lsx_vssran_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssran_h_w : ClangBuiltin<"__builtin_lsx_vssran_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssran_w_d : ClangBuiltin<"__builtin_lsx_vssran_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlrn_b_h : ClangBuiltin<"__builtin_lsx_vssrlrn_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrn_h_w : ClangBuiltin<"__builtin_lsx_vssrlrn_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrn_w_d : ClangBuiltin<"__builtin_lsx_vssrlrn_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrarn_b_h : ClangBuiltin<"__builtin_lsx_vssrarn_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarn_h_w : ClangBuiltin<"__builtin_lsx_vssrarn_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarn_w_d : ClangBuiltin<"__builtin_lsx_vssrarn_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrln_bu_h : ClangBuiltin<"__builtin_lsx_vssrln_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrln_hu_w : ClangBuiltin<"__builtin_lsx_vssrln_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrln_wu_d : ClangBuiltin<"__builtin_lsx_vssrln_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssran_bu_h : ClangBuiltin<"__builtin_lsx_vssran_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssran_hu_w : ClangBuiltin<"__builtin_lsx_vssran_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssran_wu_d : ClangBuiltin<"__builtin_lsx_vssran_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlrn_bu_h : ClangBuiltin<"__builtin_lsx_vssrlrn_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrn_hu_w : ClangBuiltin<"__builtin_lsx_vssrlrn_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrn_wu_d : ClangBuiltin<"__builtin_lsx_vssrlrn_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrarn_bu_h : ClangBuiltin<"__builtin_lsx_vssrarn_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarn_hu_w : ClangBuiltin<"__builtin_lsx_vssrarn_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarn_wu_d : ClangBuiltin<"__builtin_lsx_vssrarn_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vandn_v : ClangBuiltin<"__builtin_lsx_vandn_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vorn_v : ClangBuiltin<"__builtin_lsx_vorn_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrstp_b : ClangBuiltin<"__builtin_lsx_vfrstp_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vfrstp_h : ClangBuiltin<"__builtin_lsx_vfrstp_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vadd_q : ClangBuiltin<"__builtin_lsx_vadd_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsub_q : ClangBuiltin<"__builtin_lsx_vsub_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsigncov_b : ClangBuiltin<"__builtin_lsx_vsigncov_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vsigncov_h : ClangBuiltin<"__builtin_lsx_vsigncov_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vsigncov_w : ClangBuiltin<"__builtin_lsx_vsigncov_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vsigncov_d : ClangBuiltin<"__builtin_lsx_vsigncov_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vfcvt_h_s : ClangBuiltin<"__builtin_lsx_vfcvt_h_s">, + Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcvt_s_d : ClangBuiltin<"__builtin_lsx_vfcvt_s_d">, + Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vffint_s_l : ClangBuiltin<"__builtin_lsx_vffint_s_l">, + Intrinsic<[llvm_v4f32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftint_w_d : ClangBuiltin<"__builtin_lsx_vftint_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrz_w_d : ClangBuiltin<"__builtin_lsx_vftintrz_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrp_w_d : ClangBuiltin<"__builtin_lsx_vftintrp_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrm_w_d : ClangBuiltin<"__builtin_lsx_vftintrm_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrne_w_d : ClangBuiltin<"__builtin_lsx_vftintrne_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbsrl_v : ClangBuiltin<"__builtin_lsx_vbsrl_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbsll_v : ClangBuiltin<"__builtin_lsx_vbsll_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrstpi_b : ClangBuiltin<"__builtin_lsx_vfrstpi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrstpi_h : ClangBuiltin<"__builtin_lsx_vfrstpi_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vneg_b : ClangBuiltin<"__builtin_lsx_vneg_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vneg_h : ClangBuiltin<"__builtin_lsx_vneg_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vneg_w : ClangBuiltin<"__builtin_lsx_vneg_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vneg_d : ClangBuiltin<"__builtin_lsx_vneg_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmskgez_b : ClangBuiltin<"__builtin_lsx_vmskgez_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmsknz_b : ClangBuiltin<"__builtin_lsx_vmsknz_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrintrm_s : ClangBuiltin<"__builtin_lsx_vfrintrm_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrintrm_d : ClangBuiltin<"__builtin_lsx_vfrintrm_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrintrp_s : ClangBuiltin<"__builtin_lsx_vfrintrp_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrintrp_d : ClangBuiltin<"__builtin_lsx_vfrintrp_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrintrz_s : ClangBuiltin<"__builtin_lsx_vfrintrz_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrintrz_d : ClangBuiltin<"__builtin_lsx_vfrintrz_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrintrne_s : ClangBuiltin<"__builtin_lsx_vfrintrne_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrintrne_d : ClangBuiltin<"__builtin_lsx_vfrintrne_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vffinth_d_w : ClangBuiltin<"__builtin_lsx_vffinth_d_w">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vffintl_d_w : ClangBuiltin<"__builtin_lsx_vffintl_d_w">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrm_w_s : ClangBuiltin<"__builtin_lsx_vftintrm_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrm_l_d : ClangBuiltin<"__builtin_lsx_vftintrm_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrp_w_s : ClangBuiltin<"__builtin_lsx_vftintrp_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrp_l_d : ClangBuiltin<"__builtin_lsx_vftintrp_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrz_w_s : ClangBuiltin<"__builtin_lsx_vftintrz_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrz_l_d : ClangBuiltin<"__builtin_lsx_vftintrz_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrne_w_s : ClangBuiltin<"__builtin_lsx_vftintrne_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrne_l_d : ClangBuiltin<"__builtin_lsx_vftintrne_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftinth_l_s : ClangBuiltin<"__builtin_lsx_vftinth_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintl_l_s : ClangBuiltin<"__builtin_lsx_vftintl_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrmh_l_s : ClangBuiltin<"__builtin_lsx_vftintrmh_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrml_l_s : ClangBuiltin<"__builtin_lsx_vftintrml_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrph_l_s : ClangBuiltin<"__builtin_lsx_vftintrph_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrpl_l_s : ClangBuiltin<"__builtin_lsx_vftintrpl_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrzh_l_s : ClangBuiltin<"__builtin_lsx_vftintrzh_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrzl_l_s : ClangBuiltin<"__builtin_lsx_vftintrzl_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrneh_l_s : ClangBuiltin<"__builtin_lsx_vftintrneh_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrnel_l_s : ClangBuiltin<"__builtin_lsx_vftintrnel_l_s">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vexth_d_w : ClangBuiltin<"__builtin_lsx_vexth_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_w_h : ClangBuiltin<"__builtin_lsx_vexth_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_h_b : ClangBuiltin<"__builtin_lsx_vexth_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_q_d : ClangBuiltin<"__builtin_lsx_vexth_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vexth_du_wu : ClangBuiltin<"__builtin_lsx_vexth_du_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_wu_hu : ClangBuiltin<"__builtin_lsx_vexth_wu_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_hu_bu : ClangBuiltin<"__builtin_lsx_vexth_hu_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vexth_qu_du : ClangBuiltin<"__builtin_lsx_vexth_qu_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvexth_du_wu : ClangBuiltin<"__builtin_lasx_xvexth_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_wu_hu : ClangBuiltin<"__builtin_lasx_xvexth_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_hu_bu : ClangBuiltin<"__builtin_lasx_xvexth_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_qu_du : ClangBuiltin<"__builtin_lasx_xvexth_qu_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsllwil_d_w : ClangBuiltin<"__builtin_lsx_vsllwil_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsllwil_w_h : ClangBuiltin<"__builtin_lsx_vsllwil_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsllwil_h_b : ClangBuiltin<"__builtin_lsx_vsllwil_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vextl_q_d : ClangBuiltin<"__builtin_lsx_vextl_q_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsllwil_du_wu : ClangBuiltin<"__builtin_lsx_vsllwil_du_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsllwil_wu_hu : ClangBuiltin<"__builtin_lsx_vsllwil_wu_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsllwil_hu_bu : ClangBuiltin<"__builtin_lsx_vsllwil_hu_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vextl_qu_du : ClangBuiltin<"__builtin_lsx_vextl_qu_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitclri_b : ClangBuiltin<"__builtin_lsx_vbitclri_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclri_h : ClangBuiltin<"__builtin_lsx_vbitclri_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclri_w : ClangBuiltin<"__builtin_lsx_vbitclri_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclri_d : ClangBuiltin<"__builtin_lsx_vbitclri_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitseti_b : ClangBuiltin<"__builtin_lsx_vbitseti_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitseti_h : ClangBuiltin<"__builtin_lsx_vbitseti_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitseti_w : ClangBuiltin<"__builtin_lsx_vbitseti_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitseti_d : ClangBuiltin<"__builtin_lsx_vbitseti_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitrevi_b : ClangBuiltin<"__builtin_lsx_vbitrevi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrevi_h : ClangBuiltin<"__builtin_lsx_vbitrevi_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrevi_w : ClangBuiltin<"__builtin_lsx_vbitrevi_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrevi_d : ClangBuiltin<"__builtin_lsx_vbitrevi_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlrni_b_h : ClangBuiltin<"__builtin_lsx_vssrlrni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_h_w : ClangBuiltin<"__builtin_lsx_vssrlrni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_w_d : ClangBuiltin<"__builtin_lsx_vssrlrni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_d_q : ClangBuiltin<"__builtin_lsx_vssrlrni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrani_b_h : ClangBuiltin<"__builtin_lsx_vsrani_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrani_h_w : ClangBuiltin<"__builtin_lsx_vsrani_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrani_w_d : ClangBuiltin<"__builtin_lsx_vsrani_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrani_d_q : ClangBuiltin<"__builtin_lsx_vsrani_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vextrins_b : ClangBuiltin<"__builtin_lsx_vextrins_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vextrins_h : ClangBuiltin<"__builtin_lsx_vextrins_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vextrins_w : ClangBuiltin<"__builtin_lsx_vextrins_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vextrins_d : ClangBuiltin<"__builtin_lsx_vextrins_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitseli_b : ClangBuiltin<"__builtin_lsx_vbitseli_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vandi_b : ClangBuiltin<"__builtin_lsx_vandi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vori_b : ClangBuiltin<"__builtin_lsx_vori_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vxori_b : ClangBuiltin<"__builtin_lsx_vxori_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vnori_b : ClangBuiltin<"__builtin_lsx_vnori_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vldi : ClangBuiltin<"__builtin_lsx_vldi">, + Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vrepli_b : ClangBuiltin<"__builtin_lsx_vrepli_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vrepli_h : ClangBuiltin<"__builtin_lsx_vrepli_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vrepli_w : ClangBuiltin<"__builtin_lsx_vrepli_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vrepli_d : ClangBuiltin<"__builtin_lsx_vrepli_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpermi_w : ClangBuiltin<"__builtin_lsx_vpermi_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsadd_b : ClangBuiltin<"__builtin_lsx_vsadd_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_h : ClangBuiltin<"__builtin_lsx_vsadd_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_w : ClangBuiltin<"__builtin_lsx_vsadd_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_d : ClangBuiltin<"__builtin_lsx_vsadd_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vssub_b : ClangBuiltin<"__builtin_lsx_vssub_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_h : ClangBuiltin<"__builtin_lsx_vssub_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_w : ClangBuiltin<"__builtin_lsx_vssub_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_d : ClangBuiltin<"__builtin_lsx_vssub_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsadd_bu : ClangBuiltin<"__builtin_lsx_vsadd_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_hu : ClangBuiltin<"__builtin_lsx_vsadd_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_wu : ClangBuiltin<"__builtin_lsx_vsadd_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vsadd_du : ClangBuiltin<"__builtin_lsx_vsadd_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vssub_bu : ClangBuiltin<"__builtin_lsx_vssub_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_hu : ClangBuiltin<"__builtin_lsx_vssub_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_wu : ClangBuiltin<"__builtin_lsx_vssub_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssub_du : ClangBuiltin<"__builtin_lsx_vssub_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhaddw_h_b : ClangBuiltin<"__builtin_lsx_vhaddw_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhaddw_w_h : ClangBuiltin<"__builtin_lsx_vhaddw_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhaddw_d_w : ClangBuiltin<"__builtin_lsx_vhaddw_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhsubw_h_b : ClangBuiltin<"__builtin_lsx_vhsubw_h_b">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_w_h : ClangBuiltin<"__builtin_lsx_vhsubw_w_h">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_d_w : ClangBuiltin<"__builtin_lsx_vhsubw_d_w">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhaddw_hu_bu : ClangBuiltin<"__builtin_lsx_vhaddw_hu_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhaddw_wu_hu : ClangBuiltin<"__builtin_lsx_vhaddw_wu_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhaddw_du_wu : ClangBuiltin<"__builtin_lsx_vhaddw_du_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vhsubw_hu_bu : ClangBuiltin<"__builtin_lsx_vhsubw_hu_bu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_wu_hu : ClangBuiltin<"__builtin_lsx_vhsubw_wu_hu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vhsubw_du_wu : ClangBuiltin<"__builtin_lsx_vhsubw_du_wu">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vadda_b : ClangBuiltin<"__builtin_lsx_vadda_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadda_h : ClangBuiltin<"__builtin_lsx_vadda_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadda_w : ClangBuiltin<"__builtin_lsx_vadda_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadda_d : ClangBuiltin<"__builtin_lsx_vadda_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vabsd_b : ClangBuiltin<"__builtin_lsx_vabsd_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_h : ClangBuiltin<"__builtin_lsx_vabsd_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_w : ClangBuiltin<"__builtin_lsx_vabsd_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_d : ClangBuiltin<"__builtin_lsx_vabsd_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vabsd_bu : ClangBuiltin<"__builtin_lsx_vabsd_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_hu : ClangBuiltin<"__builtin_lsx_vabsd_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_wu : ClangBuiltin<"__builtin_lsx_vabsd_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vabsd_du : ClangBuiltin<"__builtin_lsx_vabsd_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vavg_b : ClangBuiltin<"__builtin_lsx_vavg_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_h : ClangBuiltin<"__builtin_lsx_vavg_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_w : ClangBuiltin<"__builtin_lsx_vavg_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_d : ClangBuiltin<"__builtin_lsx_vavg_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vavg_bu : ClangBuiltin<"__builtin_lsx_vavg_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_hu : ClangBuiltin<"__builtin_lsx_vavg_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_wu : ClangBuiltin<"__builtin_lsx_vavg_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavg_du : ClangBuiltin<"__builtin_lsx_vavg_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vavgr_b : ClangBuiltin<"__builtin_lsx_vavgr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_h : ClangBuiltin<"__builtin_lsx_vavgr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_w : ClangBuiltin<"__builtin_lsx_vavgr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_d : ClangBuiltin<"__builtin_lsx_vavgr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vavgr_bu : ClangBuiltin<"__builtin_lsx_vavgr_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_hu : ClangBuiltin<"__builtin_lsx_vavgr_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_wu : ClangBuiltin<"__builtin_lsx_vavgr_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vavgr_du : ClangBuiltin<"__builtin_lsx_vavgr_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vsrlr_b : ClangBuiltin<"__builtin_lsx_vsrlr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlr_h : ClangBuiltin<"__builtin_lsx_vsrlr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlr_w : ClangBuiltin<"__builtin_lsx_vsrlr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlr_d : ClangBuiltin<"__builtin_lsx_vsrlr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrar_b : ClangBuiltin<"__builtin_lsx_vsrar_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrar_h : ClangBuiltin<"__builtin_lsx_vsrar_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrar_w : ClangBuiltin<"__builtin_lsx_vsrar_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrar_d : ClangBuiltin<"__builtin_lsx_vsrar_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmax_s : ClangBuiltin<"__builtin_lsx_vfmax_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmax_d : ClangBuiltin<"__builtin_lsx_vfmax_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmin_s : ClangBuiltin<"__builtin_lsx_vfmin_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmin_d : ClangBuiltin<"__builtin_lsx_vfmin_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmaxa_s : ClangBuiltin<"__builtin_lsx_vfmaxa_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmaxa_d : ClangBuiltin<"__builtin_lsx_vfmaxa_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmina_s : ClangBuiltin<"__builtin_lsx_vfmina_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmina_d : ClangBuiltin<"__builtin_lsx_vfmina_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfclass_s : ClangBuiltin<"__builtin_lsx_vfclass_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfclass_d : ClangBuiltin<"__builtin_lsx_vfclass_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrecip_s : ClangBuiltin<"__builtin_lsx_vfrecip_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrecip_d : ClangBuiltin<"__builtin_lsx_vfrecip_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrsqrt_s : ClangBuiltin<"__builtin_lsx_vfrsqrt_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrsqrt_d : ClangBuiltin<"__builtin_lsx_vfrsqrt_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcvtl_s_h : ClangBuiltin<"__builtin_lsx_vfcvtl_s_h">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcvtl_d_s : ClangBuiltin<"__builtin_lsx_vfcvtl_d_s">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfcvth_s_h : ClangBuiltin<"__builtin_lsx_vfcvth_s_h">, + Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfcvth_d_s : ClangBuiltin<"__builtin_lsx_vfcvth_d_s">, + Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftint_w_s : ClangBuiltin<"__builtin_lsx_vftint_w_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftint_l_d : ClangBuiltin<"__builtin_lsx_vftint_l_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftint_wu_s : ClangBuiltin<"__builtin_lsx_vftint_wu_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftint_lu_d : ClangBuiltin<"__builtin_lsx_vftint_lu_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrlri_b : ClangBuiltin<"__builtin_lsx_vsrlri_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlri_h : ClangBuiltin<"__builtin_lsx_vsrlri_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlri_w : ClangBuiltin<"__builtin_lsx_vsrlri_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlri_d : ClangBuiltin<"__builtin_lsx_vsrlri_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrari_b : ClangBuiltin<"__builtin_lsx_vsrari_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrari_h : ClangBuiltin<"__builtin_lsx_vsrari_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrari_w : ClangBuiltin<"__builtin_lsx_vsrari_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrari_d : ClangBuiltin<"__builtin_lsx_vsrari_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsat_b : ClangBuiltin<"__builtin_lsx_vsat_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_h : ClangBuiltin<"__builtin_lsx_vsat_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_w : ClangBuiltin<"__builtin_lsx_vsat_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_d : ClangBuiltin<"__builtin_lsx_vsat_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsat_bu : ClangBuiltin<"__builtin_lsx_vsat_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_hu : ClangBuiltin<"__builtin_lsx_vsat_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_wu : ClangBuiltin<"__builtin_lsx_vsat_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsat_du : ClangBuiltin<"__builtin_lsx_vsat_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrlni_b_h : ClangBuiltin<"__builtin_lsx_vsrlni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlni_h_w : ClangBuiltin<"__builtin_lsx_vsrlni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlni_w_d : ClangBuiltin<"__builtin_lsx_vsrlni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlni_d_q : ClangBuiltin<"__builtin_lsx_vsrlni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrlrni_b_h : ClangBuiltin<"__builtin_lsx_vsrlrni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlrni_h_w : ClangBuiltin<"__builtin_lsx_vsrlrni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlrni_w_d : ClangBuiltin<"__builtin_lsx_vsrlrni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrlrni_d_q : ClangBuiltin<"__builtin_lsx_vsrlrni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlni_b_h : ClangBuiltin<"__builtin_lsx_vssrlni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_h_w : ClangBuiltin<"__builtin_lsx_vssrlni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_w_d : ClangBuiltin<"__builtin_lsx_vssrlni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_d_q : ClangBuiltin<"__builtin_lsx_vssrlni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlrni_bu_h : ClangBuiltin<"__builtin_lsx_vssrlrni_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_hu_w : ClangBuiltin<"__builtin_lsx_vssrlrni_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_wu_d : ClangBuiltin<"__builtin_lsx_vssrlrni_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlrni_du_q : ClangBuiltin<"__builtin_lsx_vssrlrni_du_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrarni_b_h : ClangBuiltin<"__builtin_lsx_vsrarni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarni_h_w : ClangBuiltin<"__builtin_lsx_vsrarni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarni_w_d : ClangBuiltin<"__builtin_lsx_vsrarni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrarni_d_q : ClangBuiltin<"__builtin_lsx_vsrarni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrani_b_h : ClangBuiltin<"__builtin_lsx_vssrani_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_h_w : ClangBuiltin<"__builtin_lsx_vssrani_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_w_d : ClangBuiltin<"__builtin_lsx_vssrani_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_d_q : ClangBuiltin<"__builtin_lsx_vssrani_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrani_bu_h : ClangBuiltin<"__builtin_lsx_vssrani_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_hu_w : ClangBuiltin<"__builtin_lsx_vssrani_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_wu_d : ClangBuiltin<"__builtin_lsx_vssrani_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrani_du_q : ClangBuiltin<"__builtin_lsx_vssrani_du_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrarni_b_h : ClangBuiltin<"__builtin_lsx_vssrarni_b_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_h_w : ClangBuiltin<"__builtin_lsx_vssrarni_h_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_w_d : ClangBuiltin<"__builtin_lsx_vssrarni_w_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_d_q : ClangBuiltin<"__builtin_lsx_vssrarni_d_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrarni_bu_h : ClangBuiltin<"__builtin_lsx_vssrarni_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_hu_w : ClangBuiltin<"__builtin_lsx_vssrarni_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_wu_d : ClangBuiltin<"__builtin_lsx_vssrarni_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrarni_du_q : ClangBuiltin<"__builtin_lsx_vssrarni_du_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vssrlni_bu_h : ClangBuiltin<"__builtin_lsx_vssrlni_bu_h">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_hu_w : ClangBuiltin<"__builtin_lsx_vssrlni_hu_w">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_wu_d : ClangBuiltin<"__builtin_lsx_vssrlni_wu_d">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vssrlni_du_q : ClangBuiltin<"__builtin_lsx_vssrlni_du_q">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vseq_b : ClangBuiltin<"__builtin_lsx_vseq_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseq_h : ClangBuiltin<"__builtin_lsx_vseq_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseq_w : ClangBuiltin<"__builtin_lsx_vseq_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseq_d : ClangBuiltin<"__builtin_lsx_vseq_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsle_b : ClangBuiltin<"__builtin_lsx_vsle_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_h : ClangBuiltin<"__builtin_lsx_vsle_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_w : ClangBuiltin<"__builtin_lsx_vsle_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_d : ClangBuiltin<"__builtin_lsx_vsle_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsle_bu : ClangBuiltin<"__builtin_lsx_vsle_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_hu : ClangBuiltin<"__builtin_lsx_vsle_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_wu : ClangBuiltin<"__builtin_lsx_vsle_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsle_du : ClangBuiltin<"__builtin_lsx_vsle_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslt_b : ClangBuiltin<"__builtin_lsx_vslt_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_h : ClangBuiltin<"__builtin_lsx_vslt_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_w : ClangBuiltin<"__builtin_lsx_vslt_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_d : ClangBuiltin<"__builtin_lsx_vslt_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslt_bu : ClangBuiltin<"__builtin_lsx_vslt_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_hu : ClangBuiltin<"__builtin_lsx_vslt_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_wu : ClangBuiltin<"__builtin_lsx_vslt_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslt_du : ClangBuiltin<"__builtin_lsx_vslt_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vadd_b : ClangBuiltin<"__builtin_lsx_vadd_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadd_h : ClangBuiltin<"__builtin_lsx_vadd_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadd_w : ClangBuiltin<"__builtin_lsx_vadd_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vadd_d : ClangBuiltin<"__builtin_lsx_vadd_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vsub_b : ClangBuiltin<"__builtin_lsx_vsub_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsub_h : ClangBuiltin<"__builtin_lsx_vsub_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsub_w : ClangBuiltin<"__builtin_lsx_vsub_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsub_d : ClangBuiltin<"__builtin_lsx_vsub_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmax_b : ClangBuiltin<"__builtin_lsx_vmax_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_h : ClangBuiltin<"__builtin_lsx_vmax_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_w : ClangBuiltin<"__builtin_lsx_vmax_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_d : ClangBuiltin<"__builtin_lsx_vmax_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmin_b : ClangBuiltin<"__builtin_lsx_vmin_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_h : ClangBuiltin<"__builtin_lsx_vmin_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_w : ClangBuiltin<"__builtin_lsx_vmin_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_d : ClangBuiltin<"__builtin_lsx_vmin_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmax_bu : ClangBuiltin<"__builtin_lsx_vmax_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_hu : ClangBuiltin<"__builtin_lsx_vmax_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_wu : ClangBuiltin<"__builtin_lsx_vmax_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmax_du : ClangBuiltin<"__builtin_lsx_vmax_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmin_bu : ClangBuiltin<"__builtin_lsx_vmin_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_hu : ClangBuiltin<"__builtin_lsx_vmin_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_wu : ClangBuiltin<"__builtin_lsx_vmin_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmin_du : ClangBuiltin<"__builtin_lsx_vmin_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmul_b : ClangBuiltin<"__builtin_lsx_vmul_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmul_h : ClangBuiltin<"__builtin_lsx_vmul_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmul_w : ClangBuiltin<"__builtin_lsx_vmul_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmul_d : ClangBuiltin<"__builtin_lsx_vmul_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmadd_b : ClangBuiltin<"__builtin_lsx_vmadd_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmadd_h : ClangBuiltin<"__builtin_lsx_vmadd_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmadd_w : ClangBuiltin<"__builtin_lsx_vmadd_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmadd_d : ClangBuiltin<"__builtin_lsx_vmadd_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vmsub_b : ClangBuiltin<"__builtin_lsx_vmsub_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmsub_h : ClangBuiltin<"__builtin_lsx_vmsub_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmsub_w : ClangBuiltin<"__builtin_lsx_vmsub_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vmsub_d : ClangBuiltin<"__builtin_lsx_vmsub_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vdiv_b : ClangBuiltin<"__builtin_lsx_vdiv_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_h : ClangBuiltin<"__builtin_lsx_vdiv_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_w : ClangBuiltin<"__builtin_lsx_vdiv_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_d : ClangBuiltin<"__builtin_lsx_vdiv_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmod_b : ClangBuiltin<"__builtin_lsx_vmod_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_h : ClangBuiltin<"__builtin_lsx_vmod_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_w : ClangBuiltin<"__builtin_lsx_vmod_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_d : ClangBuiltin<"__builtin_lsx_vmod_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vdiv_bu : ClangBuiltin<"__builtin_lsx_vdiv_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_hu : ClangBuiltin<"__builtin_lsx_vdiv_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_wu : ClangBuiltin<"__builtin_lsx_vdiv_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vdiv_du : ClangBuiltin<"__builtin_lsx_vdiv_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsll_b : ClangBuiltin<"__builtin_lsx_vsll_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsll_h : ClangBuiltin<"__builtin_lsx_vsll_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsll_w : ClangBuiltin<"__builtin_lsx_vsll_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsll_d : ClangBuiltin<"__builtin_lsx_vsll_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrl_b : ClangBuiltin<"__builtin_lsx_vsrl_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrl_h : ClangBuiltin<"__builtin_lsx_vsrl_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrl_w : ClangBuiltin<"__builtin_lsx_vsrl_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrl_d : ClangBuiltin<"__builtin_lsx_vsrl_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitclr_b : ClangBuiltin<"__builtin_lsx_vbitclr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclr_h : ClangBuiltin<"__builtin_lsx_vbitclr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclr_w : ClangBuiltin<"__builtin_lsx_vbitclr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitclr_d : ClangBuiltin<"__builtin_lsx_vbitclr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitset_b : ClangBuiltin<"__builtin_lsx_vbitset_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitset_h : ClangBuiltin<"__builtin_lsx_vbitset_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitset_w : ClangBuiltin<"__builtin_lsx_vbitset_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitset_d : ClangBuiltin<"__builtin_lsx_vbitset_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpackev_b : ClangBuiltin<"__builtin_lsx_vpackev_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackev_h : ClangBuiltin<"__builtin_lsx_vpackev_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackev_w : ClangBuiltin<"__builtin_lsx_vpackev_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackev_d : ClangBuiltin<"__builtin_lsx_vpackev_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpackod_b : ClangBuiltin<"__builtin_lsx_vpackod_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackod_h : ClangBuiltin<"__builtin_lsx_vpackod_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackod_w : ClangBuiltin<"__builtin_lsx_vpackod_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpackod_d : ClangBuiltin<"__builtin_lsx_vpackod_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vilvl_b : ClangBuiltin<"__builtin_lsx_vilvl_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvl_h : ClangBuiltin<"__builtin_lsx_vilvl_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvl_w : ClangBuiltin<"__builtin_lsx_vilvl_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvl_d : ClangBuiltin<"__builtin_lsx_vilvl_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vilvh_b : ClangBuiltin<"__builtin_lsx_vilvh_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvh_h : ClangBuiltin<"__builtin_lsx_vilvh_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvh_w : ClangBuiltin<"__builtin_lsx_vilvh_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vilvh_d : ClangBuiltin<"__builtin_lsx_vilvh_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpickev_b : ClangBuiltin<"__builtin_lsx_vpickev_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickev_h : ClangBuiltin<"__builtin_lsx_vpickev_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickev_w : ClangBuiltin<"__builtin_lsx_vpickev_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickev_d : ClangBuiltin<"__builtin_lsx_vpickev_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vand_v : ClangBuiltin<"__builtin_lsx_vand_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vor_v : ClangBuiltin<"__builtin_lsx_vor_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vbitrev_b : ClangBuiltin<"__builtin_lsx_vbitrev_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrev_h : ClangBuiltin<"__builtin_lsx_vbitrev_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrev_w : ClangBuiltin<"__builtin_lsx_vbitrev_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vbitrev_d : ClangBuiltin<"__builtin_lsx_vbitrev_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmod_bu : ClangBuiltin<"__builtin_lsx_vmod_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_hu : ClangBuiltin<"__builtin_lsx_vmod_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_wu : ClangBuiltin<"__builtin_lsx_vmod_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmod_du : ClangBuiltin<"__builtin_lsx_vmod_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpickod_b : ClangBuiltin<"__builtin_lsx_vpickod_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickod_h : ClangBuiltin<"__builtin_lsx_vpickod_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickod_w : ClangBuiltin<"__builtin_lsx_vpickod_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpickod_d : ClangBuiltin<"__builtin_lsx_vpickod_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vreplve_b : ClangBuiltin<"__builtin_lsx_vreplve_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_h : ClangBuiltin<"__builtin_lsx_vreplve_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_w : ClangBuiltin<"__builtin_lsx_vreplve_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_d : ClangBuiltin<"__builtin_lsx_vreplve_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsra_b : ClangBuiltin<"__builtin_lsx_vsra_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsra_h : ClangBuiltin<"__builtin_lsx_vsra_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsra_w : ClangBuiltin<"__builtin_lsx_vsra_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsra_d : ClangBuiltin<"__builtin_lsx_vsra_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vxor_v : ClangBuiltin<"__builtin_lsx_vxor_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vnor_v : ClangBuiltin<"__builtin_lsx_vnor_v">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfadd_s : ClangBuiltin<"__builtin_lsx_vfadd_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfadd_d : ClangBuiltin<"__builtin_lsx_vfadd_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfsub_s : ClangBuiltin<"__builtin_lsx_vfsub_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfsub_d : ClangBuiltin<"__builtin_lsx_vfsub_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfmul_s : ClangBuiltin<"__builtin_lsx_vfmul_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfmul_d : ClangBuiltin<"__builtin_lsx_vfmul_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vshuf_h : ClangBuiltin<"__builtin_lsx_vshuf_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vshuf_w : ClangBuiltin<"__builtin_lsx_vshuf_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vshuf_d : ClangBuiltin<"__builtin_lsx_vshuf_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vseqi_b : ClangBuiltin<"__builtin_lsx_vseqi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseqi_h : ClangBuiltin<"__builtin_lsx_vseqi_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseqi_w : ClangBuiltin<"__builtin_lsx_vseqi_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vseqi_d : ClangBuiltin<"__builtin_lsx_vseqi_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslei_b : ClangBuiltin<"__builtin_lsx_vslei_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_h : ClangBuiltin<"__builtin_lsx_vslei_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_w : ClangBuiltin<"__builtin_lsx_vslei_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_d : ClangBuiltin<"__builtin_lsx_vslei_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslei_bu : ClangBuiltin<"__builtin_lsx_vslei_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_hu : ClangBuiltin<"__builtin_lsx_vslei_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_wu : ClangBuiltin<"__builtin_lsx_vslei_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslei_du : ClangBuiltin<"__builtin_lsx_vslei_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslti_b : ClangBuiltin<"__builtin_lsx_vslti_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_h : ClangBuiltin<"__builtin_lsx_vslti_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_w : ClangBuiltin<"__builtin_lsx_vslti_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_d : ClangBuiltin<"__builtin_lsx_vslti_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslti_bu : ClangBuiltin<"__builtin_lsx_vslti_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_hu : ClangBuiltin<"__builtin_lsx_vslti_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_wu : ClangBuiltin<"__builtin_lsx_vslti_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslti_du : ClangBuiltin<"__builtin_lsx_vslti_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vaddi_bu : ClangBuiltin<"__builtin_lsx_vaddi_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vaddi_hu : ClangBuiltin<"__builtin_lsx_vaddi_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vaddi_wu : ClangBuiltin<"__builtin_lsx_vaddi_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lsx_vaddi_du : ClangBuiltin<"__builtin_lsx_vaddi_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lsx_vsubi_bu : ClangBuiltin<"__builtin_lsx_vsubi_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubi_hu : ClangBuiltin<"__builtin_lsx_vsubi_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubi_wu : ClangBuiltin<"__builtin_lsx_vsubi_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsubi_du : ClangBuiltin<"__builtin_lsx_vsubi_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaxi_b : ClangBuiltin<"__builtin_lsx_vmaxi_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_h : ClangBuiltin<"__builtin_lsx_vmaxi_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_w : ClangBuiltin<"__builtin_lsx_vmaxi_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_d : ClangBuiltin<"__builtin_lsx_vmaxi_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmini_b : ClangBuiltin<"__builtin_lsx_vmini_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_h : ClangBuiltin<"__builtin_lsx_vmini_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_w : ClangBuiltin<"__builtin_lsx_vmini_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_d : ClangBuiltin<"__builtin_lsx_vmini_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmaxi_bu : ClangBuiltin<"__builtin_lsx_vmaxi_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_hu : ClangBuiltin<"__builtin_lsx_vmaxi_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_wu : ClangBuiltin<"__builtin_lsx_vmaxi_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmaxi_du : ClangBuiltin<"__builtin_lsx_vmaxi_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vmini_bu : ClangBuiltin<"__builtin_lsx_vmini_bu">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_hu : ClangBuiltin<"__builtin_lsx_vmini_hu">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_wu : ClangBuiltin<"__builtin_lsx_vmini_wu">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vmini_du : ClangBuiltin<"__builtin_lsx_vmini_du">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vclz_b : ClangBuiltin<"__builtin_lsx_vclz_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclz_h : ClangBuiltin<"__builtin_lsx_vclz_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclz_w : ClangBuiltin<"__builtin_lsx_vclz_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vclz_d : ClangBuiltin<"__builtin_lsx_vclz_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vpcnt_b : ClangBuiltin<"__builtin_lsx_vpcnt_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpcnt_h : ClangBuiltin<"__builtin_lsx_vpcnt_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpcnt_w : ClangBuiltin<"__builtin_lsx_vpcnt_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vpcnt_d : ClangBuiltin<"__builtin_lsx_vpcnt_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfsqrt_s : ClangBuiltin<"__builtin_lsx_vfsqrt_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfsqrt_d : ClangBuiltin<"__builtin_lsx_vfsqrt_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vfrint_s : ClangBuiltin<"__builtin_lsx_vfrint_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfrint_d : ClangBuiltin<"__builtin_lsx_vfrint_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vffint_s_w : ClangBuiltin<"__builtin_lsx_vffint_s_w">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vffint_d_l : ClangBuiltin<"__builtin_lsx_vffint_d_l">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vffint_s_wu : ClangBuiltin<"__builtin_lsx_vffint_s_wu">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vffint_d_lu : ClangBuiltin<"__builtin_lsx_vffint_d_lu">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vftintrz_wu_s : ClangBuiltin<"__builtin_lsx_vftintrz_wu_s">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vftintrz_lu_d : ClangBuiltin<"__builtin_lsx_vftintrz_lu_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vreplgr2vr_b : ClangBuiltin<"__builtin_lsx_vreplgr2vr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_h : ClangBuiltin<"__builtin_lsx_vreplgr2vr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_w : ClangBuiltin<"__builtin_lsx_vreplgr2vr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_d : ClangBuiltin<"__builtin_lsx_vreplgr2vr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vinsgr2vr_b : ClangBuiltin<"__builtin_lsx_vinsgr2vr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vinsgr2vr_h : ClangBuiltin<"__builtin_lsx_vinsgr2vr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vinsgr2vr_w : ClangBuiltin<"__builtin_lsx_vinsgr2vr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vinsgr2vr_d : ClangBuiltin<"__builtin_lsx_vinsgr2vr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vfdiv_s : ClangBuiltin<"__builtin_lsx_vfdiv_s">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vfdiv_d : ClangBuiltin<"__builtin_lsx_vfdiv_d">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vslli_b : ClangBuiltin<"__builtin_lsx_vslli_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslli_h : ClangBuiltin<"__builtin_lsx_vslli_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslli_w : ClangBuiltin<"__builtin_lsx_vslli_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vslli_d : ClangBuiltin<"__builtin_lsx_vslli_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrli_b : ClangBuiltin<"__builtin_lsx_vsrli_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrli_h : ClangBuiltin<"__builtin_lsx_vsrli_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrli_w : ClangBuiltin<"__builtin_lsx_vsrli_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrli_d : ClangBuiltin<"__builtin_lsx_vsrli_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vsrai_b : ClangBuiltin<"__builtin_lsx_vsrai_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrai_h : ClangBuiltin<"__builtin_lsx_vsrai_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrai_w : ClangBuiltin<"__builtin_lsx_vsrai_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vsrai_d : ClangBuiltin<"__builtin_lsx_vsrai_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vshuf4i_b : ClangBuiltin<"__builtin_lsx_vshuf4i_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vshuf4i_h : ClangBuiltin<"__builtin_lsx_vshuf4i_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vshuf4i_w : ClangBuiltin<"__builtin_lsx_vshuf4i_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vshuf4i_d : ClangBuiltin<"__builtin_lsx_vshuf4i_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vrotr_b : ClangBuiltin<"__builtin_lsx_vrotr_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotr_h : ClangBuiltin<"__builtin_lsx_vrotr_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotr_w : ClangBuiltin<"__builtin_lsx_vrotr_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotr_d : ClangBuiltin<"__builtin_lsx_vrotr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vrotri_b : ClangBuiltin<"__builtin_lsx_vrotri_b">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotri_h : ClangBuiltin<"__builtin_lsx_vrotri_h">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotri_w : ClangBuiltin<"__builtin_lsx_vrotri_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vrotri_d : ClangBuiltin<"__builtin_lsx_vrotri_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lsx_vld : ClangBuiltin<"__builtin_lsx_vld">, + Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lsx_vst : ClangBuiltin<"__builtin_lsx_vst">, + Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrArgMemOnly]>; + +def int_loongarch_lsx_bz_v : ClangBuiltin<"__builtin_lsx_bz_v">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_bnz_v : ClangBuiltin<"__builtin_lsx_bnz_v">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + +def int_loongarch_lsx_bz_b : ClangBuiltin<"__builtin_lsx_bz_b">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_bz_h : ClangBuiltin<"__builtin_lsx_bz_h">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_bz_w : ClangBuiltin<"__builtin_lsx_bz_w">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_bz_d : ClangBuiltin<"__builtin_lsx_bz_d">, + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +def int_loongarch_lsx_bnz_b : ClangBuiltin<"__builtin_lsx_bnz_b">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; +def int_loongarch_lsx_bnz_h : ClangBuiltin<"__builtin_lsx_bnz_h">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; +def int_loongarch_lsx_bnz_w : ClangBuiltin<"__builtin_lsx_bnz_w">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_bnz_d : ClangBuiltin<"__builtin_lsx_bnz_d">, + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +//LoongArch LASX + +def int_loongarch_lasx_xvfmadd_s : ClangBuiltin<"__builtin_lasx_xvfmadd_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfmadd_d : ClangBuiltin<"__builtin_lasx_xvfmadd_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvfmsub_s : ClangBuiltin<"__builtin_lasx_xvfmsub_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfmsub_d : ClangBuiltin<"__builtin_lasx_xvfmsub_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + + +def int_loongarch_lasx_xvfnmadd_s : ClangBuiltin<"__builtin_lasx_xvfnmadd_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfnmadd_d : ClangBuiltin<"__builtin_lasx_xvfnmadd_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvfnmsub_s : ClangBuiltin<"__builtin_lasx_xvfnmsub_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfnmsub_d : ClangBuiltin<"__builtin_lasx_xvfnmsub_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvclo_b : ClangBuiltin<"__builtin_lasx_xvclo_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclo_h : ClangBuiltin<"__builtin_lasx_xvclo_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclo_w : ClangBuiltin<"__builtin_lasx_xvclo_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclo_d : ClangBuiltin<"__builtin_lasx_xvclo_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvflogb_s : ClangBuiltin<"__builtin_lasx_xvflogb_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvflogb_d : ClangBuiltin<"__builtin_lasx_xvflogb_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve2gr_w : ClangBuiltin<"__builtin_lasx_xvpickve2gr_w">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickve2gr_d : ClangBuiltin<"__builtin_lasx_xvpickve2gr_d">, + Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve2gr_wu : ClangBuiltin<"__builtin_lasx_xvpickve2gr_wu">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickve2gr_du : ClangBuiltin<"__builtin_lasx_xvpickve2gr_du">, + Intrinsic<[llvm_i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmskltz_b : ClangBuiltin<"__builtin_lasx_xvmskltz_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmskltz_h : ClangBuiltin<"__builtin_lasx_xvmskltz_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmskltz_w : ClangBuiltin<"__builtin_lasx_xvmskltz_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmskltz_d : ClangBuiltin<"__builtin_lasx_xvmskltz_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_caf_s : ClangBuiltin<"__builtin_lasx_xvfcmp_caf_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_caf_d : ClangBuiltin<"__builtin_lasx_xvfcmp_caf_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cor_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cor_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cor_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cor_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cun_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cun_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cun_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cun_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cune_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cune_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cune_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cune_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cueq_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cueq_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cueq_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cueq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_ceq_s : ClangBuiltin<"__builtin_lasx_xvfcmp_ceq_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_ceq_d : ClangBuiltin<"__builtin_lasx_xvfcmp_ceq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cne_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cne_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cne_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cne_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_clt_s : ClangBuiltin<"__builtin_lasx_xvfcmp_clt_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_clt_d : ClangBuiltin<"__builtin_lasx_xvfcmp_clt_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cult_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cult_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cult_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cult_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cle_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cle_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cle_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cle_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_cule_s : ClangBuiltin<"__builtin_lasx_xvfcmp_cule_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_cule_d : ClangBuiltin<"__builtin_lasx_xvfcmp_cule_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_saf_s : ClangBuiltin<"__builtin_lasx_xvfcmp_saf_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_saf_d : ClangBuiltin<"__builtin_lasx_xvfcmp_saf_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sor_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sor_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sor_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sor_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sun_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sun_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sun_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sun_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sune_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sune_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sune_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sune_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sueq_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sueq_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sueq_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sueq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_seq_s : ClangBuiltin<"__builtin_lasx_xvfcmp_seq_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_seq_d : ClangBuiltin<"__builtin_lasx_xvfcmp_seq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sne_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sne_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sne_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sne_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_slt_s : ClangBuiltin<"__builtin_lasx_xvfcmp_slt_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_slt_d : ClangBuiltin<"__builtin_lasx_xvfcmp_slt_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sult_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sult_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sult_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sult_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sle_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sle_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sle_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sle_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcmp_sule_s : ClangBuiltin<"__builtin_lasx_xvfcmp_sule_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcmp_sule_d : ClangBuiltin<"__builtin_lasx_xvfcmp_sule_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitsel_v : ClangBuiltin<"__builtin_lasx_xvbitsel_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvshuf_b : ClangBuiltin<"__builtin_lasx_xvshuf_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvldrepl_b : ClangBuiltin<"__builtin_lasx_xvldrepl_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvldrepl_h : ClangBuiltin<"__builtin_lasx_xvldrepl_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvldrepl_w : ClangBuiltin<"__builtin_lasx_xvldrepl_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvldrepl_d : ClangBuiltin<"__builtin_lasx_xvldrepl_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lasx_xvstelm_b : ClangBuiltin<"__builtin_lasx_xvstelm_b">, + Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lasx_xvstelm_h : ClangBuiltin<"__builtin_lasx_xvstelm_h">, + Intrinsic<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lasx_xvstelm_w : ClangBuiltin<"__builtin_lasx_xvstelm_w">, + Intrinsic<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_loongarch_lasx_xvstelm_d : ClangBuiltin<"__builtin_lasx_xvstelm_d">, + Intrinsic<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrArgMemOnly]>; + +def int_loongarch_lasx_xvldx : ClangBuiltin<"__builtin_lasx_xvldx">, + Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lasx_xvstx : ClangBuiltin<"__builtin_lasx_xvstx">, + Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly]>; + +def int_loongarch_lasx_xvaddwev_d_w : ClangBuiltin<"__builtin_lasx_xvaddwev_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_w_h : ClangBuiltin<"__builtin_lasx_xvaddwev_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_h_b : ClangBuiltin<"__builtin_lasx_xvaddwev_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_q_d : ClangBuiltin<"__builtin_lasx_xvaddwev_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsubwev_d_w : ClangBuiltin<"__builtin_lasx_xvsubwev_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_w_h : ClangBuiltin<"__builtin_lasx_xvsubwev_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_h_b : ClangBuiltin<"__builtin_lasx_xvsubwev_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_q_d : ClangBuiltin<"__builtin_lasx_xvsubwev_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwod_d_w : ClangBuiltin<"__builtin_lasx_xvaddwod_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_w_h : ClangBuiltin<"__builtin_lasx_xvaddwod_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_h_b : ClangBuiltin<"__builtin_lasx_xvaddwod_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_q_d : ClangBuiltin<"__builtin_lasx_xvaddwod_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsubwod_d_w : ClangBuiltin<"__builtin_lasx_xvsubwod_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_w_h : ClangBuiltin<"__builtin_lasx_xvsubwod_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_h_b : ClangBuiltin<"__builtin_lasx_xvsubwod_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_q_d : ClangBuiltin<"__builtin_lasx_xvsubwod_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwev_d_wu : ClangBuiltin<"__builtin_lasx_xvaddwev_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_w_hu : ClangBuiltin<"__builtin_lasx_xvaddwev_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_h_bu : ClangBuiltin<"__builtin_lasx_xvaddwev_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_q_du : ClangBuiltin<"__builtin_lasx_xvaddwev_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsubwev_d_wu : ClangBuiltin<"__builtin_lasx_xvsubwev_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_w_hu : ClangBuiltin<"__builtin_lasx_xvsubwev_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_h_bu : ClangBuiltin<"__builtin_lasx_xvsubwev_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwev_q_du : ClangBuiltin<"__builtin_lasx_xvsubwev_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwod_d_wu : ClangBuiltin<"__builtin_lasx_xvaddwod_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_w_hu : ClangBuiltin<"__builtin_lasx_xvaddwod_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_h_bu : ClangBuiltin<"__builtin_lasx_xvaddwod_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_q_du : ClangBuiltin<"__builtin_lasx_xvaddwod_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsubwod_d_wu : ClangBuiltin<"__builtin_lasx_xvsubwod_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_w_hu : ClangBuiltin<"__builtin_lasx_xvsubwod_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_h_bu : ClangBuiltin<"__builtin_lasx_xvsubwod_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubwod_q_du : ClangBuiltin<"__builtin_lasx_xvsubwod_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwev_d_wu_w : ClangBuiltin<"__builtin_lasx_xvaddwev_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_w_hu_h : ClangBuiltin<"__builtin_lasx_xvaddwev_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_h_bu_b : ClangBuiltin<"__builtin_lasx_xvaddwev_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwev_q_du_d : ClangBuiltin<"__builtin_lasx_xvaddwev_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddwod_d_wu_w : ClangBuiltin<"__builtin_lasx_xvaddwod_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_w_hu_h : ClangBuiltin<"__builtin_lasx_xvaddwod_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_h_bu_b : ClangBuiltin<"__builtin_lasx_xvaddwod_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvaddwod_q_du_d : ClangBuiltin<"__builtin_lasx_xvaddwod_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhaddw_qu_du : ClangBuiltin<"__builtin_lasx_xvhaddw_qu_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_qu_du : ClangBuiltin<"__builtin_lasx_xvhsubw_qu_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhaddw_q_d : ClangBuiltin<"__builtin_lasx_xvhaddw_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_q_d : ClangBuiltin<"__builtin_lasx_xvhsubw_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmuh_b : ClangBuiltin<"__builtin_lasx_xvmuh_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_h : ClangBuiltin<"__builtin_lasx_xvmuh_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_w : ClangBuiltin<"__builtin_lasx_xvmuh_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_d : ClangBuiltin<"__builtin_lasx_xvmuh_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmuh_bu : ClangBuiltin<"__builtin_lasx_xvmuh_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_hu : ClangBuiltin<"__builtin_lasx_xvmuh_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_wu : ClangBuiltin<"__builtin_lasx_xvmuh_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmuh_du : ClangBuiltin<"__builtin_lasx_xvmuh_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwev_d_w : ClangBuiltin<"__builtin_lasx_xvmulwev_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_w_h : ClangBuiltin<"__builtin_lasx_xvmulwev_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_h_b : ClangBuiltin<"__builtin_lasx_xvmulwev_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_q_d : ClangBuiltin<"__builtin_lasx_xvmulwev_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwod_d_w : ClangBuiltin<"__builtin_lasx_xvmulwod_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_w_h : ClangBuiltin<"__builtin_lasx_xvmulwod_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_h_b : ClangBuiltin<"__builtin_lasx_xvmulwod_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_q_d : ClangBuiltin<"__builtin_lasx_xvmulwod_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwev_d_wu : ClangBuiltin<"__builtin_lasx_xvmulwev_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_w_hu : ClangBuiltin<"__builtin_lasx_xvmulwev_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_h_bu : ClangBuiltin<"__builtin_lasx_xvmulwev_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_q_du : ClangBuiltin<"__builtin_lasx_xvmulwev_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwod_d_wu : ClangBuiltin<"__builtin_lasx_xvmulwod_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_w_hu : ClangBuiltin<"__builtin_lasx_xvmulwod_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_h_bu : ClangBuiltin<"__builtin_lasx_xvmulwod_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_q_du : ClangBuiltin<"__builtin_lasx_xvmulwod_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwev_d_wu_w : ClangBuiltin<"__builtin_lasx_xvmulwev_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_w_hu_h : ClangBuiltin<"__builtin_lasx_xvmulwev_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_h_bu_b : ClangBuiltin<"__builtin_lasx_xvmulwev_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwev_q_du_d : ClangBuiltin<"__builtin_lasx_xvmulwev_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmulwod_d_wu_w : ClangBuiltin<"__builtin_lasx_xvmulwod_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_w_hu_h : ClangBuiltin<"__builtin_lasx_xvmulwod_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_h_bu_b : ClangBuiltin<"__builtin_lasx_xvmulwod_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmulwod_q_du_d : ClangBuiltin<"__builtin_lasx_xvmulwod_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwev_d_w : ClangBuiltin<"__builtin_lasx_xvmaddwev_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_w_h : ClangBuiltin<"__builtin_lasx_xvmaddwev_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_h_b : ClangBuiltin<"__builtin_lasx_xvmaddwev_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_q_d : ClangBuiltin<"__builtin_lasx_xvmaddwev_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwod_d_w : ClangBuiltin<"__builtin_lasx_xvmaddwod_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_w_h : ClangBuiltin<"__builtin_lasx_xvmaddwod_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_h_b : ClangBuiltin<"__builtin_lasx_xvmaddwod_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_q_d : ClangBuiltin<"__builtin_lasx_xvmaddwod_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwev_d_wu : ClangBuiltin<"__builtin_lasx_xvmaddwev_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_w_hu : ClangBuiltin<"__builtin_lasx_xvmaddwev_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_h_bu : ClangBuiltin<"__builtin_lasx_xvmaddwev_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_q_du : ClangBuiltin<"__builtin_lasx_xvmaddwev_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwod_d_wu : ClangBuiltin<"__builtin_lasx_xvmaddwod_d_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_w_hu : ClangBuiltin<"__builtin_lasx_xvmaddwod_w_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_h_bu : ClangBuiltin<"__builtin_lasx_xvmaddwod_h_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_q_du : ClangBuiltin<"__builtin_lasx_xvmaddwod_q_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwev_d_wu_w : ClangBuiltin<"__builtin_lasx_xvmaddwev_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_w_hu_h : ClangBuiltin<"__builtin_lasx_xvmaddwev_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_h_bu_b : ClangBuiltin<"__builtin_lasx_xvmaddwev_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwev_q_du_d : ClangBuiltin<"__builtin_lasx_xvmaddwev_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaddwod_d_wu_w : ClangBuiltin<"__builtin_lasx_xvmaddwod_d_wu_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_w_hu_h : ClangBuiltin<"__builtin_lasx_xvmaddwod_w_hu_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_h_bu_b : ClangBuiltin<"__builtin_lasx_xvmaddwod_h_bu_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaddwod_q_du_d : ClangBuiltin<"__builtin_lasx_xvmaddwod_q_du_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrln_b_h : ClangBuiltin<"__builtin_lasx_xvsrln_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrln_h_w : ClangBuiltin<"__builtin_lasx_xvsrln_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrln_w_d : ClangBuiltin<"__builtin_lasx_xvsrln_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsran_b_h : ClangBuiltin<"__builtin_lasx_xvsran_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsran_h_w : ClangBuiltin<"__builtin_lasx_xvsran_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsran_w_d : ClangBuiltin<"__builtin_lasx_xvsran_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrlrn_b_h : ClangBuiltin<"__builtin_lasx_xvsrlrn_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrn_h_w : ClangBuiltin<"__builtin_lasx_xvsrlrn_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrn_w_d : ClangBuiltin<"__builtin_lasx_xvsrlrn_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrarn_b_h : ClangBuiltin<"__builtin_lasx_xvsrarn_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarn_h_w : ClangBuiltin<"__builtin_lasx_xvsrarn_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarn_w_d : ClangBuiltin<"__builtin_lasx_xvsrarn_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrln_b_h : ClangBuiltin<"__builtin_lasx_xvssrln_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrln_h_w : ClangBuiltin<"__builtin_lasx_xvssrln_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrln_w_d : ClangBuiltin<"__builtin_lasx_xvssrln_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssran_b_h : ClangBuiltin<"__builtin_lasx_xvssran_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssran_h_w : ClangBuiltin<"__builtin_lasx_xvssran_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssran_w_d : ClangBuiltin<"__builtin_lasx_xvssran_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlrn_b_h : ClangBuiltin<"__builtin_lasx_xvssrlrn_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrn_h_w : ClangBuiltin<"__builtin_lasx_xvssrlrn_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrn_w_d : ClangBuiltin<"__builtin_lasx_xvssrlrn_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrarn_b_h : ClangBuiltin<"__builtin_lasx_xvssrarn_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarn_h_w : ClangBuiltin<"__builtin_lasx_xvssrarn_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarn_w_d : ClangBuiltin<"__builtin_lasx_xvssrarn_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrln_bu_h : ClangBuiltin<"__builtin_lasx_xvssrln_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrln_hu_w : ClangBuiltin<"__builtin_lasx_xvssrln_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrln_wu_d : ClangBuiltin<"__builtin_lasx_xvssrln_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssran_bu_h : ClangBuiltin<"__builtin_lasx_xvssran_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssran_hu_w : ClangBuiltin<"__builtin_lasx_xvssran_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssran_wu_d : ClangBuiltin<"__builtin_lasx_xvssran_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlrn_bu_h : ClangBuiltin<"__builtin_lasx_xvssrlrn_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrn_hu_w : ClangBuiltin<"__builtin_lasx_xvssrlrn_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrn_wu_d : ClangBuiltin<"__builtin_lasx_xvssrlrn_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrarn_bu_h : ClangBuiltin<"__builtin_lasx_xvssrarn_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarn_hu_w : ClangBuiltin<"__builtin_lasx_xvssrarn_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarn_wu_d : ClangBuiltin<"__builtin_lasx_xvssrarn_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvandn_v : ClangBuiltin<"__builtin_lasx_xvandn_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvorn_v : ClangBuiltin<"__builtin_lasx_xvorn_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrstp_b : ClangBuiltin<"__builtin_lasx_xvfrstp_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvfrstp_h : ClangBuiltin<"__builtin_lasx_xvfrstp_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvadd_q : ClangBuiltin<"__builtin_lasx_xvadd_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsub_q : ClangBuiltin<"__builtin_lasx_xvsub_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsigncov_b : ClangBuiltin<"__builtin_lasx_xvsigncov_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvsigncov_h : ClangBuiltin<"__builtin_lasx_xvsigncov_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvsigncov_w : ClangBuiltin<"__builtin_lasx_xvsigncov_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvsigncov_d : ClangBuiltin<"__builtin_lasx_xvsigncov_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvfcvt_h_s : ClangBuiltin<"__builtin_lasx_xvfcvt_h_s">, + Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcvt_s_d : ClangBuiltin<"__builtin_lasx_xvfcvt_s_d">, + Intrinsic<[llvm_v8f32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvffint_s_l : ClangBuiltin<"__builtin_lasx_xvffint_s_l">, + Intrinsic<[llvm_v8f32_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftint_w_d : ClangBuiltin<"__builtin_lasx_xvftint_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrz_w_d : ClangBuiltin<"__builtin_lasx_xvftintrz_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrp_w_d : ClangBuiltin<"__builtin_lasx_xvftintrp_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrm_w_d : ClangBuiltin<"__builtin_lasx_xvftintrm_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrne_w_d : ClangBuiltin<"__builtin_lasx_xvftintrne_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbsrl_v : ClangBuiltin<"__builtin_lasx_xvbsrl_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbsll_v : ClangBuiltin<"__builtin_lasx_xvbsll_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrstpi_b : ClangBuiltin<"__builtin_lasx_xvfrstpi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrstpi_h : ClangBuiltin<"__builtin_lasx_xvfrstpi_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvneg_b : ClangBuiltin<"__builtin_lasx_xvneg_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvneg_h : ClangBuiltin<"__builtin_lasx_xvneg_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvneg_w : ClangBuiltin<"__builtin_lasx_xvneg_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvneg_d : ClangBuiltin<"__builtin_lasx_xvneg_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmskgez_b : ClangBuiltin<"__builtin_lasx_xvmskgez_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmsknz_b : ClangBuiltin<"__builtin_lasx_xvmsknz_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrintrm_s : ClangBuiltin<"__builtin_lasx_xvfrintrm_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrintrm_d : ClangBuiltin<"__builtin_lasx_xvfrintrm_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrintrp_s : ClangBuiltin<"__builtin_lasx_xvfrintrp_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrintrp_d : ClangBuiltin<"__builtin_lasx_xvfrintrp_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrintrz_s : ClangBuiltin<"__builtin_lasx_xvfrintrz_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrintrz_d : ClangBuiltin<"__builtin_lasx_xvfrintrz_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrintrne_s : ClangBuiltin<"__builtin_lasx_xvfrintrne_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrintrne_d : ClangBuiltin<"__builtin_lasx_xvfrintrne_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvffinth_d_w : ClangBuiltin<"__builtin_lasx_xvffinth_d_w">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvffintl_d_w : ClangBuiltin<"__builtin_lasx_xvffintl_d_w">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrm_w_s : ClangBuiltin<"__builtin_lasx_xvftintrm_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrm_l_d : ClangBuiltin<"__builtin_lasx_xvftintrm_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrp_w_s : ClangBuiltin<"__builtin_lasx_xvftintrp_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrp_l_d : ClangBuiltin<"__builtin_lasx_xvftintrp_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrz_w_s : ClangBuiltin<"__builtin_lasx_xvftintrz_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrz_l_d : ClangBuiltin<"__builtin_lasx_xvftintrz_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrne_w_s : ClangBuiltin<"__builtin_lasx_xvftintrne_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrne_l_d : ClangBuiltin<"__builtin_lasx_xvftintrne_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftinth_l_s : ClangBuiltin<"__builtin_lasx_xvftinth_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintl_l_s : ClangBuiltin<"__builtin_lasx_xvftintl_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrmh_l_s : ClangBuiltin<"__builtin_lasx_xvftintrmh_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrml_l_s : ClangBuiltin<"__builtin_lasx_xvftintrml_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrph_l_s : ClangBuiltin<"__builtin_lasx_xvftintrph_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrpl_l_s : ClangBuiltin<"__builtin_lasx_xvftintrpl_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrzh_l_s : ClangBuiltin<"__builtin_lasx_xvftintrzh_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrzl_l_s : ClangBuiltin<"__builtin_lasx_xvftintrzl_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrneh_l_s : ClangBuiltin<"__builtin_lasx_xvftintrneh_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrnel_l_s : ClangBuiltin<"__builtin_lasx_xvftintrnel_l_s">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvexth_d_w : ClangBuiltin<"__builtin_lasx_xvexth_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_w_h : ClangBuiltin<"__builtin_lasx_xvexth_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_h_b : ClangBuiltin<"__builtin_lasx_xvexth_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvexth_q_d : ClangBuiltin<"__builtin_lasx_xvexth_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsllwil_d_w : ClangBuiltin<"__builtin_lasx_xvsllwil_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsllwil_w_h : ClangBuiltin<"__builtin_lasx_xvsllwil_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsllwil_h_b : ClangBuiltin<"__builtin_lasx_xvsllwil_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsllwil_du_wu : ClangBuiltin<"__builtin_lasx_xvsllwil_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsllwil_wu_hu : ClangBuiltin<"__builtin_lasx_xvsllwil_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsllwil_hu_bu : ClangBuiltin<"__builtin_lasx_xvsllwil_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitclri_b : ClangBuiltin<"__builtin_lasx_xvbitclri_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclri_h : ClangBuiltin<"__builtin_lasx_xvbitclri_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclri_w : ClangBuiltin<"__builtin_lasx_xvbitclri_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclri_d : ClangBuiltin<"__builtin_lasx_xvbitclri_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitseti_b : ClangBuiltin<"__builtin_lasx_xvbitseti_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitseti_h : ClangBuiltin<"__builtin_lasx_xvbitseti_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitseti_w : ClangBuiltin<"__builtin_lasx_xvbitseti_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitseti_d : ClangBuiltin<"__builtin_lasx_xvbitseti_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitrevi_b : ClangBuiltin<"__builtin_lasx_xvbitrevi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrevi_h : ClangBuiltin<"__builtin_lasx_xvbitrevi_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrevi_w : ClangBuiltin<"__builtin_lasx_xvbitrevi_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrevi_d : ClangBuiltin<"__builtin_lasx_xvbitrevi_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlrni_b_h : ClangBuiltin<"__builtin_lasx_xvssrlrni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_h_w : ClangBuiltin<"__builtin_lasx_xvssrlrni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_w_d : ClangBuiltin<"__builtin_lasx_xvssrlrni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_d_q : ClangBuiltin<"__builtin_lasx_xvssrlrni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrani_b_h : ClangBuiltin<"__builtin_lasx_xvsrani_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrani_h_w : ClangBuiltin<"__builtin_lasx_xvsrani_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrani_w_d : ClangBuiltin<"__builtin_lasx_xvsrani_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrani_d_q : ClangBuiltin<"__builtin_lasx_xvsrani_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvextrins_b : ClangBuiltin<"__builtin_lasx_xvextrins_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvextrins_h : ClangBuiltin<"__builtin_lasx_xvextrins_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvextrins_w : ClangBuiltin<"__builtin_lasx_xvextrins_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvextrins_d : ClangBuiltin<"__builtin_lasx_xvextrins_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitseli_b : ClangBuiltin<"__builtin_lasx_xvbitseli_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvandi_b : ClangBuiltin<"__builtin_lasx_xvandi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvori_b : ClangBuiltin<"__builtin_lasx_xvori_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvxori_b : ClangBuiltin<"__builtin_lasx_xvxori_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvnori_b : ClangBuiltin<"__builtin_lasx_xvnori_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvldi : ClangBuiltin<"__builtin_lasx_xvldi">, + Intrinsic<[llvm_v4i64_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvrepli_b : ClangBuiltin<"__builtin_lasx_xvrepli_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvrepli_h : ClangBuiltin<"__builtin_lasx_xvrepli_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvrepli_w : ClangBuiltin<"__builtin_lasx_xvrepli_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvrepli_d : ClangBuiltin<"__builtin_lasx_xvrepli_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpermi_w : ClangBuiltin<"__builtin_lasx_xvpermi_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsadd_b : ClangBuiltin<"__builtin_lasx_xvsadd_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_h : ClangBuiltin<"__builtin_lasx_xvsadd_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_w : ClangBuiltin<"__builtin_lasx_xvsadd_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_d : ClangBuiltin<"__builtin_lasx_xvsadd_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvssub_b : ClangBuiltin<"__builtin_lasx_xvssub_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_h : ClangBuiltin<"__builtin_lasx_xvssub_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_w : ClangBuiltin<"__builtin_lasx_xvssub_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_d : ClangBuiltin<"__builtin_lasx_xvssub_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsadd_bu : ClangBuiltin<"__builtin_lasx_xvsadd_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_hu : ClangBuiltin<"__builtin_lasx_xvsadd_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_wu : ClangBuiltin<"__builtin_lasx_xvsadd_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvsadd_du : ClangBuiltin<"__builtin_lasx_xvsadd_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvssub_bu : ClangBuiltin<"__builtin_lasx_xvssub_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_hu : ClangBuiltin<"__builtin_lasx_xvssub_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_wu : ClangBuiltin<"__builtin_lasx_xvssub_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssub_du : ClangBuiltin<"__builtin_lasx_xvssub_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhaddw_h_b : ClangBuiltin<"__builtin_lasx_xvhaddw_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhaddw_w_h : ClangBuiltin<"__builtin_lasx_xvhaddw_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhaddw_d_w : ClangBuiltin<"__builtin_lasx_xvhaddw_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhsubw_h_b : ClangBuiltin<"__builtin_lasx_xvhsubw_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_w_h : ClangBuiltin<"__builtin_lasx_xvhsubw_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_d_w : ClangBuiltin<"__builtin_lasx_xvhsubw_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhaddw_hu_bu : ClangBuiltin<"__builtin_lasx_xvhaddw_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhaddw_wu_hu : ClangBuiltin<"__builtin_lasx_xvhaddw_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhaddw_du_wu : ClangBuiltin<"__builtin_lasx_xvhaddw_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvhsubw_hu_bu : ClangBuiltin<"__builtin_lasx_xvhsubw_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_wu_hu : ClangBuiltin<"__builtin_lasx_xvhsubw_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvhsubw_du_wu : ClangBuiltin<"__builtin_lasx_xvhsubw_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvadda_b : ClangBuiltin<"__builtin_lasx_xvadda_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadda_h : ClangBuiltin<"__builtin_lasx_xvadda_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadda_w : ClangBuiltin<"__builtin_lasx_xvadda_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadda_d : ClangBuiltin<"__builtin_lasx_xvadda_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvabsd_b : ClangBuiltin<"__builtin_lasx_xvabsd_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_h : ClangBuiltin<"__builtin_lasx_xvabsd_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_w : ClangBuiltin<"__builtin_lasx_xvabsd_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_d : ClangBuiltin<"__builtin_lasx_xvabsd_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvabsd_bu : ClangBuiltin<"__builtin_lasx_xvabsd_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_hu : ClangBuiltin<"__builtin_lasx_xvabsd_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_wu : ClangBuiltin<"__builtin_lasx_xvabsd_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvabsd_du : ClangBuiltin<"__builtin_lasx_xvabsd_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvavg_b : ClangBuiltin<"__builtin_lasx_xvavg_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_h : ClangBuiltin<"__builtin_lasx_xvavg_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_w : ClangBuiltin<"__builtin_lasx_xvavg_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_d : ClangBuiltin<"__builtin_lasx_xvavg_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvavg_bu : ClangBuiltin<"__builtin_lasx_xvavg_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_hu : ClangBuiltin<"__builtin_lasx_xvavg_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_wu : ClangBuiltin<"__builtin_lasx_xvavg_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavg_du : ClangBuiltin<"__builtin_lasx_xvavg_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvavgr_b : ClangBuiltin<"__builtin_lasx_xvavgr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_h : ClangBuiltin<"__builtin_lasx_xvavgr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_w : ClangBuiltin<"__builtin_lasx_xvavgr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_d : ClangBuiltin<"__builtin_lasx_xvavgr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvavgr_bu : ClangBuiltin<"__builtin_lasx_xvavgr_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_hu : ClangBuiltin<"__builtin_lasx_xvavgr_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_wu : ClangBuiltin<"__builtin_lasx_xvavgr_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvavgr_du : ClangBuiltin<"__builtin_lasx_xvavgr_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvsrlr_b : ClangBuiltin<"__builtin_lasx_xvsrlr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlr_h : ClangBuiltin<"__builtin_lasx_xvsrlr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlr_w : ClangBuiltin<"__builtin_lasx_xvsrlr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlr_d : ClangBuiltin<"__builtin_lasx_xvsrlr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrar_b : ClangBuiltin<"__builtin_lasx_xvsrar_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrar_h : ClangBuiltin<"__builtin_lasx_xvsrar_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrar_w : ClangBuiltin<"__builtin_lasx_xvsrar_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrar_d : ClangBuiltin<"__builtin_lasx_xvsrar_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmax_s : ClangBuiltin<"__builtin_lasx_xvfmax_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmax_d : ClangBuiltin<"__builtin_lasx_xvfmax_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmin_s : ClangBuiltin<"__builtin_lasx_xvfmin_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmin_d : ClangBuiltin<"__builtin_lasx_xvfmin_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmaxa_s : ClangBuiltin<"__builtin_lasx_xvfmaxa_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmaxa_d : ClangBuiltin<"__builtin_lasx_xvfmaxa_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmina_s : ClangBuiltin<"__builtin_lasx_xvfmina_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmina_d : ClangBuiltin<"__builtin_lasx_xvfmina_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfclass_s : ClangBuiltin<"__builtin_lasx_xvfclass_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfclass_d : ClangBuiltin<"__builtin_lasx_xvfclass_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrecip_s : ClangBuiltin<"__builtin_lasx_xvfrecip_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrecip_d : ClangBuiltin<"__builtin_lasx_xvfrecip_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrsqrt_s : ClangBuiltin<"__builtin_lasx_xvfrsqrt_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrsqrt_d : ClangBuiltin<"__builtin_lasx_xvfrsqrt_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcvtl_s_h : ClangBuiltin<"__builtin_lasx_xvfcvtl_s_h">, + Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcvtl_d_s : ClangBuiltin<"__builtin_lasx_xvfcvtl_d_s">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfcvth_s_h : ClangBuiltin<"__builtin_lasx_xvfcvth_s_h">, + Intrinsic<[llvm_v8f32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfcvth_d_s : ClangBuiltin<"__builtin_lasx_xvfcvth_d_s">, + Intrinsic<[llvm_v4f64_ty], [llvm_v8f32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftint_w_s : ClangBuiltin<"__builtin_lasx_xvftint_w_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftint_l_d : ClangBuiltin<"__builtin_lasx_xvftint_l_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftint_wu_s : ClangBuiltin<"__builtin_lasx_xvftint_wu_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftint_lu_d : ClangBuiltin<"__builtin_lasx_xvftint_lu_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrlri_b : ClangBuiltin<"__builtin_lasx_xvsrlri_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlri_h : ClangBuiltin<"__builtin_lasx_xvsrlri_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlri_w : ClangBuiltin<"__builtin_lasx_xvsrlri_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlri_d : ClangBuiltin<"__builtin_lasx_xvsrlri_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrari_b : ClangBuiltin<"__builtin_lasx_xvsrari_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrari_h : ClangBuiltin<"__builtin_lasx_xvsrari_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrari_w : ClangBuiltin<"__builtin_lasx_xvsrari_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrari_d : ClangBuiltin<"__builtin_lasx_xvsrari_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsat_b : ClangBuiltin<"__builtin_lasx_xvsat_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_h : ClangBuiltin<"__builtin_lasx_xvsat_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_w : ClangBuiltin<"__builtin_lasx_xvsat_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_d : ClangBuiltin<"__builtin_lasx_xvsat_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsat_bu : ClangBuiltin<"__builtin_lasx_xvsat_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_hu : ClangBuiltin<"__builtin_lasx_xvsat_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_wu : ClangBuiltin<"__builtin_lasx_xvsat_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsat_du : ClangBuiltin<"__builtin_lasx_xvsat_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrlni_b_h : ClangBuiltin<"__builtin_lasx_xvsrlni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlni_h_w : ClangBuiltin<"__builtin_lasx_xvsrlni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlni_w_d : ClangBuiltin<"__builtin_lasx_xvsrlni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlni_d_q : ClangBuiltin<"__builtin_lasx_xvsrlni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlni_b_h : ClangBuiltin<"__builtin_lasx_xvssrlni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_h_w : ClangBuiltin<"__builtin_lasx_xvssrlni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_w_d : ClangBuiltin<"__builtin_lasx_xvssrlni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_d_q : ClangBuiltin<"__builtin_lasx_xvssrlni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlrni_bu_h : ClangBuiltin<"__builtin_lasx_xvssrlrni_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_hu_w : ClangBuiltin<"__builtin_lasx_xvssrlrni_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_wu_d : ClangBuiltin<"__builtin_lasx_xvssrlrni_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlrni_du_q : ClangBuiltin<"__builtin_lasx_xvssrlrni_du_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrarni_b_h : ClangBuiltin<"__builtin_lasx_xvsrarni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarni_h_w : ClangBuiltin<"__builtin_lasx_xvsrarni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarni_w_d : ClangBuiltin<"__builtin_lasx_xvsrarni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrarni_d_q : ClangBuiltin<"__builtin_lasx_xvsrarni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrani_b_h : ClangBuiltin<"__builtin_lasx_xvssrani_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_h_w : ClangBuiltin<"__builtin_lasx_xvssrani_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_w_d : ClangBuiltin<"__builtin_lasx_xvssrani_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_d_q : ClangBuiltin<"__builtin_lasx_xvssrani_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrani_bu_h : ClangBuiltin<"__builtin_lasx_xvssrani_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_hu_w : ClangBuiltin<"__builtin_lasx_xvssrani_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_wu_d : ClangBuiltin<"__builtin_lasx_xvssrani_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrani_du_q : ClangBuiltin<"__builtin_lasx_xvssrani_du_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrarni_b_h : ClangBuiltin<"__builtin_lasx_xvssrarni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_h_w : ClangBuiltin<"__builtin_lasx_xvssrarni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_w_d : ClangBuiltin<"__builtin_lasx_xvssrarni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_d_q : ClangBuiltin<"__builtin_lasx_xvssrarni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrarni_bu_h : ClangBuiltin<"__builtin_lasx_xvssrarni_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_hu_w : ClangBuiltin<"__builtin_lasx_xvssrarni_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_wu_d : ClangBuiltin<"__builtin_lasx_xvssrarni_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrarni_du_q : ClangBuiltin<"__builtin_lasx_xvssrarni_du_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvssrlni_bu_h : ClangBuiltin<"__builtin_lasx_xvssrlni_bu_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_hu_w : ClangBuiltin<"__builtin_lasx_xvssrlni_hu_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_wu_d : ClangBuiltin<"__builtin_lasx_xvssrlni_wu_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvssrlni_du_q : ClangBuiltin<"__builtin_lasx_xvssrlni_du_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvseq_b : ClangBuiltin<"__builtin_lasx_xvseq_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseq_h : ClangBuiltin<"__builtin_lasx_xvseq_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseq_w : ClangBuiltin<"__builtin_lasx_xvseq_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseq_d : ClangBuiltin<"__builtin_lasx_xvseq_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsle_b : ClangBuiltin<"__builtin_lasx_xvsle_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_h : ClangBuiltin<"__builtin_lasx_xvsle_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_w : ClangBuiltin<"__builtin_lasx_xvsle_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_d : ClangBuiltin<"__builtin_lasx_xvsle_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsle_bu : ClangBuiltin<"__builtin_lasx_xvsle_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_hu : ClangBuiltin<"__builtin_lasx_xvsle_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_wu : ClangBuiltin<"__builtin_lasx_xvsle_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsle_du : ClangBuiltin<"__builtin_lasx_xvsle_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslt_b : ClangBuiltin<"__builtin_lasx_xvslt_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_h : ClangBuiltin<"__builtin_lasx_xvslt_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_w : ClangBuiltin<"__builtin_lasx_xvslt_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_d : ClangBuiltin<"__builtin_lasx_xvslt_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslt_bu : ClangBuiltin<"__builtin_lasx_xvslt_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_hu : ClangBuiltin<"__builtin_lasx_xvslt_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_wu : ClangBuiltin<"__builtin_lasx_xvslt_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslt_du : ClangBuiltin<"__builtin_lasx_xvslt_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvadd_b : ClangBuiltin<"__builtin_lasx_xvadd_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadd_h : ClangBuiltin<"__builtin_lasx_xvadd_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadd_w : ClangBuiltin<"__builtin_lasx_xvadd_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvadd_d : ClangBuiltin<"__builtin_lasx_xvadd_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvsub_b : ClangBuiltin<"__builtin_lasx_xvsub_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsub_h : ClangBuiltin<"__builtin_lasx_xvsub_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsub_w : ClangBuiltin<"__builtin_lasx_xvsub_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsub_d : ClangBuiltin<"__builtin_lasx_xvsub_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmax_b : ClangBuiltin<"__builtin_lasx_xvmax_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_h : ClangBuiltin<"__builtin_lasx_xvmax_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_w : ClangBuiltin<"__builtin_lasx_xvmax_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_d : ClangBuiltin<"__builtin_lasx_xvmax_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmin_b : ClangBuiltin<"__builtin_lasx_xvmin_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_h : ClangBuiltin<"__builtin_lasx_xvmin_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_w : ClangBuiltin<"__builtin_lasx_xvmin_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_d : ClangBuiltin<"__builtin_lasx_xvmin_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmax_bu : ClangBuiltin<"__builtin_lasx_xvmax_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_hu : ClangBuiltin<"__builtin_lasx_xvmax_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_wu : ClangBuiltin<"__builtin_lasx_xvmax_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmax_du : ClangBuiltin<"__builtin_lasx_xvmax_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmin_bu : ClangBuiltin<"__builtin_lasx_xvmin_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_hu : ClangBuiltin<"__builtin_lasx_xvmin_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_wu : ClangBuiltin<"__builtin_lasx_xvmin_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmin_du : ClangBuiltin<"__builtin_lasx_xvmin_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmul_b : ClangBuiltin<"__builtin_lasx_xvmul_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmul_h : ClangBuiltin<"__builtin_lasx_xvmul_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmul_w : ClangBuiltin<"__builtin_lasx_xvmul_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmul_d : ClangBuiltin<"__builtin_lasx_xvmul_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmadd_b : ClangBuiltin<"__builtin_lasx_xvmadd_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmadd_h : ClangBuiltin<"__builtin_lasx_xvmadd_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmadd_w : ClangBuiltin<"__builtin_lasx_xvmadd_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmadd_d : ClangBuiltin<"__builtin_lasx_xvmadd_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvmsub_b : ClangBuiltin<"__builtin_lasx_xvmsub_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmsub_h : ClangBuiltin<"__builtin_lasx_xvmsub_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmsub_w : ClangBuiltin<"__builtin_lasx_xvmsub_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvmsub_d : ClangBuiltin<"__builtin_lasx_xvmsub_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvdiv_b : ClangBuiltin<"__builtin_lasx_xvdiv_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_h : ClangBuiltin<"__builtin_lasx_xvdiv_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_w : ClangBuiltin<"__builtin_lasx_xvdiv_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_d : ClangBuiltin<"__builtin_lasx_xvdiv_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmod_b : ClangBuiltin<"__builtin_lasx_xvmod_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_h : ClangBuiltin<"__builtin_lasx_xvmod_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_w : ClangBuiltin<"__builtin_lasx_xvmod_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_d : ClangBuiltin<"__builtin_lasx_xvmod_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvdiv_bu : ClangBuiltin<"__builtin_lasx_xvdiv_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_hu : ClangBuiltin<"__builtin_lasx_xvdiv_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_wu : ClangBuiltin<"__builtin_lasx_xvdiv_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvdiv_du : ClangBuiltin<"__builtin_lasx_xvdiv_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsll_b : ClangBuiltin<"__builtin_lasx_xvsll_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsll_h : ClangBuiltin<"__builtin_lasx_xvsll_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsll_w : ClangBuiltin<"__builtin_lasx_xvsll_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsll_d : ClangBuiltin<"__builtin_lasx_xvsll_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrl_b : ClangBuiltin<"__builtin_lasx_xvsrl_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrl_h : ClangBuiltin<"__builtin_lasx_xvsrl_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrl_w : ClangBuiltin<"__builtin_lasx_xvsrl_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrl_d : ClangBuiltin<"__builtin_lasx_xvsrl_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitclr_b : ClangBuiltin<"__builtin_lasx_xvbitclr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclr_h : ClangBuiltin<"__builtin_lasx_xvbitclr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclr_w : ClangBuiltin<"__builtin_lasx_xvbitclr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitclr_d : ClangBuiltin<"__builtin_lasx_xvbitclr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitset_b : ClangBuiltin<"__builtin_lasx_xvbitset_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitset_h : ClangBuiltin<"__builtin_lasx_xvbitset_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitset_w : ClangBuiltin<"__builtin_lasx_xvbitset_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitset_d : ClangBuiltin<"__builtin_lasx_xvbitset_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpackev_b : ClangBuiltin<"__builtin_lasx_xvpackev_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackev_h : ClangBuiltin<"__builtin_lasx_xvpackev_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackev_w : ClangBuiltin<"__builtin_lasx_xvpackev_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackev_d : ClangBuiltin<"__builtin_lasx_xvpackev_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpackod_b : ClangBuiltin<"__builtin_lasx_xvpackod_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackod_h : ClangBuiltin<"__builtin_lasx_xvpackod_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackod_w : ClangBuiltin<"__builtin_lasx_xvpackod_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpackod_d : ClangBuiltin<"__builtin_lasx_xvpackod_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvilvl_b : ClangBuiltin<"__builtin_lasx_xvilvl_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvl_h : ClangBuiltin<"__builtin_lasx_xvilvl_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvl_w : ClangBuiltin<"__builtin_lasx_xvilvl_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvl_d : ClangBuiltin<"__builtin_lasx_xvilvl_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvilvh_b : ClangBuiltin<"__builtin_lasx_xvilvh_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvh_h : ClangBuiltin<"__builtin_lasx_xvilvh_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvh_w : ClangBuiltin<"__builtin_lasx_xvilvh_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvilvh_d : ClangBuiltin<"__builtin_lasx_xvilvh_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickev_b : ClangBuiltin<"__builtin_lasx_xvpickev_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickev_h : ClangBuiltin<"__builtin_lasx_xvpickev_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickev_w : ClangBuiltin<"__builtin_lasx_xvpickev_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickev_d : ClangBuiltin<"__builtin_lasx_xvpickev_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvand_v : ClangBuiltin<"__builtin_lasx_xvand_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvor_v : ClangBuiltin<"__builtin_lasx_xvor_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvbitrev_b : ClangBuiltin<"__builtin_lasx_xvbitrev_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrev_h : ClangBuiltin<"__builtin_lasx_xvbitrev_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrev_w : ClangBuiltin<"__builtin_lasx_xvbitrev_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvbitrev_d : ClangBuiltin<"__builtin_lasx_xvbitrev_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmod_bu : ClangBuiltin<"__builtin_lasx_xvmod_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_hu : ClangBuiltin<"__builtin_lasx_xvmod_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_wu : ClangBuiltin<"__builtin_lasx_xvmod_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmod_du : ClangBuiltin<"__builtin_lasx_xvmod_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickod_b : ClangBuiltin<"__builtin_lasx_xvpickod_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickod_h : ClangBuiltin<"__builtin_lasx_xvpickod_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickod_w : ClangBuiltin<"__builtin_lasx_xvpickod_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickod_d : ClangBuiltin<"__builtin_lasx_xvpickod_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvreplve_b : ClangBuiltin<"__builtin_lasx_xvreplve_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_h : ClangBuiltin<"__builtin_lasx_xvreplve_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_w : ClangBuiltin<"__builtin_lasx_xvreplve_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_d : ClangBuiltin<"__builtin_lasx_xvreplve_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsra_b : ClangBuiltin<"__builtin_lasx_xvsra_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsra_h : ClangBuiltin<"__builtin_lasx_xvsra_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsra_w : ClangBuiltin<"__builtin_lasx_xvsra_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsra_d : ClangBuiltin<"__builtin_lasx_xvsra_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvxor_v : ClangBuiltin<"__builtin_lasx_xvxor_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvnor_v : ClangBuiltin<"__builtin_lasx_xvnor_v">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfadd_s : ClangBuiltin<"__builtin_lasx_xvfadd_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfadd_d : ClangBuiltin<"__builtin_lasx_xvfadd_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfsub_s : ClangBuiltin<"__builtin_lasx_xvfsub_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfsub_d : ClangBuiltin<"__builtin_lasx_xvfsub_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfmul_s : ClangBuiltin<"__builtin_lasx_xvfmul_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfmul_d : ClangBuiltin<"__builtin_lasx_xvfmul_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvshuf_h : ClangBuiltin<"__builtin_lasx_xvshuf_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvshuf_w : ClangBuiltin<"__builtin_lasx_xvshuf_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvshuf_d : ClangBuiltin<"__builtin_lasx_xvshuf_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvseqi_b : ClangBuiltin<"__builtin_lasx_xvseqi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseqi_h : ClangBuiltin<"__builtin_lasx_xvseqi_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseqi_w : ClangBuiltin<"__builtin_lasx_xvseqi_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvseqi_d : ClangBuiltin<"__builtin_lasx_xvseqi_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslei_b : ClangBuiltin<"__builtin_lasx_xvslei_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_h : ClangBuiltin<"__builtin_lasx_xvslei_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_w : ClangBuiltin<"__builtin_lasx_xvslei_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_d : ClangBuiltin<"__builtin_lasx_xvslei_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslei_bu : ClangBuiltin<"__builtin_lasx_xvslei_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_hu : ClangBuiltin<"__builtin_lasx_xvslei_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_wu : ClangBuiltin<"__builtin_lasx_xvslei_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslei_du : ClangBuiltin<"__builtin_lasx_xvslei_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslti_b : ClangBuiltin<"__builtin_lasx_xvslti_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_h : ClangBuiltin<"__builtin_lasx_xvslti_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_w : ClangBuiltin<"__builtin_lasx_xvslti_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_d : ClangBuiltin<"__builtin_lasx_xvslti_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslti_bu : ClangBuiltin<"__builtin_lasx_xvslti_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_hu : ClangBuiltin<"__builtin_lasx_xvslti_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_wu : ClangBuiltin<"__builtin_lasx_xvslti_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslti_du : ClangBuiltin<"__builtin_lasx_xvslti_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvaddi_bu : ClangBuiltin<"__builtin_lasx_xvaddi_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvaddi_hu : ClangBuiltin<"__builtin_lasx_xvaddi_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvaddi_wu : ClangBuiltin<"__builtin_lasx_xvaddi_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; +def int_loongarch_lasx_xvaddi_du : ClangBuiltin<"__builtin_lasx_xvaddi_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], + [Commutative, IntrNoMem]>; + +def int_loongarch_lasx_xvsubi_bu : ClangBuiltin<"__builtin_lasx_xvsubi_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubi_hu : ClangBuiltin<"__builtin_lasx_xvsubi_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubi_wu : ClangBuiltin<"__builtin_lasx_xvsubi_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsubi_du : ClangBuiltin<"__builtin_lasx_xvsubi_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaxi_b : ClangBuiltin<"__builtin_lasx_xvmaxi_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_h : ClangBuiltin<"__builtin_lasx_xvmaxi_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_w : ClangBuiltin<"__builtin_lasx_xvmaxi_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_d : ClangBuiltin<"__builtin_lasx_xvmaxi_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmini_b : ClangBuiltin<"__builtin_lasx_xvmini_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_h : ClangBuiltin<"__builtin_lasx_xvmini_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_w : ClangBuiltin<"__builtin_lasx_xvmini_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_d : ClangBuiltin<"__builtin_lasx_xvmini_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmaxi_bu : ClangBuiltin<"__builtin_lasx_xvmaxi_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_hu : ClangBuiltin<"__builtin_lasx_xvmaxi_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_wu : ClangBuiltin<"__builtin_lasx_xvmaxi_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmaxi_du : ClangBuiltin<"__builtin_lasx_xvmaxi_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvmini_bu : ClangBuiltin<"__builtin_lasx_xvmini_bu">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_hu : ClangBuiltin<"__builtin_lasx_xvmini_hu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_wu : ClangBuiltin<"__builtin_lasx_xvmini_wu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvmini_du : ClangBuiltin<"__builtin_lasx_xvmini_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvclz_b : ClangBuiltin<"__builtin_lasx_xvclz_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclz_h : ClangBuiltin<"__builtin_lasx_xvclz_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclz_w : ClangBuiltin<"__builtin_lasx_xvclz_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvclz_d : ClangBuiltin<"__builtin_lasx_xvclz_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpcnt_b : ClangBuiltin<"__builtin_lasx_xvpcnt_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpcnt_h : ClangBuiltin<"__builtin_lasx_xvpcnt_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpcnt_w : ClangBuiltin<"__builtin_lasx_xvpcnt_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpcnt_d : ClangBuiltin<"__builtin_lasx_xvpcnt_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfsqrt_s : ClangBuiltin<"__builtin_lasx_xvfsqrt_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfsqrt_d : ClangBuiltin<"__builtin_lasx_xvfsqrt_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvfrint_s : ClangBuiltin<"__builtin_lasx_xvfrint_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfrint_d : ClangBuiltin<"__builtin_lasx_xvfrint_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvffint_s_w : ClangBuiltin<"__builtin_lasx_xvffint_s_w">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvffint_d_l : ClangBuiltin<"__builtin_lasx_xvffint_d_l">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvffint_s_wu : ClangBuiltin<"__builtin_lasx_xvffint_s_wu">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvffint_d_lu : ClangBuiltin<"__builtin_lasx_xvffint_d_lu">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvftintrz_wu_s : ClangBuiltin<"__builtin_lasx_xvftintrz_wu_s">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvftintrz_lu_d : ClangBuiltin<"__builtin_lasx_xvftintrz_lu_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvreplgr2vr_b : ClangBuiltin<"__builtin_lasx_xvreplgr2vr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_h : ClangBuiltin<"__builtin_lasx_xvreplgr2vr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_w : ClangBuiltin<"__builtin_lasx_xvreplgr2vr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_d : ClangBuiltin<"__builtin_lasx_xvreplgr2vr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvinsgr2vr_w : ClangBuiltin<"__builtin_lasx_xvinsgr2vr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvinsgr2vr_d : ClangBuiltin<"__builtin_lasx_xvinsgr2vr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvfdiv_s : ClangBuiltin<"__builtin_lasx_xvfdiv_s">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvfdiv_d : ClangBuiltin<"__builtin_lasx_xvfdiv_d">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvslli_b : ClangBuiltin<"__builtin_lasx_xvslli_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslli_h : ClangBuiltin<"__builtin_lasx_xvslli_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslli_w : ClangBuiltin<"__builtin_lasx_xvslli_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvslli_d : ClangBuiltin<"__builtin_lasx_xvslli_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrli_b : ClangBuiltin<"__builtin_lasx_xvsrli_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrli_h : ClangBuiltin<"__builtin_lasx_xvsrli_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrli_w : ClangBuiltin<"__builtin_lasx_xvsrli_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrli_d : ClangBuiltin<"__builtin_lasx_xvsrli_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrai_b : ClangBuiltin<"__builtin_lasx_xvsrai_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrai_h : ClangBuiltin<"__builtin_lasx_xvsrai_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrai_w : ClangBuiltin<"__builtin_lasx_xvsrai_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrai_d : ClangBuiltin<"__builtin_lasx_xvsrai_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvshuf4i_b : ClangBuiltin<"__builtin_lasx_xvshuf4i_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvshuf4i_h : ClangBuiltin<"__builtin_lasx_xvshuf4i_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvshuf4i_w : ClangBuiltin<"__builtin_lasx_xvshuf4i_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvshuf4i_d : ClangBuiltin<"__builtin_lasx_xvshuf4i_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvrotr_b : ClangBuiltin<"__builtin_lasx_xvrotr_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotr_h : ClangBuiltin<"__builtin_lasx_xvrotr_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotr_w : ClangBuiltin<"__builtin_lasx_xvrotr_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotr_d : ClangBuiltin<"__builtin_lasx_xvrotr_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvrotri_b : ClangBuiltin<"__builtin_lasx_xvrotri_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotri_h : ClangBuiltin<"__builtin_lasx_xvrotri_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotri_w : ClangBuiltin<"__builtin_lasx_xvrotri_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrotri_d : ClangBuiltin<"__builtin_lasx_xvrotri_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvld : ClangBuiltin<"__builtin_lasx_xvld">, + Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_loongarch_lasx_xvst : ClangBuiltin<"__builtin_lasx_xvst">, + Intrinsic<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrArgMemOnly]>; + +def int_loongarch_lasx_xvrepl128vei_b : ClangBuiltin<"__builtin_lasx_xvrepl128vei_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrepl128vei_h : ClangBuiltin<"__builtin_lasx_xvrepl128vei_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrepl128vei_w : ClangBuiltin<"__builtin_lasx_xvrepl128vei_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvrepl128vei_d : ClangBuiltin<"__builtin_lasx_xvrepl128vei_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvinsve0_w : ClangBuiltin<"__builtin_lasx_xvinsve0_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvinsve0_d : ClangBuiltin<"__builtin_lasx_xvinsve0_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve_w : ClangBuiltin<"__builtin_lasx_xvpickve_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickve_d : ClangBuiltin<"__builtin_lasx_xvpickve_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve_w_f : ClangBuiltin<"__builtin_lasx_xvpickve_w_f">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpickve_d_f : ClangBuiltin<"__builtin_lasx_xvpickve_d_f">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvreplve0_b : ClangBuiltin<"__builtin_lasx_xvreplve0_b">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve0_h : ClangBuiltin<"__builtin_lasx_xvreplve0_h">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve0_w : ClangBuiltin<"__builtin_lasx_xvreplve0_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve0_d : ClangBuiltin<"__builtin_lasx_xvreplve0_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve0_q : ClangBuiltin<"__builtin_lasx_xvreplve0_q">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_vext2xv_d_w : ClangBuiltin<"__builtin_lasx_vext2xv_d_w">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_w_h : ClangBuiltin<"__builtin_lasx_vext2xv_w_h">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_h_b : ClangBuiltin<"__builtin_lasx_vext2xv_h_b">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_vext2xv_d_h : ClangBuiltin<"__builtin_lasx_vext2xv_d_h">, + Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_w_b : ClangBuiltin<"__builtin_lasx_vext2xv_w_b">, + Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_d_b : ClangBuiltin<"__builtin_lasx_vext2xv_d_b">, + Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_vext2xv_du_wu : ClangBuiltin<"__builtin_lasx_vext2xv_du_wu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_wu_hu : ClangBuiltin<"__builtin_lasx_vext2xv_wu_hu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_hu_bu : ClangBuiltin<"__builtin_lasx_vext2xv_hu_bu">, + Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_vext2xv_du_hu : ClangBuiltin<"__builtin_lasx_vext2xv_du_hu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_wu_bu : ClangBuiltin<"__builtin_lasx_vext2xv_wu_bu">, + Intrinsic<[llvm_v8i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_vext2xv_du_bu : ClangBuiltin<"__builtin_lasx_vext2xv_du_bu">, + Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvpermi_q : ClangBuiltin<"__builtin_lasx_xvpermi_q">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvpermi_d : ClangBuiltin<"__builtin_lasx_xvpermi_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvperm_w : ClangBuiltin<"__builtin_lasx_xvperm_w">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvsrlrni_b_h : ClangBuiltin<"__builtin_lasx_xvsrlrni_b_h">, + Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrni_h_w : ClangBuiltin<"__builtin_lasx_xvsrlrni_h_w">, + Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrni_w_d : ClangBuiltin<"__builtin_lasx_xvsrlrni_w_d">, + Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvsrlrni_d_q : ClangBuiltin<"__builtin_lasx_xvsrlrni_d_q">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xbz_v : ClangBuiltin<"__builtin_lasx_xbz_v">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xbnz_v : ClangBuiltin<"__builtin_lasx_xbnz_v">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xbz_b : ClangBuiltin<"__builtin_lasx_xbz_b">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbz_h : ClangBuiltin<"__builtin_lasx_xbz_h">, + Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbz_w : ClangBuiltin<"__builtin_lasx_xbz_w">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbz_d : ClangBuiltin<"__builtin_lasx_xbz_d">, + Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xbnz_b : ClangBuiltin<"__builtin_lasx_xbnz_b">, + Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbnz_h : ClangBuiltin<"__builtin_lasx_xbnz_h">, + Intrinsic<[llvm_i32_ty], [llvm_v16i16_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbnz_w : ClangBuiltin<"__builtin_lasx_xbnz_w">, + Intrinsic<[llvm_i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xbnz_d : ClangBuiltin<"__builtin_lasx_xbnz_d">, + Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +def int_loongarch_lasx_xvextl_q_d : ClangBuiltin<"__builtin_lasx_xvextl_q_d">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvextl_qu_du : ClangBuiltin<"__builtin_lasx_xvextl_qu_du">, + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + +//===----------------------------------------------------------------------===// +// LoongArch BASE + +def int_loongarch_cpucfg : ClangBuiltin<"__builtin_loongarch_cpucfg">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_csrrd_w : ClangBuiltin<"__builtin_loongarch_csrrd_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_csrrd_d : ClangBuiltin<"__builtin_loongarch_csrrd_d">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; + +def int_loongarch_csrwr_w : ClangBuiltin<"__builtin_loongarch_csrwr_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_csrwr_d : ClangBuiltin<"__builtin_loongarch_csrwr_d">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_csrxchg_w : ClangBuiltin<"__builtin_loongarch_csrxchg_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_csrxchg_d : ClangBuiltin<"__builtin_loongarch_csrxchg_d">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_iocsrrd_b : ClangBuiltin<"__builtin_loongarch_iocsrrd_b">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_iocsrrd_h : ClangBuiltin<"__builtin_loongarch_iocsrrd_h">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_iocsrrd_w : ClangBuiltin<"__builtin_loongarch_iocsrrd_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; + +def int_loongarch_iocsrrd_d : ClangBuiltin<"__builtin_loongarch_iocsrrd_d">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; + +def int_loongarch_iocsrwr_b : ClangBuiltin<"__builtin_loongarch_iocsrwr_b">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_iocsrwr_h : ClangBuiltin<"__builtin_loongarch_iocsrwr_h">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_iocsrwr_w : ClangBuiltin<"__builtin_loongarch_iocsrwr_w">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_iocsrwr_d : ClangBuiltin<"__builtin_loongarch_iocsrwr_d">, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty], []>; + +def int_loongarch_cacop_w : ClangBuiltin<"__builtin_loongarch_cacop_w">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_cacop_d : ClangBuiltin<"__builtin_loongarch_cacop_d">, + Intrinsic<[], [llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_crc_w_b_w : ClangBuiltin<"__builtin_loongarch_crc_w_b_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crc_w_h_w : ClangBuiltin<"__builtin_loongarch_crc_w_h_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crc_w_w_w : ClangBuiltin<"__builtin_loongarch_crc_w_w_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crc_w_d_w : ClangBuiltin<"__builtin_loongarch_crc_w_d_w">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; + +def int_loongarch_crcc_w_b_w : ClangBuiltin<"__builtin_loongarch_crcc_w_b_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crcc_w_h_w : ClangBuiltin<"__builtin_loongarch_crcc_w_h_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crcc_w_w_w : ClangBuiltin<"__builtin_loongarch_crcc_w_w_w">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +def int_loongarch_crcc_w_d_w : ClangBuiltin<"__builtin_loongarch_crcc_w_d_w">, + Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>; + +def int_loongarch_tlbclr : ClangBuiltin<"__builtin_loongarch_tlbclr">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbflush : ClangBuiltin<"__builtin_loongarch_tlbflush">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbfill : ClangBuiltin<"__builtin_loongarch_tlbfill">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbrd : ClangBuiltin<"__builtin_loongarch_tlbrd">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbwr : ClangBuiltin<"__builtin_loongarch_tlbwr">, + Intrinsic<[], [], []>; + +def int_loongarch_tlbsrch : ClangBuiltin<"__builtin_loongarch_tlbsrch">, + Intrinsic<[], [], []>; + +def int_loongarch_syscall : ClangBuiltin<"__builtin_loongarch_syscall">, + Intrinsic<[], [llvm_i64_ty], []>; + +def int_loongarch_break : ClangBuiltin<"__builtin_loongarch_break">, + Intrinsic<[], [llvm_i64_ty], []>; + +def int_loongarch_asrtle_d : ClangBuiltin<"__builtin_loongarch_asrtle_d">, + Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_asrtgt_d : ClangBuiltin<"__builtin_loongarch_asrtgt_d">, + Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], []>; + +def int_loongarch_dbar : ClangBuiltin<"__builtin_loongarch_dbar">, + Intrinsic<[], [llvm_i64_ty], []>; + +def int_loongarch_ibar : ClangBuiltin<"__builtin_loongarch_ibar">, + Intrinsic<[], [llvm_i64_ty], []>; + +} diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index ed2f70b0da25..e2247e8ffb6e 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -753,7 +753,8 @@ Expected ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { } if (ESym->getType() == ELF::STT_FUNC && (ESym->st_value & 1) == 1) Result |= SymbolRef::SF_Thumb; - } else if (EF.getHeader().e_machine == ELF::EM_RISCV) { + } else if (EF.getHeader().e_machine == ELF::EM_LOONGARCH || + EF.getHeader().e_machine == ELF::EM_RISCV) { if (Expected NameOrErr = getSymbolName(Sym)) { // Mark empty name symbols used for label differences. if (NameOrErr->empty()) diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3d3b504c6abd..96cfe1fc0644 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -701,6 +701,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addMetadata(Var); MIB.addMetadata(Expr); AddDbgValueLocationOps(MIB, DbgValDesc, LocationOps, VRBaseMap); + // FIXME: Fix rustc build error with lto=thin option on loongarch. + if (MF->getTarget().getTargetTriple().isLoongArch()) + return nullptr; return &*MIB; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 195c0e6a836f..c8c219d4461d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9996,8 +9996,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { case ISD::ADD: SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); - if (!isConstantIntBuildVectorOrConstantInt(N0) && - isConstantIntBuildVectorOrConstantInt(N1)) { + if (!isa(N0) && isa(N1)) { uint64_t Offset = N.getConstantOperandVal(1); // Rewrite an ADD constant node into a DIExpression. Since we are diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 56308d780862..986237cdeb0d 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -207,6 +207,16 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, PersonalityEncoding = dwarf::DW_EH_PE_absptr; TTypeEncoding = dwarf::DW_EH_PE_absptr; break; + case Triple::loongarch32: + case Triple::loongarch64: + PersonalityEncoding = dwarf::DW_EH_PE_indirect; + + // Note: gas does not support pc-relative LSDA references. + LSDAEncoding = dwarf::DW_EH_PE_absptr; + + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + break; case Triple::mips: case Triple::mipsel: case Triple::mips64: diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp index 48aaab96e71f..65944be4c1e6 100644 --- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp @@ -263,6 +263,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) { case Triple::riscv64: return CreateWithABI(EPC); + case Triple::loongarch64: + return CreateWithABI(EPC); + case Triple::x86_64: if (TT.getOS() == Triple::OSType::Win32) return CreateWithABI(EPC); diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 38cab526704f..ee2bd91e18af 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -157,6 +157,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, return CCMgrT::Create(ES, ErrorHandlerAddress); } + case Triple::loongarch64: { + typedef orc::LocalJITCompileCallbackManager CCMgrT; + return CCMgrT::Create(ES, ErrorHandlerAddress); + } + case Triple::x86_64: { if (T.getOS() == Triple::OSType::Win32) { typedef orc::LocalJITCompileCallbackManager CCMgrT; diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 20b655bdf4b1..8b5fea92cf15 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -135,6 +135,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, return LocalLazyCallThroughManager::Create(ES, ErrorHandlerAddr); + case Triple::loongarch64: + return LocalLazyCallThroughManager::Create( + ES, ErrorHandlerAddr); + case Triple::x86_64: if (T.getOS() == Triple::OSType::Win32) return LocalLazyCallThroughManager::Create( diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp index da8aaad08cad..d2b972e17df3 100644 --- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -1077,5 +1077,205 @@ void OrcRiscv64::writeIndirectStubsBlock( } } +void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddress, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) { + + const uint32_t ResolverCode[] = { + // resolver_entry: + 0x02fc8063, // 0x0: addi.d $r3,$r3,-224(0xf20) + 0x29c00064, // 0x4: st.d $r4,$r3,0 + 0x29c02065, // 0x8: st.d $r5,$r3,8(0x8) + 0x29c04066, // 0xc: st.d $r6,$r3,16(0x10) + 0x29c06067, // 0x10: st.d $r7,$r3,24(0x18) + 0x29c08068, // 0x14: st.d $r8,$r3,32(0x20) + 0x29c0a069, // 0x18: st.d $r9,$r3,40(0x28) + 0x29c0c06a, // 0x1c: st.d $r10,$r3,48(0x30) + 0x29c0e06b, // 0x20: st.d $r11,$r3,56(0x38) + 0x29c1006c, // 0x24: st.d $r12,$r3,64(0x40) + 0x29c1206d, // 0x28: st.d $r13,$r3,72(0x48) + 0x29c1406e, // 0x2c: st.d $r14,$r3,80(0x50) + 0x29c1606f, // 0x30: st.d $r15,$r3,88(0x58) + 0x29c18070, // 0x34: st.d $r16,$r3,96(0x60) + 0x29c1a071, // 0x38: st.d $r17,$r3,104(0x68) + 0x29c1c072, // 0x3c: st.d $r18,$r3,112(0x70) + 0x29c1e073, // 0x40: st.d $r19,$r3,120(0x78) + 0x29c20074, // 0x44: st.d $r20,$r3,128(0x80) + 0x29c22076, // 0x48: st.d $r22,$r3,136(0x88) + 0x29c24077, // 0x4c: st.d $r23,$r3,144(0x90) + 0x29c26078, // 0x50: st.d $r24,$r3,152(0x98) + 0x29c28079, // 0x54: st.d $r25,$r3,160(0xa0) + 0x29c2a07a, // 0x58: st.d $r26,$r3,168(0xa8) + 0x29c2c07b, // 0x5c: st.d $r27,$r3,176(0xb0) + 0x29c2e07c, // 0x60: st.d $r28,$r3,184(0xb8) + 0x29c3007d, // 0x64: st.d $r29,$r3,192(0xc0) + 0x29c3207e, // 0x68: st.d $r30,$r3,200(0xc8) + 0x29c3407f, // 0x6c: st.d $r31,$r3,208(0xd0) + 0x29c36061, // 0x70: st.d $r1,$r3,216(0xd8) + // JIT re-entry ctx addr. + 0x00000000, // 0x74: lu12i.w $a0,hi(ctx) + 0x00000000, // 0x78: ori $a0,$a0,lo(ctx) + 0x00000000, // 0x7c: lu32i.d $a0,higher(ctx) + 0x00000000, // 0x80: lu52i.d $a0,$a0,highest(ctx) + + 0x00150025, // 0x84: move $r5,$r1 + 0x02ffa0a5, // 0x88: addi.d $r5,$r5,-24(0xfe8) + + // JIT re-entry fn addr: + 0x00000000, // 0x8c: lu12i.w $t0,hi(reentry) + 0x00000000, // 0x90: ori $t0,$t0,lo(reentry) + 0x00000000, // 0x94: lu32i.d $t0,higher(reentry) + 0x00000000, // 0x98: lu52i.d $t0,$t0,highest(reentry) + 0x4c0002a1, // 0x9c: jirl $r1,$r21,0 + 0x00150095, // 0xa0: move $r21,$r4 + 0x28c36061, // 0xa4: ld.d $r1,$r3,216(0xd8) + 0x28c3407f, // 0xa8: ld.d $r31,$r3,208(0xd0) + 0x28c3207e, // 0xac: ld.d $r30,$r3,200(0xc8) + 0x28c3007d, // 0xb0: ld.d $r29,$r3,192(0xc0) + 0x28c2e07c, // 0xb4: ld.d $r28,$r3,184(0xb8) + 0x28c2c07b, // 0xb8: ld.d $r27,$r3,176(0xb0) + 0x28c2a07a, // 0xbc: ld.d $r26,$r3,168(0xa8) + 0x28c28079, // 0xc0: ld.d $r25,$r3,160(0xa0) + 0x28c26078, // 0xc4: ld.d $r24,$r3,152(0x98) + 0x28c24077, // 0xc8: ld.d $r23,$r3,144(0x90) + 0x28c22076, // 0xcc: ld.d $r22,$r3,136(0x88) + 0x28c20074, // 0xd0: ld.d $r20,$r3,128(0x80) + 0x28c1e073, // 0xd4: ld.d $r19,$r3,120(0x78) + 0x28c1c072, // 0xd8: ld.d $r18,$r3,112(0x70) + 0x28c1a071, // 0xdc: ld.d $r17,$r3,104(0x68) + 0x28c18070, // 0xe0: ld.d $r16,$r3,96(0x60) + 0x28c1606f, // 0xe4: ld.d $r15,$r3,88(0x58) + 0x28c1406e, // 0xe8: ld.d $r14,$r3,80(0x50) + 0x28c1206d, // 0xec: ld.d $r13,$r3,72(0x48) + 0x28c1006c, // 0xf0: ld.d $r12,$r3,64(0x40) + 0x28c0e06b, // 0xf4: ld.d $r11,$r3,56(0x38) + 0x28c0c06a, // 0xf8: ld.d $r10,$r3,48(0x30) + 0x28c0a069, // 0xfc: ld.d $r9,$r3,40(0x28) + 0x28c08068, // 0x100: ld.d $r8,$r3,32(0x20) + 0x28c06067, // 0x104: ld.d $r7,$r3,24(0x18) + 0x28c04066, // 0x108: ld.d $r6,$r3,16(0x10) + 0x28c02065, // 0x10c: ld.d $r5,$r3,8(0x8) + 0x28c00064, // 0x110: ld.d $r4,$r3,0 + 0x02c38063, // 0x114: addi.d $r3,$r3,224(0xe0) + 0x00150281, // 0x118: move $r1,$r20 + 0x4c0002a0, // 0x11c: jirl $r0,$r21,0 + }; + + const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lu12i.w + const unsigned ReentryCtxAddrOffset = 0x74; // JIT re-entry ctx addr lu12i.w + + memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode)); + + uint32_t ReentryCtxLU12i = 0x14000004 | ((ReentryCtxAddr << 32 >> 44) << 5); + uint32_t ReentryCtxORi = 0x03800084 | ((ReentryCtxAddr & 0xFFF) << 10); + uint32_t ReentryCtxLU32i = 0x16000004 | ((ReentryCtxAddr << 12 >> 44) << 5); + uint32_t ReentryCtxLU52i = 0x03000084 | ((ReentryCtxAddr >> 52) << 10); + + memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLU12i, + sizeof(ReentryCtxLU12i)); + memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxORi, + sizeof(ReentryCtxORi)); + memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxLU32i, + sizeof(ReentryCtxLU32i)); + memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxLU52i, + sizeof(ReentryCtxLU52i)); + + uint32_t ReentryLU12i = 0x14000015 | ((ReentryFnAddr << 32 >> 44) << 5); + uint32_t ReentryORi = 0x038002b5 | ((ReentryFnAddr & 0xFFF) << 10); + uint32_t ReentryLU32i = 0x16000015 | ((ReentryFnAddr << 12 >> 44) << 5); + uint32_t ReentryLU52i = 0x030002b5 | ((ReentryFnAddr >> 52) << 10); + + memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryLU12i, + sizeof(ReentryLU12i)); + memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryORi, + sizeof(ReentryORi)); + memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryLU32i, + sizeof(ReentryLU32i)); + memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryLU52i, + sizeof(ReentryLU52i)); +} + +void OrcLoongArch64::writeTrampolines( + char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddress, + JITTargetAddress ResolverFnAddr, unsigned NumTrampolines) { + + uint32_t *Trampolines = + reinterpret_cast(TrampolineBlockWorkingMem); + + uint64_t HiBits = ((ResolverFnAddr << 32 >> 44) << 5); + uint64_t LoBits = ((ResolverFnAddr & 0xFFF) << 10); + uint64_t HigherBits = ((ResolverFnAddr << 12 >> 44) << 5); + uint64_t HighestBits = ((ResolverFnAddr >> 52) << 10); + + for (unsigned I = 0; I < NumTrampolines; ++I) { + Trampolines[10 * I + 0] = 0x00150034; // move $t8,$ra + Trampolines[10 * I + 1] = + 0x14000015 | HiBits; // lu12i.w $r21,hi(ResolveAddr) + Trampolines[10 * I + 2] = + 0x038002b5 | LoBits; // ori $r21,$r21,lo(ResolveAddr) + Trampolines[10 * I + 3] = + 0x16000015 | HigherBits; // lu32i $r21,higher(ResolveAddr) + Trampolines[10 * I + 4] = + 0x030002b5 | HighestBits; // lu52i $r21,$r21,highest(ResolveAddr) + Trampolines[10 * I + 5] = 0x4c0002a1; // jirl $ra, $r21, 0 + } +} + +void OrcLoongArch64::writeIndirectStubsBlock( + char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // lu12i.w $r21, %abs(ptr1)<<32>>44 + // ori $r21, $r21, %abs(ptr1)&0xfff + // lu32i.d $r21, %abs(ptr1)<<12>>44 + // lu52i.d $r21, $r21, %abs(ptr1)>>52 + // ld.d $r21, $r21, 0 + // jirl $r0, $r21, 0 + // stub2: + // lu12i.w $r21, %abs(ptr2)<<32>>44 + // ori $r21, $r21, %abs(ptr2)&0xfff + // lu32i.d $r21, %abs(ptr2)<<12>>44 + // lu52i.d $r21, $r21, %abs(ptr2)>>52 + // ld.d $r21, $r21, 0 + // jirl $r0, $r21, 0 + // + // ... + // + // .section __orc_ptrs + // ptr1: + // .dword 0x0 + // ptr2: + // .dword 0x0 + // + // ... + + assert(stubAndPointerRangesOk( + StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) && + "PointersBlock is out of range"); + + // Populate the stubs page stubs and mark it executable. + uint32_t *Stub = reinterpret_cast(StubsBlockWorkingMem); + uint64_t PtrAddr = PointersBlockTargetAddress; + + for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) { + uint64_t HiBits = ((PtrAddr << 32 >> 44) << 5); + uint64_t LoBits = ((PtrAddr & 0xFFF) << 10); + uint64_t HigherBits = ((PtrAddr << 12 >> 44) << 5); + uint64_t HighestBits = ((PtrAddr >> 52) << 10); + Stub[8 * I + 0] = 0x14000015 | HiBits; // lu12i.w $r21, hi(PtrAddr) + Stub[8 * I + 1] = 0x038002b5 | LoBits; // ori $r21, $r21, lo(PtrAddr) + Stub[8 * I + 2] = 0x16000015 | HigherBits; // lu32i.d $r21, higher(PtrAddr) + Stub[8 * I + 3] = + 0x030002b5 | HighestBits; // lu52i.d $r21, $r21, highest(PtrAddr) + Stub[8 * I + 4] = 0x28c002b5; // ld.d $r21, $r21, 0 + Stub[8 * I + 5] = 0x4c0002a0; // jirl $r0, $r21, 0 + } +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index c702584b7a33..2f1ec696ac83 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -641,6 +641,191 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, } } +void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, uint32_t Type, + int64_t Addend) { + uint32_t *TargetPtr = + reinterpret_cast(Section.getAddressWithOffset(Offset)); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); + uint64_t tmp1, tmp2, tmp3; + + LLVM_DEBUG(dbgs() << "[XXX] resolveLoongArch64Relocation, LocalAddress: 0x" + << format("%llx", Section.getAddressWithOffset(Offset)) + << " FinalAddress: 0x" << format("%llx", FinalAddress) + << " Value: 0x" << format("%llx", Value) << " Type: 0x" + << format("%x", Type) << " Addend: 0x" + << format("%llx", Addend) << "\n"); + + switch (Type) { + case ELF::R_LARCH_SOP_PUSH_GPREL: + case ELF::R_LARCH_SOP_PUSH_TLS_TPREL: + case ELF::R_LARCH_SOP_PUSH_TLS_GOT: + case ELF::R_LARCH_SOP_PUSH_TLS_GD: + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + case ELF::R_LARCH_MARK_LA: + // mark la + MarkLA = true; + break; + case ELF::R_LARCH_SOP_PUSH_ABSOLUTE: + if (MarkLA && !Addend) + // push(value) + ValuesStack.push_back(Value); + else + // push(addend) + ValuesStack.push_back(Addend); + break; + case ELF::R_LARCH_SOP_PUSH_PLT_PCREL: + case ELF::R_LARCH_SOP_PUSH_PCREL: + MarkLA = false; + // push(value -pc + addend) + ValuesStack.push_back(Value - FinalAddress + Addend); + break; + case ELF::R_LARCH_SOP_NOT: + // pop(tmp1) + // push(!tmp1) + tmp1 = ValuesStack.pop_back_val(); + ValuesStack.push_back(!tmp1); + break; + case ELF::R_LARCH_SOP_AND: + // pop(tmp2) + // pop(tmp1) + // push(tmp1 & tmp2) + tmp2 = ValuesStack.pop_back_val(); + tmp1 = ValuesStack.pop_back_val(); + ValuesStack.push_back(tmp1 & tmp2); + break; + case ELF::R_LARCH_SOP_IF_ELSE: + // pop(tmp3) + // pop(tmp2) + // pop(tmp1) + // push(tmp1 ? tmp2 : tmp3) + tmp3 = ValuesStack.pop_back_val(); + tmp2 = ValuesStack.pop_back_val(); + tmp1 = ValuesStack.pop_back_val(); + ValuesStack.push_back(tmp1 ? tmp2 : tmp3); + break; + case ELF::R_LARCH_SOP_ADD: + // pop(tmp2) + // pop(tmp1) + // push(tmp1 + tmp2) + tmp2 = ValuesStack.pop_back_val(); + tmp1 = ValuesStack.pop_back_val(); + ValuesStack.push_back(tmp1 + tmp2); + break; + case ELF::R_LARCH_SOP_SUB: + // pop(tmp2) + // pop(tmp1) + // push(tmp1 - tmp2) + tmp2 = ValuesStack.pop_back_val(); + tmp1 = ValuesStack.pop_back_val(); + ValuesStack.push_back(tmp1 - tmp2); + break; + case ELF::R_LARCH_SOP_SR: + // pop(tmp2) + // pop(tmp1) + // push(tmp1 >> tmp2) + tmp2 = ValuesStack.pop_back_val(); + tmp1 = ValuesStack.pop_back_val(); + ValuesStack.push_back(tmp1 >> tmp2); + break; + case ELF::R_LARCH_SOP_SL: + // pop(tmp2) + // pop(tmp1) + // push(tmp1 << tmp2) + tmp2 = ValuesStack.pop_back_val(); + tmp1 = ValuesStack.pop_back_val(); + ValuesStack.push_back(tmp1 << tmp2); + break; + case ELF::R_LARCH_32: + support::ulittle32_t::ref{TargetPtr} = + static_cast(Value + Addend); + break; + case ELF::R_LARCH_64: + support::ulittle64_t::ref{TargetPtr} = Value + Addend; + break; + case ELF::R_LARCH_SOP_POP_32_U_10_12: + case ELF::R_LARCH_SOP_POP_32_S_10_12: + // pop(tmp1) + // get(inst) + // inst=(inst & 0xffc003ff)|((tmp1 & 0xfff) << 10) + // write(inst) + tmp1 = ValuesStack.pop_back_val(); + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} & 0xffc003ff) | + static_cast((tmp1 & 0xfff) << 10); + break; + case ELF::R_LARCH_SOP_POP_32_S_5_20: + // pop(tmp1) + // get(inst) + // inst=(inst & 0xfe00001f)|((tmp1 & 0xfffff) << 5) + // write(inst) + tmp1 = ValuesStack.pop_back_val(); + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} & 0xfe00001f) | + static_cast((tmp1 & 0xfffff) << 5); + break; + case ELF::R_LARCH_SOP_POP_32_S_10_16_S2: + // pop(tmp1) + // tmp1 >>=2 + // get(inst) + // inst=(inst & 0xfc0003ff)|((tmp1 & 0xffff) << 10) + // write(inst) + tmp1 = ValuesStack.pop_back_val(); + tmp1 >>= 2; + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} & 0xfc0003ff) | + static_cast((tmp1 & 0xffff) << 10); + break; + case ELF::R_LARCH_SOP_POP_32_S_0_5_10_16_S2: + // pop(tmp1) + // tmp1 >>= 2 + // get(inst) + // inst=(inst & 0xfc0003e0)|((tmp1 & 0xffff) << 10)|((tmp1 & 0x1f0000) >> + // 16) write(inst) + tmp1 = ValuesStack.pop_back_val(); + tmp1 >>= 2; + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} & 0xfc0003e0) | + static_cast((tmp1 & 0xffff) << 10) | + static_cast((tmp1 & 0x1f0000) >> 16); + break; + case ELF::R_LARCH_SOP_POP_32_S_0_10_10_16_S2: + // pop(tmp1) + // tmp1 >>= 2 + // get(inst) + // inst=(inst & 0xfc000000)|((tmp1 & 0xffff) << 10)|((tmp1 & 0x3ff0000) >> + // 16) write(inst) + tmp1 = ValuesStack.pop_back_val(); + tmp1 >>= 2; + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} & 0xfc000000) | + static_cast((tmp1 & 0xffff) << 10) | + static_cast((tmp1 & 0x3ff0000) >> 16); + break; + case ELF::R_LARCH_ADD32: + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} + + static_cast(Value + Addend)); + break; + case ELF::R_LARCH_SUB32: + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} - + static_cast(Value + Addend)); + break; + case ELF::R_LARCH_ADD64: + support::ulittle64_t::ref{TargetPtr} = + (support::ulittle64_t::ref{TargetPtr} + Value + Addend); + break; + case ELF::R_LARCH_SUB64: + support::ulittle64_t::ref{TargetPtr} = + (support::ulittle64_t::ref{TargetPtr} - Value - Addend); + break; + } +} + void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { if (Arch == Triple::UnknownArch || !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { @@ -1057,6 +1242,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; + case Triple::loongarch64: + resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend); + break; case Triple::ppc: // Fall through. case Triple::ppcle: resolvePPC32Relocation(Section, Offset, Value, Type, Addend); @@ -1369,6 +1557,58 @@ RuntimeDyldELF::processRelocationRef( } processSimpleRelocation(SectionID, Offset, RelType, Value); } + } else if (Arch == Triple::loongarch64) { + if (RelType == ELF::R_LARCH_32 || RelType == ELF::R_LARCH_64 || + (RelType >= ELF::R_LARCH_ADD8 && RelType <= ELF::R_LARCH_SUB64)) { + if (TargetName.size() == 0 && + Sections[SectionID].getAddress() != nullptr) { + uint64_t SymOffset = 0; + unsigned SID = 0; + auto SectionOrErr = Symbol->getSection(); + if (!SectionOrErr) { + std::string Buf; + raw_string_ostream OS(Buf); + logAllUnhandledErrors(SectionOrErr.takeError(), OS); + report_fatal_error(Twine(OS.str())); + } + section_iterator si = *SectionOrErr; + if (si == Obj.section_end()) + llvm_unreachable("Symbol section not found!"); + bool isCode = si->isText(); + if (auto SectionIDOrErr = + findOrEmitSection(Obj, (*si), isCode, ObjSectionToID)) { + SID = *SectionIDOrErr; + } else + return SectionIDOrErr.takeError(); + auto OffsetOrErr = Symbol->getAddress(); + if (OffsetOrErr) + SymOffset = *OffsetOrErr; + uint64_t Target = Sections[SID].getLoadAddress() + SymOffset; + resolveRelocation(Sections[SectionID], Offset, Target, RelType, Addend); + } else { + processSimpleRelocation(SectionID, Offset, RelType, Value); + } + } else { + RTDyldSymbolTable::const_iterator Loc = + GlobalSymbolTable.find(TargetName); + if (!TargetName.empty()) { + if (Loc == GlobalSymbolTable.end()) { + IsSaved = true; + SavedSymbol = TargetName; + } else { + IsSaved = false; + } + } + if (IsSaved == true) { + Value.SymbolName = SavedSymbol.data(); + processSimpleRelocation(SectionID, Offset, RelType, Value); + } else { + uint8_t *TargetAddr = getSymbolLocalAddress(TargetName); + resolveRelocation(Sections[SectionID], Offset, + reinterpret_cast(TargetAddr), RelType, + Addend); + } + } } else if (IsMipsO32ABI) { uint8_t *Placeholder = reinterpret_cast( computePlaceholderAddress(SectionID, Offset)); @@ -2218,6 +2458,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() { case Triple::x86_64: case Triple::aarch64: case Triple::aarch64_be: + case Triple::loongarch64: case Triple::ppc64: case Triple::ppc64le: case Triple::systemz: diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 1251036f4caa..ba898f654324 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -48,6 +48,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend); + void resolveLoongArch64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend); + void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); @@ -155,6 +159,12 @@ private: // EH frame sections with the memory manager. SmallVector UnregisteredEHFrameSections; + // For loongarch evaluteRelocation + SmallVector ValuesStack; + bool IsSaved; + bool MarkLA; + StringRef SavedSymbol; + // Map between GOT relocation value and corresponding GOT offset std::map GOTOffsetMap; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 5a073632d385..377b4d28bbb9 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/IR/IntrinsicsMips.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index d6fe952c0c1d..254c2fa60131 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -332,6 +332,12 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { switch (T.getArch()) { + case Triple::loongarch32: + case Triple::loongarch64: + FDECFIEncoding = Ctx->getAsmInfo()->getCodePointerSize() == 4 + ? dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_sdata8; + break; case Triple::mips: case Triple::mipsel: case Triple::mips64: diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp index e14301663df3..ec54d7b599b1 100644 --- a/llvm/lib/Object/RelocationResolver.cpp +++ b/llvm/lib/Object/RelocationResolver.cpp @@ -511,6 +511,28 @@ static uint64_t resolveCSKY(uint64_t Type, uint64_t Offset, uint64_t S, } } +static bool supportsLoongArch(uint64_t Type) { + switch (Type) { + case ELF::R_LARCH_32: + case ELF::R_LARCH_64: + return true; + default: + return false; + } +} + +static uint64_t resolveLoongArch(uint64_t Type, uint64_t Offset, uint64_t S, + uint64_t LocData, int64_t Addend) { + switch (Type) { + case ELF::R_LARCH_32: + return (S + Addend) & 0xFFFFFFFF; + case ELF::R_LARCH_64: + return S + Addend; + default: + llvm_unreachable("Invalid relocation type"); + } +} + static bool supportsCOFFX86(uint64_t Type) { switch (Type) { case COFF::IMAGE_REL_I386_SECREL: @@ -725,6 +747,8 @@ getRelocationResolver(const ObjectFile &Obj) { return {supportsAmdgpu, resolveAmdgpu}; case Triple::riscv64: return {supportsRISCV, resolveRISCV}; + case Triple::loongarch64: + return {supportsLoongArch, resolveLoongArch}; default: return {nullptr, nullptr}; } @@ -760,6 +784,8 @@ getRelocationResolver(const ObjectFile &Obj) { return {supportsRISCV, resolveRISCV}; case Triple::csky: return {supportsCSKY, resolveCSKY}; + case Triple::loongarch32: + return {supportsLoongArch, resolveLoongArch}; default: return {nullptr, nullptr}; } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 9ad2c4135167..20600a334eca 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -518,14 +518,6 @@ void ScalarBitSetTraits::bitset(IO &IO, BCaseMask(EF_AVR_ARCH_XMEGA7, EF_AVR_ARCH_MASK); BCase(EF_AVR_LINKRELAX_PREPARED); break; - case ELF::EM_LOONGARCH: - BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32S, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32F, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_ILP32D, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_LP64S, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_LP64F, EF_LOONGARCH_BASE_ABI_MASK); - BCaseMask(EF_LOONGARCH_BASE_ABI_LP64D, EF_LOONGARCH_BASE_ABI_MASK); - break; case ELF::EM_RISCV: BCase(EF_RISCV_RVC); BCaseMask(EF_RISCV_FLOAT_ABI_SOFT, EF_RISCV_FLOAT_ABI); @@ -620,6 +612,11 @@ void ScalarBitSetTraits::bitset(IO &IO, break; } break; + case ELF::EM_LOONGARCH: + BCaseMask(EF_LARCH_ABI_LP32, EF_LARCH_ABI); + BCaseMask(EF_LARCH_ABI_LPX32, EF_LARCH_ABI); + BCaseMask(EF_LARCH_ABI_LP64, EF_LARCH_ABI); + break; default: break; } diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index d163bcff0d3f..f10a0be4ed0c 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -258,6 +258,7 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case GNU: return "gnu"; case GNUABI64: return "gnuabi64"; case GNUABIN32: return "gnuabin32"; + case GNUABILPX32: return "gnuabilpx32"; case GNUEABI: return "gnueabi"; case GNUEABIHF: return "gnueabihf"; case GNUX32: return "gnux32"; diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index 5e008069dd98..6735b74c5ad0 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -241,8 +241,9 @@ void Memory::InvalidateInstructionCache(const void *Addr, for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) asm volatile("icbi 0, %0" : : "r"(Line)); asm volatile("isync"); -# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ - defined(__GNUC__) +#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ + defined(__mips__)) && \ + defined(__GNUC__) // FIXME: Can we safely always call this for __GNUC__ everywhere? const char *Start = static_cast(Addr); const char *End = Start + Len; diff --git a/llvm/lib/Target/LoongArch/AsmParser/CMakeLists.txt b/llvm/lib/Target/LoongArch/AsmParser/CMakeLists.txt index 29616053118f..cb8b768d5acb 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/AsmParser/CMakeLists.txt @@ -2,10 +2,10 @@ add_llvm_component_library(LLVMLoongArchAsmParser LoongArchAsmParser.cpp LINK_COMPONENTS - LoongArchDesc - LoongArchInfo MC MCParser + LoongArchDesc + LoongArchInfo Support ADD_TO_COMPONENT diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 9793c7bc3532..61d4555187dc 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -1,4 +1,4 @@ -// LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions -=// +//===-- LoongArchAsmParser.cpp - Parse LoongArch assembly to MCInst instructions ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,551 +6,2279 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "LoongArchTargetStreamer.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCExpr.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "loongarch-asm-parser" +namespace llvm { + +class MCInstrInfo; + +} // end namespace llvm + +namespace { + +class LoongArchAssemblerOptions { +public: + LoongArchAssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {} + + LoongArchAssemblerOptions(const LoongArchAssemblerOptions *Opts) { + Features = Opts->getFeatures(); + } + + const FeatureBitset &getFeatures() const { return Features; } + void setFeatures(const FeatureBitset &Features_) { Features = Features_; } + +private: + FeatureBitset Features; +}; + +} // end anonymous namespace + namespace { + class LoongArchAsmParser : public MCTargetAsmParser { - SMLoc getLoc() const { return getParser().getTok().getLoc(); } + LoongArchTargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast(TS); + } + + LoongArchABIInfo ABI; + SmallVector, 2> AssemblerOptions; + MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a + // nullptr, which indicates that no function is currently + // selected. This usually happens after an '.end' + // directive. + bool IsPicEnabled; - /// Parse a register as used in CFI directives. + // Map of register aliases created via the .set directive. + StringMap RegisterSets; + +#define GET_ASSEMBLER_HEADER +#include "LoongArchGenAsmMatcher.inc" + + unsigned checkTargetMatchPredicate(MCInst &Inst) override; + + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + + /// Parse a register as used in CFI directives bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + bool mnemonicIsValid(StringRef Mnemonic); + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; - bool ParseDirective(AsmToken DirectiveID) override { return true; } + bool ParseDirective(AsmToken DirectiveID) override; + + OperandMatchResultTy parseMemOperand(OperandVector &Operands); + OperandMatchResultTy parseAMemOperand(OperandVector &Operands); + OperandMatchResultTy + matchAnyRegisterNameWithoutDollar(OperandVector &Operands, + StringRef Identifier, SMLoc S); + OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, + const AsmToken &Token, + SMLoc S); + OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands, + SMLoc S); + OperandMatchResultTy parseAnyRegister(OperandVector &Operands); + OperandMatchResultTy parseJumpTarget(OperandVector &Operands); + + bool searchSymbolAlias(OperandVector &Operands); + + bool parseOperand(OperandVector &, StringRef Mnemonic); + + enum MacroExpanderResultTy { + MER_NotAMacro, + MER_Success, + MER_Fail, + }; - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, MCStreamer &Out, - uint64_t &ErrorInfo, - bool MatchingInlineAsm) override; + // Expands assembly pseudo instructions. + MacroExpanderResultTy tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI); - unsigned checkTargetMatchPredicate(MCInst &Inst) override; + bool expandLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); - unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, - unsigned Kind) override; + bool expandLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); - bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo, - int64_t Lower, int64_t Upper, Twine Msg); + bool reportParseError(Twine ErrorMsg); - /// Helper for processing MC instructions that have been successfully matched - /// by MatchAndEmitInstruction. - bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, - MCStreamer &Out); + bool parseMemOffset(const MCExpr *&Res); -// Auto-generated instruction matching functions. -#define GET_ASSEMBLER_HEADER -#include "LoongArchGenAsmMatcher.inc" + bool isEvaluated(const MCExpr *Expr); + bool parseDirectiveSet(); + + bool parseSetAssignment(); + + bool parseInternalDirectiveReallowModule(); + + int matchCPURegisterName(StringRef Symbol); + + int matchFPURegisterName(StringRef Name); + + int matchFCFRRegisterName(StringRef Name); + int matchFCSRRegisterName(StringRef Name); + + int matchLSX128RegisterName(StringRef Name); + + int matchLASX256RegisterName(StringRef Name); + + bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + + // Helper function that checks if the value of a vector index is within the + // boundaries of accepted values for each RegisterKind + // Example: VINSGR2VR.B $v0[n], $1 => 16 > n >= 0 + bool validateLSXIndex(int Val, int RegKind); + + void setFeatureBits(uint64_t Feature, StringRef FeatureString) { + if (!(getSTI().getFeatureBits()[Feature])) { + MCSubtargetInfo &STI = copySTI(); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); + } + } - OperandMatchResultTy parseRegister(OperandVector &Operands); - OperandMatchResultTy parseImmediate(OperandVector &Operands); + void clearFeatureBits(uint64_t Feature, StringRef FeatureString) { + if (getSTI().getFeatureBits()[Feature]) { + MCSubtargetInfo &STI = copySTI(); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); + AssemblerOptions.back()->setFeatures(STI.getFeatureBits()); + } + } + + void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { + setFeatureBits(Feature, FeatureString); + AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); + } - bool parseOperand(OperandVector &Operands, StringRef Mnemonic); + void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) { + clearFeatureBits(Feature, FeatureString); + AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits()); + } public: enum LoongArchMatchResultTy { - Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, - Match_RequiresMsbNotLessThanLsb, - Match_RequiresOpnd2NotR0R1, + Match_RequiresNoZeroRegister = FIRST_TARGET_MATCH_RESULT_TY, + Match_RequiresNoRaRegister, + Match_RequiresRange0_31, + Match_RequiresRange0_63, + Match_MsbHigherThanLsb, + Match_RequiresPosSizeUImm6, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "LoongArchGenAsmMatcher.inc" #undef GET_OPERAND_DIAGNOSTIC_TYPES }; - LoongArchAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, + LoongArchAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI, MII) { - Parser.addAliasForDirective(".half", ".2byte"); - Parser.addAliasForDirective(".hword", ".2byte"); - Parser.addAliasForDirective(".word", ".4byte"); - Parser.addAliasForDirective(".dword", ".8byte"); + : MCTargetAsmParser(Options, sti, MII), + ABI(LoongArchABIInfo::computeTargetABI(Triple(sti.getTargetTriple()), + sti.getCPU(), Options)) { + MCAsmParserExtension::Initialize(parser); + + parser.addAliasForDirective(".asciiz", ".asciz"); + parser.addAliasForDirective(".half", ".2byte"); + parser.addAliasForDirective(".hword", ".2byte"); + parser.addAliasForDirective(".word", ".4byte"); + parser.addAliasForDirective(".dword", ".8byte"); // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + + // Remember the initial assembler options. The user can not modify these. + AssemblerOptions.push_back( + std::make_unique(getSTI().getFeatureBits())); + + // Create an assembler options environment for the user to modify. + AssemblerOptions.push_back( + std::make_unique(getSTI().getFeatureBits())); + + getTargetStreamer().updateABIInfo(*this); + + CurrentFn = nullptr; + + IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent(); + } + + bool is64Bit() const { + return getSTI().getFeatureBits()[LoongArch::Feature64Bit]; + } + + bool isFP64bit() const { + return getSTI().getFeatureBits()[LoongArch::FeatureFP64Bit]; + } + + const LoongArchABIInfo &getABI() const { return ABI; } + bool isABI_LPX32() const { return ABI.IsLPX32(); } + bool isABI_LP64() const { return ABI.IsLP64(); } + bool isABI_LP32() const { return ABI.IsLP32(); } + + bool hasLSX() const { + return getSTI().getFeatureBits()[LoongArch::FeatureLSX]; + } + + bool hasLASX() const { + return getSTI().getFeatureBits()[LoongArch::FeatureLASX]; + } + + bool inPicMode() { + return IsPicEnabled; + } + + bool useSoftFloat() const { + return getSTI().getFeatureBits()[LoongArch::FeatureSoftFloat]; + } + + const MCExpr *createTargetUnaryExpr(const MCExpr *E, + AsmToken::TokenKind OperatorToken, + MCContext &Ctx) override { + switch(OperatorToken) { + default: + llvm_unreachable("Unknown token"); + return nullptr; +#if 0 + case AsmToken::PercentPlt: + return LoongArchMCExpr::create(LoongArchMCExpr::MEK_PLT, E, Ctx); +#endif + } } }; -// Instances of this class represent a parsed LoongArch machine instruction. +/// LoongArchOperand - Instances of this class represent a parsed LoongArch machine +/// instruction. class LoongArchOperand : public MCParsedAsmOperand { - enum class KindTy { - Token, - Register, - Immediate, +public: + /// Broad categories of register classes + /// The exact class is finalized by the render method. + enum RegKind { + RegKind_GPR = 1, /// GPR32 and GPR64 (depending on is64Bit()) + RegKind_FGR = 2, /// FGR32, FGR64 (depending on isFP64bit()) + RegKind_FCFR = 4, /// FCFR + RegKind_FCSR = 8, /// FCSR + RegKind_LSX128 = 16, /// LSX128[BHWD] (makes no difference which) + RegKind_LASX256 = 32, /// LASX256[BHWD] (makes no difference which) + RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCFR | RegKind_FCSR | + RegKind_LSX128 | RegKind_LASX256 + }; + +private: + enum KindTy { + k_Immediate, /// An immediate (possibly involving symbol references) + k_Memory, /// Base + Offset Memory Address + k_RegisterIndex, /// A register index in one or more RegKind. + k_Token, /// A simple token + k_RegList, /// A physical register list } Kind; - struct RegOp { - MCRegister RegNum; +public: + LoongArchOperand(KindTy K, LoongArchAsmParser &Parser) + : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {} + + ~LoongArchOperand() override { + switch (Kind) { + case k_Memory: + delete Mem.Base; + break; + case k_RegList: + delete RegList.List; + break; + case k_Immediate: + case k_RegisterIndex: + case k_Token: + break; + } + } + +private: + /// For diagnostics, and checking the assembler temporary + LoongArchAsmParser &AsmParser; + + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegIdxOp { + unsigned Index; /// Index into the register class + RegKind Kind; /// Bitfield of the kinds it could possibly be + struct Token Tok; /// The input token this operand originated from. + const MCRegisterInfo *RegInfo; }; struct ImmOp { const MCExpr *Val; }; - SMLoc StartLoc, EndLoc; - union { - StringRef Tok; - struct RegOp Reg; - struct ImmOp Imm; + struct MemOp { + LoongArchOperand *Base; + const MCExpr *Off; }; -public: - LoongArchOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + struct RegListOp { + SmallVector *List; + }; - bool isToken() const override { return Kind == KindTy::Token; } - bool isReg() const override { return Kind == KindTy::Register; } - bool isImm() const override { return Kind == KindTy::Immediate; } - bool isMem() const override { return false; } - void setReg(MCRegister PhysReg) { Reg.RegNum = PhysReg; } + union { + struct Token Tok; + struct RegIdxOp RegIdx; + struct ImmOp Imm; + struct MemOp Mem; + struct RegListOp RegList; + }; - static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm) { - if (auto CE = dyn_cast(Expr)) { - Imm = CE->getValue(); - return true; - } + SMLoc StartLoc, EndLoc; - return false; + /// Internal constructor for register kinds + static std::unique_ptr CreateReg(unsigned Index, StringRef Str, + RegKind RegKind, + const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, + LoongArchAsmParser &Parser) { + auto Op = std::make_unique(k_RegisterIndex, Parser); + Op->RegIdx.Index = Index; + Op->RegIdx.RegInfo = RegInfo; + Op->RegIdx.Kind = RegKind; + Op->RegIdx.Tok.Data = Str.data(); + Op->RegIdx.Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = E; + return Op; } - template bool isUImm() const { - if (!isImm()) - return false; - - int64_t Imm; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm); - return IsConstantImm && isUInt(Imm - P); +public: + /// Coerce the register to GPR32 and return the real register for the current + /// target. + unsigned getGPR32Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); + unsigned ClassID = LoongArch::GPR32RegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); } - template bool isSImm() const { - if (!isImm()) - return false; - - int64_t Imm; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm); - return IsConstantImm && isShiftedInt(Imm); - } - - bool isUImm2() const { return isUImm<2>(); } - bool isUImm2plus1() const { return isUImm<2, 1>(); } - bool isUImm3() const { return isUImm<3>(); } - bool isUImm5() const { return isUImm<5>(); } - bool isUImm6() const { return isUImm<6>(); } - bool isUImm8() const { return isUImm<8>(); } - bool isUImm12() const { return isUImm<12>(); } - bool isUImm14() const { return isUImm<14>(); } - bool isUImm15() const { return isUImm<15>(); } - bool isSImm12() const { return isSImm<12>(); } - bool isSImm14lsl2() const { return isSImm<14, 2>(); } - bool isSImm16() const { return isSImm<16>(); } - bool isSImm16lsl2() const { return isSImm<16, 2>(); } - bool isSImm20() const { return isSImm<20>(); } - bool isSImm21lsl2() const { return isSImm<21, 2>(); } - bool isSImm26lsl2() const { return isSImm<26, 2>(); } - - /// Gets location of the first token of this operand. - SMLoc getStartLoc() const override { return StartLoc; } - /// Gets location of the last token of this operand. - SMLoc getEndLoc() const override { return EndLoc; } - - unsigned getReg() const override { - assert(Kind == KindTy::Register && "Invalid type access!"); - return Reg.RegNum.id(); + /// Coerce the register to GPR32 and return the real register for the current + /// target. + unsigned getGPRMM16Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); + unsigned ClassID = LoongArch::GPR32RegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); } - const MCExpr *getImm() const { - assert(Kind == KindTy::Immediate && "Invalid type access!"); - return Imm.Val; + /// Coerce the register to GPR64 and return the real register for the current + /// target. + unsigned getGPR64Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_GPR) && "Invalid access!"); + unsigned ClassID = LoongArch::GPR64RegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); } - StringRef getToken() const { - assert(Kind == KindTy::Token && "Invalid type access!"); - return Tok; +private: + /// Coerce the register to FGR64 and return the real register for the current + /// target. + unsigned getFGR64Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!"); + return RegIdx.RegInfo->getRegClass(LoongArch::FGR64RegClassID) + .getRegister(RegIdx.Index); } - void print(raw_ostream &OS) const override { - auto RegName = [](unsigned Reg) { - if (Reg) - return LoongArchInstPrinter::getRegisterName(Reg); - else - return "noreg"; - }; + /// Coerce the register to FGR32 and return the real register for the current + /// target. + unsigned getFGR32Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FGR) && "Invalid access!"); + return RegIdx.RegInfo->getRegClass(LoongArch::FGR32RegClassID) + .getRegister(RegIdx.Index); + } - switch (Kind) { - case KindTy::Immediate: - OS << *getImm(); - break; - case KindTy::Register: - OS << ""; - break; - case KindTy::Token: - OS << "'" << getToken() << "'"; - break; - } + /// Coerce the register to FCFR and return the real register for the current + /// target. + unsigned getFCFRReg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FCFR) && "Invalid access!"); + return RegIdx.RegInfo->getRegClass(LoongArch::FCFRRegClassID) + .getRegister(RegIdx.Index); } - static std::unique_ptr createToken(StringRef Str, SMLoc S) { - auto Op = std::make_unique(KindTy::Token); - Op->Tok = Str; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; + /// Coerce the register to LSX128 and return the real register for the current + /// target. + unsigned getLSX128Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_LSX128) && "Invalid access!"); + // It doesn't matter which of the LSX128[BHWD] classes we use. They are all + // identical + unsigned ClassID = LoongArch::LSX128BRegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); } - static std::unique_ptr createReg(unsigned RegNo, SMLoc S, - SMLoc E) { - auto Op = std::make_unique(KindTy::Register); - Op->Reg.RegNum = RegNo; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; + unsigned getLASX256Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_LASX256) && "Invalid access!"); + unsigned ClassID = LoongArch::LASX256BRegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); } - static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, - SMLoc E) { - auto Op = std::make_unique(KindTy::Immediate); - Op->Imm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; + /// Coerce the register to CCR and return the real register for the + /// current target. + unsigned getFCSRReg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_FCSR) && "Invalid access!"); + unsigned ClassID = LoongArch::FCSRRegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); } +public: void addExpr(MCInst &Inst, const MCExpr *Expr) const { - if (auto CE = dyn_cast(Expr)) + // Add as immediate when possible. Null MCExpr = 0. + if (!Expr) + Inst.addOperand(MCOperand::createImm(0)); + else if (const MCConstantExpr *CE = dyn_cast(Expr)) Inst.addOperand(MCOperand::createImm(CE->getValue())); else Inst.addOperand(MCOperand::createExpr(Expr)); } - // Used by the TableGen Code. void addRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::createReg(getReg())); + llvm_unreachable("Use a custom parser instead"); } - void addImmOperands(MCInst &Inst, unsigned N) const { + + /// Render the operand to an MCInst as a GPR32 + /// Asserts if the wrong number of operands are requested, or the operand + /// is not a k_RegisterIndex compatible with RegKind_GPR + void addGPR32ZeroAsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); + Inst.addOperand(MCOperand::createReg(getGPR32Reg())); } -}; -} // end namespace -#define GET_REGISTER_MATCHER -#define GET_SUBTARGET_FEATURE_NAME -#define GET_MATCHER_IMPLEMENTATION -#define GET_MNEMONIC_SPELL_CHECKER -#include "LoongArchGenAsmMatcher.inc" + void addGPR32NonZeroAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPR32Reg())); + } -static MCRegister convertFPR32ToFPR64(MCRegister Reg) { - assert(Reg >= LoongArch::F0 && Reg <= LoongArch::F31 && "Invalid register"); - return Reg - LoongArch::F0 + LoongArch::F0_64; -} + void addGPR32AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPR32Reg())); + } -// Attempts to match Name as a register (either using the default name or -// alternative ABI names), setting RegNo to the matching register. Upon -// failure, returns true and sets RegNo to 0. -static bool matchRegisterNameHelper(MCRegister &RegNo, StringRef Name) { - RegNo = MatchRegisterName(Name); - // The 32-bit and 64-bit FPRs have the same asm name. Check that the initial - // match always matches the 32-bit variant, and not the 64-bit one. - assert(!(RegNo >= LoongArch::F0_64 && RegNo <= LoongArch::F31_64)); - // The default FPR register class is based on the tablegen enum ordering. - static_assert(LoongArch::F0 < LoongArch::F0_64, - "FPR matching must be updated"); - if (RegNo == LoongArch::NoRegister) - RegNo = MatchRegisterAltName(Name); + void addGPRMM16AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } - return RegNo == LoongArch::NoRegister; -} + void addGPRMM16AsmRegZeroOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } -bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { - return Error(getLoc(), "invalid register number"); -} + void addGPRMM16AsmRegMovePOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } -OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo, - SMLoc &StartLoc, - SMLoc &EndLoc) { - llvm_unreachable("Unimplemented function."); -} + void addGPRMM16AsmRegMovePPairFirstOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } -OperandMatchResultTy -LoongArchAsmParser::parseRegister(OperandVector &Operands) { - if (getLexer().getTok().isNot(AsmToken::Dollar)) - return MatchOperand_NoMatch; + void addGPRMM16AsmRegMovePPairSecondOperands(MCInst &Inst, + unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPRMM16Reg())); + } - // Eat the $ prefix. - getLexer().Lex(); - if (getLexer().getKind() != AsmToken::Identifier) - return MatchOperand_NoMatch; + /// Render the operand to an MCInst as a GPR64 + /// Asserts if the wrong number of operands are requested, or the operand + /// is not a k_RegisterIndex compatible with RegKind_GPR + void addGPR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getGPR64Reg())); + } - StringRef Name = getLexer().getTok().getIdentifier(); - MCRegister RegNo; - matchRegisterNameHelper(RegNo, Name); - if (RegNo == LoongArch::NoRegister) - return MatchOperand_NoMatch; + void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR64Reg())); + } - SMLoc S = getLoc(); - SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size()); - getLexer().Lex(); - Operands.push_back(LoongArchOperand::createReg(RegNo, S, E)); + void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR64Reg())); + } - return MatchOperand_Success; -} + void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR32Reg())); + } -OperandMatchResultTy -LoongArchAsmParser::parseImmediate(OperandVector &Operands) { - SMLoc S = getLoc(); - SMLoc E; - const MCExpr *Res; + void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR32Reg())); + } - if (getParser().parseExpression(Res, E)) - return MatchOperand_ParseFail; + void addFCFRAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFCFRReg())); + } - Operands.push_back(LoongArchOperand::createImm(Res, S, E)); - return MatchOperand_Success; -} + void addLSX128AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getLSX128Reg())); + } -/// Looks at a token type and creates the relevant operand from this -/// information, adding to Operands. Return true upon an error. -bool LoongArchAsmParser::parseOperand(OperandVector &Operands, - StringRef Mnemonic) { - if (parseRegister(Operands) == MatchOperand_Success || - parseImmediate(Operands) == MatchOperand_Success) - return false; + void addLASX256AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getLASX256Reg())); + } - // Finally we have exhausted all options and must declare defeat. - Error(getLoc(), "unknown operand"); - return true; -} + void addFCSRAsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFCSRReg())); + } -bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - OperandVector &Operands) { - // First operand in MCInst is instruction mnemonic. - Operands.push_back(LoongArchOperand::createToken(Name, NameLoc)); + template + void addConstantUImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + uint64_t Imm = getConstantImm() - Offset; + Imm &= (1ULL << Bits) - 1; + Imm += Offset; + Imm += AdjustOffset; + Inst.addOperand(MCOperand::createImm(Imm)); + } - // If there are no more operands, then finish. - if (parseOptionalToken(AsmToken::EndOfStatement)) - return false; + template + void addSImmOperands(MCInst &Inst, unsigned N) const { + if (isImm() && !isConstantImm()) { + addExpr(Inst, getImm()); + return; + } + addConstantSImmOperands(Inst, N); + } - // Parse first operand. - if (parseOperand(Operands, Name)) - return true; + template + void addUImmOperands(MCInst &Inst, unsigned N) const { + if (isImm() && !isConstantImm()) { + addExpr(Inst, getImm()); + return; + } + addConstantUImmOperands(Inst, N); + } - // Parse until end of statement, consuming commas between operands. - while (parseOptionalToken(AsmToken::Comma)) - if (parseOperand(Operands, Name)) - return true; + template + void addConstantSImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + int64_t Imm = getConstantImm() - Offset; + Imm = SignExtend64(Imm); + Imm += Offset; + Imm += AdjustOffset; + Inst.addOperand(MCOperand::createImm(Imm)); + } - // Parse end of statement and return successfully. - if (parseOptionalToken(AsmToken::EndOfStatement)) - return false; + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCExpr *Expr = getImm(); + addExpr(Inst, Expr); + } - SMLoc Loc = getLexer().getLoc(); - getParser().eatToEndOfStatement(); - return Error(Loc, "unexpected token"); -} + void addMemOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); -bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, - OperandVector &Operands, - MCStreamer &Out) { - Inst.setLoc(IDLoc); - Out.emitInstruction(Inst, getSTI()); - return false; -} + Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit() + ? getMemBase()->getGPR64Reg() + : getMemBase()->getGPR32Reg())); -unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) { - switch (Inst.getOpcode()) { - default: - break; - case LoongArch::CSRXCHG: { - unsigned Rj = Inst.getOperand(2).getReg(); - if (Rj == LoongArch::R0 || Rj == LoongArch::R1) - return Match_RequiresOpnd2NotR0R1; - return Match_Success; + const MCExpr *Expr = getMemOff(); + addExpr(Inst, Expr); } - case LoongArch::BSTRINS_W: - case LoongArch::BSTRINS_D: - case LoongArch::BSTRPICK_W: - case LoongArch::BSTRPICK_D: { - unsigned Opc = Inst.getOpcode(); - const signed Msb = - (Opc == LoongArch::BSTRINS_W || Opc == LoongArch::BSTRINS_D) - ? Inst.getOperand(3).getImm() - : Inst.getOperand(2).getImm(); - const signed Lsb = - (Opc == LoongArch::BSTRINS_W || Opc == LoongArch::BSTRINS_D) - ? Inst.getOperand(4).getImm() - : Inst.getOperand(3).getImm(); - if (Msb < Lsb) - return Match_RequiresMsbNotLessThanLsb; - return Match_Success; + + void addRegListOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + for (auto RegNo : getRegList()) + Inst.addOperand(MCOperand::createReg(RegNo)); } + + bool isReg() const override { + // As a special case until we sort out the definition of div/divu, accept + // $0/$zero here so that MCK_ZERO works correctly. + return isGPRAsmReg() && RegIdx.Index == 0; } - return Match_Success; -} + bool isRegIdx() const { return Kind == k_RegisterIndex; } + bool isImm() const override { return Kind == k_Immediate; } -unsigned -LoongArchAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, - unsigned Kind) { - LoongArchOperand &Op = static_cast(AsmOp); - if (!Op.isReg()) - return Match_InvalidOperand; + bool isConstantImm() const { + int64_t Res; + return isImm() && getImm()->evaluateAsAbsolute(Res); + } - MCRegister Reg = Op.getReg(); - // As the parser couldn't differentiate an FPR32 from an FPR64, coerce the - // register from FPR32 to FPR64 if necessary. - if (LoongArchMCRegisterClasses[LoongArch::FPR32RegClassID].contains(Reg) && - Kind == MCK_FPR64) { - Op.setReg(convertFPR32ToFPR64(Reg)); - return Match_Success; + bool isConstantImmz() const { + return isConstantImm() && getConstantImm() == 0; } - return Match_InvalidOperand; -} + template bool isConstantUImm() const { + return isConstantImm() && isUInt(getConstantImm() - Offset); + } -bool LoongArchAsmParser::generateImmOutOfRangeError( - OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper, - Twine Msg = "immediate must be an integer in the range") { - SMLoc ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc(); - return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]"); -} + template bool isSImm() const { + return isConstantImm() ? isInt(getConstantImm()) : isImm(); + } -bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, - MCStreamer &Out, - uint64_t &ErrorInfo, - bool MatchingInlineAsm) { - MCInst Inst; - FeatureBitset MissingFeatures; + template bool isUImm() const { + return isConstantImm() ? isUInt(getConstantImm()) : isImm(); + } - auto Result = MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures, - MatchingInlineAsm); - switch (Result) { - default: - break; - case Match_Success: - return processInstruction(Inst, IDLoc, Operands, Out); - case Match_MissingFeature: { - assert(MissingFeatures.any() && "Unknown missing features!"); - bool FirstFeature = true; - std::string Msg = "instruction requires the following:"; - for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { - if (MissingFeatures[i]) { - Msg += FirstFeature ? " " : ", "; - Msg += getSubtargetFeatureName(i); - FirstFeature = false; - } - } - return Error(IDLoc, Msg); + template bool isAnyImm() const { + return isConstantImm() ? (isInt(getConstantImm()) || + isUInt(getConstantImm())) + : isImm(); } - case Match_MnemonicFail: { - FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); - std::string Suggestion = LoongArchMnemonicSpellCheck( - ((LoongArchOperand &)*Operands[0]).getToken(), FBS, 0); - return Error(IDLoc, "unrecognized instruction mnemonic" + Suggestion); + + template bool isConstantSImm() const { + return isConstantImm() && isInt(getConstantImm() - Offset); + } + + template bool isConstantUImmRange() const { + return isConstantImm() && getConstantImm() >= Bottom && + getConstantImm() <= Top; + } + + bool isToken() const override { + // Note: It's not possible to pretend that other operand kinds are tokens. + // The matcher emitter checks tokens first. + return Kind == k_Token; + } + + bool isMem() const override { return Kind == k_Memory; } + + bool isConstantMemOff() const { + return isMem() && isa(getMemOff()); + } + + bool isZeroMemOff() const { + return isMem() && isa(getMemOff()) && + getConstantMemOff() == 0; + } + + // Allow relocation operators. + // FIXME: This predicate and others need to look through binary expressions + // and determine whether a Value is a constant or not. + template + bool isMemWithSimmOffset() const { + if (!isMem()) + return false; + if (!getMemBase()->isGPRAsmReg()) + return false; + if (isa(getMemOff()) || + (isConstantMemOff() && + isShiftedInt(getConstantMemOff()))) + return true; + MCValue Res; + bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); + return IsReloc && isShiftedInt(Res.getConstant()); + } + + bool isMemWithPtrSizeOffset() const { + if (!isMem()) + return false; + if (!getMemBase()->isGPRAsmReg()) + return false; + const unsigned PtrBits = AsmParser.getABI().ArePtrs64bit() ? 64 : 32; + if (isa(getMemOff()) || + (isConstantMemOff() && isIntN(PtrBits, getConstantMemOff()))) + return true; + MCValue Res; + bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr); + return IsReloc && isIntN(PtrBits, Res.getConstant()); + } + + bool isMemWithGRPMM16Base() const { + return isMem() && getMemBase()->isMM16AsmReg(); + } + + template bool isMemWithUimmOffsetSP() const { + return isMem() && isConstantMemOff() && isUInt(getConstantMemOff()) + && getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == LoongArch::SP); + } + + template bool isMemWithUimmWordAlignedOffsetSP() const { + return isMem() && isConstantMemOff() && isUInt(getConstantMemOff()) + && (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx() + && (getMemBase()->getGPR32Reg() == LoongArch::SP); + } + + template + bool isScaledUImm() const { + return isConstantImm() && + isShiftedUInt(getConstantImm()); + } + + template + bool isScaledSImm() const { + if (isConstantImm() && + isShiftedInt(getConstantImm())) + return true; + // Operand can also be a symbol or symbol plus + // offset in case of relocations. + if (Kind != k_Immediate) + return false; + MCValue Res; + bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr); + return Success && isShiftedInt(Res.getConstant()); + } + + bool isRegList16() const { + if (!isRegList()) + return false; + + int Size = RegList.List->size(); + if (Size < 2 || Size > 5) + return false; + + unsigned R0 = RegList.List->front(); + unsigned R1 = RegList.List->back(); + if (!((R0 == LoongArch::S0 && R1 == LoongArch::RA) || + (R0 == LoongArch::S0_64 && R1 == LoongArch::RA_64))) + return false; + + int PrevReg = *RegList.List->begin(); + for (int i = 1; i < Size - 1; i++) { + int Reg = (*(RegList.List))[i]; + if ( Reg != PrevReg + 1) + return false; + PrevReg = Reg; + } + + return true; + } + + bool isInvNum() const { return Kind == k_Immediate; } + + bool isLSAImm() const { + if (!isConstantImm()) + return false; + int64_t Val = getConstantImm(); + return 1 <= Val && Val <= 4; + } + + bool isRegList() const { return Kind == k_RegList; } + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const override { + // As a special case until we sort out the definition of div/divu, accept + // $0/$zero here so that MCK_ZERO works correctly. + if (Kind == k_RegisterIndex && RegIdx.Index == 0 && + RegIdx.Kind & RegKind_GPR) + return getGPR32Reg(); // FIXME: GPR64 too + + llvm_unreachable("Invalid access!"); + return 0; + } + + const MCExpr *getImm() const { + assert((Kind == k_Immediate) && "Invalid access!"); + return Imm.Val; + } + + int64_t getConstantImm() const { + const MCExpr *Val = getImm(); + int64_t Value = 0; + (void)Val->evaluateAsAbsolute(Value); + return Value; + } + + LoongArchOperand *getMemBase() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Base; + } + + const MCExpr *getMemOff() const { + assert((Kind == k_Memory) && "Invalid access!"); + return Mem.Off; + } + + int64_t getConstantMemOff() const { + return static_cast(getMemOff())->getValue(); + } + + const SmallVectorImpl &getRegList() const { + assert((Kind == k_RegList) && "Invalid access!"); + return *(RegList.List); + } + + static std::unique_ptr CreateToken(StringRef Str, SMLoc S, + LoongArchAsmParser &Parser) { + auto Op = std::make_unique(k_Token, Parser); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + /// Create a numeric register (e.g. $1). The exact register remains + /// unresolved until an instruction successfully matches + static std::unique_ptr + createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + LLVM_DEBUG(dbgs() << "createNumericReg(" << Index << ", ...)\n"); + return CreateReg(Index, Str, RegKind_Numeric, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely a GPR. + /// This is typically only used for named registers such as $gp. + static std::unique_ptr + createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely a FGR. + /// This is typically only used for named registers such as $f0. + static std::unique_ptr + createFGRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_FGR, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely an FCFR. + /// This is typically only used for named registers such as $fcc0. + static std::unique_ptr + createFCFRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_FCFR, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely an FCSR. + /// This is typically only used for named registers such as $fcsr0. + static std::unique_ptr + createFCSRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_FCSR, RegInfo, S, E, Parser); + } + + /// Create a register that is definitely an LSX128. + /// This is typically only used for named registers such as $v0. + static std::unique_ptr + createLSX128Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_LSX128, RegInfo, S, E, Parser); + } + + static std::unique_ptr + createLASX256Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + return CreateReg(Index, Str, RegKind_LASX256, RegInfo, S, E, Parser); + } + + static std::unique_ptr + CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, LoongArchAsmParser &Parser) { + auto Op = std::make_unique(k_Immediate, Parser); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + CreateMem(std::unique_ptr Base, const MCExpr *Off, SMLoc S, + SMLoc E, LoongArchAsmParser &Parser) { + auto Op = std::make_unique(k_Memory, Parser); + Op->Mem.Base = Base.release(); + Op->Mem.Off = Off; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + CreateRegList(SmallVectorImpl &Regs, SMLoc StartLoc, SMLoc EndLoc, + LoongArchAsmParser &Parser) { + assert(Regs.size() > 0 && "Empty list not allowed"); + + auto Op = std::make_unique(k_RegList, Parser); + Op->RegList.List = new SmallVector(Regs.begin(), Regs.end()); + Op->StartLoc = StartLoc; + Op->EndLoc = EndLoc; + return Op; + } + + bool isGPRZeroAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0; + } + + bool isGPRNonZeroAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index > 0 && + RegIdx.Index <= 31; + } + + bool isGPRAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31; + } + + bool isMM16AsmReg() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return ((RegIdx.Index >= 2 && RegIdx.Index <= 7) + || RegIdx.Index == 16 || RegIdx.Index == 17); + + } + bool isMM16AsmRegZero() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return (RegIdx.Index == 0 || + (RegIdx.Index >= 2 && RegIdx.Index <= 7) || + RegIdx.Index == 17); + } + + bool isMM16AsmRegMoveP() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return (RegIdx.Index == 0 || (RegIdx.Index >= 2 && RegIdx.Index <= 3) || + (RegIdx.Index >= 16 && RegIdx.Index <= 20)); + } + + bool isMM16AsmRegMovePPairFirst() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return RegIdx.Index >= 4 && RegIdx.Index <= 6; + } + + bool isMM16AsmRegMovePPairSecond() const { + if (!(isRegIdx() && RegIdx.Kind)) + return false; + return (RegIdx.Index == 21 || RegIdx.Index == 22 || + (RegIdx.Index >= 5 && RegIdx.Index <= 7)); + } + + bool isFGRAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31; + } + + bool isStrictlyFGRAsmReg() const { + return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31; + } + + bool isFCSRAsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_FCSR && RegIdx.Index <= 3; + } + + bool isFCFRAsmReg() const { + if (!(isRegIdx() && RegIdx.Kind & RegKind_FCFR)) + return false; + return RegIdx.Index <= 7; + } + + bool isLSX128AsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_LSX128 && RegIdx.Index <= 31; + } + + bool isLASX256AsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_LASX256 && RegIdx.Index <= 31; + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const override { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const override { return EndLoc; } + + void print(raw_ostream &OS) const override { + switch (Kind) { + case k_Immediate: + OS << "Imm<"; + OS << *Imm.Val; + OS << ">"; + break; + case k_Memory: + OS << "Mem<"; + Mem.Base->print(OS); + OS << ", "; + OS << *Mem.Off; + OS << ">"; + break; + case k_RegisterIndex: + OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", " + << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">"; + break; + case k_Token: + OS << getToken(); + break; + case k_RegList: + OS << "RegList< "; + for (auto Reg : (*RegList.List)) + OS << Reg << " "; + OS << ">"; + break; + } + } + + bool isValidForTie(const LoongArchOperand &Other) const { + if (Kind != Other.Kind) + return false; + + switch (Kind) { + default: + llvm_unreachable("Unexpected kind"); + return false; + case k_RegisterIndex: { + StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length); + StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length); + return Token == OtherToken; + } + } + } +}; // class LoongArchOperand + +} // end anonymous namespace + +namespace llvm { + +extern const MCInstrDesc LoongArchInsts[]; + +} // end namespace llvm + +static const MCInstrDesc &getInstDesc(unsigned Opcode) { + return LoongArchInsts[Opcode]; +} + +static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) { + if (const MCSymbolRefExpr *SRExpr = dyn_cast(Expr)) { + return &SRExpr->getSymbol(); + } + + if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) { + const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS()); + const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS()); + + if (LHSSym) + return LHSSym; + + if (RHSSym) + return RHSSym; + + return nullptr; + } + + if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) + return getSingleMCSymbol(UExpr->getSubExpr()); + + return nullptr; +} + +static unsigned countMCSymbolRefExpr(const MCExpr *Expr) { + if (isa(Expr)) + return 1; + + if (const MCBinaryExpr *BExpr = dyn_cast(Expr)) + return countMCSymbolRefExpr(BExpr->getLHS()) + + countMCSymbolRefExpr(BExpr->getRHS()); + + if (const MCUnaryExpr *UExpr = dyn_cast(Expr)) + return countMCSymbolRefExpr(UExpr->getSubExpr()); + + return 0; +} + +bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + + Inst.setLoc(IDLoc); + + // Check branch instructions. + if (MCID.isBranch() || MCID.isCall()) { + const unsigned Opcode = Inst.getOpcode(); + MCOperand Offset; + bool check = true; + unsigned OffsetOpndIdx, OffsetOpndWidth; + switch (Opcode) { + default: + check = false; + break; + case LoongArch::BEQ: + case LoongArch::BNE: + case LoongArch::BLT: + case LoongArch::BGE: + case LoongArch::BLTU: + case LoongArch::BGEU: + OffsetOpndIdx = 2; + OffsetOpndWidth = 16; + break; + case LoongArch::BEQZ: + case LoongArch::BNEZ: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + OffsetOpndIdx = 1; + OffsetOpndWidth = 21; + break; + case LoongArch::B: + case LoongArch::BL: + OffsetOpndIdx = 0; + OffsetOpndWidth = 26; + break; + } + if (check) { + assert(MCID.getNumOperands() == OffsetOpndIdx + 1 && + "unexpected number of operands"); + Offset = Inst.getOperand(OffsetOpndIdx); + // Non-Imm situation will be dealed with later on when applying fixups. + if (Offset.isImm()) { + if (!isIntN(OffsetOpndWidth + 2, Offset.getImm())) + return Error(IDLoc, "branch target out of range"); + if (offsetToAlignment(Offset.getImm(), Align(1LL << 2))) + return Error(IDLoc, "branch to misaligned address"); + } + } + } + + bool IsPCRelativeLoad = (MCID.TSFlags & LoongArchII::IsPCRelativeLoad) != 0; + if ((MCID.mayLoad() || MCID.mayStore()) && !IsPCRelativeLoad) { + // Check the offset of memory operand, if it is a symbol + // reference or immediate we may have to expand instructions. + for (unsigned i = 0; i < MCID.getNumOperands(); i++) { + const MCOperandInfo &OpInfo = MCID.OpInfo[i]; + if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || + (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { + MCOperand &Op = Inst.getOperand(i); + if (Op.isImm()) { + int64_t MemOffset = Op.getImm(); + if (MemOffset < -32768 || MemOffset > 32767) { + return getParser().hasPendingError(); + } + } else if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *SR = + static_cast(Expr); + if (SR->getKind() == MCSymbolRefExpr::VK_None) { + return getParser().hasPendingError(); + } + } else if (!isEvaluated(Expr)) { + return getParser().hasPendingError(); + } + } + } + } // for + } // if load/store + + MacroExpanderResultTy ExpandResult = + tryExpandInstruction(Inst, IDLoc, Out, STI); + switch (ExpandResult) { + case MER_NotAMacro: + Out.emitInstruction(Inst, *STI); + break; + case MER_Success: + break; + case MER_Fail: + return true; } + + return false; +} + +LoongArchAsmParser::MacroExpanderResultTy +LoongArchAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI) { + switch (Inst.getOpcode()) { + default: + return MER_NotAMacro; + case LoongArch::LoadImm32: // li.w $rd, $imm32 + case LoongArch::LoadImm64: // li.d $rd, $imm64 + return expandLoadImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; + case LoongArch::LoadAddrLocal: // la.local $rd, symbol + case LoongArch::LoadAddrGlobal: // la.global $rd, symbol + case LoongArch::LoadAddrGlobal_Alias: // la $rd, symbol + case LoongArch::LoadAddrTLS_LE: // la.tls.le $rd, symbol + case LoongArch::LoadAddrTLS_IE: // la.tls.ie $rd, symbol + case LoongArch::LoadAddrTLS_LD: // la.tls.ld $rd, symbol + case LoongArch::LoadAddrTLS_GD: // la.tls.gd $rd, symbol + return expandLoadAddress(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; + } +} + +/// Can the value be represented by a unsigned N-bit value and a shift left? +template static bool isShiftedUIntAtAnyPosition(uint64_t x) { + unsigned BitNum = findFirstSet(x); + + return (x == x >> BitNum << BitNum) && isUInt(x >> BitNum); +} + +bool LoongArchAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + const int64_t Imm = Inst.getOperand(1).getImm(); + const unsigned DstReg = Inst.getOperand(0).getReg(); + LoongArchTargetStreamer &TOut = getTargetStreamer(); + bool Is64Bit = Inst.getOpcode() == LoongArch::LoadImm64; + unsigned SrcReg = Is64Bit ? LoongArch::ZERO_64 : LoongArch::ZERO; + LoongArchAnalyzeImmediate::InstSeq Seq = + LoongArchAnalyzeImmediate::generateInstSeq( + Is64Bit ? Imm : SignExtend64<32>(Imm), Is64Bit); + + for (auto &Inst : Seq) { + if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) + TOut.emitRI(Inst.Opc, DstReg, Inst.Imm, IDLoc, STI); + else + TOut.emitRRI(Inst.Opc, DstReg, SrcReg, Inst.Imm, IDLoc, STI); + SrcReg = DstReg; + } + + return false; +} + +bool LoongArchAsmParser::expandLoadAddress(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + LoongArchTargetStreamer &TOut = getTargetStreamer(); + const MCExpr *SymExpr = Inst.getOperand(1).getExpr(); + const LoongArchMCExpr *HiExpr = nullptr; + const LoongArchMCExpr *LoExpr = nullptr; + const LoongArchMCExpr *HigherExpr = nullptr; + const LoongArchMCExpr *HighestExpr = nullptr; + const MCExpr *GotExpr = MCSymbolRefExpr::create( + "_GLOBAL_OFFSET_TABLE_", MCSymbolRefExpr::VK_None, getContext()); + unsigned DstReg = Inst.getOperand(0).getReg(); + + MCValue Res; + if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { + Error(IDLoc, "expected relocatable expression"); + return true; + } + if (Res.getSymB() != nullptr) { + Error(IDLoc, "expected relocatable expression with only one symbol"); + return true; + } + + switch (Inst.getOpcode()) { + case LoongArch::LoadAddrLocal: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_PCREL_LO, SymExpr, + getContext()); + + TOut.emitRX(LoongArch::PCADDU12I_ri, DstReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRX(LoongArch::ADDI_D_rri, DstReg, DstReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + return false; + case LoongArch::LoadAddrGlobal: + case LoongArch::LoadAddrGlobal_Alias: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_GOT_LO, SymExpr, + getContext()); + TOut.emitRXX(LoongArch::PCADDU12I_rii, DstReg, + MCOperand::createExpr(HiExpr), MCOperand::createExpr(GotExpr), + IDLoc, STI); + TOut.emitRRXX(LoongArch::LD_D_rrii, DstReg, DstReg, + MCOperand::createExpr(LoExpr), MCOperand::createExpr(GotExpr), + IDLoc, STI); + return false; + case LoongArch::LoadAddrTLS_LE: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_LO, SymExpr, + getContext()); + HigherExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHER, + SymExpr, getContext()); + HighestExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSLE_HIGHEST, + SymExpr, getContext()); + TOut.emitRX(LoongArch::LU12I_W_ri, DstReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRX(LoongArch::ORI_rri, DstReg, DstReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + TOut.emitRX(LoongArch::LU32I_D_ri, DstReg, + MCOperand::createExpr(HigherExpr), IDLoc, STI); + TOut.emitRRX(LoongArch::LU52I_D_rri, DstReg, DstReg, + MCOperand::createExpr(HighestExpr), IDLoc, STI); + return false; + case LoongArch::LoadAddrTLS_IE: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSIE_LO, SymExpr, + getContext()); + TOut.emitRXX(LoongArch::PCADDU12I_rii, DstReg, + MCOperand::createExpr(HiExpr), MCOperand::createExpr(GotExpr), + IDLoc, STI); + TOut.emitRRXX(LoongArch::LD_D_rrii, DstReg, DstReg, + MCOperand::createExpr(LoExpr), MCOperand::createExpr(GotExpr), + IDLoc, STI); + return false; + case LoongArch::LoadAddrTLS_LD: + case LoongArch::LoadAddrTLS_GD: + HiExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_HI, SymExpr, + getContext()); + LoExpr = LoongArchMCExpr::create(LoongArchMCExpr::MEK_TLSGD_LO, SymExpr, + getContext()); + TOut.emitRXX(LoongArch::PCADDU12I_rii, DstReg, + MCOperand::createExpr(HiExpr), MCOperand::createExpr(GotExpr), + IDLoc, STI); + TOut.emitRRXX(LoongArch::ADDI_D_rrii, DstReg, DstReg, + MCOperand::createExpr(LoExpr), MCOperand::createExpr(GotExpr), + IDLoc, STI); + return false; + default: + llvm_unreachable(""); + } +} + +unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + switch (Inst.getOpcode()) { + case LoongArch::BSTRINS_W: + case LoongArch::BSTRPICK_W: { + assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && + "Operands must be immediates for bstrins.w/bstrpick.w!"); + const signed Msbw = Inst.getOperand(2).getImm(); + const signed Lsbw = Inst.getOperand(3).getImm(); + if (Msbw < Lsbw) + return Match_MsbHigherThanLsb; + if ((Lsbw < 0) || (Msbw > 31)) + return Match_RequiresRange0_31; + return Match_Success; + } + case LoongArch::BSTRINS_D: + case LoongArch::BSTRPICK_D: { + assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && + "Operands must be immediates for bstrins.d/bstrpick.d!"); + const signed Msbd = Inst.getOperand(2).getImm(); + const signed Lsbd = Inst.getOperand(3).getImm(); + if (Msbd < Lsbd) + return Match_MsbHigherThanLsb; + if ((Lsbd < 0) || (Msbd > 63)) + return Match_RequiresRange0_63; + return Match_Success; + } + case LoongArch::CSRXCHG32: + case LoongArch::CSRXCHG: + if (Inst.getOperand(2).getReg() == LoongArch::ZERO || + Inst.getOperand(2).getReg() == LoongArch::ZERO_64) + return Match_RequiresNoZeroRegister; + if (Inst.getOperand(2).getReg() == LoongArch::RA || + Inst.getOperand(2).getReg() == LoongArch::RA_64) + return Match_RequiresNoRaRegister; + return Match_Success; + } + + return Match_Success; +} + +static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands, + uint64_t ErrorInfo) { + if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) { + SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc(); + if (ErrorLoc == SMLoc()) + return Loc; + return ErrorLoc; + } + return Loc; +} + +bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + unsigned MatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + switch (MatchResult) { + case Match_Success: + if (processInstruction(Inst, IDLoc, Out, STI)) + return true; + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0ULL) { if (ErrorInfo >= Operands.size()) - return Error(ErrorLoc, "too few operands for instruction"); + return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc(); + ErrorLoc = Operands[ErrorInfo]->getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } + return Error(ErrorLoc, "invalid operand for instruction"); } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction"); + case Match_RequiresNoZeroRegister: + return Error(IDLoc, "invalid operand ($zero) for instruction"); + case Match_RequiresNoRaRegister: + return Error(IDLoc, "invalid operand ($r1) for instruction"); + case Match_InvalidImm0_3: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 3]."); + case Match_InvalidImm0_7: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 7]."); + case Match_InvalidImm0_31: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 31]."); + case Match_InvalidImm0_63: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 63]."); + case Match_InvalidImm0_4095: + case Match_UImm12_Relaxed: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 4095]."); + case Match_InvalidImm0_32767: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "immediate must be an integer in range [0, 32767]."); + case Match_UImm16_Relaxed: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 16-bit unsigned immediate"); + case Match_UImm20_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 20-bit unsigned immediate"); + case Match_UImm26_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 26-bit unsigned immediate"); + case Match_UImm32_Coerced: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 32-bit immediate"); + case Match_InvalidSImm2: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 2-bit signed immediate"); + case Match_InvalidSImm3: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 3-bit signed immediate"); + case Match_InvalidSImm5: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 5-bit signed immediate"); + case Match_InvalidSImm8: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 8-bit signed immediate"); + case Match_InvalidSImm12: + case Match_SImm12_Relaxed: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 12-bit signed immediate"); + case Match_InvalidSImm14: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 14-bit signed immediate"); + case Match_InvalidSImm15: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 15-bit signed immediate"); + case Match_InvalidSImm16: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 16-bit signed immediate"); + case Match_InvalidSImm20: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 20-bit signed immediate"); + case Match_InvalidSImm21: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 21-bit signed immediate"); + case Match_InvalidSImm26: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 26-bit signed immediate"); + case Match_SImm32: + case Match_SImm32_Relaxed: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 32-bit signed immediate"); + case Match_MemSImm14: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected memory with 14-bit signed offset"); + case Match_MemSImmPtr: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected memory with 32-bit signed offset"); + case Match_UImm2_1: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected immediate in range 1 .. 4"); + case Match_MemSImm14Lsl2: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected memory with 16-bit signed offset and multiple of 4"); + case Match_RequiresRange0_31: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + SMLoc ErrorEnd = Operands[4]->getEndLoc(); + return Error(ErrorStart, "from lsbw to msbw are not in the range 0 .. 31", + SMRange(ErrorStart, ErrorEnd)); + } + case Match_RequiresPosSizeUImm6: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + SMLoc ErrorEnd = Operands[4]->getEndLoc(); + return Error(ErrorStart, "size plus position are not in the range 1 .. 63", + SMRange(ErrorStart, ErrorEnd)); + } + case Match_RequiresRange0_63: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + SMLoc ErrorEnd = Operands[4]->getEndLoc(); + return Error(ErrorStart, "from lsbd to msbd are not in the range 0 .. 63", + SMRange(ErrorStart, ErrorEnd)); + } + case Match_MsbHigherThanLsb: { + SMLoc ErrorStart = Operands[3]->getStartLoc(); + SMLoc ErrorEnd = Operands[4]->getEndLoc(); + return Error(ErrorStart, "msb are not higher than lsb", SMRange(ErrorStart, ErrorEnd)); + } + case Match_MemZeroOff: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected memory with constant 0 offset"); } - // Handle the case when the error message is of specific type - // other than the generic Match_InvalidOperand, and the - // corresponding operand is missing. - if (Result > FIRST_TARGET_MATCH_RESULT_TY) { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0ULL && ErrorInfo >= Operands.size()) - return Error(ErrorLoc, "too few operands for instruction"); + llvm_unreachable("Implement any new match types added!"); +} + +/* + * Note: The implementation of this function must be sync with the definition + * of GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td + */ +int LoongArchAsmParser::matchCPURegisterName(StringRef Name) { + int CC; + + CC = StringSwitch(Name) + .Cases("zero", "r0", 0) + .Cases("a0", "v0", "r4", 1) + .Cases("a1", "v1", "r5", 2) + .Cases("a2", "r6", 3) + .Cases("a3", "r7", 4) + .Cases("a4", "r8", 5) + .Cases("a5", "r9", 6) + .Cases("a6", "r10", 7) + .Cases("a7", "r11", 8) + .Cases("t0", "r12", 9) + .Cases("t1", "r13", 10) + .Cases("t2", "r14", 11) + .Cases("t3", "r15", 12) + .Cases("t4", "r16", 13) + .Cases("t5", "r17", 14) + .Cases("t6", "r18", 15) + .Cases("t7", "r19", 16) + .Cases("t8", "r20", 17) + .Cases("s0", "r23", 18) + .Cases("s1", "r24", 19) + .Cases("s2", "r25", 20) + .Cases("s3", "r26", 21) + .Cases("s4", "r27", 22) + .Cases("s5", "r28", 23) + .Cases("s6", "r29", 24) + .Cases("s7", "r30", 25) + .Cases("s8", "r31", 26) + .Cases("ra", "r1", 27) + .Cases("tp", "r2", 28) + .Cases("sp", "r3", 29) + .Case("r21", 30) + .Cases("fp", "r22", 31) + .Default(-1); + + return CC; +} + +int LoongArchAsmParser::matchFPURegisterName(StringRef Name) { + if (Name[0] == 'f') { + int CC; + + CC = StringSwitch(Name) + .Cases("f0", "fa0", "fv0", 0) + .Cases("f1", "fa1", "fv1", 1) + .Cases("f2", "fa2", 2) + .Cases("f3", "fa3", 3) + .Cases("f4", "fa4", 4) + .Cases("f5", "fa5", 5) + .Cases("f6", "fa6", 6) + .Cases("f7", "fa7", 7) + .Cases("f8", "ft0", 8) + .Cases("f9", "ft1", 9) + .Cases("f10", "ft2", 10) + .Cases("f11", "ft3", 11) + .Cases("f12", "ft4", 12) + .Cases("f13", "ft5", 13) + .Cases("f14", "ft6", 14) + .Cases("f15", "ft7", 15) + .Cases("f16", "ft8", 16) + .Cases("f17", "ft9", 17) + .Cases("f18", "ft10", 18) + .Cases("f19", "ft11", 19) + .Cases("f20", "ft12", 20) + .Cases("f21", "ft13", 21) + .Cases("f22", "ft14", 22) + .Cases("f23", "ft15", 23) + .Cases("f24", "fs0", 24) + .Cases("f25", "fs1", 25) + .Cases("f26", "fs2", 26) + .Cases("f27", "fs3", 27) + .Cases("f28", "fs4", 28) + .Cases("f29", "fs5", 29) + .Cases("f30", "fs6", 30) + .Cases("f31", "fs7", 31) + .Default(-1); + + return CC; } + return -1; +} - switch (Result) { - default: - break; - case Match_RequiresMsbNotLessThanLsb: { - SMLoc ErrorStart = Operands[3]->getStartLoc(); - return Error(ErrorStart, "msb is less than lsb", - SMRange(ErrorStart, Operands[4]->getEndLoc())); - } - case Match_RequiresOpnd2NotR0R1: - return Error(Operands[2]->getStartLoc(), "must not be $r0 or $r1"); - case Match_InvalidUImm2: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, - /*Upper=*/(1 << 2) - 1); - case Match_InvalidUImm2plus1: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/1, - /*Upper=*/(1 << 2)); - case Match_InvalidUImm3: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, - /*Upper=*/(1 << 3) - 1); - case Match_InvalidUImm5: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, - /*Upper=*/(1 << 5) - 1); - case Match_InvalidUImm6: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, - /*Upper=*/(1 << 6) - 1); - case Match_InvalidUImm12: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, - /*Upper=*/(1 << 12) - 1); - case Match_InvalidUImm15: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, - /*Upper=*/(1 << 15) - 1); - case Match_InvalidSImm12: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 11), - /*Upper=*/(1 << 11) - 1); - case Match_InvalidSImm14lsl2: - return generateImmOutOfRangeError( - Operands, ErrorInfo, /*Lower=*/-(1 << 15), /*Upper=*/(1 << 15) - 4, - "immediate must be a multiple of 4 in the range"); - case Match_InvalidSImm16: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 15), - /*Upper=*/(1 << 15) - 1); - case Match_InvalidSImm16lsl2: - return generateImmOutOfRangeError( - Operands, ErrorInfo, /*Lower=*/-(1 << 17), /*Upper=*/(1 << 17) - 4, - "immediate must be a multiple of 4 in the range"); - case Match_InvalidSImm20: - return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 19), - /*Upper=*/(1 << 19) - 1); - case Match_InvalidSImm21lsl2: - return generateImmOutOfRangeError( - Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, - "immediate must be a multiple of 4 in the range"); - case Match_InvalidSImm26lsl2: - return generateImmOutOfRangeError( - Operands, ErrorInfo, /*Lower=*/-(1 << 27), /*Upper=*/(1 << 27) - 4, - "immediate must be a multiple of 4 in the range"); +int LoongArchAsmParser::matchFCFRRegisterName(StringRef Name) { + if (Name.startswith("fcc")) { + StringRef NumString = Name.substr(3); + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return -1; // This is not an integer. + if (IntVal > 7) // There are only 8 fcc registers. + return -1; + return IntVal; + } + return -1; +} + +int LoongArchAsmParser::matchFCSRRegisterName(StringRef Name) { + if (Name.startswith("fcsr")) { + StringRef NumString = Name.substr(4); + unsigned IntVal; + if (NumString.getAsInteger(10, IntVal)) + return -1; // This is not an integer. + if (IntVal > 3) // There are only 4 fcsr registers. + return -1; + return IntVal; + } + return -1; +} + +int LoongArchAsmParser::matchLSX128RegisterName(StringRef Name) { + unsigned IntVal; + + if (Name.front() != 'v' || Name.drop_front(2).getAsInteger(10, IntVal)) + return -1; + + if (IntVal > 31) + return -1; + + return IntVal; +} + +int LoongArchAsmParser::matchLASX256RegisterName(StringRef Name) { + unsigned IntVal; + + if (Name.front() != 'x' || Name.drop_front(2).getAsInteger(10, IntVal)) + return -1; + + if (IntVal > 31) + return -1; + + return IntVal; +} + +bool LoongArchAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseOperand\n"); + + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + LLVM_DEBUG(dbgs() << ".. Generic Parser\n"); + + switch (getLexer().getKind()) { + case AsmToken::Dollar: { + // Parse the register. + SMLoc S = Parser.getTok().getLoc(); + + // Almost all registers have been parsed by custom parsers. There is only + // one exception to this. $zero (and it's alias $0) will reach this point + // for div, divu, and similar instructions because it is not an operand + // to the instruction definition but an explicit register. Special case + // this situation for now. + if (parseAnyRegister(Operands) != MatchOperand_NoMatch) + return false; + + // Maybe it is a symbol reference. + StringRef Identifier; + if (Parser.parseIdentifier(Identifier)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier); + // Otherwise create a symbol reference. + const MCExpr *Res = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + + Operands.push_back(LoongArchOperand::CreateImm(Res, S, E, *this)); + return false; + } + default: { + LLVM_DEBUG(dbgs() << ".. generic integer expression\n"); + + const MCExpr *Expr; + SMLoc S = Parser.getTok().getLoc(); // Start location of the operand. + if (getParser().parseExpression(Expr)) + return true; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + Operands.push_back(LoongArchOperand::CreateImm(Expr, S, E, *this)); + return false; } - llvm_unreachable("Unknown match type detected!"); + } // switch(getLexer().getKind()) + return true; +} + +bool LoongArchAsmParser::isEvaluated(const MCExpr *Expr) { + switch (Expr->getKind()) { + case MCExpr::Constant: + return true; + case MCExpr::SymbolRef: + return (cast(Expr)->getKind() != MCSymbolRefExpr::VK_None); + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + if (!isEvaluated(BE->getLHS())) + return false; + return isEvaluated(BE->getRHS()); + } + case MCExpr::Unary: + return isEvaluated(cast(Expr)->getSubExpr()); + case MCExpr::Target: + return true; + } + return false; +} + +bool LoongArchAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + return tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success; +} + +OperandMatchResultTy LoongArchAsmParser::tryParseRegister(unsigned &RegNo, + SMLoc &StartLoc, + SMLoc &EndLoc) { + SmallVector, 1> Operands; + OperandMatchResultTy ResTy = parseAnyRegister(Operands); + if (ResTy == MatchOperand_Success) { + assert(Operands.size() == 1); + LoongArchOperand &Operand = static_cast(*Operands.front()); + StartLoc = Operand.getStartLoc(); + EndLoc = Operand.getEndLoc(); + + // AFAIK, we only support numeric registers and named GPR's in CFI + // directives. + // Don't worry about eating tokens before failing. Using an unrecognised + // register is a parse error. + if (Operand.isGPRAsmReg()) { + // Resolve to GPR32 or GPR64 appropriately. + RegNo = is64Bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg(); + } + + return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch + : MatchOperand_Success; + } + + assert(Operands.size() == 0); + return (RegNo == (unsigned)-1) ? MatchOperand_NoMatch : MatchOperand_Success; +} + +bool LoongArchAsmParser::parseMemOffset(const MCExpr *&Res) { + return getParser().parseExpression(Res); +} + +OperandMatchResultTy +LoongArchAsmParser::parseMemOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseMemOperand\n"); + const MCExpr *IdVal = nullptr; + SMLoc S; + OperandMatchResultTy Res = MatchOperand_NoMatch; + // First operand is the base. + S = Parser.getTok().getLoc(); + + Res = parseAnyRegister(Operands); + if (Res != MatchOperand_Success) + return Res; + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "',' expected"); + return MatchOperand_ParseFail; + } + + Parser.Lex(); // Eat the ',' token. + + if (parseMemOffset(IdVal)) + return MatchOperand_ParseFail; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + // Replace the register operand with the memory operand. + std::unique_ptr op( + static_cast(Operands.back().release())); + // Remove the register from the operands. + // "op" will be managed by k_Memory. + Operands.pop_back(); + + // when symbol not defined, error report. + if (dyn_cast(IdVal)) { + return MatchOperand_ParseFail; + } + + // Add the memory operand. + if (dyn_cast(IdVal)) { + int64_t Imm; + if (IdVal->evaluateAsAbsolute(Imm)) + IdVal = MCConstantExpr::create(Imm, getContext()); + else + return MatchOperand_ParseFail; + } + + Operands.push_back(LoongArchOperand::CreateMem(std::move(op), IdVal, S, E, *this)); + return MatchOperand_Success; +} + +OperandMatchResultTy +LoongArchAsmParser::parseAMemOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseAMemOperand\n"); + const MCExpr *IdVal = nullptr; + SMLoc S; + OperandMatchResultTy Res = MatchOperand_NoMatch; + // First operand is the base. + S = Parser.getTok().getLoc(); + + Res = parseAnyRegister(Operands); + if (Res != MatchOperand_Success) + return Res; + + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + // AM* instructions allow an optional '0' memory offset. + if (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the ',' token. + + if (parseMemOffset(IdVal)) + return MatchOperand_ParseFail; + + // when symbol not defined, error report. + if (dyn_cast(IdVal)) + return MatchOperand_ParseFail; + + if (dyn_cast(IdVal)) { + int64_t Imm; + if (IdVal->evaluateAsAbsolute(Imm)) { + assert(Imm == 0 && "imm must be 0"); + IdVal = MCConstantExpr::create(Imm, getContext()); + } else { + return MatchOperand_ParseFail; + } + } + } else { + // Offset defaults to 0. + IdVal = MCConstantExpr::create(0, getContext()); + } + + // Replace the register operand with the memory operand. + std::unique_ptr op( + static_cast(Operands.back().release())); + // Remove the register from the operands. + // "op" will be managed by k_Memory. + Operands.pop_back(); + // Add the memory operand. + Operands.push_back( + LoongArchOperand::CreateMem(std::move(op), IdVal, S, E, *this)); + return MatchOperand_Success; +} + +bool LoongArchAsmParser::searchSymbolAlias(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + MCSymbol *Sym = getContext().lookupSymbol(Parser.getTok().getIdentifier()); + if (!Sym) + return false; + + SMLoc S = Parser.getTok().getLoc(); + if (Sym->isVariable()) { + const MCExpr *Expr = Sym->getVariableValue(); + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast(Expr); + StringRef DefSymbol = Ref->getSymbol().getName(); + if (DefSymbol.startswith("$")) { + OperandMatchResultTy ResTy = + matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); + return true; + } + if (ResTy == MatchOperand_ParseFail) + llvm_unreachable("Should never ParseFail"); + } + } + } else if (Sym->isUnset()) { + // If symbol is unset, it might be created in the `parseSetAssignment` + // routine as an alias for a numeric register name. + // Lookup in the aliases list. + auto Entry = RegisterSets.find(Sym->getName()); + if (Entry != RegisterSets.end()) { + OperandMatchResultTy ResTy = + matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); + return true; + } + } + } + + return false; +} + +OperandMatchResultTy +LoongArchAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands, + StringRef Identifier, + SMLoc S) { + int Index = matchCPURegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createGPRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchFPURegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createFGRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchFCFRRegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createFCFRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchFCSRRegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createFCSRReg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchLSX128RegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createLSX128Reg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + Index = matchLASX256RegisterName(Identifier); + if (Index != -1) { + Operands.push_back(LoongArchOperand::createLASX256Reg( + Index, Identifier, getContext().getRegisterInfo(), S, + getLexer().getLoc(), *this)); + return MatchOperand_Success; + } + + return MatchOperand_NoMatch; +} + +OperandMatchResultTy +LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, + const AsmToken &Token, SMLoc S) { + if (Token.is(AsmToken::Identifier)) { + LLVM_DEBUG(dbgs() << ".. identifier\n"); + StringRef Identifier = Token.getIdentifier(); + OperandMatchResultTy ResTy = + matchAnyRegisterNameWithoutDollar(Operands, Identifier, S); + return ResTy; + } else if (Token.is(AsmToken::Integer)) { + LLVM_DEBUG(dbgs() << ".. integer\n"); + int64_t RegNum = Token.getIntVal(); + if (RegNum < 0 || RegNum > 31) { + // Show the error, but treat invalid register + // number as a normal one to continue parsing + // and catch other possible errors. + Error(getLexer().getLoc(), "invalid register number"); + } + Operands.push_back(LoongArchOperand::createNumericReg( + RegNum, Token.getString(), getContext().getRegisterInfo(), S, + Token.getLoc(), *this)); + return MatchOperand_Success; + } + + LLVM_DEBUG(dbgs() << Token.getKind() << "\n"); + + return MatchOperand_NoMatch; +} + +OperandMatchResultTy +LoongArchAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) { + auto Token = getLexer().peekTok(false); + return matchAnyRegisterWithoutDollar(Operands, Token, S); +} + +OperandMatchResultTy +LoongArchAsmParser::parseAnyRegister(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseAnyRegister\n"); + + auto Token = Parser.getTok(); + + SMLoc S = Token.getLoc(); + + if (Token.isNot(AsmToken::Dollar)) { + LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n"); + if (Token.is(AsmToken::Identifier)) { + if (searchSymbolAlias(Operands)) + return MatchOperand_Success; + } + LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n"); + return MatchOperand_NoMatch; + } + LLVM_DEBUG(dbgs() << ".. $\n"); + + OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S); + if (ResTy == MatchOperand_Success) { + Parser.Lex(); // $ + Parser.Lex(); // identifier + } + return ResTy; +} + +OperandMatchResultTy +LoongArchAsmParser::parseJumpTarget(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "parseJumpTarget\n"); + + SMLoc S = getLexer().getLoc(); + + // Registers are a valid target and have priority over symbols. + OperandMatchResultTy ResTy = parseAnyRegister(Operands); + if (ResTy != MatchOperand_NoMatch) + return ResTy; + + // Integers and expressions are acceptable + const MCExpr *Expr = nullptr; + if (Parser.parseExpression(Expr)) { + // We have no way of knowing if a symbol was consumed so we must ParseFail + return MatchOperand_ParseFail; + } + Operands.push_back( + LoongArchOperand::CreateImm(Expr, S, getLexer().getLoc(), *this)); + return MatchOperand_Success; +} + +static std::string LoongArchMnemonicSpellCheck(StringRef S, + const FeatureBitset &FBS, + unsigned VariantID = 0); + +bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + LLVM_DEBUG(dbgs() << "ParseInstruction\n"); + + // We have reached first instruction, module directive are now forbidden. + getTargetStreamer().forbidModuleDirective(); + + // Check if we have valid mnemonic + if (!mnemonicIsValid(Name)) { + FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = LoongArchMnemonicSpellCheck(Name, FBS); + return Error(NameLoc, "unknown instruction" + Suggestion); + } + + // First operand in MCInst is instruction mnemonic. + Operands.push_back(LoongArchOperand::CreateToken(Name, NameLoc, *this)); + + // Read the remaining operands. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (parseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + // Parse and remember the operand. + if (parseOperand(Operands, Name)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + } + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token in argument list"); + } + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +// FIXME: Given that these have the same name, these should both be +// consistent on affecting the Parser. +bool LoongArchAsmParser::reportParseError(Twine ErrorMsg) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, ErrorMsg); +} + +bool LoongArchAsmParser::parseSetAssignment() { + StringRef Name; + const MCExpr *Value; + MCAsmParser &Parser = getParser(); + + if (Parser.parseIdentifier(Name)) + return reportParseError("expected identifier after .set"); + + if (getLexer().isNot(AsmToken::Comma)) + return reportParseError("unexpected token, expected comma"); + Lex(); // Eat comma + + if (!Parser.parseExpression(Value)) { + // Parse assignment of an expression including + // symbolic registers: + // .set $tmp, $BB0-$BB1 + // .set r2, $f2 + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + Sym->setVariableValue(Value); + } else { + return reportParseError("expected valid expression after comma"); + } + + return false; +} + +bool LoongArchAsmParser::parseDirectiveSet() { + const AsmToken &Tok = getParser().getTok(); + StringRef IdVal = Tok.getString(); + SMLoc Loc = Tok.getLoc(); + + if (IdVal == "bopt") { + Warning(Loc, "'bopt' feature is unsupported"); + getParser().Lex(); + return false; + } + if (IdVal == "nobopt") { + // We're already running in nobopt mode, so nothing to do. + getParser().Lex(); + return false; + } + + // It is just an identifier, look for an assignment. + return parseSetAssignment(); +} + +bool LoongArchAsmParser::ParseDirective(AsmToken DirectiveID) { + // This returns false if this function recognizes the directive + // regardless of whether it is successfully handles or reports an + // error. Otherwise it returns true to give the generic parser a + // chance at recognizing it. + + MCAsmParser &Parser = getParser(); + StringRef IDVal = DirectiveID.getString(); + + if (IDVal == ".end") { + while (getLexer().isNot(AsmToken::Eof)) + Parser.Lex(); + return false; + } + + if (IDVal == ".set") { + parseDirectiveSet(); + return false; + } + + if (IDVal == ".llvm_internal_loongarch_reallow_module_directive") { + parseInternalDirectiveReallowModule(); + return false; + } + + return true; +} + +bool LoongArchAsmParser::parseInternalDirectiveReallowModule() { + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + getTargetStreamer().reallowModuleDirective(); + + getParser().Lex(); // Eat EndOfStatement token. + return false; } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmParser() { RegisterMCAsmParser X(getTheLoongArch32Target()); - RegisterMCAsmParser Y(getTheLoongArch64Target()); + RegisterMCAsmParser A(getTheLoongArch64Target()); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER +#include "LoongArchGenAsmMatcher.inc" + +bool LoongArchAsmParser::mnemonicIsValid(StringRef Mnemonic) { + // Find the appropriate table for this asm variant. + const MatchEntry *Start, *End; + Start = std::begin(MatchTable0); + End = std::end(MatchTable0); + + // Search the table. + auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode()); + return MnemonicRange.first != MnemonicRange.second; } diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt index 4d8e81aea420..8540b97ff761 100644 --- a/llvm/lib/Target/LoongArch/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/CMakeLists.txt @@ -1,14 +1,15 @@ -add_llvm_component_group(LoongArch) +add_llvm_component_group(LoongArch HAS_JIT) set(LLVM_TARGET_DEFINITIONS LoongArch.td) tablegen(LLVM LoongArchGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM LoongArchGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM LoongArchGenCallingConv.inc -gen-callingconv) tablegen(LLVM LoongArchGenDAGISel.inc -gen-dag-isel) tablegen(LLVM LoongArchGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM LoongArchGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM LoongArchGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM LoongArchGenRegisterInfo.inc -gen-register-info) tablegen(LLVM LoongArchGenSubtargetInfo.inc -gen-subtarget) @@ -16,14 +17,20 @@ add_public_tablegen_target(LoongArchCommonTableGen) add_llvm_target(LoongArchCodeGen LoongArchAsmPrinter.cpp - LoongArchFrameLowering.cpp + LoongArchCCState.cpp + LoongArchExpandPseudo.cpp LoongArchInstrInfo.cpp LoongArchISelDAGToDAG.cpp LoongArchISelLowering.cpp + LoongArchFrameLowering.cpp LoongArchMCInstLower.cpp + LoongArchMachineFunction.cpp + LoongArchModuleISelDAGToDAG.cpp LoongArchRegisterInfo.cpp LoongArchSubtarget.cpp LoongArchTargetMachine.cpp + LoongArchTargetObjectFile.cpp + LoongArchTargetTransformInfo.cpp LINK_COMPONENTS Analysis diff --git a/llvm/lib/Target/LoongArch/Disassembler/CMakeLists.txt b/llvm/lib/Target/LoongArch/Disassembler/CMakeLists.txt index 1cce676cfab8..864be6313072 100644 --- a/llvm/lib/Target/LoongArch/Disassembler/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/Disassembler/CMakeLists.txt @@ -2,10 +2,8 @@ add_llvm_component_library(LLVMLoongArchDisassembler LoongArchDisassembler.cpp LINK_COMPONENTS - LoongArchDesc - LoongArchInfo - MC MCDisassembler + LoongArchInfo Support ADD_TO_COMPONENT diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp index beb757c78596..6468a0fc8800 100644 --- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchDisassembler.cpp - Disassembler for LoongArch ------------===// +//===- LoongArchDisassembler.cpp - Disassembler for LoongArch -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,140 +6,935 @@ // //===----------------------------------------------------------------------===// // -// This file implements the LoongArchDisassembler class. +// This file is part of the LoongArch Disassembler. // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" -#include "TargetInfo/LoongArchTargetInfo.h" +#include "LoongArch.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; #define DEBUG_TYPE "loongarch-disassembler" -typedef MCDisassembler::DecodeStatus DecodeStatus; +using DecodeStatus = MCDisassembler::DecodeStatus; namespace { + class LoongArchDisassembler : public MCDisassembler { + public: LoongArchDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) {} + bool isFP64() const { return STI.getFeatureBits()[LoongArch::FeatureFP64Bit]; } + + bool is64Bit() const { return STI.getFeatureBits()[LoongArch::Feature64Bit]; } + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &CStream) const override; }; -} // end namespace -static MCDisassembler *createLoongArchDisassembler(const Target &T, - const MCSubtargetInfo &STI, - MCContext &Ctx) { +} // end anonymous namespace + +// Forward declare these because the autogenerated code will reference them. +// Definitions are further down. +static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeBranchTarget(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeJumpTarget(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeAMem(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMemSimm14(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + +static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +template +static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder); + +template +static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder) { + return DecodeUImmWithOffsetAndScale(Inst, Value, Address, + Decoder); +} + +template +static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder); + +/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't +/// handle. +template +static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +namespace llvm { + +Target &getTheLoongArch32Target(); +Target &getTheLoongArch64Target(); + +} // end namespace llvm + +static MCDisassembler *createLoongArchDisassembler( + const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { return new LoongArchDisassembler(STI, Ctx); } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchDisassembler() { - // Register the disassembler for each target. + // Register the disassembler. TargetRegistry::RegisterMCDisassembler(getTheLoongArch32Target(), createLoongArchDisassembler); TargetRegistry::RegisterMCDisassembler(getTheLoongArch64Target(), createLoongArchDisassembler); } -static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const MCDisassembler *Decoder) { - if (RegNo >= 32) +#include "LoongArchGenDisassemblerTables.inc" + +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const LoongArchDisassembler *Dis = static_cast(D); + const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); + if (RC == LoongArch::GPR64RegClassID || RC == LoongArch::GPR32RegClassID) { + // sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td + // that just like LoongArchAsmParser.cpp and LoongArchISelLowering.cpp + unsigned char indexes[] = { 0, 27, 28, 29, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 30, 31, 18, + 19, 20, 21, 22, 23, 24, 25, 26 + }; + assert(RegNo < sizeof(indexes)); + return *(RegInfo->getRegClass(RC).begin() + indexes[RegNo]); + } + return *(RegInfo->getRegClass(RC).begin() + RegNo); +} + +template +static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder) { + using DecodeFN = DecodeStatus (*)(MCInst &, unsigned, uint64_t, const void *); + + // The size of the n field depends on the element size + // The register class also depends on this. + InsnType tmp = fieldFromInstruction(insn, 17, 5); + unsigned NSize = 0; + DecodeFN RegDecoder = nullptr; + if ((tmp & 0x18) == 0x00) { + NSize = 4; + RegDecoder = DecodeLSX128BRegisterClass; + } else if ((tmp & 0x1c) == 0x10) { + NSize = 3; + RegDecoder = DecodeLSX128HRegisterClass; + } else if ((tmp & 0x1e) == 0x18) { + NSize = 2; + RegDecoder = DecodeLSX128WRegisterClass; + } else if ((tmp & 0x1f) == 0x1c) { + NSize = 1; + RegDecoder = DecodeLSX128DRegisterClass; + } else + llvm_unreachable("Invalid encoding"); + + assert(NSize != 0 && RegDecoder != nullptr); + + // $vd + tmp = fieldFromInstruction(insn, 6, 5); + if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) + return MCDisassembler::Fail; + // $vd_in + if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) + return MCDisassembler::Fail; + // $n + tmp = fieldFromInstruction(insn, 16, NSize); + MI.addOperand(MCOperand::createImm(tmp)); + // $vs + tmp = fieldFromInstruction(insn, 11, 5); + if (RegDecoder(MI, tmp, Address, Decoder) == MCDisassembler::Fail) + return MCDisassembler::Fail; + // $n2 + MI.addOperand(MCOperand::createImm(0)); + + return MCDisassembler::Success; +} + +/// Read four bytes from the ArrayRef and return 32 bit word. +static DecodeStatus readInstruction32(ArrayRef Bytes, uint64_t Address, + uint64_t &Size, uint32_t &Insn) { + // We want to read exactly 4 Bytes of data. + if (Bytes.size() < 4) { + Size = 0; return MCDisassembler::Fail; - Inst.addOperand(MCOperand::createReg(LoongArch::R0 + RegNo)); + } + + Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | + (Bytes[3] << 24); + return MCDisassembler::Success; } -static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, +DecodeStatus LoongArchDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const { + uint32_t Insn; + DecodeStatus Result; + Size = 0; + + // Attempt to read the instruction so that we can attempt to decode it. If + // the buffer is not 4 bytes long, let the higher level logic figure out + // what to do with a size of zero and MCDisassembler::Fail. + Result = readInstruction32(Bytes, Address, Size, Insn); + if (Result == MCDisassembler::Fail) + return MCDisassembler::Fail; + + // The only instruction size for standard encoded LoongArch. + Size = 4; + + if (is64Bit()) { + LLVM_DEBUG(dbgs() << "Trying LoongArch (GPR64) table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableLoongArch32, Instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) + return Result; + } + + LLVM_DEBUG(dbgs() << "Trying LoongArch32 (GPR32) table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableLoongArch3232, Instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) + return Result; + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, - const MCDisassembler *Decoder) { - if (RegNo >= 32) + const void *Decoder) { + if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::createReg(LoongArch::F0 + RegNo)); + + unsigned Reg = getReg(Decoder, LoongArch::GPR64RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } -static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, +static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, - const MCDisassembler *Decoder) { - if (RegNo >= 32) + const void *Decoder) { + if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::createReg(LoongArch::F0_64 + RegNo)); + unsigned Reg = getReg(Decoder, LoongArch::GPR32RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } -static DecodeStatus DecodeCFRRegisterClass(MCInst &Inst, uint64_t RegNo, +static DecodeStatus DecodePtrRegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, - const MCDisassembler *Decoder) { - if (RegNo >= 8) + const void *Decoder) { + if (static_cast(Decoder)->is64Bit()) + return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder); + + return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::createReg(LoongArch::FCC0 + RegNo)); + unsigned Reg = getReg(Decoder, LoongArch::FGR64RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } -static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, uint64_t RegNo, +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, LoongArch::FGR32RegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, - const MCDisassembler *Decoder) { - if (RegNo >= 4) + const void *Decoder) { + if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::createReg(LoongArch::FCSR0 + RegNo)); + + unsigned Reg = getReg(Decoder, LoongArch::FCSRRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } -template -static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, - int64_t Address, - const MCDisassembler *Decoder) { - assert(isUInt(Imm) && "Invalid immediate"); - Inst.addOperand(MCOperand::createImm(Imm + P)); +static DecodeStatus DecodeFCFRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 7) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, LoongArch::FCFRRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; } -template -static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, - int64_t Address, - const MCDisassembler *Decoder) { - assert(isUInt(Imm) && "Invalid immediate"); - // Sign-extend the number in the bottom bits of Imm, then shift left - // bits. - Inst.addOperand(MCOperand::createImm(SignExtend64(Imm) << S)); +static DecodeStatus DecodeMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<12>((Insn >> 10) & 0xfff); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + + Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + if (Inst.getOpcode() == LoongArch::SC_W || + Inst.getOpcode() == LoongArch::SC_D) + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + return MCDisassembler::Success; } -#include "LoongArchGenDisassemblerTables.inc" +static DecodeStatus DecodeAMem(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rj = fieldFromInstruction(Insn, 5, 5); + unsigned Rk = fieldFromInstruction(Insn, 10, 5); -DecodeStatus LoongArchDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - ArrayRef Bytes, - uint64_t Address, - raw_ostream &CS) const { - uint32_t Insn; - DecodeStatus Result; + Rd = getReg(Decoder, LoongArch::GPR32RegClassID, Rd); + Rj = getReg(Decoder, LoongArch::GPR32RegClassID, Rj); + Rk = getReg(Decoder, LoongArch::GPR32RegClassID, Rk); - // We want to read exactly 4 bytes of data because all LoongArch instructions - // are fixed 32 bits. - if (Bytes.size() < 4) { - Size = 0; + // Note the operands sequence is "rd,rk,rj". + Inst.addOperand(MCOperand::createReg(Rd)); + Inst.addOperand(MCOperand::createReg(Rk)); + Inst.addOperand(MCOperand::createReg(Rj)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMemSimm14(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<12>((Insn >> 10) & 0x3fff); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + + Reg = getReg(Decoder, LoongArch::GPR32RegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + if (Inst.getOpcode() == LoongArch::SC_W || + Inst.getOpcode() == LoongArch::SC_D) + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128Mem(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128Mem13(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128Mem10(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256Mem13(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<13>(fieldFromInstruction(Insn, 5, 13)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256Mem10(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 5, 10)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + + Inst.addOperand(MCOperand::createReg(Reg)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128memstl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + unsigned idx; + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + switch (Inst.getOpcode()) { + default: + assert(false && "Unexpected instruction"); return MCDisassembler::Fail; + break; + case LoongArch::VSTELM_B: + Inst.addOperand(MCOperand::createImm(Offset)); + idx = fieldFromInstruction(Insn, 18, 4); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::VSTELM_H: + Inst.addOperand(MCOperand::createImm(Offset * 2)); + idx = fieldFromInstruction(Insn, 18, 3); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::VSTELM_W: + Inst.addOperand(MCOperand::createImm(Offset * 4)); + idx = fieldFromInstruction(Insn, 18, 2); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::VSTELM_D: + Inst.addOperand(MCOperand::createImm(Offset * 8)); + idx = fieldFromInstruction(Insn, 18, 1); + Inst.addOperand(MCOperand::createImm(idx)); + break; } - Insn = support::endian::read32le(Bytes.data()); - // Calling the auto-generated decoder function. - Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI); - Size = 4; + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128memlsl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + + int Offset; + unsigned Reg, Base; + switch (Inst.getOpcode()) { + default: + assert(false && "Unexpected instruction"); + return MCDisassembler::Fail; + break; + case LoongArch::VLDREPL_B: + + Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + Inst.addOperand(MCOperand::createImm(Offset)); + break; + case LoongArch::VLDREPL_H: + + Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 2)); + break; + case LoongArch::VLDREPL_W: + + Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 4)); + break; + case LoongArch::VLDREPL_D: + + Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 8)); + break; + } + + return MCDisassembler::Success; +} +static DecodeStatus DecodeLASX256Mem(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256memstl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + int Offset = SignExtend32<8>(fieldFromInstruction(Insn, 10, 8)); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + unsigned idx; + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + switch (Inst.getOpcode()) { + default: + assert(false && "Unexpected instruction"); + return MCDisassembler::Fail; + break; + case LoongArch::XVSTELM_B: + Inst.addOperand(MCOperand::createImm(Offset)); + idx = fieldFromInstruction(Insn, 18, 5); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::XVSTELM_H: + Inst.addOperand(MCOperand::createImm(Offset * 2)); + idx = fieldFromInstruction(Insn, 18, 4); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::XVSTELM_W: + Inst.addOperand(MCOperand::createImm(Offset * 4)); + idx = fieldFromInstruction(Insn, 18, 3); + Inst.addOperand(MCOperand::createImm(idx)); + break; + case LoongArch::XVSTELM_D: + Inst.addOperand(MCOperand::createImm(Offset * 8)); + idx = fieldFromInstruction(Insn, 18, 2); + Inst.addOperand(MCOperand::createImm(idx)); + break; + } + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256memlsl(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { - return Result; + int Offset; + unsigned Reg, Base; + switch (Inst.getOpcode()) { + default: + assert(false && "Unexpected instruction"); + return MCDisassembler::Fail; + break; + case LoongArch::XVLDREPL_B: + + Offset = SignExtend32<12>(fieldFromInstruction(Insn, 10, 12)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + + Inst.addOperand(MCOperand::createImm(Offset)); + break; + case LoongArch::XVLDREPL_H: + + Offset = SignExtend32<11>(fieldFromInstruction(Insn, 10, 11)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 2)); + break; + case LoongArch::XVLDREPL_W: + + Offset = SignExtend32<10>(fieldFromInstruction(Insn, 10, 10)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 4)); + break; + case LoongArch::XVLDREPL_D: + + Offset = SignExtend32<9>(fieldFromInstruction(Insn, 10, 9)); + Reg = fieldFromInstruction(Insn, 0, 5); + Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset * 8)); + break; + } + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<12>((Insn >> 10) & 0xffff); + unsigned Reg = fieldFromInstruction(Insn, 0, 5); + unsigned Base = fieldFromInstruction(Insn, 5, 5); + Reg = getReg(Decoder, LoongArch::FGR64RegClassID, Reg); + Base = getReg(Decoder, LoongArch::GPR32RegClassID, Base); + + Inst.addOperand(MCOperand::createReg(Reg)); + Inst.addOperand(MCOperand::createReg(Base)); + Inst.addOperand(MCOperand::createImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128BRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LSX128BRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128HRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LSX128HRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128WRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LSX128WRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLSX128DRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LSX128DRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256BRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LASX256BRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256HRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LASX256HRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256WRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LASX256WRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256DRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, LoongArch::LASX256DRegClassID, RegNo); + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBranchTarget(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder) { + int32_t BranchOffset; + // Similar to LoongArchAsmParser::processInstruction, decode the branch target + // for different instructions. + switch (Inst.getOpcode()) { + default: + llvm_unreachable(""); + case LoongArch::BEQ: + case LoongArch::BNE: + case LoongArch::BLT: + case LoongArch::BGE: + case LoongArch::BLTU: + case LoongArch::BGEU: + BranchOffset = (SignExtend32<16>(Offset) * 4); + break; + case LoongArch::BEQZ: + case LoongArch::BNEZ: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + BranchOffset = (SignExtend32<21>(Offset) * 4); + break; + case LoongArch::B: + case LoongArch::BL: + BranchOffset = (SignExtend32<26>(Offset) * 4); + break; + } + Inst.addOperand(MCOperand::createImm(BranchOffset)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeJumpTarget(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned hi10 = fieldFromInstruction(Insn, 0, 10); + unsigned lo16 = fieldFromInstruction(Insn, 10, 16); + int32_t JumpOffset = SignExtend32<28>((hi10 << 16 | lo16) << 2); + Inst.addOperand(MCOperand::createImm(JumpOffset)); + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeUImmWithOffsetAndScale(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder) { + Value &= ((1 << Bits) - 1); + Value *= Scale; + Inst.addOperand(MCOperand::createImm(Value + Offset)); + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeSImmWithOffsetAndScale(MCInst &Inst, unsigned Value, + uint64_t Address, + const void *Decoder) { + int32_t Imm = SignExtend32(Value) * ScaleBy; + Inst.addOperand(MCOperand::createImm(Imm + Offset)); + return MCDisassembler::Success; } diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h index e6c9c24dd1b2..73fd4a628edb 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.h +++ b/llvm/lib/Target/LoongArch/LoongArch.h @@ -1,4 +1,4 @@ -//===-- LoongArch.h - Top-level interface for LoongArch ---------*- C++ -*-===// +//===-- LoongArch.h - Top-level interface for LoongArch representation ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,33 +6,32 @@ // //===----------------------------------------------------------------------===// // -// This file contains the entry points for global functions defined in the LLVM -// LoongArch back-end. +// This file contains the entry points for global functions defined in +// the LLVM LoongArch back-end. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H -#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { -class LoongArchTargetMachine; -class AsmPrinter; -class FunctionPass; -class MCInst; -class MCOperand; -class MachineInstr; -class MachineOperand; + class LoongArchTargetMachine; + class ModulePass; + class FunctionPass; + class LoongArchSubtarget; + class LoongArchTargetMachine; + class InstructionSelector; + class PassRegistry; -bool lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AsmPrinter &AP); -bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, - MCOperand &MCOp, - const AsmPrinter &AP); + FunctionPass *createLoongArchModuleISelDagPass(); + FunctionPass *createLoongArchOptimizePICCallPass(); + FunctionPass *createLoongArchBranchExpansion(); + FunctionPass *createLoongArchExpandPseudoPass(); -FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); -} // end namespace llvm + void initializeLoongArchBranchExpansionPass(PassRegistry &); +} // end namespace llvm; -#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H +#endif diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index bf465c27ef99..703c1ba506e7 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -1,139 +1,107 @@ -//===-- LoongArch.td - Describe the LoongArch Target -------*- tablegen -*-===// +//===-- LoongArch.td - Describe the LoongArch Target Machine ---------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// This is the top level entry point for the LoongArch target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// include "llvm/Target/Target.td" +// The overall idea of the PredicateControl class is to chop the Predicates list +// into subsets that are usually overridden independently. This allows +// subclasses to partially override the predicates of their superclasses without +// having to re-add all the existing predicates. +class PredicateControl { + // Predicates for the encoding scheme in use such as HasStdEnc + list EncodingPredicates = []; + // Predicates for the GPR size such as is64Bit + list GPRPredicates = []; + // Predicates for the FGR size and layout such as IsFP64bit + list FGRPredicates = []; + // Predicates for the instruction group membership such as ISA's. + list InsnPredicates = []; + // Predicate for the ISA extension that an instruction belongs to. + list ExtPredicate = []; + // Predicate for marking the instruction as usable in hard-float mode only. + list HardFloatPredicate = []; + // Predicates for anything else + list AdditionalPredicates = []; + list Predicates = !listconcat(EncodingPredicates, + GPRPredicates, + FGRPredicates, + InsnPredicates, + HardFloatPredicate, + ExtPredicate, + AdditionalPredicates); +} + +// Like Requires<> but for the AdditionalPredicates list +class AdditionalRequires preds> { + list AdditionalPredicates = preds; +} + //===----------------------------------------------------------------------===// -// LoongArch subtarget features and instruction predicates. +// LoongArch Subtarget features // //===----------------------------------------------------------------------===// -// LoongArch is divided into two versions, the 32-bit version (LA32) and the -// 64-bit version (LA64). -def Feature64Bit - : SubtargetFeature<"64bit", "HasLA64", "true", - "LA64 Basic Integer and Privilege Instruction Set">; -def IsLA64 - : Predicate<"Subtarget->is64Bit()">, - AssemblerPredicate<(all_of Feature64Bit), - "LA64 Basic Integer and Privilege Instruction Set">; -def IsLA32 - : Predicate<"!Subtarget->is64Bit()">, - AssemblerPredicate<(all_of(not Feature64Bit)), - "LA32 Basic Integer and Privilege Instruction Set">; - -defvar LA32 = DefaultMode; -def LA64 : HwMode<"+64bit">; - -// Single Precision floating point -def FeatureBasicF - : SubtargetFeature<"f", "HasBasicF", "true", - "'F' (Single-Precision Floating-Point)">; -def HasBasicF - : Predicate<"Subtarget->hasBasicF()">, - AssemblerPredicate<(all_of FeatureBasicF), - "'F' (Single-Precision Floating-Point)">; - -// Double Precision floating point -def FeatureBasicD - : SubtargetFeature<"d", "HasBasicD", "true", - "'D' (Double-Precision Floating-Point)", - [FeatureBasicF]>; -def HasBasicD - : Predicate<"Subtarget->hasBasicD()">, - AssemblerPredicate<(all_of FeatureBasicD), - "'D' (Double-Precision Floating-Point)">; - -// Loongson SIMD eXtension (LSX) -def FeatureExtLSX - : SubtargetFeature<"lsx", "HasExtLSX", "true", - "'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>; -def HasExtLSX - : Predicate<"Subtarget->hasExtLSX()">, - AssemblerPredicate<(all_of FeatureExtLSX), - "'LSX' (Loongson SIMD Extension)">; - -// Loongson Advanced SIMD eXtension (LASX) -def FeatureExtLASX - : SubtargetFeature<"lasx", "HasExtLASX", "true", - "'LASX' (Loongson Advanced SIMD Extension)", - [FeatureExtLSX]>; -def HasExtLASX - : Predicate<"Subtarget->hasExtLASX()">, - AssemblerPredicate<(all_of FeatureExtLASX), - "'LASX' (Loongson Advanced SIMD Extension)">; - -// Loongson VirtualiZation (LVZ) -def FeatureExtLVZ - : SubtargetFeature<"lvz", "HasExtLVZ", "true", - "'LVZ' (Loongson Virtualization Extension)">; -def HasExtLVZ - : Predicate<"Subtarget->hasExtLVZ()">, - AssemblerPredicate<(all_of FeatureExtLVZ), - "'LVZ' (Loongson Virtualization Extension)">; - -// Loongson Binary Translation (LBT) -def FeatureExtLBT - : SubtargetFeature<"lbt", "HasExtLBT", "true", - "'LBT' (Loongson Binary Translation Extension)">; -def HasExtLBT - : Predicate<"Subtarget->hasExtLBT()">, - AssemblerPredicate<(all_of FeatureExtLBT), - "'LBT' (Loongson Binary Translation Extension)">; - +def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", + "Support 64-bit FP registers">; +def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", + "true", "Only supports single precision float">; +def FeatureSoftFloat : SubtargetFeature<"soft-float", "IsSoftFloat", "true", + "Does not support floating point instructions">; +def Feature64Bit : SubtargetFeature<"64bit", "HasLA64", "true", + "Support LA64 ISA", + [FeatureFP64Bit]>; +def FeatureLSX : SubtargetFeature<"lsx", "HasLSX", "true", "Support LSX">; + +def FeatureLASX : SubtargetFeature<"lasx", "HasLASX", "true", "Support LASX", [FeatureLSX]>; + +def FeatureUnalignedAccess + : SubtargetFeature<"unaligned-access", "UnalignedAccess", "true", + "Allow all unaligned memory access">; //===----------------------------------------------------------------------===// -// Registers, instruction descriptions ... +// Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// include "LoongArchRegisterInfo.td" -include "LoongArchCallingConv.td" include "LoongArchInstrInfo.td" +include "LoongArchCallingConv.td" + +def LoongArchInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// // LoongArch processors supported. //===----------------------------------------------------------------------===// -def : ProcessorModel<"generic-la32", NoSchedModel, []>; -def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>; +def : ProcessorModel<"la264", NoSchedModel, [Feature64Bit]>; -def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, - FeatureExtLASX, - FeatureExtLVZ, - FeatureExtLBT]>; +def : ProcessorModel<"la364", NoSchedModel, [Feature64Bit]>; -//===----------------------------------------------------------------------===// -// Define the LoongArch target. -//===----------------------------------------------------------------------===// - -def LoongArchInstrInfo : InstrInfo { - // guess mayLoad, mayStore, and hasSideEffects - // This option is a temporary migration help. It will go away. - let guessInstructionProperties = 1; -} +def : ProcessorModel<"la464", NoSchedModel, + [Feature64Bit, FeatureUnalignedAccess]>; def LoongArchAsmParser : AsmParser { - let ShouldEmitMatchRegisterAltName = 1; - let AllowDuplicateRegisterNames = 1; + let ShouldEmitMatchRegisterName = 0; } def LoongArchAsmParserVariant : AsmParserVariant { int Variant = 0; + // Recognize hard coded registers. string RegisterPrefix = "$"; } -def LoongArchAsmWriter : AsmWriter { - int PassSubtarget = 1; -} - def LoongArch : Target { let InstructionSet = LoongArchInstrInfo; let AssemblyParsers = [LoongArchAsmParser]; let AssemblyParserVariants = [LoongArchAsmParserVariant]; - let AssemblyWriters = [LoongArchAsmWriter]; let AllowRegisterRenaming = 1; } diff --git a/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td new file mode 100644 index 000000000000..aa297c83748a --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td @@ -0,0 +1,741 @@ +//===- LoongArch32InstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes LoongArch32 instructions. +// +//===----------------------------------------------------------------------===// + +//===---------------------------------------------------------------------===/ +// Instruction Definitions. +//===---------------------------------------------------------------------===/ + +let DecoderNamespace = "LoongArch32" in { + /// + /// R2 + /// + def CLO_W : Count1<"clo.w", GPR32Opnd, ctlz>, R2I<0b00100>; + def CLZ_W : Int_Reg2<"clz.w", GPR32Opnd, ctlz>, R2I<0b00101>; + def CTO_W : Count1<"cto.w", GPR32Opnd, cttz>, R2I<0b00110>; + def CTZ_W : Int_Reg2<"ctz.w", GPR32Opnd, cttz>, R2I<0b00111>; + + def REVB_2H : Int_Reg2<"revb.2h", GPR32Opnd>, R2I<0b01100>;//see below bswap pattern + + def BITREV_4B : Int_Reg2<"bitrev.4b", GPR32Opnd>, R2I<0b10010>; + def BITREV_W : Int_Reg2<"bitrev.w", GPR32Opnd, bitreverse>, R2I<0b10100>; + + let isCodeGenOnly = 1 in { + def EXT_W_H32 : SignExtInReg<"ext.w.h", GPR32Opnd, i16>, R2I<0b10110>; + def EXT_W_B32 : SignExtInReg<"ext.w.b", GPR32Opnd, i8>, R2I<0b10111>; + + } + + def CPUCFG : Int_Reg2<"cpucfg", GPR32Opnd, int_loongarch_cpucfg>, R2I<0b11011>; + def RDTIMEL_W32 : Int_Reg2_Rdtime<"rdtimel.w", GPR32Opnd>, R2I<0b11000>; + def RDTIMEH_W32 : Int_Reg2_Rdtime<"rdtimeh.w", GPR32Opnd>, R2I<0b11001>; + + /// + /// R3 + /// + def ADD_W : Int_Reg3<"add.w", GPR32Opnd, add>, R3I<0b0100000>; + def SUB_W : Int_Reg3<"sub.w", GPR32Opnd, sub>, R3I<0b0100010>; + + let isCodeGenOnly = 1 in { + def SLT32 : SetCC_R<"slt", GPR32Opnd, setlt>, R3I<0b0100100>; + def SLTU32 : SetCC_R<"sltu", GPR32Opnd, setult>, R3I<0b0100101>; + def MASKEQZ32 : Int_Reg3<"maskeqz", GPR32Opnd>, R3I<0b0100110>;//see below patterns + def MASKNEZ32 : Int_Reg3<"masknez", GPR32Opnd>, R3I<0b0100111>;//see below patterns + + def NOR32 : Nor<"nor", GPR32Opnd>, R3I<0b0101000>; + def AND32 : Int_Reg3<"and", GPR32Opnd, and>, R3I<0b0101001>; + def OR32 : Int_Reg3<"or", GPR32Opnd, or>, R3I<0b0101010>; + def XOR32 : Int_Reg3<"xor", GPR32Opnd, xor>, R3I<0b0101011>; + def ANDN32 : Int_Reg3<"andn", GPR32Opnd>, R3I<0b0101101>; + def ORN32 : Int_Reg3<"orn", GPR32Opnd>, R3I<0b0101100>; + } + + def SLL_W : Shift_Var<"sll.w", GPR32Opnd, shl>, R3I<0b0101110>; + def SRL_W : Shift_Var<"srl.w", GPR32Opnd, srl>, R3I<0b0101111>; + def SRA_W : Shift_Var<"sra.w", GPR32Opnd, sra>, R3I<0b0110000>; + def ROTR_W: Shift_Var<"rotr.w", GPR32Opnd, rotr>, R3I<0b0110110>; + + def MUL_W : Int_Reg3<"mul.w", GPR32Opnd, mul>, R3I<0b0111000>; + def MULH_W : Int_Reg3<"mulh.w", GPR32Opnd, mulhs>, R3I<0b0111001>; + def MULH_WU : Int_Reg3<"mulh.wu", GPR32Opnd, mulhu>, R3I<0b0111010>; + +let usesCustomInserter = 1 in { + def DIV_W : Int_Reg3<"div.w", GPR32Opnd, sdiv>, R3I<0b1000000>; + def MOD_W : Int_Reg3<"mod.w", GPR32Opnd, srem>, R3I<0b1000001>; + def DIV_WU : Int_Reg3<"div.wu", GPR32Opnd, udiv>, R3I<0b1000010>; + def MOD_WU : Int_Reg3<"mod.wu", GPR32Opnd, urem>, R3I<0b1000011>; +} + + def CRC_W_B_W : Int_Reg3<"crc.w.b.w", GPR32Opnd, int_loongarch_crc_w_b_w>, R3I<0b1001000>; + def CRC_W_H_W : Int_Reg3<"crc.w.h.w", GPR32Opnd, int_loongarch_crc_w_h_w>, R3I<0b1001001>; + def CRC_W_W_W : Int_Reg3<"crc.w.w.w", GPR32Opnd, int_loongarch_crc_w_w_w>, R3I<0b1001010>; + def CRCC_W_B_W : Int_Reg3<"crcc.w.b.w", GPR32Opnd, int_loongarch_crcc_w_b_w>, R3I<0b1001100>; + def CRCC_W_H_W : Int_Reg3<"crcc.w.h.w", GPR32Opnd, int_loongarch_crcc_w_h_w>, R3I<0b1001101>; + def CRCC_W_W_W : Int_Reg3<"crcc.w.w.w", GPR32Opnd, int_loongarch_crcc_w_w_w>, R3I<0b1001110>; + /// + /// SLLI + /// + def SLLI_W : Shift_Imm32<"slli.w", GPR32Opnd, shl>, R2_IMM5<0b00>; + def SRLI_W : Shift_Imm32<"srli.w", GPR32Opnd, srl>, R2_IMM5<0b01>; + def SRAI_W : Shift_Imm32<"srai.w", GPR32Opnd, sra>, R2_IMM5<0b10>; + def ROTRI_W : Shift_Imm32<"rotri.w", GPR32Opnd, rotr>, R2_IMM5<0b11>; + /// + /// Misc + /// + def ALSL_W : Reg3_Sa<"alsl.w", GPR32Opnd, uimm2_plus1>, R3_SA2<0b00010> { + let Pattern = [(set GPR32Opnd:$rd, + (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))]; + } + def BYTEPICK_W : Reg3_Sa<"bytepick.w", GPR32Opnd, uimm2>, R3_SA2<0b00100>;//pattern:[] + + def BREAK : Code15<"break", int_loongarch_break>, CODE15<0b1010100>; + def SYSCALL : Code15<"syscall", int_loongarch_syscall>, CODE15<0b1010110>; + def TRAP : TrapBase; + + def BSTRINS_W : InsBase_32<"bstrins.w", GPR32Opnd, uimm5, LoongArchBstrins>, + INSERT_BIT32<0>; + def BSTRPICK_W : PickBase_32<"bstrpick.w", GPR32Opnd, uimm5, LoongArchBstrpick>, + INSERT_BIT32<1>; + + /// + /// R2_IMM12 + /// + let isCodeGenOnly = 1 in { + def SLTI32 : SetCC_I<"slti", GPR32Opnd, simm12_32>, R2_IMM12<0b000>; //PatFrag + def SLTUI32 : SetCC_I<"sltui", GPR32Opnd, simm12_32>, R2_IMM12<0b001>; //PatFrag + } + def ADDI_W : Int_Reg2_Imm12<"addi.w", GPR32Opnd, simm12_32, add>, R2_IMM12<0b010>; + + let isCodeGenOnly = 1 in { + def ANDI32 : Int_Reg2_Imm12<"andi", GPR32Opnd, uimm12_32, and>, R2_IMM12<0b101>; + def ORI32 : Int_Reg2_Imm12<"ori", GPR32Opnd, uimm12_32, or>, R2_IMM12<0b110>; + def XORI32 : Int_Reg2_Imm12<"xori", GPR32Opnd, uimm12_32, xor>, R2_IMM12<0b111>; + } + + /// + /// Privilege Instructions + /// + def CSRRD32 : CSR<"csrrd", GPR32Opnd, uimm14_32, int_loongarch_csrrd_w>, R1_CSR<0b0000000000100>; + def CSRWR32 : CSRW<"csrwr", GPR32Opnd, uimm14_32, int_loongarch_csrwr_w>, R1_CSR<0b0000100000100>; + def CSRXCHG32 : CSRX<"csrxchg", GPR32Opnd, uimm14_32, int_loongarch_csrxchg_w>, R2_CSR<0b00000100>; + def IOCSRRD_B32 : Int_Reg2<"iocsrrd.b", GPR32Opnd, int_loongarch_iocsrrd_b>, R2P<0b000>; + def IOCSRRD_H32 : Int_Reg2<"iocsrrd.h", GPR32Opnd, int_loongarch_iocsrrd_h>, R2P<0b001>; + def IOCSRRD_W32 : Int_Reg2<"iocsrrd.w", GPR32Opnd, int_loongarch_iocsrrd_w>, R2P<0b010>; + def IOCSRWR_B32 : Int_Reg2_Iocsrwr<"iocsrwr.b", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_b>, R2P<0b100>; + def IOCSRWR_H32 : Int_Reg2_Iocsrwr<"iocsrwr.h", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_h>, R2P<0b101>; + def IOCSRWR_W32 : Int_Reg2_Iocsrwr<"iocsrwr.w", GPR32Opnd, GPR32Opnd, int_loongarch_iocsrwr_w>, R2P<0b110>; + def CACOP32 : CAC<"cacop", GPR32Opnd, simm12_32, int_loongarch_cacop_w>, R1_CACHE; + def LDDIR32 : LEVEL<"lddir", GPR32Opnd>, R2_LEVEL<0b00000110010000>; + def LDPTE32 : SEQ<"ldpte", GPR32Opnd>, R1_SEQ<0b00000110010001>; + + //def WAIT : Wait<"wait">; + // + //def IOCSRRD_D : R2P<0b011>, Int_Reg2<"iocsrrd.d", GPR32Opnd>; + //def IOCSRWR_D : R2P<0b111>, Int_Reg2<"iocsrwr.d", GPR32Opnd>; + // + //def TLBINV : IMM32<0b001000>, OP32<"tlbinv">; + //def TLBFLUSH : IMM32<0b001001>, OP32<"tlbflush">; + //def TLBP : IMM32<0b001010>, OP32<"tlbp">; + //def TLBR : IMM32<0b001011>, OP32<"tlbr">; + //def TLBWI : IMM32<0b001100>, OP32<"tlbwi">; + //def TLBWR : IMM32<0b001101>, OP32<"tlbwr">; + + /// + /// R1_IMM20 + /// + let isCodeGenOnly = 1 in { + def LU12I_W32 : SI20<"lu12i.w", GPR32Opnd, simm20_32>, R1_SI20<0b0001010>; + def PCADDI32 : SI20<"pcaddi", GPR32Opnd, simm20_32>, R1_SI20<0b0001100>; + def PCALAU12I32 : SI20<"pcalau12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001101>; + def PCADDU12I32 : SI20<"pcaddu12i", GPR32Opnd, simm20_32>, R1_SI20<0b0001110>; + } + + let isCodeGenOnly = 1 in { + def BEQZ32 : Beqz<"beqz", brtarget, seteq, GPR32Opnd>, R1_IMM21BEQZ<0b010000>; + def BNEZ32 : Beqz<"bnez", brtarget, setne, GPR32Opnd>, R1_IMM21BEQZ<0b010001>; + + def JIRL32 : FJirl<"jirl", calltarget, GPR32Opnd>, R2_IMM16JIRL; + + def B32 : JumpFB, IMM26B<0b010100>; + + def BEQ32 : Beq<"beq", brtarget, seteq, GPR32Opnd>, R2_IMM16BEQ<0b010110>; + def BNE32 : Beq<"bne", brtarget, setne, GPR32Opnd>, R2_IMM16BEQ<0b010111>; + def BLT32 : Beq<"blt", brtarget, setlt, GPR32Opnd>, R2_IMM16BEQ<0b011000>; + def BGE32 : Beq<"bge", brtarget, setge, GPR32Opnd>, R2_IMM16BEQ<0b011001>; + def BLTU32 : Beq<"bltu", brtarget, setult, GPR32Opnd>, R2_IMM16BEQ<0b011010>; + def BGEU32 : Beq<"bgeu", brtarget, setuge, GPR32Opnd>, R2_IMM16BEQ<0b011011>; + } + + /// + /// Mem access + /// + def LL_W : LLBase<"ll.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b000>; + def SC_W : SCBase<"sc.w", GPR32Opnd, mem_simm14_lsl2>, LL_SC<0b001>; + + def PRELD_Raw32 : Preld_Raw<"preld", GPR32Opnd>, PRELD_FM; + + let isCodeGenOnly = 1 in { + def LD_B32 : Ld<"ld.b", GPR32Opnd, mem_simmptr, sextloadi8>, LOAD_STORE<0b0000>; + def LD_H32 : Ld<"ld.h", GPR32Opnd, mem_simmptr, sextloadi16, addrDefault>, LOAD_STORE<0b0001>; + def LD_W32 : Ld<"ld.w", GPR32Opnd, mem, load, addrDefault>, LOAD_STORE<0b0010>; + def ST_B32 : St<"st.b", GPR32Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>; + def ST_H32 : St<"st.h", GPR32Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>; + def ST_W32 : St<"st.w", GPR32Opnd, mem, store>, LOAD_STORE<0b0110>; + def LD_BU32 : Ld<"ld.bu", GPR32Opnd, mem_simmptr, zextloadi8, addrDefault>, LOAD_STORE<0b1000>; + def LD_HU32 : Ld<"ld.hu", GPR32Opnd, mem_simmptr, zextloadi16>, LOAD_STORE<0b1001>; + + def PRELD32 : Preld<"preld", mem, GPR32Opnd>, PRELD_FM; + + def LDPTR_W32 : LdPtr<"ldptr.w", GPR32Opnd>, LL_SC<0b100>; + def STPTR_W32 : StPtr<"stptr.w", GPR32Opnd>, LL_SC<0b101>; + } + + def IBAR : Bar<"ibar", int_loongarch_ibar>, BAR_FM<1>; + def DBAR : Bar<"dbar", int_loongarch_dbar>, BAR_FM<0>; + + def LONG_BRANCH_ADDIW : LoongArchPseudo<(outs GPR32Opnd:$dst), + (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; + + def LONG_BRANCH_ADDIW2Op : LoongArchPseudo<(outs GPR32Opnd:$dst), + (ins GPR32Opnd:$src, brtarget:$tgt), []>; + + def PseudoReturn : PseudoReturnBase; + + let isCodeGenOnly = 1 in { + def LDX_W32 : LDX_FT_LA<"ldx.w", GPR32Opnd, load>, + R3MI<0b00010000>; + def LDX_HU32 : LDX_FT_LA<"ldx.hu", GPR32Opnd, extloadi16>, + R3MI<0b01001000>; + def LDX_BU32 : LDX_FT_LA<"ldx.bu", GPR32Opnd, extloadi8>, + R3MI<0b01000000>; + def STX_W32 : STX_FT_LA<"stx.w", GPR32Opnd, store>, + R3MI<0b00110000>; + def LDX_H32 : LDX_FT_LA<"ldx.h", GPR32Opnd, sextloadi16>, + R3MI<0b00001000>; + def LDX_B32 : LDX_FT_LA<"ldx.b", GPR32Opnd, sextloadi8>, + R3MI<0b00000000>; + def STX_B32 : STX_FT_LA<"stx.b", GPR32Opnd, truncstorei8>, + R3MI<0b00100000>; + def STX_H32 : STX_FT_LA<"stx.h", GPR32Opnd, truncstorei16>, + R3MI<0b00101000>; + } +} + +def LEA_ADDI_W: EffectiveAddress<"addi.w", GPR32Opnd>, LEA_ADDI_FM<0b010>; + +def : LoongArchPat<(LoongArchAddress (i32 tglobaladdr:$in)), + (ADDI_W (PCADDU12I32 tglobaladdr:$in) ,0)>,GPR_32; +def : LoongArchPat<(LoongArchAddress (i32 tblockaddress:$in)), + (ADDI_W (PCADDU12I32 tblockaddress:$in),0)>, GPR_32; +def : LoongArchPat<(LoongArchAddress (i32 tjumptable:$in)), + (ADDI_W (PCADDU12I32 tjumptable:$in),0)>, GPR_32; +def : LoongArchPat<(LoongArchAddress (i32 texternalsym:$in)), + (ADDI_W (PCADDU12I32 texternalsym:$in),0)>, GPR_32; + +//===----------------------------------------------------------------------===// +// Arbitrary patterns that map to one or more instructions +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { + def REVB_2W_32 : Int_Reg2<"revb.2w", GPR32Opnd>, R2I<0b01110>; + def REVH_2W_32 : Int_Reg2<"revh.2w", GPR32Opnd>, R2I<0b10000>; +} + +// bswap pattern +def : LoongArchPat<(bswap GPR32:$rj), (ROTRI_W (REVB_2H GPR32:$rj), 16)>; +//def : LoongArchPat<(bswap GPR32:$rj), (REVB_2W_32 GPR32:$rj)>; +//def : LoongArchPat<(bswap GPR32:$rj), (REVH_2W_32 (REVB_2H GPR32:$rj))>; + +// i32 selects +multiclass SelectInt_Pats { + +// reg, immz +def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f), + (OROp (MASKNEZOp RC:$t, RC:$cond), (MASKEQZOp RC:$f, RC:$cond))>; +def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f), + (OROp (MASKEQZOp RC:$t, RC:$cond), (MASKNEZOp RC:$f, RC:$cond))>; + +//def : LoongArchPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f), +// (OROp (MASKNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), +// (MASKEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; +//def : LoongArchPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f), +// (OROp (MASKEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), +// (MASKNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; + +// reg, immSExt12Plus1 +//def : LoongArchPat<(select (Opg (setgt RC:$cond, immSExt12Plus1:$imm)), RC:$t, RC:$f), +// (OROp (MASKNEZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))), +// (MASKEQZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>; +//def : LoongArchPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f), +// (OROp (MASKNEZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))), +// (MASKEQZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>; + +def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz), + (MASKNEZOp RC:$t, RC:$cond)>; +def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz), + (MASKEQZOp RC:$t, RC:$cond)>; +def : LoongArchPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f), + (MASKEQZOp RC:$f, RC:$cond)>; +def : LoongArchPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f), + (MASKNEZOp RC:$f, RC:$cond)>; +} + +defm : SelectInt_Pats; + +def : LoongArchPat<(select i32:$cond, i32:$t, i32:$f), + (OR32 (MASKEQZ32 i32:$t, i32:$cond), + (MASKNEZ32 i32:$f, i32:$cond))>; +def : LoongArchPat<(select i32:$cond, i32:$t, immz), + (MASKEQZ32 i32:$t, i32:$cond)>; +def : LoongArchPat<(select i32:$cond, immz, i32:$f), + (MASKNEZ32 i32:$f, i32:$cond)>; + +// truncate +def : LoongArchPat<(i32 (trunc (assertzext_lt_i32 GPR64:$src))), + (EXTRACT_SUBREG GPR64:$src, sub_32)>, GPR_64; +def : LoongArchPat<(i32 (trunc GPR64:$src)), + (SLLI_W (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>, GPR_64; + +// Patterns used for matching away redundant sign extensions. +// LA32 arithmetic instructions sign extend their result implicitly. +def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADD_W GPR32:$src, GPR32:$src2), sub_32)>; +def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SUB_W GPR32:$src, GPR32:$src2), sub_32)>; +def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MUL_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>; + +def : InstAlias<"break", (BREAK 0), 0>; +def : LoongArchInstAlias<"move $dst, $src", + (OR32 GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>, GPR_32; + +def immSExt12Plus1 : PatLeaf<(imm), [{ + return isInt<13>(N->getSExtValue()) && isInt<12>(N->getSExtValue() + 1); +}]>; + +def Plus1 : SDNodeXFormgetSExtValue() + 1); }]>; + +multiclass BrcondPats { + +def : LoongArchPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), + (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), + (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQOp1 (SLTOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQOp1 (SLTUOp RC:$lhs, RC:$rhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setge RC:$lhs, immSExt12:$rhs)), bb:$dst), + (BEQOp1 (SLTIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setuge RC:$lhs, immSExt12:$rhs)), bb:$dst), + (BEQOp1 (SLTUIOp RC:$lhs, immSExt12:$rhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setgt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst), + (BEQOp1 (SLTIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setugt RC:$lhs, immSExt12Plus1:$rhs)), bb:$dst), + (BEQOp1 (SLTUIOp RC:$lhs, (Plus1 imm:$rhs)), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), + (BEQOp1 (SLTOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), + (BEQOp1 (SLTUOp RC:$rhs, RC:$lhs), ZEROReg, bb:$dst)>; +def : LoongArchPat<(brcond RC:$cond, bb:$dst), + (BNEOp RC:$cond, ZEROReg, bb:$dst)>; +} + +defm : BrcondPats, GPR_64; + +defm atomic_cmp_swap_8 : ternary_atomic_op_failure_ord; +defm atomic_cmp_swap_16 : ternary_atomic_op_failure_ord; +defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; + +let usesCustomInserter = 1 in { + def ATOMIC_LOAD_ADD_I8 : Atomic2Ops; + def ATOMIC_LOAD_ADD_I16 : Atomic2Ops; + def ATOMIC_LOAD_ADD_I32 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I8 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I16 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I32 : Atomic2Ops; + def ATOMIC_LOAD_AND_I8 : Atomic2Ops; + def ATOMIC_LOAD_AND_I16 : Atomic2Ops; + def ATOMIC_LOAD_AND_I32 : Atomic2Ops; + def ATOMIC_LOAD_OR_I8 : Atomic2Ops; + def ATOMIC_LOAD_OR_I16 : Atomic2Ops; + def ATOMIC_LOAD_OR_I32 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I8 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I16 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I32 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I8 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I16 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I32 : Atomic2Ops; + + def ATOMIC_SWAP_I8 : Atomic2Ops; + def ATOMIC_SWAP_I16 : Atomic2Ops; + def ATOMIC_SWAP_I32 : Atomic2Ops; + + defm I8_ : AtomicCmpSwapInstrs<"8", GPR32>; + defm I16_ : AtomicCmpSwapInstrs<"16", GPR32>; + defm I32_ : AtomicCmpSwapInstrs<"32", GPR32>; + + def ATOMIC_LOAD_MAX_I8 : Atomic2Ops; + def ATOMIC_LOAD_MAX_I16 : Atomic2Ops; + def ATOMIC_LOAD_MAX_I32 : Atomic2Ops; + + def ATOMIC_LOAD_MIN_I8 : Atomic2Ops; + def ATOMIC_LOAD_MIN_I16 : Atomic2Ops; + def ATOMIC_LOAD_MIN_I32 : Atomic2Ops; + + def ATOMIC_LOAD_UMAX_I8 : Atomic2Ops; + def ATOMIC_LOAD_UMAX_I16 : Atomic2Ops; + def ATOMIC_LOAD_UMAX_I32 : Atomic2Ops; + + def ATOMIC_LOAD_UMIN_I8 : Atomic2Ops; + def ATOMIC_LOAD_UMIN_I16 : Atomic2Ops; + def ATOMIC_LOAD_UMIN_I32 : Atomic2Ops; +} + +def ATOMIC_LOAD_ADD_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_ADD_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_ADD_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_SUB_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_SUB_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_SUB_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_AND_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_AND_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_AND_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_OR_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_OR_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_OR_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_XOR_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_XOR_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_XOR_I32_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_NAND_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_NAND_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_NAND_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_SWAP_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_SWAP_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_SWAP_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_CMP_SWAP_I8_POSTRA : AtomicCmpSwapSubwordPostRA; +def ATOMIC_CMP_SWAP_I16_POSTRA : AtomicCmpSwapSubwordPostRA; +def ATOMIC_CMP_SWAP_I32_POSTRA : AtomicCmpSwapPostRA; + +def ATOMIC_LOAD_MAX_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_MAX_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_MAX_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_MIN_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_MIN_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_MIN_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_UMAX_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_UMAX_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_UMAX_I32_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_UMIN_I8_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_UMIN_I16_POSTRA : Atomic2OpsSubwordPostRA; +def ATOMIC_LOAD_UMIN_I32_POSTRA : Atomic2OpsPostRA; + +def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B32 addr:$a)>; +def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H32 addr:$a)>; +def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W32 addrimm14lsl2:$a)>; +def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W32 addr:$a)>; + +def : LoongArchPat<(atomic_store_8 addr:$a, GPR32:$v), + (ST_B32 GPR32:$v, addr:$a)>; +def : LoongArchPat<(atomic_store_16 addr:$a, GPR32:$v), + (ST_H32 GPR32:$v, addr:$a)>; +def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR32:$v), + (STPTR_W32 GPR32:$v, addrimm14lsl2:$a)>; +def : LoongArchPat<(atomic_store_32 addr:$a, GPR32:$v), + (ST_W32 GPR32:$v, addr:$a)>; + +// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from +// the Linux patch revealing it [1]: +// +// - Bit 4: kind of constraint (0: completion, 1: ordering) +// - Bit 3: barrier for previous read (0: true, 1: false) +// - Bit 2: barrier for previous write (0: true, 1: false) +// - Bit 1: barrier for succeeding read (0: true, 1: false) +// - Bit 0: barrier for succeeding write (0: true, 1: false) +// +// Hint 0x700: barrier for "read after read" from the same address, which is +// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as +// nop if such reordering is disabled on supporting newer models.) +// +// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ +// +// Implementations without support for the finer-granularity hints simply treat +// all as the full barrier (DBAR 0), so we can unconditionally start emiting the +// more precise hints right away. + +def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire +def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release +def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel +def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst + +def : LoongArchPat<(i32 (extloadi1 addr:$src)), (LD_BU32 addr:$src)>; +def : LoongArchPat<(i32 (extloadi8 addr:$src)), (LD_BU32 addr:$src)>; +def : LoongArchPat<(i32 (extloadi16 addr:$src)), (LD_HU32 addr:$src)>; + +def : LoongArchPat<(store (i32 0), addr:$dst), (ST_W32 ZERO, addr:$dst)>; + +// Patterns for loads/stores with a reg+imm operand. +let AddedComplexity = 40 in { + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + + def : LoadRegImm14Lsl2Pat; + def : StoreRegImm14Lsl2Pat; +} + +let isCall=1, isCTI=1, Defs = [RA] in { + + class JumpLinkRegPseudo: + LoongArchPseudo<(outs), (ins RO:$rj), [(LoongArchJmpLink RO:$rj)]>, + PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> { + let hasPostISelHook = 1; + } + + class JumpLinkReg: + InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj, 0"), + [], FrmR, opstr> { + let hasPostISelHook = 1; + } + +} + +def JIRLR : JumpLinkReg<"jirl", GPR32Opnd>, R2_IMM16JIRL { + let offs16 = 0; +} +def JIRLRPseudo : JumpLinkRegPseudo; + +class BrindRegPseudo: + LoongArchPseudo<(outs), (ins RO:$rj), [(brind RO:$rj)]>, + PseudoInstExpansion<(JIRLRInst RetReg, ResRO:$rj)> { + let isTerminator=1; + let isBarrier=1; + let isBranch = 1; + let isIndirectBranch = 1; + bit isCTI = 1; +} + +def JIRLRBRIND : BrindRegPseudo; + +def : LoongArchPat<(addc GPR32:$src, immSExt12:$imm), + (ADDI_W GPR32:$src, imm:$imm)>; + +defm : SeteqPats; +defm : SetlePats; +defm : SetgtPats; +defm : SetgePats; +defm : SetgeImmPats; + +def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immZExt12:$imm12))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (XORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (immZExt12:$imm12)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (add (i32 (trunc (i64 (assertsext GPR64:$rj)))), (immSExt12:$imm12))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADDI_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (immSExt12:$imm12)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (sra (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRA_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (srl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (mul (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MUL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (XOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (uimm12_32:$imm12))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ORI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (uimm12_32:$imm12)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 GPR32:$rk))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (OR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), GPR32:$rk), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond), + (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond)), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (shl (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SLL_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (srem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MOD_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(atomic_store_32 addr:$a, (i32 (trunc (i64 (assertsext GPR64:$rj))))), + (ST_W32 (EXTRACT_SUBREG GPR64:$rj, sub_32), addr:$a)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (sub (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SUB_W (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (udiv (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (DIV_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(i64 (sext (i32 (urem (i32 (trunc (i64 (assertsext GPR64:$rj)))), (i32 (trunc (i64 (assertsext GPR64:$rk)))))))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MOD_WU (EXTRACT_SUBREG GPR64:$rj, sub_32), (EXTRACT_SUBREG GPR64:$rk, sub_32)), sub_32)>, GPR_64; + +def : LoongArchPat<(brcond (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0)), bb:$offs21), + (BEQZ32 (EXTRACT_SUBREG GPR64:$rj, sub_32), brtarget:$offs21)>; + +def : LoongArchPat<(setne (i32 (trunc (i64 (assertsext GPR64:$rj)))), 0), + (SLTU32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32))>; + +def : LoongArchPat<(select i32:$cond, (i32 (trunc (i64 (assertsext GPR64:$t)))), (i32 (trunc (i64 (assertsext GPR64:$f))))), + (OR32 (MASKEQZ32 (EXTRACT_SUBREG GPR64:$t, sub_32), i32:$cond), + (MASKNEZ32 (EXTRACT_SUBREG GPR64:$f, sub_32), i32:$cond))>; + +def : LoongArchPat<(select (i32 (setne (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f), + (MASKNEZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>; + +def : LoongArchPat<(select (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$cond)))), immz)), immz, i32:$f), + (MASKEQZ32 i32:$f, (EXTRACT_SUBREG GPR64:$cond, sub_32))>; + + def : LoongArchPat<(store (i32 (trunc (i64 (assertsext GPR64:$v)))), addr:$a), + (ST_W32 (EXTRACT_SUBREG GPR64:$v, sub_32), addr:$a)>; + + +def : LoongArchPat<(i32 (xor GPR32:$rj, (i32 -1))), + (NOR32 ZERO, GPR32:$rj)>; + +def : LoongArchPat<(and GPR32:$rj, (i32 (xor GPR32:$rk, (i32 -1)))), + (ANDN32 GPR32:$rj, GPR32:$rk)>; + +def : LoongArchPat< + (i64 + (sext + (i32 (and (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))), + (i32 -1)))) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (ANDN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32)), + sub_32 + )>; + +def : LoongArchPat< + (i64 + (sext + (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rk)))), + (i32 -1)))) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (ORN32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32)), + sub_32 + )>; + +def : LoongArchPat<(i64 + (sext + (i32 (xor (i32 (or (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 (trunc (i64 (assertsext GPR64:$rk)))))), + (i32 -1)) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (NOR32 (EXTRACT_SUBREG GPR64:$rj, sub_32), + (EXTRACT_SUBREG GPR64:$rk, sub_32)), + sub_32 + )>; + +def : LoongArchPat<(i64 + (sext + (i32 (xor (i32 (trunc (i64 (or (i64 (assertsext GPR64:$rj)), + (i64 (assertsext GPR64:$rk)))))), + (i32 -1)) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (NOR32 (EXTRACT_SUBREG GPR64:$rk, sub_32), + (EXTRACT_SUBREG GPR64:$rj, sub_32)), + sub_32 + )>; + +def : LoongArchPat<(i64 + (sext + (i32 (xor (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 -1)) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (NOR32 ZERO, (EXTRACT_SUBREG GPR64:$rj, sub_32)), + sub_32 + )>; + +def : LoongArchPat<(i64 + (zext + (i32 (seteq (i32 (trunc (i64 (assertsext GPR64:$rj)))), + (i32 0)) + ) + ) + ), + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (SLTUI32 (EXTRACT_SUBREG GPR64:$rj, sub_32), (i32 1)), + sub_32 + )>; diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp index 1467d1757ff0..afa38dbf25e1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -1,4 +1,4 @@ -//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer -*- C++ -*--=// +//===- LoongArchAsmPrinter.cpp - LoongArch LLVM Assembly Printer --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,40 +12,622 @@ //===----------------------------------------------------------------------===// #include "LoongArchAsmPrinter.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "LoongArch.h" +#include "LoongArchMCInstLower.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchSubtarget.h" #include "LoongArchTargetMachine.h" -#include "TargetInfo/LoongArchTargetInfo.h" -#include "llvm/CodeGen/AsmPrinter.h" +#include "LoongArchTargetStreamer.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "loongarch-asm-printer" -// Simple pseudo-instructions have their lowering (with expansion to real -// instructions) auto-generated. +LoongArchTargetStreamer &LoongArchAsmPrinter::getTargetStreamer() const { + return static_cast(*OutStreamer->getTargetStreamer()); +} + +bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + + LoongArchFI = MF.getInfo(); + MCP = MF.getConstantPool(); + + AsmPrinter::runOnMachineFunction(MF); + + emitXRayTable(); + + return true; +} + +bool LoongArchAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { + MCOp = MCInstLowering.LowerOperand(MO); + return MCOp.isValid(); +} + #include "LoongArchGenMCPseudoLowering.inc" +// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to +// JIRL as appropriate for the target. +void LoongArchAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer, + const MachineInstr *MI) { + bool HasLinkReg = false; + MCInst TmpInst0; + TmpInst0.setOpcode(LoongArch::JIRL); + HasLinkReg = true; + + MCOperand MCOp; + + if (HasLinkReg) { + unsigned ZeroReg = Subtarget->is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; + TmpInst0.addOperand(MCOperand::createReg(ZeroReg)); + } + + lowerOperand(MI->getOperand(0), MCOp); + TmpInst0.addOperand(MCOp); + + TmpInst0.addOperand(MCOperand::createImm(0)); + + EmitToStreamer(OutStreamer, TmpInst0); +} + +void LoongArchAsmPrinter::emitPseudoTailBranch(MCStreamer &OutStreamer, + const MachineInstr *MI) { + MCInst TmpInst; + TmpInst.setOpcode(LoongArch::B); + + MCOperand MCOp; + + lowerOperand(MI->getOperand(0), MCOp); + TmpInst.addOperand(MCOp); + + EmitToStreamer(OutStreamer, TmpInst); +} + void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { - LoongArch_MC::verifyInstructionPredicates( - MI->getOpcode(), getSubtargetInfo().getFeatureBits()); + LoongArchTargetStreamer &TS = getTargetStreamer(); + unsigned Opc = MI->getOpcode(); + TS.forbidModuleDirective(); - // Do any auto-generated pseudo lowerings. - if (emitPseudoExpansionLowering(*OutStreamer, MI)) + if (MI->isDebugValue()) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + + PrintDebugValueComment(MI, OS); + return; + } + if (MI->isDebugLabel()) return; + // If we just ended a constant pool, mark it as such. + OutStreamer->emitDataRegion(MCDR_DataRegionEnd); + InConstantPool = false; - MCInst TmpInst; - if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this)) - EmitToStreamer(*OutStreamer, TmpInst); + switch (Opc) { + case LoongArch::PATCHABLE_FUNCTION_ENTER: + LowerPATCHABLE_FUNCTION_ENTER(*MI); + return; + case LoongArch::PATCHABLE_FUNCTION_EXIT: + LowerPATCHABLE_FUNCTION_EXIT(*MI); + return; + case LoongArch::PATCHABLE_TAIL_CALL: + LowerPATCHABLE_TAIL_CALL(*MI); + return; + } + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + do { + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(*OutStreamer, &*I)) + continue; + if (I->getOpcode() == LoongArch::PseudoReturn || + I->getOpcode() == LoongArch::PseudoReturn64){ + emitPseudoIndirectBranch(*OutStreamer, &*I); + continue; + } + if (I->getOpcode() == LoongArch::PseudoTailReturn){ + emitPseudoTailBranch(*OutStreamer, &*I); + continue; + } + + // Some instructions are marked as pseudo right now which + // would make the test fail for the wrong reason but + // that will be fixed soon. We need this here because we are + // removing another test for this situation downstream in the + // callchain. + // + if (I->isPseudo() + && !isLongBranchPseudo(I->getOpcode())) + llvm_unreachable("Pseudo opcode found in EmitInstruction()"); + + MCInst TmpInst0; + MCInstLowering.Lower(&*I, TmpInst0); + EmitToStreamer(*OutStreamer, TmpInst0); + } while ((++I != E) && I->isInsideBundle()); } -bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - AsmPrinter::runOnMachineFunction(MF); - return true; +//===----------------------------------------------------------------------===// +// +// LoongArch Asm Directives +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Set directives +//===----------------------------------------------------------------------===// + +/// Emit Set directives. +const char *LoongArchAsmPrinter::getCurrentABIString() const { + switch (static_cast(TM).getABI().GetEnumValue()) { + case LoongArchABIInfo::ABI::LP32: return "abilp32"; + case LoongArchABIInfo::ABI::LPX32: return "abilpx32"; + case LoongArchABIInfo::ABI::LP64: return "abilp64"; + default: llvm_unreachable("Unknown LoongArch ABI"); + } +} + +void LoongArchAsmPrinter::emitFunctionEntryLabel() { + + OutStreamer->emitLabel(CurrentFnSym); + +} + +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void LoongArchAsmPrinter::emitFunctionBodyStart() { + + MCInstLowering.Initialize(&MF->getContext()); +} + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void LoongArchAsmPrinter::emitFunctionBodyEnd() { + + // Make sure to terminate any constant pools that were at the end + // of the function. + if (!InConstantPool) + return; + InConstantPool = false; + OutStreamer->emitDataRegion(MCDR_DataRegionEnd); +} + +void LoongArchAsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) { + AsmPrinter::emitBasicBlockEnd(MBB); +} + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool LoongArchAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* + MBB) const { + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *MBB->pred_begin(); + + // If the predecessor is a switch statement, assume a jump table + // implementation, so it is not a fall through. + if (const BasicBlock *bb = Pred->getBasicBlock()) + if (isa(bb->getTerminator())) + return false; + + // Check default implementation + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + +// Print out an operand for an inline asm expression. +bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNum, const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNum); + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI,OpNum,ExtraCode,O); + case 'X': // hex const int + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << "0x" << Twine::utohexstr(MO.getImm()); + return false; + case 'x': // hex const int (low 16 bits) + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff); + return false; + case 'd': // decimal const int + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << MO.getImm(); + return false; + case 'm': // decimal const int minus 1 + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << MO.getImm() - 1; + return false; + case 'y': // exact log2 + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + if (!isPowerOf2_64(MO.getImm())) + return true; + O << Log2_64(MO.getImm()); + return false; + case 'z': + // $r0 if zero, regular printing otherwise + if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) { + O << "$r0"; + return false; + } + // If not, call printOperand as normal. + break; + case 'D': // Second part of a double word register operand + case 'L': // Low order register of a double word register operand + case 'M': // High order register of a double word register operand + { + if (OpNum == 0) + return true; + const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1); + if (!FlagsOP.isImm()) + return true; + unsigned Flags = FlagsOP.getImm(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + // Number of registers represented by this operand. We are looking + // for 2 for 32 bit mode and 1 for 64 bit mode. + if (NumVals != 2) { + if (Subtarget->is64Bit() && NumVals == 1 && MO.isReg()) { + unsigned Reg = MO.getReg(); + O << '$' << LoongArchInstPrinter::getRegisterName(Reg); + return false; + } + return true; + } + + unsigned RegOp = OpNum; + if (!Subtarget->is64Bit()){ + // Endianness reverses which register holds the high or low value + // between M and L. + switch(ExtraCode[0]) { + case 'M': + RegOp = OpNum + 1; + break; + case 'L': + RegOp = OpNum; + break; + case 'D': // Always the second part + RegOp = OpNum + 1; + } + if (RegOp >= MI->getNumOperands()) + return true; + const MachineOperand &MO = MI->getOperand(RegOp); + if (!MO.isReg()) + return true; + unsigned Reg = MO.getReg(); + O << '$' << LoongArchInstPrinter::getRegisterName(Reg); + return false; + } + break; + } + case 'w': + // Print LSX registers for the 'f' constraint + // In LLVM, the 'w' modifier doesn't need to do anything. + // We can just call printOperand as normal. + break; + case 'u': + // Print LASX registers for the 'f' constraint + // In LLVM, the 'u' modifier doesn't need to do anything. + // We can just call printOperand as normal. + break; + } + } + + printOperand(MI, OpNum, O); + return false; +} + +bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, + const char *ExtraCode, + raw_ostream &O) { + assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands"); + const MachineOperand &BaseMO = MI->getOperand(OpNum); + const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1); + assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand."); + assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand."); + int Offset = OffsetMO.getImm(); + + // Currently we are expecting either no ExtraCode or 'D','M','L'. + if (ExtraCode) { + switch (ExtraCode[0]) { + case 'D': + case 'M': + Offset += 4; + break; + case 'L': + break; + default: + return true; // Unknown modifier. + } + } + + O << "$" << LoongArchInstPrinter::getRegisterName(BaseMO.getReg()) << ", " << Offset; + + return false; +} + +void LoongArchAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << '$' + << StringRef(LoongArchInstPrinter::getRegisterName(MO.getReg())).lower(); + break; + + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(O, MAI); + return; + + case MachineOperand::MO_GlobalAddress: + getSymbol(MO.getGlobal())->print(O, MAI); + break; + + case MachineOperand::MO_BlockAddress: { + MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); + O << BA->getName(); + break; + } + + case MachineOperand::MO_ConstantPoolIndex: + O << getDataLayout().getPrivateGlobalPrefix() << "CPI" + << getFunctionNumber() << "_" << MO.getIndex(); + if (MO.getOffset()) + O << "+" << MO.getOffset(); + break; + + default: + llvm_unreachable(""); + } +} + +void LoongArchAsmPrinter:: +printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { + // Load/Store memory operands -- imm($reg) + // If PIC target the target is loaded as the + // pattern lw $25,%call16($28) + + printOperand(MI, opNum+1, O); + O << "("; + printOperand(MI, opNum, O); + O << ")"; +} + +void LoongArchAsmPrinter:: +printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { + // when using stack locations for not load/store instructions + // print the same way as all normal 3 operand instructions. + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); +} + +void LoongArchAsmPrinter:: +printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) { + for (int i = opNum, e = MI->getNumOperands(); i != e; ++i) { + if (i != opNum) O << ", "; + printOperand(MI, i, O); + } +} + +void LoongArchAsmPrinter::emitStartOfAsmFile(Module &M) { + LoongArchTargetStreamer &TS = getTargetStreamer(); + + // LoongArchTargetStreamer has an initialization order problem when emitting an + // object file directly (see LoongArchTargetELFStreamer for full details). Work + // around it by re-initializing the PIC state here. + TS.setPic(OutContext.getObjectFileInfo()->isPositionIndependent()); + + // Compute LoongArch architecture attributes based on the default subtarget + // that we'd have constructed. Module level directives aren't LTO + // clean anyhow. + // FIXME: For ifunc related functions we could iterate over and look + // for a feature string that doesn't match the default one. + const Triple &TT = TM.getTargetTriple(); + StringRef CPU = LoongArch_MC::selectLoongArchCPU(TT, TM.getTargetCPU()); + StringRef FS = TM.getTargetFeatureString(); + const LoongArchTargetMachine &MTM = static_cast(TM); + const LoongArchSubtarget STI(TT, CPU, FS, MTM, None); + + TS.updateABIInfo(STI); +} + +void LoongArchAsmPrinter::emitInlineAsmStart() const { + + OutStreamer->addBlankLine(); +} + +void LoongArchAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const { + OutStreamer->addBlankLine(); +} + +void LoongArchAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, + unsigned Reg) { + MCInst I; + I.setOpcode(Opcode); + I.addOperand(MCOperand::createReg(Reg)); + OutStreamer->emitInstruction(I, STI); +} + +void LoongArchAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI, + unsigned Opcode, unsigned Reg1, + unsigned Reg2) { + MCInst I; + // + // Because of the current td files for LoongArch32, the operands for MTC1 + // appear backwards from their normal assembly order. It's not a trivial + // change to fix this in the td file so we adjust for it here. + // + if (Opcode == LoongArch::MOVGR2FR_W) { + unsigned Temp = Reg1; + Reg1 = Reg2; + Reg2 = Temp; + } + I.setOpcode(Opcode); + I.addOperand(MCOperand::createReg(Reg1)); + I.addOperand(MCOperand::createReg(Reg2)); + OutStreamer->emitInstruction(I, STI); +} + +void LoongArchAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI, + unsigned Opcode, unsigned Reg1, + unsigned Reg2, unsigned Reg3) { + MCInst I; + I.setOpcode(Opcode); + I.addOperand(MCOperand::createReg(Reg1)); + I.addOperand(MCOperand::createReg(Reg2)); + I.addOperand(MCOperand::createReg(Reg3)); + OutStreamer->emitInstruction(I, STI); +} + +void LoongArchAsmPrinter::EmitMovFPIntPair(const MCSubtargetInfo &STI, + unsigned MovOpc, unsigned Reg1, + unsigned Reg2, unsigned FPReg1, + unsigned FPReg2, bool LE) { + if (!LE) { + unsigned temp = Reg1; + Reg1 = Reg2; + Reg2 = temp; + } + EmitInstrRegReg(STI, MovOpc, Reg1, FPReg1); + EmitInstrRegReg(STI, MovOpc, Reg2, FPReg2); +} + +void LoongArchAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { + const uint8_t NoopsInSledCount = 11; + // For loongarch64 we want to emit the following pattern: + // + // .Lxray_sled_N: + // ALIGN + // B .tmpN + // 11 NOP instructions (44 bytes) + // .tmpN + // + // We need the 44 bytes (11 instructions) because at runtime, we'd + // be patching over the full 48 bytes (12 instructions) with the following + // pattern: + // + // addi.d sp,sp, -16 ;create stack frame + // st.d ra, sp, 8 ;save return address + // lu12i.w t0,%%abs_hi20(__xray_FunctionEntry/Exit) + // ori t0,t0,%%abs_lo12(__xray_FunctionEntry/Exit) + // lu32i.d t0,%%abs64_lo20(__xray_FunctionEntry/Exit) + // lu52i.d t0,t0,%%abs64_hi12(__xray_FunctionEntry/Exit) + // lu12i.w t1,%%abs_hi20(function_id) + // ori t1,t1,%%abs_lo12(function_id) ;pass function id + // jirl ra, t0, 0 ;call Tracing hook + // ld.d ra, sp, 8 ;restore return address + // addi.d sp, sp, 16 ;delete stack frame + + OutStreamer->emitCodeAlignment(4, &getSubtargetInfo()); + auto CurSled = OutContext.createTempSymbol("xray_sled_", true); + OutStreamer->emitLabel(CurSled); + auto Target = OutContext.createTempSymbol(); + + // Emit "B .tmpN" instruction, which jumps over the nop sled to the actual + // start of function + const MCExpr *TargetExpr = MCSymbolRefExpr::create( + Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext); + EmitToStreamer(*OutStreamer, MCInstBuilder(LoongArch::BEQ) + .addReg(LoongArch::ZERO) + .addReg(LoongArch::ZERO) + .addExpr(TargetExpr)); + + for (int8_t I = 0; I < NoopsInSledCount; I++) + EmitToStreamer(*OutStreamer, MCInstBuilder(LoongArch::ANDI) + .addReg(LoongArch::ZERO) + .addReg(LoongArch::ZERO) + .addImm(0)); + + OutStreamer->emitLabel(Target); + recordSled(CurSled, MI, Kind, 2); +} + +void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) { + EmitSled(MI, SledKind::FUNCTION_ENTER); +} + +void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) { + EmitSled(MI, SledKind::FUNCTION_EXIT); +} + +void LoongArchAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { + EmitSled(MI, SledKind::TAIL_CALL); +} + +void LoongArchAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + // TODO: implement +} + +bool LoongArchAsmPrinter::isLongBranchPseudo(int Opcode) const { + return (Opcode == LoongArch::LONG_BRANCH_ADDIW + || Opcode == LoongArch::LONG_BRANCH_ADDIW2Op + || Opcode == LoongArch::LONG_BRANCH_ADDID + || Opcode == LoongArch::LONG_BRANCH_ADDID2Op + || Opcode == LoongArch::LONG_BRANCH_PCADDU12I); } // Force static initialization. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchAsmPrinter() { RegisterAsmPrinter X(getTheLoongArch32Target()); - RegisterAsmPrinter Y(getTheLoongArch64Target()); + RegisterAsmPrinter A(getTheLoongArch64Target()); } diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h index b51c19188051..3e4ca8ed1b78 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -1,4 +1,4 @@ -//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -*- C++ -*--===// +//===- LoongArchAsmPrinter.h - LoongArch LLVM Assembly Printer -----------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -13,36 +13,123 @@ #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHASMPRINTER_H +#include "LoongArchMCInstLower.h" #include "LoongArchSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/Compiler.h" +#include +#include +#include namespace llvm { +class MCOperand; +class MCSubtargetInfo; +class MCSymbol; +class MachineBasicBlock; +class MachineConstantPool; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class LoongArchFunctionInfo; +class LoongArchTargetStreamer; +class Module; +class raw_ostream; +class TargetMachine; + class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter { - const MCSubtargetInfo *STI; + LoongArchTargetStreamer &getTargetStreamer() const; + + void EmitInstrWithMacroNoAT(const MachineInstr *MI); + + //===------------------------------------------------------------------===// + // XRay implementation + //===------------------------------------------------------------------===// public: - explicit LoongArchAsmPrinter(TargetMachine &TM, - std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()) {} + // XRay-specific lowering for LoongArch. + void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); + void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); + void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - StringRef getPassName() const override { - return "LoongArch Assembly Printer"; - } +private: + /// MCP - Keep a pointer to constantpool entries of the current + /// MachineFunction. + const MachineConstantPool *MCP = nullptr; - bool runOnMachineFunction(MachineFunction &MF) override; + /// InConstantPool - Maintain state when emitting a sequence of constant + /// pool entries so we can properly mark them as data regions. + bool InConstantPool = false; - void emitInstruction(const MachineInstr *MI) override; + void EmitSled(const MachineInstr &MI, SledKind Kind); // tblgen'erated function. bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); - // Wrapper needed for tblgenned pseudo lowering. - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { - return lowerLoongArchMachineOperandToMCOperand(MO, MCOp, *this); - } + + // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch, + // and PseudoIndirectBranch64 as a JIRL as appropriate + // for the target. + void emitPseudoIndirectBranch(MCStreamer &OutStreamer, + const MachineInstr *MI); + + void emitPseudoTailBranch(MCStreamer &OutStreamer, + const MachineInstr *MI); + + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + + void emitInlineAsmStart() const override; + + void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const override; + + void EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg); + + void EmitInstrRegReg(const MCSubtargetInfo &STI, unsigned Opcode, + unsigned Reg1, unsigned Reg2); + + void EmitInstrRegRegReg(const MCSubtargetInfo &STI, unsigned Opcode, + unsigned Reg1, unsigned Reg2, unsigned Reg3); + + void EmitMovFPIntPair(const MCSubtargetInfo &STI, unsigned MovOpc, + unsigned Reg1, unsigned Reg2, unsigned FPReg1, + unsigned FPReg2, bool LE); + + bool isLongBranchPseudo(int Opcode) const; + +public: + const LoongArchSubtarget *Subtarget; + const LoongArchFunctionInfo *LoongArchFI; + LoongArchMCInstLower MCInstLowering; + + explicit LoongArchAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {} + + StringRef getPassName() const override { return "LoongArch Assembly Printer"; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void emitInstruction(const MachineInstr *MI) override; + const char *getCurrentABIString() const; + void emitFunctionEntryLabel() override; + void emitFunctionBodyStart() override; + void emitFunctionBodyEnd() override; + void emitBasicBlockEnd(const MachineBasicBlock &MBB) override; + bool isBlockOnlyReachableByFallthrough( + const MachineBasicBlock* MBB) const override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + const char *ExtraCode, raw_ostream &O) override; + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O); + void printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O); + void emitStartOfAsmFile(Module &M) override; + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchCCState.cpp b/llvm/lib/Target/LoongArch/LoongArchCCState.cpp new file mode 100644 index 000000000000..6630ca7598ac --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchCCState.cpp @@ -0,0 +1,165 @@ +//===---- LoongArchCCState.cpp - CCState with LoongArch specific extensions ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchCCState.h" +#include "LoongArchSubtarget.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +/// This function returns true if CallSym is a long double emulation routine. +static bool isF128SoftLibCall(const char *CallSym) { + const char *const LibCalls[] = { + "__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", + "__extendsftf2", "__fixtfdi", "__fixtfsi", "__fixtfti", + "__fixunstfdi", "__fixunstfsi", "__fixunstfti", "__floatditf", + "__floatsitf", "__floattitf", "__floatunditf", "__floatunsitf", + "__floatuntitf", "__getf2", "__gttf2", "__letf2", + "__lttf2", "__multf3", "__netf2", "__powitf2", + "__subtf3", "__trunctfdf2", "__trunctfsf2", "__unordtf2", + "ceill", "copysignl", "cosl", "exp2l", + "expl", "floorl", "fmal", "fmaxl", + "fmodl", "log10l", "log2l", "logl", + "nearbyintl", "powl", "rintl", "roundl", + "sinl", "sqrtl", "truncl"}; + + // Check that LibCalls is sorted alphabetically. + auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; }; + assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp)); + return std::binary_search(std::begin(LibCalls), std::end(LibCalls), + CallSym, Comp); +} + +/// This function returns true if Ty is fp128, {f128} or i128 which was +/// originally a fp128. +static bool originalTypeIsF128(const Type *Ty, const char *Func) { + if (Ty->isFP128Ty()) + return true; + + if (Ty->isStructTy() && Ty->getStructNumElements() == 1 && + Ty->getStructElementType(0)->isFP128Ty()) + return true; + + // If the Ty is i128 and the function being called is a long double emulation + // routine, then the original type is f128. + return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); +} + +/// Return true if the original type was vXfXX. +static bool originalEVTTypeIsVectorFloat(EVT Ty) { + if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) + return true; + + return false; +} + +/// Return true if the original type was vXfXX / vXfXX. +static bool originalTypeIsVectorFloat(const Type * Ty) { + if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) + return true; + + return false; +} + +LoongArchCCState::SpecialCallingConvType +LoongArchCCState::getSpecialCallingConvForCallee(const SDNode *Callee, + const LoongArchSubtarget &Subtarget) { + LoongArchCCState::SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv; + return SpecialCallingConv; +} + +void LoongArchCCState::PreAnalyzeCallResultForF128( + const SmallVectorImpl &Ins, + const Type *RetTy, const char *Call) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalArgWasF128.push_back( + originalTypeIsF128(RetTy, Call)); + OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy()); + } +} + +/// Identify lowered values that originated from f128 or float arguments and +/// record this for use by RetCC_LoongArchLP64LPX32. +void LoongArchCCState::PreAnalyzeReturnForF128( + const SmallVectorImpl &Outs) { + const MachineFunction &MF = getMachineFunction(); + for (unsigned i = 0; i < Outs.size(); ++i) { + OriginalArgWasF128.push_back( + originalTypeIsF128(MF.getFunction().getReturnType(), nullptr)); + OriginalArgWasFloat.push_back( + MF.getFunction().getReturnType()->isFloatingPointTy()); + } +} + +/// Identify lower values that originated from vXfXX and record +/// this. +void LoongArchCCState::PreAnalyzeCallResultForVectorFloat( + const SmallVectorImpl &Ins, const Type *RetTy) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy)); + } +} + +/// Identify lowered values that originated from vXfXX arguments and record +/// this. +void LoongArchCCState::PreAnalyzeReturnForVectorFloat( + const SmallVectorImpl &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) { + ISD::OutputArg Out = Outs[i]; + OriginalRetWasFloatVector.push_back( + originalEVTTypeIsVectorFloat(Out.ArgVT)); + } +} + +/// Identify lowered values that originated from f128, float and sret to vXfXX +/// arguments and record this. +void LoongArchCCState::PreAnalyzeCallOperands( + const SmallVectorImpl &Outs, + std::vector &FuncArgs, + const char *Func) { + for (unsigned i = 0; i < Outs.size(); ++i) { + TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; + + OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func)); + OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); + OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); + CallOperandIsFixed.push_back(Outs[i].IsFixed); + } +} + +/// Identify lowered values that originated from f128, float and vXfXX arguments +/// and record this. +void LoongArchCCState::PreAnalyzeFormalArgumentsForF128( + const SmallVectorImpl &Ins) { + const MachineFunction &MF = getMachineFunction(); + for (unsigned i = 0; i < Ins.size(); ++i) { + Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); + + // SRet arguments cannot originate from f128 or {f128} returns so we just + // push false. We have to handle this specially since SRet arguments + // aren't mapped to an original argument. + if (Ins[i].Flags.isSRet()) { + OriginalArgWasF128.push_back(false); + OriginalArgWasFloat.push_back(false); + OriginalArgWasFloatVector.push_back(false); + continue; + } + + assert(Ins[i].getOrigArgIndex() < MF.getFunction().arg_size()); + std::advance(FuncArg, Ins[i].getOrigArgIndex()); + + OriginalArgWasF128.push_back( + originalTypeIsF128(FuncArg->getType(), nullptr)); + OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); + + // The LoongArch vector ABI exhibits a corner case of sorts or quirk; if the + // first argument is actually an SRet pointer to a vector, then the next + // argument slot is $a2. + OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); + } +} diff --git a/llvm/lib/Target/LoongArch/LoongArchCCState.h b/llvm/lib/Target/LoongArch/LoongArchCCState.h new file mode 100644 index 000000000000..1c1a1446efba --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchCCState.h @@ -0,0 +1,165 @@ +//===---- LoongArchCCState.h - CCState with LoongArch specific extensions -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LoongArchCCSTATE_H +#define LoongArchCCSTATE_H + +#include "LoongArchISelLowering.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CallingConvLower.h" + +namespace llvm { +class SDNode; +class LoongArchSubtarget; + +class LoongArchCCState : public CCState { +public: + enum SpecialCallingConvType { NoSpecialCallingConv }; + + /// Determine the SpecialCallingConvType for the given callee + static SpecialCallingConvType + getSpecialCallingConvForCallee(const SDNode *Callee, + const LoongArchSubtarget &Subtarget); + +private: + /// Identify lowered values that originated from f128 arguments and record + /// this for use by RetCC_LoongArchLP64LPX32. + void PreAnalyzeCallResultForF128(const SmallVectorImpl &Ins, + const Type *RetTy, const char * Func); + + /// Identify lowered values that originated from f128 arguments and record + /// this for use by RetCC_LoongArchLP64LPX32. + void PreAnalyzeReturnForF128(const SmallVectorImpl &Outs); + + /// Identify lowered values that originated from f128 arguments and record + /// this. + void + PreAnalyzeCallOperands(const SmallVectorImpl &Outs, + std::vector &FuncArgs, + const char *Func); + + /// Identify lowered values that originated from f128 arguments and record + /// this for use by RetCC_LoongArchLP64LPX32. + void + PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl &Ins); + + void + PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl &Ins, + const Type *RetTy); + + void PreAnalyzeFormalArgumentsForVectorFloat( + const SmallVectorImpl &Ins); + + void + PreAnalyzeReturnForVectorFloat(const SmallVectorImpl &Outs); + + /// Records whether the value has been lowered from an f128. + SmallVector OriginalArgWasF128; + + /// Records whether the value has been lowered from float. + SmallVector OriginalArgWasFloat; + + /// Records whether the value has been lowered from a floating point vector. + SmallVector OriginalArgWasFloatVector; + + /// Records whether the return value has been lowered from a floating point + /// vector. + SmallVector OriginalRetWasFloatVector; + + /// Records whether the value was a fixed argument. + /// See ISD::OutputArg::IsFixed, + SmallVector CallOperandIsFixed; + + // FIXME: This should probably be a fully fledged calling convention. + SpecialCallingConvType SpecialCallingConv; + +public: + LoongArchCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + SmallVectorImpl &locs, LLVMContext &C, + SpecialCallingConvType SpecialCC = NoSpecialCallingConv) + : CCState(CC, isVarArg, MF, locs, C), SpecialCallingConv(SpecialCC) {} + + void + AnalyzeCallOperands(const SmallVectorImpl &Outs, + CCAssignFn Fn, + std::vector &FuncArgs, + const char *Func) { + PreAnalyzeCallOperands(Outs, FuncArgs, Func); + CCState::AnalyzeCallOperands(Outs, Fn); + OriginalArgWasF128.clear(); + OriginalArgWasFloat.clear(); + OriginalArgWasFloatVector.clear(); + CallOperandIsFixed.clear(); + } + + // The AnalyzeCallOperands in the base class is not usable since we must + // provide a means of accessing ArgListEntry::IsFixed. Delete them from this + // class. This doesn't stop them being used via the base class though. + void AnalyzeCallOperands(const SmallVectorImpl &Outs, + CCAssignFn Fn) = delete; + void AnalyzeCallOperands(const SmallVectorImpl &Outs, + SmallVectorImpl &Flags, + CCAssignFn Fn) = delete; + + void AnalyzeFormalArguments(const SmallVectorImpl &Ins, + CCAssignFn Fn) { + PreAnalyzeFormalArgumentsForF128(Ins); + CCState::AnalyzeFormalArguments(Ins, Fn); + OriginalArgWasFloat.clear(); + OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); + } + + void AnalyzeCallResult(const SmallVectorImpl &Ins, + CCAssignFn Fn, const Type *RetTy, + const char *Func) { + PreAnalyzeCallResultForF128(Ins, RetTy, Func); + PreAnalyzeCallResultForVectorFloat(Ins, RetTy); + CCState::AnalyzeCallResult(Ins, Fn); + OriginalArgWasFloat.clear(); + OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); + } + + void AnalyzeReturn(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + PreAnalyzeReturnForF128(Outs); + PreAnalyzeReturnForVectorFloat(Outs); + CCState::AnalyzeReturn(Outs, Fn); + OriginalArgWasFloat.clear(); + OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); + } + + bool CheckReturn(const SmallVectorImpl &ArgsFlags, + CCAssignFn Fn) { + PreAnalyzeReturnForF128(ArgsFlags); + PreAnalyzeReturnForVectorFloat(ArgsFlags); + bool Return = CCState::CheckReturn(ArgsFlags, Fn); + OriginalArgWasFloat.clear(); + OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); + return Return; + } + + bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; } + bool WasOriginalArgFloat(unsigned ValNo) { + return OriginalArgWasFloat[ValNo]; + } + bool WasOriginalArgVectorFloat(unsigned ValNo) const { + return OriginalArgWasFloatVector[ValNo]; + } + bool WasOriginalRetVectorFloat(unsigned ValNo) const { + return OriginalRetWasFloatVector[ValNo]; + } + bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } + SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } +}; +} + +#endif diff --git a/llvm/lib/Target/LoongArch/LoongArchCallingConv.td b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td index 9844163163a5..e8564e85bb0a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchCallingConv.td +++ b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td @@ -1,23 +1,310 @@ -//=- LoongArchCallingConv.td - Calling Conventions LoongArch -*- tablegen -*-=// +//===-- LoongArchCallingConv.td - Calling Conventions for LoongArch --*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// This describes the calling conventions for LoongArch architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf" + "(State.getMachineFunction().getSubtarget()).", + F), + A>; + +// The inverse of CCIfSubtarget +class CCIfSubtargetNot : CCIfSubtarget; + +/// Match if the original argument (before lowering) was a float. +/// For example, this is true for i32's that were lowered from soft-float. +class CCIfOrigArgWasNotFloat + : CCIf<"!static_cast(&State)->WasOriginalArgFloat(ValNo)", + A>; + +/// Match if the original argument (before lowering) was a 128-bit float (i.e. +/// long double). +class CCIfOrigArgWasF128 + : CCIf<"static_cast(&State)->WasOriginalArgF128(ValNo)", A>; + +/// Match if this specific argument is a vararg. +/// This is slightly different fro CCIfIsVarArg which matches if any argument is +/// a vararg. +class CCIfArgIsVarArg + : CCIf<"!static_cast(&State)->IsCallOperandFixed(ValNo)", A>; + +/// Match if the return was a floating point vector. +class CCIfOrigArgWasNotVectorFloat + : CCIf<"!static_cast(&State)" + "->WasOriginalRetVectorFloat(ValNo)", A>; + +/// Match if the special calling conv is the specified value. +class CCIfSpecialCallingConv + : CCIf<"static_cast(&State)->getSpecialCallingConv() == " + "LoongArchCCState::" # CC, A>; + +// For soft-float, f128 values are returned in A0_64 rather than V1_64. +def RetCC_F128SoftFloat : CallingConv<[ + CCAssignToReg<[A0_64, A1_64]> +]>; + // -// This describes the calling conventions for the LoongArch architecture. -// +// For hard-float, f128 values are returned as a pair of f64's rather than a +// pair of i64's. +def RetCC_F128HardFloat : CallingConv<[ + //CCBitConvertToType, + + // Contrary to the ABI documentation, a struct containing a long double is + // returned in $f0, and $f1 instead of the usual $f0, and $f2. This is to + // match the de facto ABI as implemented by GCC. + CCIfInReg>, + + CCAssignToReg<[A0_64, A1_64]> +]>; + +// Handle F128 specially since we can't identify the original type during the +// tablegen-erated code. +def RetCC_F128 : CallingConv<[ + CCIfSubtarget<"useSoftFloat()", + CCIfType<[i64], CCDelegateTo>>, + CCIfSubtargetNot<"useSoftFloat()", + CCIfType<[i64], CCDelegateTo>> +]>; + +//===----------------------------------------------------------------------===// +// LoongArch LP32 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_LoongArchLP32 : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // Integer values get stored in stack slots that are 4 bytes in + // size and 4-byte aligned. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Integer values get stored in stack slots that are 8 bytes in + // size and 8-byte aligned. + CCIfType<[f64], CCAssignToStack<8, 8>> +]>; + +// Only the return rules are defined here for LP32. The rules for argument +// passing are defined in LoongArchISelLowering.cpp. +def RetCC_LoongArchLP32 : CallingConv<[ + // Promote i1/i8/i16 return values to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // i32 are returned in registers V0, V1, A0, A1, unless the original return + // type was a vector of floats. + CCIfOrigArgWasNotVectorFloat>>, + + // f32 are returned in registers F0, F2 + CCIfType<[f32], CCAssignToReg<[F0, F1]>>, + + // f64 arguments are returned in F0_64 and F2_64 in FP64bit mode or + // in F0 and F1 in FP32bit mode. + CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[F0_64, F1_64]>>> +]>; + +def CC_LoongArchLP32_FP32 : CustomCallingConv; +def CC_LoongArchLP32_FP64 : CustomCallingConv; +def CC_LoongArch_F128 : CustomCallingConv; + +def CC_LoongArchLP32_FP : CallingConv<[ + CCIfSubtargetNot<"isFP64bit()", CCDelegateTo>, + CCIfSubtarget<"isFP64bit()", CCDelegateTo> +]>; + +//===----------------------------------------------------------------------===// +// LoongArch LPX32/LP64 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_LoongArchLP64LPX32_SoftFloat : CallingConv<[ + CCAssignToReg<[A0, A1, A2, A3, + A4, A5, A6, A7]>, + CCAssignToStack<4, 8> +]>; + +def CC_LoongArchLP64LPX32 : CallingConv<[ + + // All integers (except soft-float integers) are promoted to 64-bit. + CCIfType<[i8, i16, i32], CCIfOrigArgWasNotFloat>>, + + // The only i32's we have left are soft-float arguments. + CCIfSubtarget<"useSoftFloat()", CCIfType<[i32], CCDelegateTo>>, + + // Integer arguments are passed in integer registers. + //CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64, + // A4_64, A5_64, A6_64, A7_64], + // [F0_64, F1_64, F2_64, F3_64, + // F4_64, F5_64, F6_64, F7_64]>>, + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>, + + // f32 arguments are passed in single precision FP registers. + CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, + F4, F5, F6, F7]>>, + + // f64 arguments are passed in double precision FP registers. + CCIfType<[f64], CCAssignToReg<[F0_64, F1_64, F2_64, F3_64, + F4_64, F5_64, F6_64, F7_64]>>, + + // others f32 arguments are passed in single precision FP registers. + CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>, + + // others f64 arguments are passed in double precision FP registers. + CCIfType<[f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>, + + CCIfSubtarget<"hasLSX()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToRegWithShadow<[VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7], + [A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>>, + CCIfSubtarget<"hasLASX()", + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCAssignToRegWithShadow<[XR0, XR1, XR2, XR3, XR4, XR5, XR6, XR7], + [A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>>, + + // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. + CCIfType<[f32], CCAssignToStack<4, 8>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>>, + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 16>>, + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCAssignToStack<32, 32>> +]>; + +// LPX32/LP64 variable arguments. +// All arguments are passed in integer registers. +def CC_LoongArchLP64LPX32_VarArg : CallingConv<[ + // All integers are promoted to 64-bit. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, A4, A5, A6, A7]>>, + + CCIfType<[i64], CCIfOrigArgWasF128>>, + + CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, + A4_64, A5_64, A6_64, A7_64]>>, + + // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. + CCIfType<[f32], CCAssignToStack<4, 8>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +def RetCC_LoongArchLP64LPX32 : CallingConv<[ + // f128 needs to be handled similarly to f32 and f64. However, f128 is not + // legal and is lowered to i128 which is further lowered to a pair of i64's. + // This presents us with a problem for the calling convention since hard-float + // still needs to pass them in FPU registers, and soft-float needs to use $v0, + // and $a0 instead of the usual $v0, and $v1. We therefore resort to a + // pre-analyze (see PreAnalyzeReturnForF128()) step to pass information on + // whether the result was originally an f128 into the tablegen-erated code. + // + // f128 should only occur for the LP64 ABI where long double is 128-bit. On + // LPX32, long double is equivalent to double. + CCIfType<[i64], CCIfOrigArgWasF128>>, + + CCIfType<[i8, i16, i32, i64], CCIfInReg>>, + + // i64 are returned in registers V0_64, V1_64 + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>, + + CCIfSubtarget<"hasLSX()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[VR0]>>>, + + CCIfSubtarget<"hasLASX()", + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCAssignToReg<[XR0]>>>, + + CCIfSubtarget<"hasLASX()", + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64]>>>, + + // f32 are returned in registers F0, F2 + CCIfType<[f32], CCAssignToReg<[F0, F1]>>, + + // f64 are returned in registers D0, D2 + CCIfType<[f64], CCAssignToReg<[F0_64, F1_64]>> +]>; + //===----------------------------------------------------------------------===// +// LoongArch Calling Convention Dispatch +//===----------------------------------------------------------------------===// + +def RetCC_LoongArch : CallingConv<[ + CCIfSubtarget<"isABI_LPX32()", CCDelegateTo>, + CCIfSubtarget<"isABI_LP64()", CCDelegateTo>, + CCDelegateTo +]>; + +def CC_LoongArch_ByVal : CallingConv<[ + CCIfSubtarget<"isABI_LP32()", CCIfByVal>>, + CCIfByVal> +]>; + +def CC_LoongArch_FixedArg : CallingConv<[ + CCIfByVal>, + //CCIfByVal>>, + + // f128 needs to be handled similarly to f32 and f64 on hard-float. However, + // f128 is not legal and is lowered to i128 which is further lowered to a pair + // of i64's. + // This presents us with a problem for the calling convention since hard-float + // still needs to pass them in FPU registers. We therefore resort to a + // pre-analyze (see PreAnalyzeFormalArgsForF128()) step to pass information on + // whether the argument was originally an f128 into the tablegen-erated code. + // + // f128 should only occur for the LP64 ABI where long double is 128-bit. On + // LPX32, long double is equivalent to double. + CCIfType<[i64], + CCIfSubtargetNot<"useSoftFloat()", + CCIfOrigArgWasF128>>>, + + CCIfSubtarget<"isABI_LP32()", CCDelegateTo>, + CCDelegateTo +]>; + +def CC_LoongArch_VarArg : CallingConv<[ + CCIfByVal>, + + CCIfSubtarget<"isABI_LP32()", CCDelegateTo>, + CCDelegateTo +]>; + +def CC_LoongArch : CallingConv<[ + CCIfVarArg>>, + CCDelegateTo +]>; + +//===----------------------------------------------------------------------===// +// Callee-saved register lists. +//===----------------------------------------------------------------------===// + +def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 24), RA, FP, + (sequence "S%u", 8, 0))>; + +//def CSR_LP32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, +// (sequence "S%u", 8, 0))> { +// let OtherPreserved = (add (decimate (sequence "F%u", 30, 20), 2)); +//} -def CSR_ILP32S_LP64S - : CalleeSavedRegs<(add R1, (sequence "R%u", 22, 31))>; +def CSR_LP32 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA, FP, + (sequence "S%u", 8, 0))>; -def CSR_ILP32F_LP64F - : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u", 24, 31))>; +//def CSR_LP32_FP64 : +// CalleeSavedRegs<(add (decimate (sequence "D%u_64", 30, 20), 2), RA, FP, +// (sequence "S%u", 8, 0))>; -def CSR_ILP32D_LP64D - : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u_64", 24, 31))>; +def CSR_LPX32 : CalleeSavedRegs<(add F20_64, F22_64, F24_64, F26_64, F28_64, + F30_64, RA_64, FP_64, + (sequence "S%u_64", 8, 0))>; -// Needed for implementation of LoongArchRegisterInfo::getNoPreservedMask() -def CSR_NoRegs : CalleeSavedRegs<(add)>; +//def CSR_LP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, SP_64, FP_64, +def CSR_LP64 : CalleeSavedRegs<(add (sequence "F%u_64", 31, 24), RA_64, FP_64, + (sequence "S%u_64", 8, 0))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp new file mode 100644 index 000000000000..2e13e5b832d4 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp @@ -0,0 +1,2482 @@ +//===-- LoongArchExpandPseudoInsts.cpp - Expand pseudo instructions ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling, if-conversion, and other late +// optimizations. This pass should be run after register allocation but before +// the post-regalloc scheduling pass. +// +// This is currently only used for expanding atomic pseudos after register +// allocation. We do this to avoid the fast register allocator introducing +// spills between ll and sc. These stores cause some LoongArch implementations to +// abort the atomic RMW sequence. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-pseudo" + +namespace { + class LoongArchExpandPseudo : public MachineFunctionPass { + public: + static char ID; + LoongArchExpandPseudo() : MachineFunctionPass(ID) {} + + const LoongArchInstrInfo *TII; + const LoongArchSubtarget *STI; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + StringRef getPassName() const override { + return "LoongArch pseudo instruction expansion pass"; + } + + private: + bool expandAtomicCmpSwap(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + + bool expandAtomicBinOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, unsigned Size); + bool expandXINSERT_BOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandINSERT_HOp(MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandXINSERT_FWOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandAtomicBinOpSubword(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + + bool expandPseudoCall(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + bool expandPseudoTailCall(MachineBasicBlock &BB, + MachineBasicBlock::iterator I); + + bool expandPseudoTEQ(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + + bool expandLoadAddr(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI); + + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB); + bool expandMBB(MachineBasicBlock &MBB); + }; + char LoongArchExpandPseudo::ID = 0; +} + +bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC; + unsigned ZERO = LoongArch::ZERO; + unsigned BNE = LoongArch::BNE32; + unsigned BEQ = LoongArch::BEQ32; + unsigned SEOp = + I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ? LoongArch::EXT_W_B32 : LoongArch::EXT_W_H32; + + LL = LoongArch::LL_W; + SC = LoongArch::SC_W; + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Mask = I->getOperand(2).getReg(); + unsigned ShiftCmpVal = I->getOperand(3).getReg(); + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftNewVal = I->getOperand(5).getReg(); + unsigned ShiftAmnt = I->getOperand(6).getReg(); + unsigned Scratch = I->getOperand(8).getReg(); + unsigned Scratch2 = I->getOperand(9).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + loop1MBB->addSuccessor(sinkMBB); + loop1MBB->addSuccessor(loop2MBB); + loop1MBB->normalizeSuccProbs(); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(sinkMBB); + loop2MBB->normalizeSuccProbs(); + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + // loop1MBB: + // ll dest, 0(ptr) + // and Mask', dest, Mask + // bne Mask', ShiftCmpVal, exitMBB + BuildMI(loop1MBB, DL, TII->get(LL), Scratch).addReg(Ptr).addImm(0); + BuildMI(loop1MBB, DL, TII->get(LoongArch::AND32), Scratch2) + .addReg(Scratch) + .addReg(Mask); + BuildMI(loop1MBB, DL, TII->get(BNE)) + .addReg(Scratch2).addReg(ShiftCmpVal).addMBB(sinkMBB); + + // loop2MBB: + // and dest, dest, mask2 + // or dest, dest, ShiftNewVal + // sc dest, dest, 0(ptr) + // beq dest, $0, loop1MBB + BuildMI(loop2MBB, DL, TII->get(LoongArch::AND32), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(Mask2); + BuildMI(loop2MBB, DL, TII->get(LoongArch::OR32), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(ShiftNewVal); + BuildMI(loop2MBB, DL, TII->get(SC), Scratch) + .addReg(Scratch, RegState::Kill) + .addReg(Ptr) + .addImm(0); + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill) + .addReg(ZERO) + .addMBB(loop1MBB); + + // sinkMBB: + // srl srlres, Mask', shiftamt + // sign_extend dest,srlres + BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest) + .addReg(Scratch2) + .addReg(ShiftAmnt); + + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + + AtomicOrdering Ordering = + static_cast(I->getOperand(7).getImm()); + int hint; + switch (Ordering) { + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // acquire + hint = 0b10100; + break; + default: + hint = 0x700; + } + MachineBasicBlock::iterator Pos = sinkMBB->begin(); + BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *loop2MBB); + computeAndAddLiveIns(LiveRegs, *sinkMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + return true; +} + +bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + const unsigned Size = + I->getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ? 4 : 8; + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned LL, SC, ZERO, BNE, BEQ, MOVE; + + if (Size == 4) { + LL = LoongArch::LL_W; + SC = LoongArch::SC_W; + BNE = LoongArch::BNE32; + BEQ = LoongArch::BEQ32; + + ZERO = LoongArch::ZERO; + MOVE = LoongArch::OR32; + } else { + LL = LoongArch::LL_D; + SC = LoongArch::SC_D; + ZERO = LoongArch::ZERO_64; + BNE = LoongArch::BNE; + BEQ = LoongArch::BEQ; + MOVE = LoongArch::OR; + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned OldVal = I->getOperand(2).getReg(); + unsigned NewVal = I->getOperand(3).getReg(); + unsigned Scratch = I->getOperand(5).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), &BB, + std::next(MachineBasicBlock::iterator(I)), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB.addSuccessor(loop1MBB, BranchProbability::getOne()); + loop1MBB->addSuccessor(exitMBB); + loop1MBB->addSuccessor(loop2MBB); + loop1MBB->normalizeSuccProbs(); + loop2MBB->addSuccessor(loop1MBB); + loop2MBB->addSuccessor(exitMBB); + loop2MBB->normalizeSuccProbs(); + + // loop1MBB: + // ll dest, 0(ptr) + // bne dest, oldval, exitMBB + BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(loop1MBB, DL, TII->get(BNE)) + .addReg(Dest, RegState::Kill).addReg(OldVal).addMBB(exitMBB); + + // loop2MBB: + // move scratch, NewVal + // sc Scratch, Scratch, 0(ptr) + // beq Scratch, $0, loop1MBB + BuildMI(loop2MBB, DL, TII->get(MOVE), Scratch).addReg(NewVal).addReg(ZERO); + BuildMI(loop2MBB, DL, TII->get(SC), Scratch) + .addReg(Scratch).addReg(Ptr).addImm(0); + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + + AtomicOrdering Ordering = + static_cast(I->getOperand(4).getImm()); + int hint; + switch (Ordering) { + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // TODO: acquire + hint = 0; + break; + default: + hint = 0x700; + } + MachineBasicBlock::iterator Pos = exitMBB->begin(); + BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *loop2MBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + return true; +} + +bool LoongArchExpandPseudo::expandXINSERT_FWOp( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned isGP64 = 0; + switch (I->getOpcode()) { + case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA: + isGP64 = 1; + break; + case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA: + break; + default: + llvm_unreachable("Unknown subword vector pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned SrcVecReg = I->getOperand(1).getReg(); + unsigned LaneReg = I->getOperand(2).getReg(); + unsigned SrcValReg = I->getOperand(3).getReg(); + + unsigned Dsttmp = I->getOperand(4).getReg(); + unsigned RI = I->getOperand(5).getReg(); + unsigned RJ = I->getOperand(6).getReg(); + Dsttmp = SrcVecReg; + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *blocks[11]; + MachineFunction::iterator It = ++BB.getIterator(); + for (int i = 0; i < 11; i++) { + blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, blocks[i]); + } + + MachineBasicBlock *mainMBB = blocks[0]; + MachineBasicBlock *FirstMBB = blocks[1]; + MachineBasicBlock *sinkMBB = blocks[9]; + MachineBasicBlock *exitMBB = blocks[10]; + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(mainMBB, BranchProbability::getOne()); + for (int i = 1; i < 9; i++) { + mainMBB->addSuccessor(blocks[i]); + blocks[i]->addSuccessor(sinkMBB); + } + + unsigned ADDI, BLT, ZERO; + ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; + BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; + ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; + + for (int i = 1; i < 8; i++) { + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i); + BuildMI(mainMBB, DL, TII->get(BLT)) + .addReg(LaneReg) + .addReg(RI) + .addMBB(blocks[i + 1]); + } + + BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB); + + BuildMI(FirstMBB, DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp) + .addReg(SrcVecReg) + .addReg(RJ) + .addImm(7); + BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + for (int i = 0; i < 7; i++) { + BuildMI(blocks[i + 2], DL, TII->get(LoongArch::XVINSGR2VR_W), Dsttmp) + .addReg(SrcVecReg) + .addReg(RJ) + .addImm(i); + BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + } + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest) + .addReg(Dsttmp) + .addImm(0); + + LivePhysRegs LiveRegs; + for (int i = 0; i < 11; i++) { + computeAndAddLiveIns(LiveRegs, *blocks[i]); + } + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandINSERT_HOp( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned isGP64 = 0; + switch (I->getOpcode()) { + case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA: + isGP64 = 1; + break; + default: + llvm_unreachable("Unknown subword vector pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned SrcVecReg = I->getOperand(1).getReg(); + unsigned LaneReg = I->getOperand(2).getReg(); + unsigned SrcValReg = I->getOperand(3).getReg(); + + unsigned Dsttmp = I->getOperand(4).getReg(); + unsigned RI = I->getOperand(5).getReg(); + Dsttmp = SrcVecReg; + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *blocks[11]; + MachineFunction::iterator It = ++BB.getIterator(); + for (int i = 0; i < 11; i++) { + blocks[i] = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, blocks[i]); + } + + MachineBasicBlock *mainMBB = blocks[0]; + MachineBasicBlock *FirstMBB = blocks[1]; + MachineBasicBlock *sinkMBB = blocks[9]; + MachineBasicBlock *exitMBB = blocks[10]; + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(mainMBB, BranchProbability::getOne()); + for (int i = 1; i < 9; i++) { + mainMBB->addSuccessor(blocks[i]); + blocks[i]->addSuccessor(sinkMBB); + } + + unsigned ADDI, BLT, ZERO; + ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; + BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; + ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; + + for (int i = 1; i < 8; i++) { + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(i); + BuildMI(mainMBB, DL, TII->get(BLT)) + .addReg(LaneReg) + .addReg(RI) + .addMBB(blocks[i + 1]); + } + + BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(FirstMBB); + + BuildMI(FirstMBB, DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp) + .addReg(SrcVecReg) + .addReg(SrcValReg) + .addImm(7); + BuildMI(FirstMBB, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + for (int i = 0; i < 7; i++) { + BuildMI(blocks[i + 2], DL, TII->get(LoongArch::VINSGR2VR_H), Dsttmp) + .addReg(SrcVecReg) + .addReg(SrcValReg) + .addImm(i); + BuildMI(blocks[i + 2], DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + } + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + BuildMI(sinkMBB, DL, TII->get(LoongArch::VORI_B), Dest) + .addReg(Dsttmp) + .addImm(0); + + LivePhysRegs LiveRegs; + for (int i = 0; i < 11; i++) { + computeAndAddLiveIns(LiveRegs, *blocks[i]); + } + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandXINSERT_BOp( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned isGP64 = 0; + switch (I->getOpcode()) { + case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA: + isGP64 = 1; + break; + case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA: + break; + default: + llvm_unreachable("Unknown subword vector pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned SrcVecReg = I->getOperand(1).getReg(); + unsigned LaneReg = I->getOperand(2).getReg(); + unsigned SrcValReg = I->getOperand(3).getReg(); + + unsigned R4r = I->getOperand(5).getReg(); + unsigned Rib = I->getOperand(6).getReg(); + unsigned Ris = I->getOperand(7).getReg(); + unsigned R7b1 = I->getOperand(8).getReg(); + unsigned R7b2 = I->getOperand(9).getReg(); + unsigned R7b3 = I->getOperand(10).getReg(); + unsigned R7r80_3 = I->getOperand(11).getReg(); + unsigned R7r80l_3 = I->getOperand(12).getReg(); + unsigned R7r81_3 = I->getOperand(13).getReg(); + unsigned R7r81l_3 = I->getOperand(14).getReg(); + unsigned R7r82_3 = I->getOperand(15).getReg(); + unsigned R7r82l_3 = I->getOperand(16).getReg(); + unsigned RI = I->getOperand(17).getReg(); + unsigned tmp_Dst73 = I->getOperand(18).getReg(); + unsigned Rimm = I->getOperand(19).getReg(); + unsigned R70 = I->getOperand(20).getReg(); + tmp_Dst73 = SrcVecReg; + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SevenMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ZeroMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *OneMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TwoMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *ThreeMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FourMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *FiveMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB0 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB1 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB2 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SixMBB3 = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, mainMBB); + MF->insert(It, SevenMBB); + MF->insert(It, SevenMBB3); + MF->insert(It, SevenMBB0); + MF->insert(It, SevenMBB1); + MF->insert(It, SevenMBB2); + MF->insert(It, ZeroMBB); + MF->insert(It, ZeroMBB3); + MF->insert(It, ZeroMBB0); + MF->insert(It, ZeroMBB1); + MF->insert(It, ZeroMBB2); + MF->insert(It, OneMBB); + MF->insert(It, OneMBB3); + MF->insert(It, OneMBB0); + MF->insert(It, OneMBB1); + MF->insert(It, OneMBB2); + MF->insert(It, TwoMBB); + MF->insert(It, TwoMBB3); + MF->insert(It, TwoMBB0); + MF->insert(It, TwoMBB1); + MF->insert(It, TwoMBB2); + MF->insert(It, ThreeMBB); + MF->insert(It, ThreeMBB3); + MF->insert(It, ThreeMBB0); + MF->insert(It, ThreeMBB1); + MF->insert(It, ThreeMBB2); + MF->insert(It, FourMBB); + MF->insert(It, FourMBB3); + MF->insert(It, FourMBB0); + MF->insert(It, FourMBB1); + MF->insert(It, FourMBB2); + MF->insert(It, FiveMBB); + MF->insert(It, FiveMBB3); + MF->insert(It, FiveMBB0); + MF->insert(It, FiveMBB1); + MF->insert(It, FiveMBB2); + MF->insert(It, SixMBB); + MF->insert(It, SixMBB3); + MF->insert(It, SixMBB0); + MF->insert(It, SixMBB1); + MF->insert(It, SixMBB2); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(mainMBB, BranchProbability::getOne()); + mainMBB->addSuccessor(SevenMBB); + mainMBB->addSuccessor(ZeroMBB); + mainMBB->addSuccessor(OneMBB); + mainMBB->addSuccessor(TwoMBB); + mainMBB->addSuccessor(ThreeMBB); + mainMBB->addSuccessor(FourMBB); + mainMBB->addSuccessor(FiveMBB); + mainMBB->addSuccessor(SixMBB); + SevenMBB->addSuccessor(SevenMBB0); + SevenMBB->addSuccessor(SevenMBB1); + SevenMBB->addSuccessor(SevenMBB2); + SevenMBB->addSuccessor(SevenMBB3); + SevenMBB0->addSuccessor(sinkMBB); + SevenMBB1->addSuccessor(sinkMBB); + SevenMBB2->addSuccessor(sinkMBB); + SevenMBB3->addSuccessor(sinkMBB); + ZeroMBB->addSuccessor(ZeroMBB0); + ZeroMBB->addSuccessor(ZeroMBB1); + ZeroMBB->addSuccessor(ZeroMBB2); + ZeroMBB->addSuccessor(ZeroMBB3); + ZeroMBB0->addSuccessor(sinkMBB); + ZeroMBB1->addSuccessor(sinkMBB); + ZeroMBB2->addSuccessor(sinkMBB); + ZeroMBB3->addSuccessor(sinkMBB); + OneMBB->addSuccessor(OneMBB0); + OneMBB->addSuccessor(OneMBB1); + OneMBB->addSuccessor(OneMBB2); + OneMBB->addSuccessor(OneMBB3); + OneMBB0->addSuccessor(sinkMBB); + OneMBB1->addSuccessor(sinkMBB); + OneMBB2->addSuccessor(sinkMBB); + OneMBB3->addSuccessor(sinkMBB); + TwoMBB->addSuccessor(TwoMBB0); + TwoMBB->addSuccessor(TwoMBB1); + TwoMBB->addSuccessor(TwoMBB2); + TwoMBB->addSuccessor(TwoMBB3); + TwoMBB0->addSuccessor(sinkMBB); + TwoMBB1->addSuccessor(sinkMBB); + TwoMBB2->addSuccessor(sinkMBB); + TwoMBB3->addSuccessor(sinkMBB); + ThreeMBB->addSuccessor(ThreeMBB0); + ThreeMBB->addSuccessor(ThreeMBB1); + ThreeMBB->addSuccessor(ThreeMBB2); + ThreeMBB->addSuccessor(ThreeMBB3); + ThreeMBB0->addSuccessor(sinkMBB); + ThreeMBB1->addSuccessor(sinkMBB); + ThreeMBB2->addSuccessor(sinkMBB); + ThreeMBB3->addSuccessor(sinkMBB); + FourMBB->addSuccessor(FourMBB0); + FourMBB->addSuccessor(FourMBB1); + FourMBB->addSuccessor(FourMBB2); + FourMBB->addSuccessor(FourMBB3); + FourMBB0->addSuccessor(sinkMBB); + FourMBB1->addSuccessor(sinkMBB); + FourMBB2->addSuccessor(sinkMBB); + FourMBB3->addSuccessor(sinkMBB); + FiveMBB->addSuccessor(FiveMBB0); + FiveMBB->addSuccessor(FiveMBB1); + FiveMBB->addSuccessor(FiveMBB2); + FiveMBB->addSuccessor(FiveMBB3); + FiveMBB0->addSuccessor(sinkMBB); + FiveMBB1->addSuccessor(sinkMBB); + FiveMBB2->addSuccessor(sinkMBB); + FiveMBB3->addSuccessor(sinkMBB); + SixMBB->addSuccessor(SixMBB0); + SixMBB->addSuccessor(SixMBB1); + SixMBB->addSuccessor(SixMBB2); + SixMBB->addSuccessor(SixMBB3); + SixMBB0->addSuccessor(sinkMBB); + SixMBB1->addSuccessor(sinkMBB); + SixMBB2->addSuccessor(sinkMBB); + SixMBB3->addSuccessor(sinkMBB); + + unsigned SRLI, ADDI, OR, MOD, BLT, ZERO; + SRLI = isGP64 ? LoongArch::SRLI_D : LoongArch::SRLI_W; + ADDI = isGP64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; + OR = isGP64 ? LoongArch::OR : LoongArch::OR32; + MOD = isGP64 ? LoongArch::MOD_DU : LoongArch::MOD_WU; + BLT = isGP64 ? LoongArch::BLT : LoongArch::BLT32; + ZERO = isGP64 ? LoongArch::ZERO_64 : LoongArch::ZERO; + + BuildMI(mainMBB, DL, TII->get(SRLI), Rimm).addReg(LaneReg).addImm(2); + BuildMI(mainMBB, DL, TII->get(ADDI), R4r).addReg(ZERO).addImm(4); + BuildMI(mainMBB, DL, TII->get(OR), Rib).addReg(Rimm).addReg(ZERO); + BuildMI(mainMBB, DL, TII->get(MOD), Ris).addReg(Rib).addReg(R4r); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(1); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ZeroMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(2); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(OneMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(3); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(TwoMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(4); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(ThreeMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(5); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FourMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(6); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(FiveMBB); + BuildMI(mainMBB, DL, TII->get(ADDI), RI).addReg(ZERO).addImm(7); + BuildMI(mainMBB, DL, TII->get(BLT)).addReg(Rib).addReg(RI).addMBB(SixMBB); + BuildMI(mainMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB); + + BuildMI(SevenMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(7); + BuildMI(SevenMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(SevenMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b1) + .addMBB(SevenMBB0); + BuildMI(SevenMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(SevenMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b2) + .addMBB(SevenMBB1); + BuildMI(SevenMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(SevenMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b3) + .addMBB(SevenMBB2); + BuildMI(SevenMBB, DL, TII->get(LoongArch::B32)).addMBB(SevenMBB3); + + BuildMI(SevenMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0x00fff); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(7); + BuildMI(SevenMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SevenMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xff00f); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(7); + BuildMI(SevenMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SevenMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xffff0); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(7); + BuildMI(SevenMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SevenMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xfffff); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(7); + BuildMI(SevenMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ZeroMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(0); + BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(ZeroMBB0); + BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(ZeroMBB1); + BuildMI(ZeroMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(ZeroMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(ZeroMBB2); + BuildMI(ZeroMBB, DL, TII->get(LoongArch::B32)).addMBB(ZeroMBB3); + + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(0); + BuildMI(ZeroMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(0); + BuildMI(ZeroMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(0); + BuildMI(ZeroMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(0); + BuildMI(ZeroMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(OneMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(1); + BuildMI(OneMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(OneMBB0); + BuildMI(OneMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(OneMBB1); + BuildMI(OneMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(OneMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(OneMBB2); + BuildMI(OneMBB, DL, TII->get(LoongArch::B32)).addMBB(OneMBB3); + + BuildMI(OneMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(OneMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(OneMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(OneMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(OneMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(OneMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(1); + BuildMI(OneMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(OneMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(OneMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(OneMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(OneMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(OneMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(OneMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(OneMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(1); + BuildMI(OneMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(OneMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(OneMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(OneMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(OneMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(OneMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(OneMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(OneMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(1); + BuildMI(OneMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(OneMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(OneMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(OneMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(OneMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(OneMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(OneMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(OneMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(1); + BuildMI(OneMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(TwoMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(2); + BuildMI(TwoMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(TwoMBB0); + BuildMI(TwoMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(TwoMBB1); + BuildMI(TwoMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(TwoMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(TwoMBB2); + BuildMI(TwoMBB, DL, TII->get(LoongArch::B32)).addMBB(TwoMBB3); + + BuildMI(TwoMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(2); + BuildMI(TwoMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(TwoMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(2); + BuildMI(TwoMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(TwoMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(2); + BuildMI(TwoMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(TwoMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(2); + BuildMI(TwoMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ThreeMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(3); + BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(ThreeMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b1) + .addMBB(ThreeMBB0); + BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(ThreeMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b2) + .addMBB(ThreeMBB1); + BuildMI(ThreeMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(ThreeMBB, DL, TII->get(BLT)) + .addReg(Ris) + .addReg(R7b3) + .addMBB(ThreeMBB2); + BuildMI(ThreeMBB, DL, TII->get(LoongArch::B32)).addMBB(ThreeMBB3); + + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0x00fff); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(3); + BuildMI(ThreeMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xff00f); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(3); + BuildMI(ThreeMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xffff0); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(3); + BuildMI(ThreeMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3) + .addImm(0xfffff); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(3); + BuildMI(ThreeMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FourMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(4); + BuildMI(FourMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FourMBB0); + BuildMI(FourMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FourMBB1); + BuildMI(FourMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(FourMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FourMBB2); + BuildMI(FourMBB, DL, TII->get(LoongArch::B32)).addMBB(FourMBB3); + + BuildMI(FourMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FourMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(FourMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(FourMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FourMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(FourMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(4); + BuildMI(FourMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FourMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FourMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(FourMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(FourMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(FourMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FourMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FourMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(4); + BuildMI(FourMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FourMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FourMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(FourMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(FourMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(FourMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FourMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FourMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(4); + BuildMI(FourMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FourMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FourMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(FourMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(FourMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(FourMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FourMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FourMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(4); + BuildMI(FourMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FiveMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(5); + BuildMI(FiveMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(FiveMBB0); + BuildMI(FiveMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(FiveMBB1); + BuildMI(FiveMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(FiveMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(FiveMBB2); + BuildMI(FiveMBB, DL, TII->get(LoongArch::B32)).addMBB(FiveMBB3); + + BuildMI(FiveMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(5); + BuildMI(FiveMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FiveMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(5); + BuildMI(FiveMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FiveMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(5); + BuildMI(FiveMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(FiveMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(5); + BuildMI(FiveMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SixMBB, DL, TII->get(LoongArch::XVPICKVE2GR_W), R70) + .addReg(SrcVecReg) + .addImm(6); + BuildMI(SixMBB, DL, TII->get(ADDI), R7b1).addReg(ZERO).addImm(1); + BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b1).addMBB(SixMBB0); + BuildMI(SixMBB, DL, TII->get(ADDI), R7b2).addReg(ZERO).addImm(2); + BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b2).addMBB(SixMBB1); + BuildMI(SixMBB, DL, TII->get(ADDI), R7b3).addReg(ZERO).addImm(3); + BuildMI(SixMBB, DL, TII->get(BLT)).addReg(Ris).addReg(R7b3).addMBB(SixMBB2); + BuildMI(SixMBB, DL, TII->get(LoongArch::B32)).addMBB(SixMBB3); + + BuildMI(SixMBB3, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SixMBB3, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0x00fff); + BuildMI(SixMBB3, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(SixMBB3, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SixMBB3, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80_3); + BuildMI(SixMBB3, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(6); + BuildMI(SixMBB3, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SixMBB0, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SixMBB0, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(8); + BuildMI(SixMBB0, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xff00f); + BuildMI(SixMBB0, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xfff); + BuildMI(SixMBB0, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SixMBB0, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SixMBB0, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(6); + BuildMI(SixMBB0, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SixMBB1, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SixMBB1, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(16); + BuildMI(SixMBB1, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xffff0); + BuildMI(SixMBB1, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0x0ff); + BuildMI(SixMBB1, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SixMBB1, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SixMBB1, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(6); + BuildMI(SixMBB1, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + BuildMI(SixMBB2, DL, TII->get(LoongArch::SLLI_W), R7r80_3) + .addReg(SrcValReg) + .addImm(24); + BuildMI(SixMBB2, DL, TII->get(LoongArch::SRLI_W), R7r80l_3) + .addReg(R7r80_3) + .addImm(24); + BuildMI(SixMBB2, DL, TII->get(LoongArch::LU12I_W), R7r81l_3).addImm(0xfffff); + BuildMI(SixMBB2, DL, TII->get(LoongArch::ORI32), R7r81_3) + .addReg(R7r81l_3) + .addImm(0xf00); + BuildMI(SixMBB2, DL, TII->get(LoongArch::AND32), R7r82l_3) + .addReg(R70) + .addReg(R7r81_3); + BuildMI(SixMBB2, DL, TII->get(LoongArch::OR32), R7r82_3) + .addReg(R7r82l_3) + .addReg(R7r80l_3); + BuildMI(SixMBB2, DL, TII->get(LoongArch::XVINSGR2VR_W), tmp_Dst73) + .addReg(SrcVecReg) + .addReg(R7r82_3) + .addImm(6); + BuildMI(SixMBB2, DL, TII->get(LoongArch::B32)).addMBB(sinkMBB); + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + BuildMI(sinkMBB, DL, TII->get(LoongArch::XVORI_B), Dest) + .addReg(tmp_Dst73) + .addImm(0); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *mainMBB); + computeAndAddLiveIns(LiveRegs, *SevenMBB); + computeAndAddLiveIns(LiveRegs, *SevenMBB0); + computeAndAddLiveIns(LiveRegs, *SevenMBB1); + computeAndAddLiveIns(LiveRegs, *SevenMBB2); + computeAndAddLiveIns(LiveRegs, *SevenMBB3); + computeAndAddLiveIns(LiveRegs, *ZeroMBB); + computeAndAddLiveIns(LiveRegs, *ZeroMBB0); + computeAndAddLiveIns(LiveRegs, *ZeroMBB1); + computeAndAddLiveIns(LiveRegs, *ZeroMBB2); + computeAndAddLiveIns(LiveRegs, *ZeroMBB3); + computeAndAddLiveIns(LiveRegs, *OneMBB); + computeAndAddLiveIns(LiveRegs, *OneMBB0); + computeAndAddLiveIns(LiveRegs, *OneMBB1); + computeAndAddLiveIns(LiveRegs, *OneMBB2); + computeAndAddLiveIns(LiveRegs, *OneMBB3); + computeAndAddLiveIns(LiveRegs, *TwoMBB); + computeAndAddLiveIns(LiveRegs, *TwoMBB0); + computeAndAddLiveIns(LiveRegs, *TwoMBB1); + computeAndAddLiveIns(LiveRegs, *TwoMBB2); + computeAndAddLiveIns(LiveRegs, *TwoMBB3); + computeAndAddLiveIns(LiveRegs, *ThreeMBB); + computeAndAddLiveIns(LiveRegs, *ThreeMBB0); + computeAndAddLiveIns(LiveRegs, *ThreeMBB1); + computeAndAddLiveIns(LiveRegs, *ThreeMBB2); + computeAndAddLiveIns(LiveRegs, *ThreeMBB3); + computeAndAddLiveIns(LiveRegs, *FourMBB); + computeAndAddLiveIns(LiveRegs, *FourMBB0); + computeAndAddLiveIns(LiveRegs, *FourMBB1); + computeAndAddLiveIns(LiveRegs, *FourMBB2); + computeAndAddLiveIns(LiveRegs, *FourMBB3); + computeAndAddLiveIns(LiveRegs, *FiveMBB); + computeAndAddLiveIns(LiveRegs, *FiveMBB0); + computeAndAddLiveIns(LiveRegs, *FiveMBB1); + computeAndAddLiveIns(LiveRegs, *FiveMBB2); + computeAndAddLiveIns(LiveRegs, *FiveMBB3); + computeAndAddLiveIns(LiveRegs, *SixMBB); + computeAndAddLiveIns(LiveRegs, *SixMBB0); + computeAndAddLiveIns(LiveRegs, *SixMBB1); + computeAndAddLiveIns(LiveRegs, *SixMBB2); + computeAndAddLiveIns(LiveRegs, *SixMBB3); + computeAndAddLiveIns(LiveRegs, *sinkMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandAtomicBinOpSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + unsigned LL, SC; + unsigned BEQ = LoongArch::BEQ32; + unsigned SEOp = LoongArch::EXT_W_H32; + + LL = LoongArch::LL_W; + SC = LoongArch::SC_W; + + bool IsSwap = false; + bool IsNand = false; + bool IsMAX = false; + bool IsMIN = false; + bool IsUnsigned = false; + + unsigned Opcode = 0; + switch (I->getOpcode()) { + case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA: + IsNand = true; + break; + case LoongArch::ATOMIC_SWAP_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_SWAP_I16_POSTRA: + IsSwap = true; + break; + case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA: + Opcode = LoongArch::ADD_W; + break; + case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA: + Opcode = LoongArch::AMMAX_DB_W; + IsMAX = true; + break; + case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA: + Opcode = LoongArch::AMMIN_DB_W; + IsMIN = true; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA: + Opcode = LoongArch::AMMAX_DB_WU; + IsMAX = true; + IsUnsigned = true; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA: + Opcode = LoongArch::AMMIN_DB_WU; + IsMIN = true; + IsUnsigned = true; + break; + case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA: + Opcode = LoongArch::SUB_W; + break; + case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA: + Opcode = LoongArch::AND32; + break; + case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA: + Opcode = LoongArch::OR32; + break; + case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA: + SEOp = LoongArch::EXT_W_B32; + LLVM_FALLTHROUGH; + case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA: + Opcode = LoongArch::XOR32; + break; + default: + llvm_unreachable("Unknown subword atomic pseudo for expansion!"); + } + + unsigned Dest = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Incr = I->getOperand(2).getReg(); + unsigned Mask = I->getOperand(3).getReg(); + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftAmnt = I->getOperand(5).getReg(); + unsigned OldVal = I->getOperand(6).getReg(); + unsigned BinOpRes = I->getOperand(7).getReg(); + unsigned StoreVal = I->getOperand(8).getReg(); + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, sinkMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(sinkMBB); + loopMBB->addSuccessor(loopMBB); + loopMBB->normalizeSuccProbs(); + + BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); + if (IsNand) { + // and andres, oldval, incr2 + // nor binopres, $0, andres + // and newval, binopres, mask + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(LoongArch::NOR32), BinOpRes) + .addReg(LoongArch::ZERO) + .addReg(BinOpRes); + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) + .addReg(BinOpRes) + .addReg(Mask); + } else if (IsMAX || IsMIN) { + + unsigned SLTScratch4 = IsUnsigned ? LoongArch::SLTU32 : LoongArch::SLT32; + unsigned CMPIncr = IsMAX ? LoongArch::MASKEQZ32 : LoongArch::MASKNEZ32; + unsigned CMPOldVal = IsMAX ? LoongArch::MASKNEZ32 : LoongArch::MASKEQZ32; + + unsigned Scratch4 = I->getOperand(9).getReg(); + unsigned Scratch5 = I->getOperand(10).getReg(); + + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Scratch5) + .addReg(OldVal) + .addReg(Mask); + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), Incr) + .addReg(Incr) + .addReg(Mask); + BuildMI(loopMBB, DL, TII->get(SLTScratch4), Scratch4) + .addReg(Scratch5) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(CMPOldVal), BinOpRes) + .addReg(Scratch5) + .addReg(Scratch4); + BuildMI(loopMBB, DL, TII->get(CMPIncr), Scratch4) + .addReg(Incr) + .addReg(Scratch4); + BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), BinOpRes) + .addReg(BinOpRes) + .addReg(Scratch4); + + } else if (!IsSwap) { + // binopres, oldval, incr2 + // and newval, binopres, mask + BuildMI(loopMBB, DL, TII->get(Opcode), BinOpRes) + .addReg(OldVal) + .addReg(Incr); + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) + .addReg(BinOpRes) + .addReg(Mask); + } else { // atomic.swap + // and newval, incr2, mask + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), BinOpRes) + .addReg(Incr) + .addReg(Mask); + } + + // and StoreVal, OlddVal, Mask2 + // or StoreVal, StoreVal, BinOpRes + // StoreVal = sc StoreVal, 0(Ptr) + // beq StoreVal, zero, loopMBB + BuildMI(loopMBB, DL, TII->get(LoongArch::AND32), StoreVal) + .addReg(OldVal) + .addReg(Mask2); + BuildMI(loopMBB, DL, TII->get(LoongArch::OR32), StoreVal) + .addReg(StoreVal) + .addReg(BinOpRes); + BuildMI(loopMBB, DL, TII->get(SC), StoreVal) + .addReg(StoreVal) + .addReg(Ptr) + .addImm(0); + BuildMI(loopMBB, DL, TII->get(BEQ)) + .addReg(StoreVal) + .addReg(LoongArch::ZERO) + .addMBB(loopMBB); + + // sinkMBB: + // and maskedoldval1,oldval,mask + // srl srlres,maskedoldval1,shiftamt + // sign_extend dest,srlres + + sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + BuildMI(sinkMBB, DL, TII->get(LoongArch::AND32), Dest) + .addReg(OldVal) + .addReg(Mask); + BuildMI(sinkMBB, DL, TII->get(LoongArch::SRL_W), Dest) + .addReg(Dest) + .addReg(ShiftAmnt); + + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loopMBB); + computeAndAddLiveIns(LiveRegs, *sinkMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + NMBBI = BB.end(); + I->eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI, + unsigned Size) { + MachineFunction *MF = BB.getParent(); + + DebugLoc DL = I->getDebugLoc(); + + unsigned LL, SC, ZERO, BEQ, SUB; + if (Size == 4) { + LL = LoongArch::LL_W; + SC = LoongArch::SC_W; + BEQ = LoongArch::BEQ32; + ZERO = LoongArch::ZERO; + SUB = LoongArch::SUB_W; + } else { + LL = LoongArch::LL_D; + SC = LoongArch::SC_D; + ZERO = LoongArch::ZERO_64; + BEQ = LoongArch::BEQ; + SUB = LoongArch::SUB_D; + } + + unsigned OldVal = I->getOperand(0).getReg(); + unsigned Ptr = I->getOperand(1).getReg(); + unsigned Incr = I->getOperand(2).getReg(); + unsigned Scratch = I->getOperand(3).getReg(); + + unsigned Opcode = 0; + unsigned OR = 0; + unsigned AND = 0; + unsigned NOR = 0; + bool IsNand = false; + bool IsSub = false; + switch (I->getOpcode()) { + case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA: + Opcode = LoongArch::AMADD_DB_W; + break; + case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA: + IsSub = true; + Opcode = LoongArch::AMADD_DB_W; + break; + case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA: + Opcode = LoongArch::AMAND_DB_W; + break; + case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA: + Opcode = LoongArch::AMOR_DB_W; + break; + case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA: + Opcode = LoongArch::AMXOR_DB_W; + break; + case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA: + IsNand = true; + AND = LoongArch::AND32; + NOR = LoongArch::NOR32; + break; + case LoongArch::ATOMIC_SWAP_I32_POSTRA: + OR = LoongArch::AMSWAP_DB_W; + break; + case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA: + Opcode = LoongArch::AMMAX_DB_W; + break; + case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA: + Opcode = LoongArch::AMMIN_DB_W; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA: + Opcode = LoongArch::AMMAX_DB_WU; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA: + Opcode = LoongArch::AMMIN_DB_WU; + break; + case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA: + Opcode = LoongArch::AMADD_DB_D; + break; + case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA: + IsSub = true; + Opcode = LoongArch::AMADD_DB_D; + break; + case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA: + Opcode = LoongArch::AMAND_DB_D; + break; + case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA: + Opcode = LoongArch::AMOR_DB_D; + break; + case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA: + Opcode = LoongArch::AMXOR_DB_D; + break; + case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA: + IsNand = true; + AND = LoongArch::AND; + NOR = LoongArch::NOR; + break; + case LoongArch::ATOMIC_SWAP_I64_POSTRA: + OR = LoongArch::AMSWAP_DB_D; + break; + case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA: + Opcode = LoongArch::AMMAX_DB_D; + break; + case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA: + Opcode = LoongArch::AMMIN_DB_D; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA: + Opcode = LoongArch::AMMAX_DB_DU; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA: + Opcode = LoongArch::AMMIN_DB_DU; + break; + default: + llvm_unreachable("Unknown pseudo atomic!"); + } + + const BasicBlock *LLVM_BB = BB.getBasicBlock(); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB.getIterator(); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), &BB, std::next(I), BB.end()); + exitMBB->transferSuccessorsAndUpdatePHIs(&BB); + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(exitMBB); + if (!Opcode && IsNand) + loopMBB->addSuccessor(loopMBB); + loopMBB->normalizeSuccProbs(); + + assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!"); + assert((OldVal != Incr) && "Clobbered the wrong reg!"); + if (Opcode) { + if(IsSub){ + BuildMI(loopMBB, DL, TII->get(SUB), Scratch).addReg(ZERO).addReg(Incr); + BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Scratch).addReg(Ptr).addImm(0); + } + else{ + BuildMI(loopMBB, DL, TII->get(Opcode), OldVal).addReg(Incr).addReg(Ptr).addImm(0); + } + } else if (IsNand) { + assert(AND && NOR && + "Unknown nand instruction for atomic pseudo expansion"); + BuildMI(loopMBB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); + BuildMI(loopMBB, DL, TII->get(AND), Scratch).addReg(OldVal).addReg(Incr); + BuildMI(loopMBB, DL, TII->get(NOR), Scratch).addReg(ZERO).addReg(Scratch); + BuildMI(loopMBB, DL, TII->get(SC), Scratch).addReg(Scratch).addReg(Ptr).addImm(0); + BuildMI(loopMBB, DL, TII->get(BEQ)).addReg(Scratch).addReg(ZERO).addMBB(loopMBB); + } else { + assert(OR && "Unknown instruction for atomic pseudo expansion!"); + BuildMI(loopMBB, DL, TII->get(OR), OldVal).addReg(Incr).addReg(Ptr).addImm(0); + } + + + NMBBI = BB.end(); + I->eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loopMBB); + computeAndAddLiveIns(LiveRegs, *exitMBB); + + return true; +} + +bool LoongArchExpandPseudo::expandLoadAddr(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + MachineFunction *MF = BB.getParent(); + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + + unsigned Op = MI.getOpcode(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned TmpReg; + const MachineOperand &MO = MI.getOperand(1); + Reloc::Model RM = MF->getTarget().getRelocationModel(); + + MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5; + unsigned HiFlag, LoFlag, HigherFlag, HighestFlag; + unsigned HiOp, LoOp, HigherOp, HighestOp, LastOp; + bool UseGot = false; + + HiOp = LoongArch::PCADDU12I_ri; + LoOp = LoongArch::ORI_rri; + HigherOp = LoongArch::LU32I_D_ri; + HighestOp = LoongArch::LU52I_D_rri; + + switch (Op) { + case LoongArch::LoadAddrLocal: + if (RM == Reloc::Static) { // for jit + HiFlag = LoongArchII::MO_ABS_HI; + LoFlag = LoongArchII::MO_ABS_LO; + HigherFlag = LoongArchII::MO_ABS_HIGHER; + HighestFlag = LoongArchII::MO_ABS_HIGHEST; + // lu12i.w + ori + lu32i.d + lu52i.d + HiOp = LoongArch::LU12I_W; + LoOp = LoongArch::ORI; + HigherOp = LoongArch::LU32I_D; + HighestOp = LoongArch::LU52I_D; + } else { + // pcaddu12i + addi.d + LoFlag = LoongArchII::MO_PCREL_LO; + HiFlag = LoongArchII::MO_PCREL_HI; + LoOp = LoongArch::ADDI_D_rri; + } + break; + case LoongArch::LoadAddrLocalRR: + // pcaddu12i + ori + lu32i.d + lu52i.d + add.d + LoFlag = LoongArchII::MO_PCREL_RRLO; + HiFlag = LoongArchII::MO_PCREL_RRHI; + HigherFlag = LoongArchII::MO_PCREL_RRHIGHER; + HighestFlag = LoongArchII::MO_PCREL_RRHIGHEST; + LastOp = LoongArch::ADD_D_rrr; + break; + case LoongArch::LoadAddrGlobal: + case LoongArch::LoadAddrGlobal_Alias: + // pcaddu12i + ld.d + LoFlag = LoongArchII::MO_GOT_LO; + HiFlag = LoongArchII::MO_GOT_HI; + HiOp = LoongArch::PCADDU12I_rii; + LoOp = LoongArch::LD_D_rrii; + UseGot = true; + break; + case LoongArch::LoadAddrGlobalRR: + // pcaddu12i + ori + lu32i.d + lu52i.d +ldx.d + LoFlag = LoongArchII::MO_GOT_RRLO; + HiFlag = LoongArchII::MO_GOT_RRHI; + HigherFlag = LoongArchII::MO_GOT_RRHIGHER; + HighestFlag = LoongArchII::MO_GOT_RRHIGHEST; + HiOp = LoongArch::PCADDU12I_rii; + LoOp = LoongArch::ORI_rrii; + HigherOp = LoongArch::LU32I_D_rii; + HighestOp = LoongArch::LU52I_D_rrii; + LastOp = LoongArch::LDX_D_rrr; + UseGot = true; + break; + case LoongArch::LoadAddrTLS_LE: + // lu12i.w + ori + lu32i.d + lu52i.d + LoFlag = LoongArchII::MO_TLSLE_LO; + HiFlag = LoongArchII::MO_TLSLE_HI; + HigherFlag = LoongArchII::MO_TLSLE_HIGHER; + HighestFlag = LoongArchII::MO_TLSLE_HIGHEST; + HiOp = LoongArch::LU12I_W_ri; + break; + case LoongArch::LoadAddrTLS_IE: + // pcaddu12i + ld.d + LoFlag = LoongArchII::MO_TLSIE_LO; + HiFlag = LoongArchII::MO_TLSIE_HI; + HiOp = LoongArch::PCADDU12I_rii; + LoOp = LoongArch::LD_D_rrii; + UseGot = true; + break; + case LoongArch::LoadAddrTLS_IE_RR: + // pcaddu12i + ori + lu32i.d + lu52i.d +ldx.d + LoFlag = LoongArchII::MO_TLSIE_RRLO; + HiFlag = LoongArchII::MO_TLSIE_RRHI; + HigherFlag = LoongArchII::MO_TLSIE_RRHIGHER; + HighestFlag = LoongArchII::MO_TLSIE_RRHIGHEST; + HiOp = LoongArch::PCADDU12I_rii; + LoOp = LoongArch::ORI_rrii; + HigherOp = LoongArch::LU32I_D_rii; + HighestOp = LoongArch::LU52I_D_rrii; + LastOp = LoongArch::LDX_D_rrr; + UseGot = true; + break; + case LoongArch::LoadAddrTLS_LD: + case LoongArch::LoadAddrTLS_GD: + // pcaddu12i + addi.d + LoFlag = LoongArchII::MO_TLSGD_LO; + HiFlag = LoongArchII::MO_TLSGD_HI; + HiOp = LoongArch::PCADDU12I_rii; + LoOp = LoongArch::ADDI_D_rrii; + UseGot = true; + break; + case LoongArch::LoadAddrTLS_LD_RR: + case LoongArch::LoadAddrTLS_GD_RR: + // pcaddu12i + ori + lu32i.d + lu52i.d + add.d + LoFlag = LoongArchII::MO_TLSGD_RRLO; + HiFlag = LoongArchII::MO_TLSGD_RRHI; + HigherFlag = LoongArchII::MO_TLSGD_RRHIGHER; + HighestFlag = LoongArchII::MO_TLSGD_RRHIGHEST; + HiOp = LoongArch::PCADDU12I_rii; + LoOp = LoongArch::ORI_rrii; + HigherOp = LoongArch::LU32I_D_rii; + HighestOp = LoongArch::LU52I_D_rrii; + LastOp = LoongArch::ADD_D_rrr; + UseGot = true; + break; + default: + break; + } + + MIB1 = BuildMI(BB, I, DL, TII->get(HiOp), DestReg); + + switch (Op) { + case LoongArch::LoadAddrLocal: + if (RM == Reloc::Static) { // for jit + // la.abs rd, symbol + MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg).addReg(DestReg); + MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg); + MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg).addReg(DestReg); + if (MO.isJTI()) { + MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); + MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); + MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); + MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); + } else if (MO.isBlockAddress()) { + MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); + MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); + MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); + MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + MIB3.addDisp(MO, 0, HigherFlag); + MIB4.addDisp(MO, 0, HighestFlag); + } + break; + } + LLVM_FALLTHROUGH; + case LoongArch::LoadAddrGlobal: // la.global rd, symbol + case LoongArch::LoadAddrGlobal_Alias: // la rd, symbol + case LoongArch::LoadAddrTLS_IE: // la.tls.ie rd, symbol + case LoongArch::LoadAddrTLS_LD: // la.tls.ld rd, symbol + case LoongArch::LoadAddrTLS_GD: // la.tls.gd rd, symbol + MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg) + .addReg(DestReg); + if (MO.isJTI()) { + MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); + MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); + } else if (MO.isBlockAddress()) { + MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); + MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + } + if (UseGot == true) { + MIB1.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + MIB2.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + } + break; + + case LoongArch::LoadAddrLocalRR: //la.local rd, rs, symbol + case LoongArch::LoadAddrGlobalRR: // la.global rd, rs, symbol + case LoongArch::LoadAddrTLS_IE_RR: // la.tls.ie rd, rs, symbol + case LoongArch::LoadAddrTLS_LD_RR: // la.tls.ld rd, rs, symbol + case LoongArch::LoadAddrTLS_GD_RR: // la.tls.gd rd, rs, symbol + TmpReg = MI.getOperand(MI.getNumOperands()-1).getReg(); + MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), TmpReg) + .addReg(TmpReg); + MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), TmpReg); + MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), TmpReg) + .addReg(TmpReg); + MIB5 = BuildMI(BB, I, DL, TII->get(LastOp), DestReg) + .addReg(DestReg) + .addReg(TmpReg); + if (MO.isJTI()) { + MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); + MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); + MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); + MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); + } else if (MO.isBlockAddress()) { + MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); + MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); + MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); + MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + MIB3.addDisp(MO, 0, HigherFlag); + MIB4.addDisp(MO, 0, HighestFlag); + } + if (UseGot == true) { + MIB1.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + MIB2.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + MIB3.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + MIB4.addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + } + break; + case LoongArch::LoadAddrTLS_LE: // la.tls.le rd, symbol + MIB2 = BuildMI(BB, I, DL, TII->get(LoOp), DestReg) + .addReg(DestReg); + MIB3 = BuildMI(BB, I, DL, TII->get(HigherOp), DestReg); + MIB4 = BuildMI(BB, I, DL, TII->get(HighestOp), DestReg) + .addReg(DestReg); + if (MO.isJTI()) { + MIB1.addJumpTableIndex(MO.getIndex(), HiFlag); + MIB2.addJumpTableIndex(MO.getIndex(), LoFlag); + MIB3.addJumpTableIndex(MO.getIndex(), HigherFlag); + MIB4.addJumpTableIndex(MO.getIndex(), HighestFlag); + } else if (MO.isBlockAddress()) { + MIB1.addBlockAddress(MO.getBlockAddress(), 0, HiFlag); + MIB2.addBlockAddress(MO.getBlockAddress(), 0, LoFlag); + MIB3.addBlockAddress(MO.getBlockAddress(), 0, HigherFlag); + MIB4.addBlockAddress(MO.getBlockAddress(), 0, HighestFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + MIB3.addDisp(MO, 0, HigherFlag); + MIB4.addDisp(MO, 0, HighestFlag); + } + break; + default: + break; + } + + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandPseudoTailCall( + MachineBasicBlock &BB, MachineBasicBlock::iterator I) { + + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + + const MachineOperand &MO = MI.getOperand(0); + + unsigned NoFlag = LoongArchII::MO_NO_FLAG; + + MachineInstrBuilder MIB = + BuildMI(BB, I, DL, TII->get(LoongArch::PseudoTailReturn)); + + if (MO.isSymbol()) { + MIB.addExternalSymbol(MO.getSymbolName(), NoFlag); + } else { + MIB.addDisp(MO, 0, NoFlag); + } + + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandPseudoCall(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + MachineFunction *MF = BB.getParent(); + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + CodeModel::Model M = MF->getTarget().getCodeModel(); + Reloc::Model RM = MF->getTarget().getRelocationModel(); + + unsigned Ra = LoongArch::RA_64; + const MachineOperand &MO = MI.getOperand(0); + unsigned HiFlag, LoFlag, HigherFlag, HighestFlag, NoFlag; + + HiFlag = LoongArchII::MO_CALL_HI; + LoFlag = LoongArchII::MO_CALL_LO; + NoFlag = LoongArchII::MO_NO_FLAG; + + if (RM == Reloc::Static) { // for jit + MachineInstrBuilder MIB1, MIB2, MIB3, MIB4, MIB5; + + HiFlag = LoongArchII::MO_ABS_HI; + LoFlag = LoongArchII::MO_ABS_LO; + HigherFlag = LoongArchII::MO_ABS_HIGHER; + HighestFlag = LoongArchII::MO_ABS_HIGHEST; + // lu12i.w + ori + lu32i.d + lu52i.d + jirl + + MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::LU12I_W), Ra); + MIB2 = BuildMI(BB, I, DL, TII->get(LoongArch::ORI), Ra) + .addReg(Ra); + MIB3 = BuildMI(BB, I, DL, TII->get(LoongArch::LU32I_D), Ra); + MIB4 = BuildMI(BB, I, DL, TII->get(LoongArch::LU52I_D), Ra) + .addReg(Ra); + MIB5 = + BuildMI(BB, I, DL, TII->get(LoongArch::JIRL), Ra).addReg(Ra).addImm(0); + if (MO.isSymbol()) { + MIB1.addExternalSymbol(MO.getSymbolName(), HiFlag); + MIB2.addExternalSymbol(MO.getSymbolName(), LoFlag); + MIB3.addExternalSymbol(MO.getSymbolName(), HigherFlag); + MIB4.addExternalSymbol(MO.getSymbolName(), HighestFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + MIB3.addDisp(MO, 0, HigherFlag); + MIB4.addDisp(MO, 0, HighestFlag); + } + } else if (M == CodeModel::Large) { + // pcaddu18i + jirl + MachineInstrBuilder MIB1; + MachineInstrBuilder MIB2; + + MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::PCADDU18I), Ra); + MIB2 = BuildMI(BB, I, DL, TII->get(LoongArch::JIRL_CALL), Ra).addReg(Ra); + if (MO.isSymbol()) { + MIB1.addExternalSymbol(MO.getSymbolName(), HiFlag); + MIB2.addExternalSymbol(MO.getSymbolName(), LoFlag); + } else { + MIB1.addDisp(MO, 0, HiFlag); + MIB2.addDisp(MO, 0, LoFlag); + } + } else { + // bl + MachineInstrBuilder MIB1; + MIB1 = BuildMI(BB, I, DL, TII->get(LoongArch::BL)); + if (MO.isSymbol()) { + MIB1.addExternalSymbol(MO.getSymbolName(), NoFlag); + } else { + MIB1.addDisp(MO, 0, NoFlag); + } + } + + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandPseudoTEQ(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + + unsigned Divisor = MI.getOperand(0).getReg(); + unsigned BneOp = LoongArch::BNE; + unsigned Zero = LoongArch::ZERO_64; + + // beq $Divisor, $zero, 8 + BuildMI(BB, I, DL, TII->get(BneOp), Divisor) + .addReg(Zero) + .addImm(8); + // break 7 + BuildMI(BB, I, DL, TII->get(LoongArch::BREAK)) + .addImm(7);; + + MI.eraseFromParent(); + + return true; +} +bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NMBB) { + + bool Modified = false; + + switch (MBBI->getOpcode()) { + case LoongArch::PseudoTEQ: + return expandPseudoTEQ(MBB, MBBI, NMBB); + case LoongArch::PseudoCall: + return expandPseudoCall(MBB, MBBI, NMBB); + case LoongArch::PseudoTailCall: + return expandPseudoTailCall(MBB, MBBI); + case LoongArch::LoadAddrLocal: + case LoongArch::LoadAddrLocalRR: + case LoongArch::LoadAddrGlobal: + case LoongArch::LoadAddrGlobalRR: + case LoongArch::LoadAddrGlobal_Alias: + case LoongArch::LoadAddrTLS_LD: + case LoongArch::LoadAddrTLS_LD_RR: + case LoongArch::LoadAddrTLS_GD: + case LoongArch::LoadAddrTLS_GD_RR: + case LoongArch::LoadAddrTLS_IE: + case LoongArch::LoadAddrTLS_IE_RR: + case LoongArch::LoadAddrTLS_LE: + return expandLoadAddr(MBB, MBBI, NMBB); + case LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA: + case LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA: + return expandAtomicCmpSwap(MBB, MBBI, NMBB); + case LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA: + case LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA: + return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB); + case LoongArch::ATOMIC_SWAP_I8_POSTRA: + case LoongArch::ATOMIC_SWAP_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_AND_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_AND_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_OR_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_OR_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA: + case LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA: + case LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA: + return expandAtomicBinOpSubword(MBB, MBBI, NMBB); + case LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA: + case LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA: + return expandXINSERT_BOp(MBB, MBBI, NMBB); + case LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA: + return expandINSERT_HOp(MBB, MBBI, NMBB); + case LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA: + case LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA: + return expandXINSERT_FWOp(MBB, MBBI, NMBB); + case LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_AND_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_OR_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA: + case LoongArch::ATOMIC_SWAP_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA: + case LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA: + return expandAtomicBinOp(MBB, MBBI, NMBB, 4); + case LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_AND_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_OR_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA: + case LoongArch::ATOMIC_SWAP_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA: + case LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA: + return expandAtomicBinOp(MBB, MBBI, NMBB, 8); + default: + return Modified; + } +} + +bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + STI = &static_cast(MF.getSubtarget()); + TII = STI->getInstrInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) + Modified |= expandMBB(*MFI); + + if (Modified) + MF.RenumberBlocks(); + + return Modified; +} + +/// createLoongArchExpandPseudoPass - returns an instance of the pseudo instruction +/// expansion pass. +FunctionPass *llvm::createLoongArchExpandPseudoPass() { + return new LoongArchExpandPseudo(); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td deleted file mode 100644 index 20448492a558..000000000000 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ /dev/null @@ -1,229 +0,0 @@ -//=-- LoongArchInstrInfoF.td - Single-Precision Float instr --*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file describes the baisc single-precision floating-point instructions. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// LoongArch specific DAG Nodes. -//===----------------------------------------------------------------------===// - -def SDT_LoongArchMOVGR2FR_W_LA64 - : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; -def SDT_LoongArchMOVFR2GR_S_LA64 - : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; -def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; - -def loongarch_movgr2fr_w_la64 - : SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>; -def loongarch_movfr2gr_s_la64 - : SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>; -def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>; - -//===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [HasBasicF] in { - -// Arithmetic Operation Instructions -def FADD_S : FP_ALU_3R<0b00000001000000001, "fadd.s", FPR32>; -def FSUB_S : FP_ALU_3R<0b00000001000000101, "fsub.s", FPR32>; -def FMUL_S : FP_ALU_3R<0b00000001000001001, "fmul.s", FPR32>; -def FDIV_S : FP_ALU_3R<0b00000001000001101, "fdiv.s", FPR32>; -def FMADD_S : FP_ALU_4R<0b000010000001, "fmadd.s", FPR32>; -def FMSUB_S : FP_ALU_4R<0b000010000101, "fmsub.s", FPR32>; -def FNMADD_S : FP_ALU_4R<0b000010001001, "fnmadd.s", FPR32>; -def FNMSUB_S : FP_ALU_4R<0b000010001101, "fnmsub.s", FPR32>; -def FMAX_S : FP_ALU_3R<0b00000001000010001, "fmax.s", FPR32>; -def FMIN_S : FP_ALU_3R<0b00000001000010101, "fmin.s", FPR32>; -def FMAXA_S : FP_ALU_3R<0b00000001000011001, "fmaxa.s", FPR32>; -def FMINA_S : FP_ALU_3R<0b00000001000011101, "fmina.s", FPR32>; -def FABS_S : FP_ALU_2R<0b0000000100010100000001, "fabs.s", FPR32>; -def FNEG_S : FP_ALU_2R<0b0000000100010100000101, "fneg.s", FPR32>; -def FSQRT_S : FP_ALU_2R<0b0000000100010100010001, "fsqrt.s", FPR32>; -def FRECIP_S : FP_ALU_2R<0b0000000100010100010101, "frecip.s", FPR32>; -def FRSQRT_S : FP_ALU_2R<0b0000000100010100011001, "frsqrt.s", FPR32>; -def FSCALEB_S : FP_ALU_3R<0b00000001000100001, "fscaleb.s", FPR32>; -def FLOGB_S : FP_ALU_2R<0b0000000100010100001001, "flogb.s", FPR32>; -def FCOPYSIGN_S : FP_ALU_3R<0b00000001000100101, "fcopysign.s", FPR32>; -def FCLASS_S : FP_ALU_2R<0b0000000100010100001101, "fclass.s", FPR32>; - - -// Comparison Instructions -def FCMP_CAF_S : FP_CMP; -def FCMP_CUN_S : FP_CMP; -def FCMP_CEQ_S : FP_CMP; -def FCMP_CUEQ_S : FP_CMP; -def FCMP_CLT_S : FP_CMP; -def FCMP_CULT_S : FP_CMP; -def FCMP_CLE_S : FP_CMP; -def FCMP_CULE_S : FP_CMP; -def FCMP_CNE_S : FP_CMP; -def FCMP_COR_S : FP_CMP; -def FCMP_CUNE_S : FP_CMP; -def FCMP_SAF_S : FP_CMP; -def FCMP_SUN_S : FP_CMP; -def FCMP_SEQ_S : FP_CMP; -def FCMP_SUEQ_S : FP_CMP; -def FCMP_SLT_S : FP_CMP; -def FCMP_SULT_S : FP_CMP; -def FCMP_SLE_S : FP_CMP; -def FCMP_SULE_S : FP_CMP; -def FCMP_SNE_S : FP_CMP; -def FCMP_SOR_S : FP_CMP; -def FCMP_SUNE_S : FP_CMP; - -// Conversion Instructions -def FFINT_S_W : FP_CONV<0b0000000100011101000100, "ffint.s.w", FPR32, FPR32>; -def FTINT_W_S : FP_CONV<0b0000000100011011000001, "ftint.w.s", FPR32, FPR32>; -def FTINTRM_W_S : FP_CONV<0b0000000100011010000001, "ftintrm.w.s", FPR32, - FPR32>; -def FTINTRP_W_S : FP_CONV<0b0000000100011010010001, "ftintrp.w.s", FPR32, - FPR32>; -def FTINTRZ_W_S : FP_CONV<0b0000000100011010100001, "ftintrz.w.s", FPR32, - FPR32>; -def FTINTRNE_W_S : FP_CONV<0b0000000100011010110001, "ftintrne.w.s", FPR32, - FPR32>; -def FRINT_S : FP_CONV<0b0000000100011110010001, "frint.s", FPR32, FPR32>; - -// Move Instructions -def FSEL_S : FP_SEL<0b00001101000000, "fsel", FPR32>; -def FMOV_S : FP_MOV<0b0000000100010100100101, "fmov.s", FPR32, FPR32>; -def MOVGR2FR_W : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR32, GPR>; -def MOVFR2GR_S : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR32>; -def MOVGR2FCSR : FP_MOV<0b0000000100010100110000, "movgr2fcsr", FCSR, GPR>; -def MOVFCSR2GR : FP_MOV<0b0000000100010100110010, "movfcsr2gr", GPR, FCSR>; -def MOVFR2CF_S : FP_MOV<0b0000000100010100110100, "movfr2cf", CFR, FPR32>; -def MOVCF2FR_S : FP_MOV<0b0000000100010100110101, "movcf2fr", FPR32, CFR>; -def MOVGR2CF : FP_MOV<0b0000000100010100110110, "movgr2cf", CFR, GPR>; -def MOVCF2GR : FP_MOV<0b0000000100010100110111, "movcf2gr", GPR, CFR>; - -// Branch Instructions -def BCEQZ : FP_BRANCH<0b01001000, "bceqz">; -def BCNEZ : FP_BRANCH<0b01001001, "bcnez">; - -// Common Memory Access Instructions -def FLD_S : FP_LOAD_2RI12<0b0010101100, "fld.s", FPR32>; -def FST_S : FP_STORE_2RI12<0b0010101101, "fst.s", FPR32>; -def FLDX_S : FP_LOAD_3R<0b00111000001100000, "fldx.s", FPR32>; -def FSTX_S : FP_STORE_3R<0b00111000001110000, "fstx.s", FPR32>; - -// Bound Check Memory Access Instructions -def FLDGT_S : FP_LOAD_3R<0b00111000011101000, "fldgt.s", FPR32>; -def FLDLE_S : FP_LOAD_3R<0b00111000011101010, "fldle.s", FPR32>; -def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>; -def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>; - -} // Predicates = [HasBasicF] - -//===----------------------------------------------------------------------===// -// Pseudo-instructions and codegen patterns -//===----------------------------------------------------------------------===// - -/// Generic pattern classes - -class PatFpr - : Pat<(OpNode RegTy:$fj), (Inst $fj)>; -class PatFprFpr - : Pat<(OpNode RegTy:$fj, RegTy:$fk), (Inst $fj, $fk)>; - -let Predicates = [HasBasicF] in { - -/// Float arithmetic operations - -def : PatFprFpr; -def : PatFprFpr; -def : PatFprFpr; -def : PatFprFpr; -def : PatFpr; - -/// Setcc - -// Match non-signaling comparison - -// TODO: change setcc to any_fsetcc after call is supported because -// we need to call llvm.experimental.constrained.fcmp.f32 in testcase. -// See RISCV float-fcmp-strict.ll for reference. -class PatFPSetcc - : Pat<(setcc RegTy:$fj, RegTy:$fk, cc), - (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>; -// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; - -// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions. - -/// Select - -def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj), - (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>; - -/// Selectcc - -class PatFPSelectcc - : Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f), - (SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; - -/// Loads - -defm : LdPat; - -/// Stores - -defm : StPat; - -/// Floating point constants - -def : Pat<(f32 fpimm0), (MOVGR2FR_W R0)>; -def : Pat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W R0))>; -def : Pat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1)))>; - -// FP Conversion -def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; -} // Predicates = [HasBasicF] - -let Predicates = [HasBasicF, IsLA64] in { -// GPR -> FPR -def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>; -// FPR -> GPR -def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src), - (MOVFR2GR_S FPR32:$src)>; -// int -> f32 -def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; -} // Predicates = [HasBasicF, IsLA64] - -let Predicates = [HasBasicF, IsLA32] in { -// GPR -> FPR -def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>; -// FPR -> GPR -def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>; -// int -> f32 -def : Pat<(f32 (sint_to_fp (i32 GPR:$src))), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; -} // Predicates = [HasBasicF, IsLA64] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td deleted file mode 100644 index bb50cec9f4c0..000000000000 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ /dev/null @@ -1,242 +0,0 @@ -//=-- LoongArchInstrInfoD.td - Double-Precision Float instr -*- tablegen -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file describes the basic double-precision floating-point instructions. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [HasBasicD] in { - -// Arithmetic Operation Instructions -def FADD_D : FP_ALU_3R<0b00000001000000010, "fadd.d", FPR64>; -def FSUB_D : FP_ALU_3R<0b00000001000000110, "fsub.d", FPR64>; -def FMUL_D : FP_ALU_3R<0b00000001000001010, "fmul.d", FPR64>; -def FDIV_D : FP_ALU_3R<0b00000001000001110, "fdiv.d", FPR64>; -def FMADD_D : FP_ALU_4R<0b000010000010, "fmadd.d", FPR64>; -def FMSUB_D : FP_ALU_4R<0b000010000110, "fmsub.d", FPR64>; -def FNMADD_D : FP_ALU_4R<0b000010001010, "fnmadd.d", FPR64>; -def FNMSUB_D : FP_ALU_4R<0b000010001110, "fnmsub.d", FPR64>; -def FMAX_D : FP_ALU_3R<0b00000001000010010, "fmax.d", FPR64>; -def FMIN_D : FP_ALU_3R<0b00000001000010110, "fmin.d", FPR64>; -def FMAXA_D : FP_ALU_3R<0b00000001000011010, "fmaxa.d", FPR64>; -def FMINA_D : FP_ALU_3R<0b00000001000011110, "fmina.d", FPR64>; -def FABS_D : FP_ALU_2R<0b0000000100010100000010, "fabs.d", FPR64>; -def FNEG_D : FP_ALU_2R<0b0000000100010100000110, "fneg.d", FPR64>; -def FSQRT_D : FP_ALU_2R<0b0000000100010100010010, "fsqrt.d", FPR64>; -def FRECIP_D : FP_ALU_2R<0b0000000100010100010110, "frecip.d", FPR64>; -def FRSQRT_D : FP_ALU_2R<0b0000000100010100011010, "frsqrt.d", FPR64>; -def FSCALEB_D : FP_ALU_3R<0b00000001000100010, "fscaleb.d", FPR64>; -def FLOGB_D : FP_ALU_2R<0b0000000100010100001010, "flogb.d", FPR64>; -def FCOPYSIGN_D : FP_ALU_3R<0b00000001000100110, "fcopysign.d", FPR64>; -def FCLASS_D : FP_ALU_2R<0b0000000100010100001110, "fclass.d", FPR64>; - -// Comparison Instructions -def FCMP_CAF_D : FP_CMP; -def FCMP_CUN_D : FP_CMP; -def FCMP_CEQ_D : FP_CMP; -def FCMP_CUEQ_D : FP_CMP; -def FCMP_CLT_D : FP_CMP; -def FCMP_CULT_D : FP_CMP; -def FCMP_CLE_D : FP_CMP; -def FCMP_CULE_D : FP_CMP; -def FCMP_CNE_D : FP_CMP; -def FCMP_COR_D : FP_CMP; -def FCMP_CUNE_D : FP_CMP; -def FCMP_SAF_D : FP_CMP; -def FCMP_SUN_D : FP_CMP; -def FCMP_SEQ_D : FP_CMP; -def FCMP_SUEQ_D : FP_CMP; -def FCMP_SLT_D : FP_CMP; -def FCMP_SULT_D : FP_CMP; -def FCMP_SLE_D : FP_CMP; -def FCMP_SULE_D : FP_CMP; -def FCMP_SNE_D : FP_CMP; -def FCMP_SOR_D : FP_CMP; -def FCMP_SUNE_D : FP_CMP; - -// Conversion Instructions -def FFINT_S_L : FP_CONV<0b0000000100011101000110, "ffint.s.l", FPR32, FPR64>; -def FTINT_L_S : FP_CONV<0b0000000100011011001001, "ftint.l.s", FPR64, FPR32>; -def FTINTRM_L_S : FP_CONV<0b0000000100011010001001, "ftintrm.l.s", FPR64, - FPR32>; -def FTINTRP_L_S : FP_CONV<0b0000000100011010011001, "ftintrp.l.s", FPR64, - FPR32>; -def FTINTRZ_L_S : FP_CONV<0b0000000100011010101001, "ftintrz.l.s", FPR64, - FPR32>; -def FTINTRNE_L_S : FP_CONV<0b0000000100011010111001, "ftintrne.l.s", FPR64, - FPR32>; -def FCVT_S_D : FP_CONV<0b0000000100011001000110, "fcvt.s.d", FPR32, FPR64>; -def FCVT_D_S : FP_CONV<0b0000000100011001001001, "fcvt.d.s", FPR64, FPR32>; -def FFINT_D_W : FP_CONV<0b0000000100011101001000, "ffint.d.w", FPR64, FPR32>; -def FFINT_D_L : FP_CONV<0b0000000100011101001010, "ffint.d.l", FPR64, FPR64>; -def FTINT_W_D : FP_CONV<0b0000000100011011000010, "ftint.w.d", FPR32, FPR64>; -def FTINT_L_D : FP_CONV<0b0000000100011011001010, "ftint.l.d", FPR64, FPR64>; -def FTINTRM_W_D : FP_CONV<0b0000000100011010000010, "ftintrm.w.d", FPR32, - FPR64>; -def FTINTRM_L_D : FP_CONV<0b0000000100011010001010, "ftintrm.l.d", FPR64, - FPR64>; -def FTINTRP_W_D : FP_CONV<0b0000000100011010010010, "ftintrp.w.d", FPR32, - FPR64>; -def FTINTRP_L_D : FP_CONV<0b0000000100011010011010, "ftintrp.l.d", FPR64, - FPR64>; -def FTINTRZ_W_D : FP_CONV<0b0000000100011010100010, "ftintrz.w.d", FPR32, - FPR64>; -def FTINTRZ_L_D : FP_CONV<0b0000000100011010101010, "ftintrz.l.d", FPR64, - FPR64>; -def FTINTRNE_W_D : FP_CONV<0b0000000100011010110010, "ftintrne.w.d", FPR32, - FPR64>; -def FTINTRNE_L_D : FP_CONV<0b0000000100011010111010, "ftintrne.l.d", FPR64, - FPR64>; -def FRINT_D : FP_CONV<0b0000000100011110010010, "frint.d", FPR64, FPR64>; - -// Move Instructions -def FMOV_D : FP_MOV<0b0000000100010100100110, "fmov.d", FPR64, FPR64>; -def MOVFRH2GR_S : FP_MOV<0b0000000100010100101111, "movfrh2gr.s", GPR, FPR64>; -let isCodeGenOnly = 1 in { -def MOVFR2GR_S_64 : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR64>; -def FSEL_D : FP_SEL<0b00001101000000, "fsel", FPR64>; -} // isCodeGenOnly = 1 -let Constraints = "$dst = $out" in { -def MOVGR2FRH_W : FPFmtMOV<0b0000000100010100101011, (outs FPR64:$out), - (ins FPR64:$dst, GPR:$src), "movgr2frh.w", - "$dst, $src">; -} // Constraints = "$dst = $out" - -// Common Memory Access Instructions -def FLD_D : FP_LOAD_2RI12<0b0010101110, "fld.d", FPR64>; -def FST_D : FP_STORE_2RI12<0b0010101111, "fst.d", FPR64>; -def FLDX_D : FP_LOAD_3R<0b00111000001101000, "fldx.d", FPR64>; -def FSTX_D : FP_STORE_3R<0b00111000001111000, "fstx.d", FPR64>; - -// Bound Check Memory Access Instructions -def FLDGT_D : FP_LOAD_3R<0b00111000011101001, "fldgt.d", FPR64>; -def FLDLE_D : FP_LOAD_3R<0b00111000011101011, "fldle.d", FPR64>; -def FSTGT_D : FP_STORE_3R<0b00111000011101101, "fstgt.d", FPR64>; -def FSTLE_D : FP_STORE_3R<0b00111000011101111, "fstle.d", FPR64>; - -} // Predicates = [HasBasicD] - -// Instructions only available on LA64 -let Predicates = [HasBasicD, IsLA64] in { -def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>; -def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>; -} // Predicates = [HasBasicD, IsLA64] - -// Instructions only available on LA32 -let Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 in { -def MOVGR2FR_W_64 : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR64, GPR>; -} // Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 - -//===----------------------------------------------------------------------===// -// Pseudo-instructions and codegen patterns -//===----------------------------------------------------------------------===// - -let Predicates = [HasBasicD] in { - -/// Float arithmetic operations - -def : PatFprFpr; -def : PatFprFpr; -def : PatFprFpr; -def : PatFprFpr; -def : PatFpr; - -/// Setcc - -// Match non-signaling comparison - -// TODO: Change setcc to any_fsetcc after call is supported because -// we need to call llvm.experimental.constrained.fcmp.f64 in testcase. -// See RISCV float-fcmp-strict.ll for reference. - -// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE. -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; -def : PatFPSetcc; - -// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions. - -/// Select - -def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj), - (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>; - -/// Selectcc - -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; - -/// Loads - -defm : LdPat; - -/// Stores - -defm : StPat; - -/// FP conversion operations - -def : Pat<(loongarch_ftint FPR64:$src), (FTINTRZ_W_D FPR64:$src)>; -def : Pat<(f64 (loongarch_ftint FPR64:$src)), (FTINTRZ_L_D FPR64:$src)>; -def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_L_S FPR32:$src)>; - -// f64 -> f32 -def : Pat<(f32 (fpround FPR64:$src)), (FCVT_S_D FPR64:$src)>; -// f32 -> f64 -def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; -} // Predicates = [HasBasicD] - -/// Floating point constants - -let Predicates = [HasBasicD, IsLA64] in { -def : Pat<(f64 fpimm0), (MOVGR2FR_D R0)>; -def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FR_D R0))>; -def : Pat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D R0, 1)))>; - -// Convert int to FP -def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), - (FFINT_D_W (MOVGR2FR_W GPR:$src))>; -def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>; - -def : Pat<(f64 (uint_to_fp (i64 (zexti32 (i64 GPR:$src))))), - (FFINT_D_W (MOVGR2FR_W GPR:$src))>; - -def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>; - -// Convert FP to int -def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>; -} // Predicates = [HasBasicD, IsLA64] - -let Predicates = [HasBasicD, IsLA32] in { -def : Pat<(f64 fpimm0), (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0)>; -def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0))>; -def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>; - -// Convert int to FP -def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; -} // Predicates = [HasBasicD, IsLA32] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td deleted file mode 100644 index d2ba1fdfffe4..000000000000 --- a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td +++ /dev/null @@ -1,241 +0,0 @@ -//==- LoongArchInstrFormatsF.td - LoongArch FP Instr Formats -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Describe LoongArch floating-point instructions format -// -// opcode - operation code. -// fd - destination register operand. -// {c/f}{j/k/a} - source register operand. -// immN - immediate data operand. -// -//===----------------------------------------------------------------------===// - -// 2R-type -// -class FPFmt2R op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<5> fj; - bits<5> fd; - - let Inst{31-10} = op; - let Inst{9-5} = fj; - let Inst{4-0} = fd; -} - -// 3R-type -// -class FPFmt3R op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<5> fk; - bits<5> fj; - bits<5> fd; - - let Inst{31-15} = op; - let Inst{14-10} = fk; - let Inst{9-5} = fj; - let Inst{4-0} = fd; -} - -// 4R-type -// -class FPFmt4R op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<5> fa; - bits<5> fk; - bits<5> fj; - bits<5> fd; - - let Inst{31-20} = op; - let Inst{19-15} = fa; - let Inst{14-10} = fk; - let Inst{9-5} = fj; - let Inst{4-0} = fd; -} - -// 2RI12-type -// -class FPFmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<12> imm12; - bits<5> rj; - bits<5> fd; - - let Inst{31-22} = op; - let Inst{21-10} = imm12; - let Inst{9-5} = rj; - let Inst{4-0} = fd; -} - -// FmtFCMP -// -class FPFmtFCMP op, bits<5> cond, dag outs, dag ins, string opcstr, - string opnstr, list pattern = []> - : LAInst { - bits<5> fk; - bits<5> fj; - bits<3> cd; - - let Inst{31-20} = op; - let Inst{19-15} = cond; - let Inst{14-10} = fk; - let Inst{9-5} = fj; - let Inst{4-3} = 0b00; - let Inst{2-0} = cd; -} - -// FPFmtBR -// -class FPFmtBR opcode, dag outs, dag ins, string opcstr, - string opnstr, list pattern = []> - : LAInst { - bits<21> imm21; - bits<3> cj; - - let Inst{31-26} = opcode{7-2}; - let Inst{25-10} = imm21{15-0}; - let Inst{9-8} = opcode{1-0}; - let Inst{7-5} = cj; - let Inst{4-0} = imm21{20-16}; -} - -// FmtFSEL -// -class FPFmtFSEL op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<3> ca; - bits<5> fk; - bits<5> fj; - bits<5> fd; - - let Inst{31-18} = op; - let Inst{17-15} = ca; - let Inst{14-10} = fk; - let Inst{9-5} = fj; - let Inst{4-0} = fd; -} - -// FPFmtMOV -// -class FPFmtMOV op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<5> src; - bits<5> dst; - - let Inst{31-10} = op; - let Inst{9-5} = src; - let Inst{4-0} = dst; -} - -// FPFmtMEM -// -class FPFmtMEM op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<5> rk; - bits<5> rj; - bits<5> fd; - - let Inst{31-15} = op; - let Inst{14-10} = rk; - let Inst{9-5} = rj; - let Inst{4-0} = fd; -} - -//===----------------------------------------------------------------------===// -// Instruction class templates -//===----------------------------------------------------------------------===// - -class FP_ALU_2R op, string opstr, RegisterClass rc> - : FPFmt2R; - -class FP_ALU_3R op, string opstr, RegisterClass rc> - : FPFmt3R; - -class FP_ALU_4R op, string opstr, RegisterClass rc> - : FPFmt4R; - -class FPCMPOpc value> { - bits<12> val = value; -} - -class FPCMPCond value> { - bits<5> val = value; -} - -class FP_CMP - : FPFmtFCMP; - -class FP_CONV op, string opstr, RegisterClass rcd, RegisterClass rcs> - : FPFmt2R; - -class FP_MOV op, string opstr, RegisterClass rcd, RegisterClass rcs> - : FPFmtMOV; - -class FP_SEL op, string opstr, RegisterClass rc> - : FPFmtFSEL; - -class FP_BRANCH opcode, string opstr> - : FPFmtBR { - let isBranch = 1; - let isTerminator = 1; -} - -let mayLoad = 1 in { -class FP_LOAD_3R op, string opstr, RegisterClass rc> - : FPFmtMEM; -class FP_LOAD_2RI12 op, string opstr, RegisterClass rc> - : FPFmt2RI12; -} // mayLoad = 1 - -let mayStore = 1 in { -class FP_STORE_3R op, string opstr, RegisterClass rc> - : FPFmtMEM; -class FP_STORE_2RI12 op, string opstr, RegisterClass rc> - : FPFmt2RI12; -} // mayStore = 1 - -def FPCMP_OPC_S : FPCMPOpc<0b000011000001>; -def FPCMP_OPC_D : FPCMPOpc<0b000011000010>; - -def FPCMP_COND_CAF : FPCMPCond<0x0>; -def FPCMP_COND_CUN : FPCMPCond<0x8>; -def FPCMP_COND_CEQ : FPCMPCond<0x4>; -def FPCMP_COND_CUEQ : FPCMPCond<0xC>; -def FPCMP_COND_CLT : FPCMPCond<0x2>; -def FPCMP_COND_CULT : FPCMPCond<0xA>; -def FPCMP_COND_CLE : FPCMPCond<0x6>; -def FPCMP_COND_CULE : FPCMPCond<0xE>; -def FPCMP_COND_CNE : FPCMPCond<0x10>; -def FPCMP_COND_COR : FPCMPCond<0x14>; -def FPCMP_COND_CUNE : FPCMPCond<0x18>; -def FPCMP_COND_SAF : FPCMPCond<0x1>; -def FPCMP_COND_SUN : FPCMPCond<0x9>; -def FPCMP_COND_SEQ : FPCMPCond<0x5>; -def FPCMP_COND_SUEQ : FPCMPCond<0xD>; -def FPCMP_COND_SLT : FPCMPCond<0x3>; -def FPCMP_COND_SULT : FPCMPCond<0xB>; -def FPCMP_COND_SLE : FPCMPCond<0x7>; -def FPCMP_COND_SULE : FPCMPCond<0xF>; -def FPCMP_COND_SNE : FPCMPCond<0x11>; -def FPCMP_COND_SOR : FPCMPCond<0x15>; -def FPCMP_COND_SUNE : FPCMPCond<0x19>; diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 0d9ec9e2eaaa..7c4c141e178d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information -*- C++ -*-==// +//===-- LoongArchFrameLowering.cpp - LoongArch Frame Information --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,192 +11,365 @@ //===----------------------------------------------------------------------===// #include "LoongArchFrameLowering.h" -#include "LoongArchMachineFunctionInfo.h" -#include "LoongArchSubtarget.h" #include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchTargetMachine.h" +#include "LoongArchRegisterInfo.h" +#include "LoongArchSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetOptions.h" +#include +#include +#include +#include using namespace llvm; -#define DEBUG_TYPE "loongarch-frame-lowering" - -// Return true if the specified function should have a dedicated frame -// pointer register. This is true if frame pointer elimination is -// disabled, if it needs dynamic stack realignment, if the function has -// variable sized allocas, or if the frame address is taken. -bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const { - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - - const MachineFrameInfo &MFI = MF.getFrameInfo(); - return MF.getTarget().Options.DisableFramePointerElim(MF) || - RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || - MFI.isFrameAddressTaken(); -} - -bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { +// We would like to split the SP adjustment to reduce prologue/epilogue +// as following instructions. In this way, the offset of the callee saved +// register could fit in a single store. +uint64_t +LoongArchFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF, + bool IsPrologue) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - - return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); -} - -void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, Register DestReg, - Register SrcReg, int64_t Val, - MachineInstr::MIFlag Flag) const { - const LoongArchInstrInfo *TII = STI.getInstrInfo(); - bool IsLA64 = STI.is64Bit(); - - if (DestReg == SrcReg && Val == 0) - return; + const std::vector &CSI = MFI.getCalleeSavedInfo(); + uint64_t StackSize = MFI.getStackSize(); - if (isInt<12>(Val)) { - // addi.w/d $DstReg, $SrcReg, Val - BuildMI(MBB, MBBI, DL, - TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg) - .addReg(SrcReg) - .addImm(Val) - .setMIFlag(Flag); - return; + // Return the FirstSPAdjustAmount if the StackSize can not fit in signed + // 12-bit and there exists a callee saved register need to be pushed. + if (!isInt<12>(StackSize)) { + // FirstSPAdjustAmount is choosed as (2048 - StackAlign) + // because 2048 will cause sp = sp + 2048 in epilogue split into + // multi-instructions. The offset smaller than 2048 can fit in signle + // load/store instruction and we have to stick with the stack alignment. + return CSI.size() > 0 ? 2048 - getStackAlign().value() + : (IsPrologue ? 2048 : 0); } - - report_fatal_error("adjustReg cannot yet handle adjustments >12 bits"); + return 0; } -// Determine the size of the frame and maximum call frame size. -void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const { - MachineFrameInfo &MFI = MF.getFrameInfo(); - - // Get the number of bytes to allocate from the FrameInfo. - uint64_t FrameSize = MFI.getStackSize(); - - // Make sure the frame is aligned. - FrameSize = alignTo(FrameSize, getStackAlign()); - - // Update frame info. - MFI.setStackSize(FrameSize); -} +//===----------------------------------------------------------------------===// +// +// Stack Frame Processing methods +// +----------------------------+ +// +// The stack is allocated decrementing the stack pointer on +// the first instruction of a function prologue. Once decremented, +// all stack references are done thought a positive offset +// from the stack/frame pointer, so the stack is considering +// to grow up! Otherwise terrible hacks would have to be made +// to get this stack ABI compliant :) +// +// The stack frame required by the ABI (after call): +// Offset +// +// 0 ---------- +// 4 Args to pass +// . Alloca allocations +// . Local Area +// . CPU "Callee Saved" Registers +// . saved FP +// . saved RA +// . FPU "Callee Saved" Registers +// StackSize ----------- +// +// Offset - offset from sp after stack allocation on function prologue +// +// The sp is the stack pointer subtracted/added from the stack size +// at the Prologue/Epilogue +// +// References to the previous stack (to obtain arguments) are done +// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1)) +// +// Examples: +// - reference to the actual stack frame +// for any local area var there is smt like : FI >= 0, StackOffset: 4 +// st.w REGX, SP, 4 +// +// - reference to previous stack frame +// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16. +// The emitted instruction will be something like: +// ld.w REGX, SP, 16+StackSize +// +// Since the total stack size is unknown on LowerFormalArguments, all +// stack references (ObjectOffset) created to reference the function +// arguments, are negative numbers. This way, on eliminateFrameIndex it's +// possible to detect those references and the offsets are adjusted to +// their real location. +// +//===----------------------------------------------------------------------===// +// +LoongArchFrameLowering::LoongArchFrameLowering(const LoongArchSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, STI.getStackAlignment(), 0, + STI.getStackAlignment()), STI(STI) {} void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); - const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); - const LoongArchInstrInfo *TII = STI.getInstrInfo(); - MachineBasicBlock::iterator MBBI = MBB.begin(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); - Register SPReg = LoongArch::R3; - Register FPReg = LoongArch::R22; - - // Debug location must be unknown since the first debug location is used - // to determine the end of the prologue. - DebugLoc DL; - - // Determine the correct frame layout - determineFrameLayout(MF); + const LoongArchInstrInfo &TII = + *static_cast(STI.getInstrInfo()); + const LoongArchRegisterInfo &RegInfo = + *static_cast(STI.getRegisterInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl; + LoongArchABIInfo ABI = STI.getABI(); + unsigned SP = ABI.GetStackPtr(); + unsigned FP = ABI.GetFramePtr(); + unsigned ZERO = ABI.GetNullPtr(); + unsigned MOVE = ABI.GetGPRMoveOp(); + unsigned ADDI = ABI.GetPtrAddiOp(); + unsigned AND = ABI.IsLP64() ? LoongArch::AND : LoongArch::AND32; + unsigned SLLI = ABI.IsLP64() ? LoongArch::SLLI_D : LoongArch::SLLI_W; + + const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? + &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; // First, compute final stack size. uint64_t StackSize = MFI.getStackSize(); + uint64_t RealStackSize = StackSize; - // Early exit if there is no need to allocate space in the stack. + // No need to allocate space on the stack. if (StackSize == 0 && !MFI.adjustsStack()) return; + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF, true); + uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount; + // Split the SP adjustment to reduce the offsets of callee saved spill. + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + // Adjust stack. - adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); - // Emit ".cfi_def_cfa_offset StackSize". - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlag(MachineInstr::FrameSetup); + TII.adjustReg(SP, SP, -StackSize, MBB, MBBI, MachineInstr::FrameSetup); + if (FirstSPAdjustAmount != 2048 || SecondSPAdjustAmount == 0) { + // Emit ".cfi_def_cfa_offset StackSize". + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } - const auto &CSI = MFI.getCalleeSavedInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + + const std::vector &CSI = MFI.getCalleeSavedInfo(); + + if (!CSI.empty()) { + // Find the instruction past the last instruction that saves a callee-saved + // register to the stack. + for (unsigned i = 0; i < CSI.size(); ++i) + ++MBBI; + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + for (std::vector::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } - // The frame pointer is callee-saved, and code has been generated for us to - // save it to the stack. We need to skip over the storing of callee-saved - // registers as the frame pointer must be modified after it has been saved - // to the stack, not before. - std::advance(MBBI, CSI.size()); - - // Iterate over list of callee-saved registers and emit .cfi_offset - // directives. - for (const auto &Entry : CSI) { - int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx()); - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, RI->getDwarfRegNum(Entry.getReg(), true), Offset)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlag(MachineInstr::FrameSetup); + if (LoongArchFI->callsEhReturn()) { + // Insert instructions that spill eh data registers. + for (int I = 0; I < 4; ++I) { + if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) + MBB.addLiveIn(ABI.GetEhDataReg(I)); + TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, + LoongArchFI->getEhDataRegFI(I), RC, &RegInfo); + } + + // Emit .cfi_offset directives for eh data registers. + for (int I = 0; I < 4; ++I) { + int64_t Offset = MFI.getObjectOffset(LoongArchFI->getEhDataRegFI(I)); + unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, Offset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } } - // Generate new FP. + // If framepointer enabled, set it to point to the stack pointer on entry. if (hasFP(MF)) { - adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup); - - // Emit ".cfi_def_cfa $fp, 0" - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, RI->getDwarfRegNum(FPReg, true), 0)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + // Insert instruction "addi.w/d $fp, $sp, StackSize" at this location. + TII.adjustReg(FP, SP, StackSize - LoongArchFI->getVarArgsSaveSize(), MBB, + MBBI, MachineInstr::FrameSetup); + // Emit ".cfi_def_cfa $fp, $varargs_size". + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfa(nullptr, MRI->getDwarfRegNum(FP, true), + LoongArchFI->getVarArgsSaveSize())); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); } + + // Emit the second SP adjustment after saving callee saved registers. + if (FirstSPAdjustAmount && SecondSPAdjustAmount) { + if (hasFP(MF)) { + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + TII.adjustReg(SP, SP, -SecondSPAdjustAmount, MBB, MBBI, + MachineInstr::FrameSetup); + } else { + // FIXME: RegScavenger will place the spill instruction before the + // prologue if a VReg is created in the prologue. This will pollute the + // caller's stack data. Therefore, until there is better way, we just use + // the `addi.w/d` instruction for stack adjustment to ensure that VReg + // will not be created. + for (int Val = SecondSPAdjustAmount; Val > 0; Val -= 2048) + BuildMI(MBB, MBBI, dl, TII.get(ADDI), SP) + .addReg(SP) + .addImm(Val < 2048 ? -Val : -2048) + .setMIFlag(MachineInstr::FrameSetup); + // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0", + // don't emit an sp-based .cfi_def_cfa_offset. + // Emit ".cfi_def_cfa_offset StackSize" + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + } + + // Realign stack. + if (hasFP(MF)) { + if (RegInfo.hasStackRealignment(MF)) { + // addiu $Reg, $zero, -MaxAlignment + // andi $sp, $sp, $Reg + unsigned VR = MF.getRegInfo().createVirtualRegister(RC); + assert((Log2(MFI.getMaxAlign()) < 16) && + "Function's alignment size requirement is not supported."); + int MaxAlign = -(int)MFI.getMaxAlign().value(); + int Alignment = (int)MFI.getMaxAlign().value(); + + if (Alignment <= 2048) { + BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(MaxAlign); + BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); + } else { + const unsigned NrBitsToZero = countTrailingZeros((unsigned)Alignment); + BuildMI(MBB, MBBI, dl, TII.get(ADDI), VR).addReg(ZERO).addImm(-1); + BuildMI(MBB, MBBI, dl, TII.get(SLLI), VR) + .addReg(VR) + .addImm(NrBitsToZero); + BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); + } + + if (hasBP(MF)) { + // move $s7, $sp + unsigned BP = STI.isABI_LP64() ? LoongArch::S7_64 : LoongArch::S7; + BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP).addReg(SP).addReg(ZERO); + } + } + } } void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - Register SPReg = LoongArch::R3; - MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); - const auto &CSI = MFI.getCalleeSavedInfo(); - // Skip to before the restores of callee-saved registers. - auto LastFrameDestroy = MBBI; - if (!CSI.empty()) - LastFrameDestroy = std::prev(MBBI, CSI.size()); + const LoongArchInstrInfo &TII = + *static_cast(STI.getInstrInfo()); + const LoongArchRegisterInfo &RegInfo = + *static_cast(STI.getRegisterInfo()); + + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + LoongArchABIInfo ABI = STI.getABI(); + unsigned SP = ABI.GetStackPtr(); + unsigned FP = ABI.GetFramePtr(); // Get the number of bytes from FrameInfo. uint64_t StackSize = MFI.getStackSize(); // Restore the stack pointer. - if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { - assert(hasFP(MF) && "frame pointer should not have been eliminated"); - adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize, - MachineInstr::FrameDestroy); + if (hasFP(MF) && + (RegInfo.hasStackRealignment(MF) || MFI.hasVarSizedObjects())) { + // Find the first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) + --I; + TII.adjustReg(SP, FP, -(StackSize - LoongArchFI->getVarArgsSaveSize()), MBB, + I); } - // Deallocate stack - adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); -} + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + // Find the first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) + --I; + + TII.adjustReg(SP, SP, SecondSPAdjustAmount, MBB, I); + } -void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, - BitVector &SavedRegs, - RegScavenger *RS) const { - TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - // Unconditionally spill RA and FP only if the function uses a frame - // pointer. - if (hasFP(MF)) { - SavedRegs.set(LoongArch::R1); - SavedRegs.set(LoongArch::R22); + if (LoongArchFI->callsEhReturn()) { + const TargetRegisterClass *RC = + ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + + // Find first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) + --I; + + // Insert instructions that restore eh data registers. + for (int J = 0; J < 4; ++J) + TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J), + LoongArchFI->getEhDataRegFI(J), RC, &RegInfo); } - // Mark BP as used if function has dedicated base pointer. - if (hasBP(MF)) - SavedRegs.set(LoongArchABI::getBPReg()); + + if (FirstSPAdjustAmount) + StackSize = FirstSPAdjustAmount; + + if (!StackSize) + return; + + // Final adjust stack. + TII.adjustReg(SP, SP, StackSize, MBB, MBBI); } -StackOffset LoongArchFrameLowering::getFrameIndexReference( - const MachineFunction &MF, int FI, Register &FrameReg) const { +StackOffset +LoongArchFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + Register &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + LoongArchABIInfo ABI = STI.getABI(); + const auto *LoongArchFI = MF.getInfo(); // Callee-saved registers should be referenced relative to the stack // pointer (positive offset), otherwise use the frame pointer (negative @@ -207,17 +380,182 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference( StackOffset Offset = StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + MFI.getOffsetAdjustment()); + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } - FrameReg = RI->getFrameRegister(MF); - if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) { - FrameReg = LoongArch::R3; + bool EhDataRegFI = LoongArchFI->isEhDataRegFI(FI); + if ((FI >= MinCSFI && FI <= MaxCSFI) || EhDataRegFI) { + FrameReg = ABI.GetStackPtr(); + + if (FirstSPAdjustAmount) + Offset += StackOffset::getFixed(FirstSPAdjustAmount); + else + Offset += StackOffset::getFixed(MFI.getStackSize()); + } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, so we need another base register to record the stack + // after realignment. + FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr(); Offset += StackOffset::getFixed(MFI.getStackSize()); + } else { + FrameReg = RI->getFrameRegister(MF); + if (hasFP(MF)) + Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); + else + Offset += StackOffset::getFixed(MFI.getStackSize()); } - return Offset; } + +bool LoongArchFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. Do not add if the register is + // RA and return address is taken, because it has already been added in + // method LoongArchTargetLowering::lowerRETURNADDR. + // It's killed at the spill, unless the register is RA and return address + // is taken. + unsigned Reg = CSI[i].getReg(); + bool IsRAAndRetAddrIsTaken = (Reg == LoongArch::RA || Reg == LoongArch::RA_64) + && MF->getFrameInfo().isReturnAddressTaken(); + if (!IsRAAndRetAddrIsTaken) + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. + bool IsKill = !IsRAAndRetAddrIsTaken; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, + CSI[i].getFrameIdx(), RC, TRI); + } + + return true; +} + +bool +LoongArchFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + // Reserve call frame if the size of the maximum call frame fits into 12-bit + // immediate field and there are no variable sized objects on the stack. + // Make sure the second register scavenger spill slot can be accessed with one + // instruction. + return isInt<12>(MFI.getMaxCallFrameSize() + getStackAlignment()) && + !MFI.hasVarSizedObjects(); +} + +/// Mark \p Reg and all registers aliasing it in the bitset. +static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, + unsigned Reg) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + SavedRegs.set(*AI); +} + +void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + LoongArchABIInfo ABI = STI.getABI(); + unsigned FP = ABI.GetFramePtr(); + unsigned BP = ABI.IsLP64() ? LoongArch::S7_64 : LoongArch::S7; + + // Mark $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) + setAliasRegs(MF, SavedRegs, FP); + // Mark $s7 as used if function has dedicated base pointer. + if (hasBP(MF)) + setAliasRegs(MF, SavedRegs, BP); + + // Create spill slots for eh data registers if function calls eh_return. + if (LoongArchFI->callsEhReturn()) + LoongArchFI->createEhDataRegsFI(); + + // Set scavenging frame index if necessary. + uint64_t MaxSPOffset = estimateStackSize(MF); + + // If there is a variable + // sized object on the stack, the estimation cannot account for it. + if (isIntN(12, MaxSPOffset) && + !MF.getFrameInfo().hasVarSizedObjects()) + return; + + const TargetRegisterClass &RC = + ABI.ArePtrs64bit() ? LoongArch::GPR64RegClass : LoongArch::GPR32RegClass; + int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), + TRI->getSpillAlign(RC), false); + RS->addScavengingFrameIndex(FI); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas, +// if it needs dynamic stack realignment, if frame pointer elimination is +// disabled, or if the frame address is taken. +bool LoongArchFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || + TRI->hasStackRealignment(MF); +} + +bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); +} + +// Estimate the size of the stack, including the incoming arguments. We need to +// account for register spills, local objects, reserved call frame and incoming +// arguments. This is required to determine the largest possible positive offset +// from $sp so that it can be determined if an emergency spill slot for stack +// addresses is required. +uint64_t LoongArchFrameLowering:: +estimateStackSize(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + + int64_t Size = 0; + + // Iterate over fixed sized objects which are incoming arguments. + for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) + if (MFI.getObjectOffset(I) > 0) + Size += MFI.getObjectSize(I); + + // Conservatively assume all callee-saved registers will be saved. + for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { + unsigned RegSize = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); + Size = alignTo(Size + RegSize, RegSize); + } + + // Get the size of the rest of the frame objects and any possible reserved + // call frame, accounting for alignment. + return Size + MFI.estimateStackSize(MF); +} + +// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions +MachineBasicBlock::iterator LoongArchFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned SP = STI.getABI().IsLP64() ? LoongArch::SP_64 : LoongArch::SP; + + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + if (I->getOpcode() == LoongArch::ADJCALLSTACKDOWN) + Amount = -Amount; + + STI.getInstrInfo()->adjustReg(SP, SP, Amount, MBB, I); + } + + return MBB.erase(I); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h index 72d8e006a0bb..74aabaeb426c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -1,4 +1,4 @@ -//=- LoongArchFrameLowering.h - TargetFrameLowering for LoongArch -*- C++ -*--// +//===-- LoongArchFrameLowering.h - Define frame lowering for LoongArch ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,51 +6,66 @@ // //===----------------------------------------------------------------------===// // -// This class implements LoongArch-specific bits of TargetFrameLowering class. +// // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H +#include "LoongArch.h" #include "llvm/CodeGen/TargetFrameLowering.h" namespace llvm { -class LoongArchSubtarget; + class LoongArchSubtarget; class LoongArchFrameLowering : public TargetFrameLowering { const LoongArchSubtarget &STI; public: - explicit LoongArchFrameLowering(const LoongArchSubtarget &STI) - : TargetFrameLowering(StackGrowsDown, - /*StackAlignment=*/Align(16), - /*LocalAreaOffset=*/0), - STI(STI) {} + explicit LoongArchFrameLowering(const LoongArchSubtarget &STI); + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + ArrayRef CSI, + const TargetRegisterInfo *TRI) const override; + + bool hasReservedCallFrame(const MachineFunction &MF) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; - MachineBasicBlock::iterator - eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const override { - return MBB.erase(MI); + bool hasFP(const MachineFunction &MF) const override; + + bool hasBP(const MachineFunction &MF) const; + + bool enableShrinkWrapping(const MachineFunction &MF) const override { + return true; } - StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, - Register &FrameReg) const override; + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; - bool hasFP(const MachineFunction &MF) const override; - bool hasBP(const MachineFunction &MF) const; + // Get the first stack adjustment amount for split the SP adjustment. + // Return 0 if we don't want to to split the SP adjustment in prologue and + // epilogue. + uint64_t getFirstSPAdjustAmount(const MachineFunction &MF, + bool IsPrologue = false) const; -private: - void determineFrameLayout(MachineFunction &MF) const; - void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, Register DestReg, Register SrcReg, - int64_t Val, MachineInstr::MIFlag Flag) const; +protected: + uint64_t estimateStackSize(const MachineFunction &MF) const; }; -} // end namespace llvm -#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H + +} // End llvm namespace + +#endif diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index bb40ff817574..0efb739e0818 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -1,4 +1,4 @@ -//=- LoongArchISelDAGToDAG.cpp - A dag to dag inst selector for LoongArch -===// +//===-- LoongArchISelDAGToDAG.cpp - A Dag to Dag Inst Selector for LoongArch --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,176 +11,868 @@ //===----------------------------------------------------------------------===// #include "LoongArchISelDAGToDAG.h" -#include "LoongArchISelLowering.h" +#include "LoongArch.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchRegisterInfo.h" +#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" -#include "MCTargetDesc/LoongArchMatInt.h" -#include "llvm/Support/KnownBits.h" - +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsLoongArch.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "loongarch-isel" -void LoongArchDAGToDAGISel::Select(SDNode *Node) { - // If we have a custom node, we have already selected. - if (Node->isMachineOpcode()) { - LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); - Node->setNodeId(-1); - return; - } +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// - // Instruction Selection not handled by the auto-generated tablegen selection - // should be handled here. - unsigned Opcode = Node->getOpcode(); - MVT GRLenVT = Subtarget->getGRLenVT(); - SDLoc DL(Node); - MVT VT = Node->getSimpleValueType(0); +//===----------------------------------------------------------------------===// +// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// - switch (Opcode) { - default: - break; - case ISD::Constant: { - int64_t Imm = cast(Node)->getSExtValue(); - if (Imm == 0 && VT == GRLenVT) { - SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - LoongArch::R0, GRLenVT); - ReplaceNode(Node, New.getNode()); - return; - } - SDNode *Result = nullptr; - SDValue SrcReg = CurDAG->getRegister(LoongArch::R0, GRLenVT); - // The instructions in the sequence are handled here. - for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) { - SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, GRLenVT); - if (Inst.Opc == LoongArch::LU12I_W) - Result = CurDAG->getMachineNode(LoongArch::LU12I_W, DL, GRLenVT, SDImm); - else - Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SrcReg, SDImm); - SrcReg = SDValue(Result, 0); - } +void LoongArchDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); } - ReplaceNode(Node, Result); - return; +void LoongArchDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + SelectionDAGISel::getAnalysisUsage(AU); +} + +bool LoongArchDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &static_cast(MF.getSubtarget()); + bool Ret = SelectionDAGISel::runOnMachineFunction(MF); + + return Ret; +} + +/// Match frameindex +bool LoongArchDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + EVT ValTy = Addr.getValueType(); + + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy); + return true; } - case ISD::FrameIndex: { - SDValue Imm = CurDAG->getTargetConstant(0, DL, GRLenVT); - int FI = cast(Node)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); - unsigned ADDIOp = - Subtarget->is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; - ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); - return; + return false; +} + +/// Match frameindex+offset and frameindex|offset +bool LoongArchDAGToDAGISel::selectAddrFrameIndexOffset( + SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits, + unsigned ShiftAmount = 0) const { + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) { + EVT ValTy = Addr.getValueType(); + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else { + Base = Addr.getOperand(0); + // If base is a FI, additional offset calculation is done in + // eliminateFrameIndex, otherwise we need to check the alignment + const Align Alignment(1ULL << ShiftAmount); + if (!isAligned(Alignment, CN->getZExtValue())) + return false; + } + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), + ValTy); + return true; + } } - // TODO: Add selection nodes needed later. + return false; +} + +/// ComplexPattern used on LoongArchInstrInfo +/// Used on LoongArch Load/Store instructions +bool LoongArchDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + // if Address is FI, get the TargetFrameIndex. + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (!TM.isPositionIndependent()) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; } - // Select the default instruction. - SelectCode(Node); + // Addresses of the form FI+const or FI|const + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) + return true; + + return false; } -bool LoongArchDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { - // If this is FrameIndex, select it directly. Otherwise just let it get - // selected to a register independently. - if (auto *FIN = dyn_cast(Addr)) - Base = - CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getGRLenVT()); - else - Base = Addr; +/// ComplexPattern used on LoongArchInstrInfo +/// Used on LoongArch Load/Store instructions +bool LoongArchDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType()); return true; } -bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, - SDValue &ShAmt) { - // Shift instructions on LoongArch only read the lower 5 or 6 bits of the - // shift amount. If there is an AND on the shift amount, we can bypass it if - // it doesn't affect any of those bits. - if (N.getOpcode() == ISD::AND && isa(N.getOperand(1))) { - const APInt &AndMask = N->getConstantOperandAPInt(1); +bool LoongArchDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) + return true; + + return false; +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 12)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; - // Since the max shift amount is a power of 2 we can subtract 1 to make a - // mask that covers the bits needed to represent all shift amounts. - assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); - APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 1)) + return true; - if (ShMask.isSubsetOf(AndMask)) { - ShAmt = N.getOperand(0); + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 11, 1)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9, 3)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 14, 2)) + return true; + + return false; +} + +bool LoongArchDAGToDAGISel::selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3)) + return true; + + return selectAddrDefault(Addr, Base, Offset); +} + +// Select constant vector splats. +// +// Returns true and sets Imm if: +// * LSX is enabled +// * N is a ISD::BUILD_VECTOR representing a constant splat +bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const { + if (!(Subtarget->hasLSX() || Subtarget->hasLASX())) + return false; + + BuildVectorSDNode *Node = dyn_cast(N); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + MinSizeInBits)) + return false; + + Imm = SplatValue; + + return true; +} + +// Select constant vector splats. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value fits in an integer with the specified signed-ness and +// width. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian LSX since BITCAST is +// sometimes a shuffle in big-endian mode. +// +// It's worth noting that this function is not used as part of the selection +// of [v/xv]ldi.[bhwd] since it does not permit using the wrong-typed +// [v/xv]ldi.[bhwd] instruction to achieve the desired bit pattern. +// [v/xv]ldi.[bhwd] is selected in LoongArchDAGToDAGISel::selectNode. +bool LoongArchDAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, + bool Signed, + unsigned ImmBitSize) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + + if ((Signed && ImmValue.isSignedIntN(ImmBitSize)) || + (!Signed && ImmValue.isIntN(ImmBitSize))) { + Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy); return true; } + } + + return false; +} + +// Select constant vector splats. +bool LoongArchDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 1); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 2); +} - // SimplifyDemandedBits may have optimized the mask so try restoring any - // bits that are known zero. - KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); - if (ShMask.isSubsetOf(AndMask | Known.Zero)) { - ShAmt = N.getOperand(0); +bool LoongArchDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 3); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 4); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 5); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 6); +} + +bool LoongArchDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 8); +} + +bool LoongArchDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, true, 5); +} + +// Select constant vector splats whose value is a power of 2. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a power of two. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian LSX since BITCAST is +// sometimes a shuffle in big-endian mode. +bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, + SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = ImmValue.exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); return true; } - } else if (N.getOpcode() == LoongArchISD::BSTRPICK) { - // Similar to the above AND, if there is a BSTRPICK on the shift amount, we - // can bypass it. - assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); - assert(isa(N.getOperand(1)) && "Illegal msb operand!"); - assert(isa(N.getOperand(2)) && "Illegal lsb operand!"); - uint64_t msb = N.getConstantOperandVal(1), lsb = N.getConstantOperandVal(2); - if (lsb == 0 && Log2_32(ShiftWidth) <= msb + 1) { - ShAmt = N.getOperand(0); + } + + return false; +} + +bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, + SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = (~ImmValue).exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); return true; } - } else if (N.getOpcode() == ISD::SUB && - isa(N.getOperand(0))) { - uint64_t Imm = N.getConstantOperandVal(0); - // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to - // generate a NEG instead of a SUB of a constant. - if (Imm != 0 && Imm % ShiftWidth == 0) { - SDLoc DL(N); - EVT VT = N.getValueType(); - SDValue Zero = - CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, LoongArch::R0, VT); - unsigned NegOpc = VT == MVT::i64 ? LoongArch::SUB_D : LoongArch::SUB_W; - MachineSDNode *Neg = - CurDAG->getMachineNode(NegOpc, DL, VT, Zero, N.getOperand(1)); - ShAmt = SDValue(Neg, 0); + } + + return false; +} + +// Select constant vector splats whose value only has a consecutive sequence +// of left-most bits set (e.g. 0b11...1100...00). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of left-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian LSX since BITCAST is +// sometimes a shuffle in big-endian mode. +bool LoongArchDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero from the bitwise + // inverse of ImmValue, and test that the inverse of this is the same + // as the original value. + if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) { + + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), + EltTy); return true; } } - ShAmt = N; - return true; + return false; } -bool LoongArchDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { - if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && - cast(N.getOperand(1))->getVT() == MVT::i32) { - Val = N.getOperand(0); - return true; - } - MVT VT = N.getSimpleValueType(); - if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { - Val = N; - return true; +// Select constant vector splats whose value only has a consecutive sequence +// of right-most bits set (e.g. 0b00...0011...11). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of right-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian LSX since BITCAST is +// sometimes a shuffle in big-endian mode. +bool LoongArchDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero, and test that the + // result is the same as the original value + if (ImmValue == (ImmValue & ~(ImmValue + 1))) { + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N), + EltTy); + return true; + } } return false; } -bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { - if (N.getOpcode() == ISD::AND) { - auto *C = dyn_cast(N.getOperand(1)); - if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { - Val = N.getOperand(0); +bool LoongArchDAGToDAGISel::trySelect(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + SDLoc DL(Node); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + switch(Opcode) { + default: break; + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + if (Subtarget->is64Bit()) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + LoongArch::ZERO_64, MVT::i64); + ReplaceNode(Node, + CurDAG->getMachineNode(LoongArch::MOVGR2FR_D, DL, MVT::f64, Zero)); + } return true; } + break; } - MVT VT = N.getSimpleValueType(); - APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); - if (CurDAG->MaskedValueIsZero(N, Mask)) { - Val = N; + + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast(Node); + MVT VT = CN->getSimpleValueType(0); + int64_t Imm = CN->getSExtValue(); + LoongArchAnalyzeImmediate::InstSeq Seq = + LoongArchAnalyzeImmediate::generateInstSeq(Imm, VT == MVT::i64); + SDLoc DL(CN); + SDNode *Result = nullptr; + SDValue SrcReg = CurDAG->getRegister( + VT == MVT::i64 ? LoongArch::ZERO_64 : LoongArch::ZERO, VT); + + // The instructions in the sequence are handled here. + for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) { + SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); + if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); + else + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); + SrcReg = SDValue(Result, 0); + } + ReplaceNode(Node, Result); return true; } + case ISD::BUILD_VECTOR: { + // Select appropriate vldi.[bhwd] instructions for constant splats of + // 128-bit when LSX is enabled. Select appropriate xvldi.[bhwd] instructions + // for constant splats of 256-bit when LASX is enabled. Fixup any register + // class mismatches that occur as a result. + // + // This allows the compiler to use a wider range of immediates than would + // otherwise be allowed. If, for example, v4i32 could only use [v/xv]ldi.h + // then it would not be possible to load { 0x01010101, 0x01010101, + // 0x01010101, 0x01010101 } without using a constant pool. This would be + // sub-optimal when // '[v/xv]ldi.b vd, 1' is capable of producing that + // bit-pattern in the same set/ of registers. Similarly, [v/xv]ldi.h isn't + // capable of producing { 0x00000000, 0x00000001, 0x00000000, 0x00000001 } + // but '[v/xv]ldi.d vd, 1' can. + + const LoongArchABIInfo &ABI = + static_cast(TM).getABI(); + + BuildVectorSDNode *BVN = cast(Node); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned LdiOp; + EVT ResVecTy = BVN->getValueType(0); + EVT ViaVecTy; + + if ((!Subtarget->hasLSX() || !BVN->getValueType(0).is128BitVector()) && + (!Subtarget->hasLASX() || !BVN->getValueType(0).is256BitVector())) + return false; + + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8)) + return false; + + bool IsLASX256 = BVN->getValueType(0).is256BitVector(); + + switch (SplatBitSize) { + default: + return false; + case 8: + LdiOp = IsLASX256 ? LoongArch::XVLDI_B : LoongArch::VLDI_B; + ViaVecTy = IsLASX256 ? MVT::v32i8 : MVT::v16i8; + break; + case 16: + LdiOp = IsLASX256 ? LoongArch::XVLDI_H : LoongArch::VLDI_H; + ViaVecTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16; + break; + case 32: + LdiOp = IsLASX256 ? LoongArch::XVLDI_W : LoongArch::VLDI_W; + ViaVecTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32; + break; + case 64: + LdiOp = IsLASX256 ? LoongArch::XVLDI_D : LoongArch::VLDI_D; + ViaVecTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64; + break; + } + + SDNode *Res; + + // If we have a signed 13 bit integer, we can splat it directly. + // + // If we have something bigger we can synthesize the value into a GPR and + // splat from there. + if (SplatValue.isSignedIntN(10)) { + SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, + ViaVecTy.getVectorElementType()); + + Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); + } else if (SplatValue.isSignedIntN(12)) { + bool Is32BitSplat = SplatBitSize < 64 ? true : false; + const unsigned ADDIOp = + Is32BitSplat ? LoongArch::ADDI_W : LoongArch::ADDI_D; + const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64; + SDValue ZeroVal = CurDAG->getRegister( + Is32BitSplat ? LoongArch::ZERO : LoongArch::ZERO_64, SplatMVT); + + const unsigned FILLOp = + (SplatBitSize == 16) + ? (IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H) + : (SplatBitSize == 32 + ? (IsLASX256 ? LoongArch::XVREPLGR2VR_W + : LoongArch::VREPLGR2VR_W) + : (SplatBitSize == 64 + ? (IsLASX256 ? LoongArch::XVREPLGR2VR_D + : LoongArch::VREPLGR2VR_D) + : 0)); + + assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!"); + + short Lo = SplatValue.getLoBits(12).getSExtValue(); + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT); + + Res = CurDAG->getMachineNode(ADDIOp, DL, SplatMVT, ZeroVal, LoVal); + Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0)); + } else if (SplatValue.isSignedIntN(16) && SplatBitSize == 16) { + const unsigned Lo = SplatValue.getLoBits(12).getZExtValue(); + const unsigned Hi = SplatValue.lshr(12).getLoBits(4).getZExtValue(); + SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32); + + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); + SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); + if (Hi) + Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal); + + if (Lo) + Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32, + Hi ? SDValue(Res, 0) : ZeroVal, LoVal); + + assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); + const unsigned FILLOp = + IsLASX256 ? LoongArch::XVREPLGR2VR_H : LoongArch::VREPLGR2VR_H; + EVT FILLTy = IsLASX256 ? MVT::v16i16 : MVT::v8i16; + Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); + } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) { + // Only handle the cases where the splat size agrees with the size + // of the SplatValue here. + const unsigned Lo = SplatValue.getLoBits(12).getZExtValue(); + const unsigned Hi = SplatValue.lshr(12).getLoBits(20).getZExtValue(); + SDValue ZeroVal = CurDAG->getRegister(LoongArch::ZERO, MVT::i32); + + SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32); + SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32); + if (Hi) + Res = CurDAG->getMachineNode(LoongArch::LU12I_W32, DL, MVT::i32, HiVal); + + if (Lo) + Res = CurDAG->getMachineNode(LoongArch::ORI32, DL, MVT::i32, + Hi ? SDValue(Res, 0) : ZeroVal, LoVal); + + assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!"); + const unsigned FILLOp = + IsLASX256 ? LoongArch::XVREPLGR2VR_W : LoongArch::VREPLGR2VR_W; + EVT FILLTy = IsLASX256 ? MVT::v8i32 : MVT::v4i32; + Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); + + } else if ((SplatValue.isSignedIntN(32) && SplatBitSize == 64 && + ABI.IsLP64()) || + (SplatValue.isSignedIntN(64))) { + + int64_t Imm = SplatValue.getSExtValue(); + LoongArchAnalyzeImmediate::InstSeq Seq = + LoongArchAnalyzeImmediate::generateInstSeq(Imm, true); + SDValue SrcReg = CurDAG->getRegister(LoongArch::ZERO_64, MVT::i64); + + for (LoongArchAnalyzeImmediate::Inst &Inst : Seq) { + SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, MVT::i64); + if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) + Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SDImm); + else + Res = CurDAG->getMachineNode(Inst.Opc, DL, MVT::i64, SrcReg, SDImm); + SrcReg = SDValue(Res, 0); + } + + const unsigned FILLOp = + IsLASX256 ? LoongArch::XVREPLGR2VR_D : LoongArch::VREPLGR2VR_D; + EVT FILLTy = IsLASX256 ? MVT::v4i64 : MVT::v2i64; + Res = CurDAG->getMachineNode(FILLOp, DL, FILLTy, SDValue(Res, 0)); + + } else + return false; + + if (ResVecTy != ViaVecTy) { + // If LdiOp is writing to a different register class to ResVecTy, then + // fix it up here. This COPY_TO_REGCLASS should never cause a move.v + // since the source and destination register sets contain the same + // registers. + const TargetLowering *TLI = getTargetLowering(); + MVT ResVecTySimple = ResVecTy.getSimpleVT(); + const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple); + Res = CurDAG->getMachineNode( + LoongArch::COPY_TO_REGCLASS, DL, ResVecTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32)); + } + + ReplaceNode(Node, Res); + return true; + } + } + return false; } -// This pass converts a legalized DAG into a LoongArch-specific DAG, ready -// for instruction scheduling. -FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { - return new LoongArchDAGToDAGISel(TM); +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +void LoongArchDAGToDAGISel::Select(SDNode *Node) { + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return; + } + + // See if subclasses can handle this node. + if (trySelect(Node)) + return; + + // Select the default instruction + SelectCode(Node); +} + +bool LoongArchDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) { + SDValue Base, Offset; + + switch(ConstraintID) { + default: + llvm_unreachable("Unexpected asm memory constraint"); + // All memory constraints can at least accept raw pointers. + case InlineAsm::Constraint_i: + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + case InlineAsm::Constraint_m: + if (selectAddrRegImm12(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + case InlineAsm::Constraint_R: + if (selectAddrRegImm12(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + case InlineAsm::Constraint_ZC: + if (selectIntAddrSImm14Lsl2(Op, Base, Offset)) { + OutOps.push_back(Base); + OutOps.push_back(Offset); + return false; + } + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + case InlineAsm::Constraint_ZB: + OutOps.push_back(Op); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); + return false; + } + return true; +} + +// This optimisation is ported from RISCV. +// Merge an ADDI into the offset of a load/store instruction where possible. +// (load (addi base, off1), off2) -> (load base, off1+off2) +// (store val, (addi base, off1), off2) -> (store val, base, off1+off2) +// This is possible when off1+off2 fits a 12-bit immediate. +void LoongArchDAGToDAGISel::doPeepholeLoadStoreADDI() { + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + int OffsetOpIdx; + int BaseOpIdx; + + // TODO: handle more instructions. + switch (N->getMachineOpcode()) { + default: + continue; + case LoongArch::LD_B: + case LoongArch::LD_B32: + case LoongArch::LD_BU: + case LoongArch::LD_BU32: + case LoongArch::LD_H: + case LoongArch::LD_H32: + case LoongArch::LD_HU: + case LoongArch::LD_HU32: + case LoongArch::LD_W: + case LoongArch::LD_W32: + case LoongArch::LD_WU: + case LoongArch::LD_D: + BaseOpIdx = 0; + OffsetOpIdx = 1; + break; + case LoongArch::ST_B: + case LoongArch::ST_B32: + case LoongArch::ST_H: + case LoongArch::ST_H32: + case LoongArch::ST_W: + case LoongArch::ST_W32: + case LoongArch::ST_D: + BaseOpIdx = 1; + OffsetOpIdx = 2; + break; + } + + if (!isa(N->getOperand(OffsetOpIdx))) + continue; + + SDValue Base = N->getOperand(BaseOpIdx); + + // If the base is an ADDI, we can merge it in to the load/store. + // TODO: handle more instructions, i.e. ADDI_W. + if (!Base.isMachineOpcode() || Base.getMachineOpcode() != LoongArch::ADDI_D) + continue; + + SDValue ImmOperand = Base.getOperand(1); + uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); + + if (auto *Const = dyn_cast(ImmOperand)) { + int64_t Offset1 = Const->getSExtValue(); + int64_t CombinedOffset = Offset1 + Offset2; + if (!isInt<12>(CombinedOffset)) + continue; + ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), + ImmOperand.getValueType()); + // TODO: handle below cases. +#if 0 + } else if (auto *GA = dyn_cast(ImmOperand)) { + // If the off1 in (addi base, off1) is a global variable's address (its + // low part, really), then we can rely on the alignment of that variable + // to provide a margin of safety before off1 can overflow the 12 bits. + // Check if off2 falls within that margin; if so off1+off2 can't overflow. + const DataLayout &DL = CurDAG->getDataLayout(); + Align Alignment = GA->getGlobal()->getPointerAlignment(DL); + if (Offset2 != 0 && Alignment <= Offset2) + continue; + int64_t Offset1 = GA->getOffset(); + int64_t CombinedOffset = Offset1 + Offset2; + ImmOperand = CurDAG->getTargetGlobalAddress( + GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), + CombinedOffset, GA->getTargetFlags()); + } else if (auto *CP = dyn_cast(ImmOperand)) { + // Ditto. + Align Alignment = CP->getAlign(); + if (Offset2 != 0 && Alignment <= Offset2) + continue; + int64_t Offset1 = CP->getOffset(); + int64_t CombinedOffset = Offset1 + Offset2; + ImmOperand = CurDAG->getTargetConstantPool( + CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), + CombinedOffset, CP->getTargetFlags()); +#endif + } else { + continue; + } + + LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + LLVM_DEBUG(Base->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\nN: "); + LLVM_DEBUG(N->dump(CurDAG)); + LLVM_DEBUG(dbgs() << "\n"); + + // Modify the offset operand of the load/store. + if (BaseOpIdx == 0) // Load + CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, + N->getOperand(2)); + else // Store + CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), + ImmOperand, N->getOperand(3)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} + +FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new LoongArchDAGToDAGISel(TM, OptLevel); } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index 8c9357d75979..76549731868b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -1,4 +1,4 @@ -//=- LoongArchISelDAGToDAG.h - A dag to dag inst selector for LoongArch ---===// +//===---- LoongArchISelDAGToDAG.h - A Dag to Dag Inst Selector for LoongArch --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,47 +14,138 @@ #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H #include "LoongArch.h" +#include "LoongArchSubtarget.h" #include "LoongArchTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" -// LoongArch-specific code to select LoongArch machine instructions for -// SelectionDAG operations. +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LoongArchDAGToDAGISel - LoongArch specific code to select LoongArch machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// namespace llvm { -class LoongArchDAGToDAGISel : public SelectionDAGISel { - const LoongArchSubtarget *Subtarget = nullptr; +class LoongArchDAGToDAGISel : public SelectionDAGISel { public: - explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM) - : SelectionDAGISel(TM) {} + explicit LoongArchDAGToDAGISel(LoongArchTargetMachine &TM, CodeGenOpt::Level OL) + : SelectionDAGISel(TM, OL), Subtarget(nullptr) {} + // Pass Name StringRef getPassName() const override { return "LoongArch DAG->DAG Pattern Instruction Selection"; } - bool runOnMachineFunction(MachineFunction &MF) override { - Subtarget = &MF.getSubtarget(); - return SelectionDAGISel::runOnMachineFunction(MF); - } + bool runOnMachineFunction(MachineFunction &MF) override; - void Select(SDNode *Node) override; + void PostprocessISelDAG() override; - bool SelectBaseAddr(SDValue Addr, SDValue &Base); + void getAnalysisUsage(AnalysisUsage &AU) const override; - bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); - bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) { - return selectShiftMask(N, Subtarget->getGRLen(), ShAmt); - } - bool selectShiftMask32(SDValue N, SDValue &ShAmt) { - return selectShiftMask(N, 32, ShAmt); - } +private: + /// Keep a pointer to the LoongArchSubtarget around so that we can make the right + /// decision when generating code for different targets. + const LoongArchSubtarget *Subtarget; + // Include the pieces autogenerated from the target description. + #include "LoongArchGenDAGISel.inc" + + void doPeepholeLoadStoreADDI(); + + bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, + unsigned OffsetBits, + unsigned ShiftAmount) const; + + // Complex Pattern. + /// (reg + imm). + bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + /// Fall back on this function if all else fails. + bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; - bool selectSExti32(SDValue N, SDValue &Val); - bool selectZExti32(SDValue N, SDValue &Val); + /// Match integer address pattern. + bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + bool selectAddrRegImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Match addr+simm12 and addr + bool selectIntAddrSImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm9Lsl3(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm11Lsl1(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm14Lsl2(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + bool selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Select constant vector splats. + bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; + /// Select constant vector splats whose value fits in a given integer. + bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const; + /// Select constant vector splats whose value fits in a uimm1. + bool selectVSplatUimm1(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm2. + bool selectVSplatUimm2(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm3. + bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm4. + bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm5. + bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm6. + bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a uimm8. + bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value fits in a simm5. + bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is a power of 2. + bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is the inverse of a + /// power of 2. + bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is a run of set bits + /// ending at the most significant bit + bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is a run of set bits + /// starting at bit zero. + bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; + + void Select(SDNode *N) override; + + bool trySelect(SDNode *Node); + + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0)); + } -// Include the pieces autogenerated from the target description. -#include "LoongArchGenDAGISel.inc" + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + unsigned ConstraintID, + std::vector &OutOps) override; }; -} // end namespace llvm +FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM, + CodeGenOpt::Level OptLevel); +} -#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELDAGTODAG_H +#endif diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 93c8864347bb..4dcac7a0924a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1,4 +1,4 @@ -//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===// +//===- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,1094 +6,8324 @@ // //===----------------------------------------------------------------------===// // -// This file defines the interfaces that LoongArch uses to lower LLVM code into -// a selection DAG. +// This file defines the interfaces that LoongArch uses to lower LLVM code into a +// selection DAG. // //===----------------------------------------------------------------------===// #include "LoongArchISelLowering.h" -#include "LoongArch.h" -#include "LoongArchMachineFunctionInfo.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "LoongArchCCState.h" +#include "LoongArchInstrInfo.h" +#include "LoongArchMachineFunction.h" #include "LoongArchRegisterInfo.h" #include "LoongArchSubtarget.h" #include "LoongArchTargetMachine.h" -#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "LoongArchTargetObjectFile.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsLoongArch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/KnownBits.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; -#define DEBUG_TYPE "loongarch-isel-lowering" +#define DEBUG_TYPE "loongarch-lower" -static cl::opt ZeroDivCheck( - "loongarch-check-zero-division", cl::Hidden, - cl::desc("Trap on integer division by zero."), - cl::init(false)); +STATISTIC(NumTailCalls, "Number of tail calls"); -LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, - const LoongArchSubtarget &STI) - : TargetLowering(TM), Subtarget(STI) { +static cl::opt +NoZeroDivCheck("mnocheck-zero-division", cl::Hidden, + cl::desc("LoongArch: Don't trap on integer division by zero."), + cl::init(false)); - MVT GRLenVT = Subtarget.getGRLenVT(); - // Set up the register classes. - addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); - if (Subtarget.hasBasicF()) - addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); - if (Subtarget.hasBasicD()) - addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); +static const MCPhysReg LoongArch64DPRegs[8] = { + LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, + LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64 +}; - setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, - MVT::i1, Promote); +// If I is a shifted mask, set the size (SMSize) and the first bit of the +// mask (SMLsb), and return true. +// For example, if I is 0x003ff800, (SMLsb, SMSize) = (11, 11). +static bool isShiftedMask(uint64_t I, uint64_t &SMLsb, uint64_t &SMSize) { + if (!isShiftedMask_64(I)) + return false; - // TODO: add necessary setOperationAction calls later. - setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); - setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); - setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); - setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); + SMSize = countPopulation(I); + SMLsb = countTrailingZeros(I); + return true; +} - setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom); +SDValue LoongArchTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} - if (Subtarget.is64Bit()) { - setOperationAction(ISD::SHL, MVT::i32, Custom); - setOperationAction(ISD::SRA, MVT::i32, Custom); - setOperationAction(ISD::SRL, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::BITCAST, MVT::i32, Custom); - if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); +SDValue LoongArchTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); +} + +SDValue LoongArchTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), Flag); +} + +SDValue LoongArchTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} + +SDValue LoongArchTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), + N->getOffset(), Flag); +} + +const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((LoongArchISD::NodeType)Opcode) { + case LoongArchISD::FIRST_NUMBER: break; + case LoongArchISD::JmpLink: return "LoongArchISD::JmpLink"; + case LoongArchISD::TailCall: return "LoongArchISD::TailCall"; + case LoongArchISD::GlobalAddress: return "LoongArchISD::GlobalAddress"; + case LoongArchISD::Ret: return "LoongArchISD::Ret"; + case LoongArchISD::ERet: return "LoongArchISD::ERet"; + case LoongArchISD::EH_RETURN: return "LoongArchISD::EH_RETURN"; + case LoongArchISD::FPBrcond: return "LoongArchISD::FPBrcond"; + case LoongArchISD::FPCmp: return "LoongArchISD::FPCmp"; + case LoongArchISD::CMovFP_T: return "LoongArchISD::CMovFP_T"; + case LoongArchISD::CMovFP_F: return "LoongArchISD::CMovFP_F"; + case LoongArchISD::TruncIntFP: return "LoongArchISD::TruncIntFP"; + case LoongArchISD::BSTRPICK: return "LoongArchISD::BSTRPICK"; + case LoongArchISD::BSTRINS: return "LoongArchISD::BSTRINS"; + case LoongArchISD::VALL_ZERO: + return "LoongArchISD::VALL_ZERO"; + case LoongArchISD::VANY_ZERO: + return "LoongArchISD::VANY_ZERO"; + case LoongArchISD::VALL_NONZERO: + return "LoongArchISD::VALL_NONZERO"; + case LoongArchISD::VANY_NONZERO: + return "LoongArchISD::VANY_NONZERO"; + case LoongArchISD::VEXTRACT_SEXT_ELT: + return "LoongArchISD::VEXTRACT_SEXT_ELT"; + case LoongArchISD::VEXTRACT_ZEXT_ELT: + return "LoongArchISD::VEXTRACT_ZEXT_ELT"; + case LoongArchISD::VNOR: + return "LoongArchISD::VNOR"; + case LoongArchISD::VSHF: + return "LoongArchISD::VSHF"; + case LoongArchISD::SHF: + return "LoongArchISD::SHF"; + case LoongArchISD::VPACKEV: + return "LoongArchISD::VPACKEV"; + case LoongArchISD::VPACKOD: + return "LoongArchISD::VPACKOD"; + case LoongArchISD::VILVH: + return "LoongArchISD::VILVH"; + case LoongArchISD::VILVL: + return "LoongArchISD::VILVL"; + case LoongArchISD::VPICKEV: + return "LoongArchISD::VPICKEV"; + case LoongArchISD::VPICKOD: + return "LoongArchISD::VPICKOD"; + case LoongArchISD::INSVE: + return "LoongArchISD::INSVE"; + case LoongArchISD::VROR: + return "LoongArchISD::VROR"; + case LoongArchISD::VRORI: + return "LoongArchISD::VRORI"; + case LoongArchISD::XVBROADCAST: + return "LoongArchISD::XVBROADCAST"; + case LoongArchISD::VBROADCAST: + return "LoongArchISD::VBROADCAST"; + case LoongArchISD::VABSD: + return "LoongArchISD::VABSD"; + case LoongArchISD::UVABSD: + return "LoongArchISD::UVABSD"; + case LoongArchISD::XVPICKVE: + return "LoongArchISD::XVPICKVE"; + case LoongArchISD::XVPERMI: + return "LoongArchISD::XVPERMI"; + case LoongArchISD::XVSHUF4I: + return "LoongArchISD::XVSHUF4I"; + case LoongArchISD::REVBD: + return "LoongArchISD::REVBD"; + case LoongArchISD::FSEL: + return "LoongArchISD::FSEL"; } + return nullptr; +} + +LoongArchTargetLowering::LoongArchTargetLowering(const LoongArchTargetMachine &TM, + const LoongArchSubtarget &STI) + : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) { + // Set up the register classes + addRegisterClass(MVT::i32, &LoongArch::GPR32RegClass); - static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE, - ISD::SETUGT, ISD::SETUGE}; + if (Subtarget.is64Bit()) + addRegisterClass(MVT::i64, &LoongArch::GPR64RegClass); - if (Subtarget.hasBasicF()) { - setCondCodeAction(FPCCToExpand, MVT::f32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + // LoongArch does not have i1 type, so use i32 for + // setcc operations results (slt, sgt, ...). + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Load extented operations for i1 types must be promoted + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); } - if (Subtarget.hasBasicD()) { - setCondCodeAction(FPCCToExpand, MVT::f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + + // LoongArch doesn't have extending float->double load/store. Set LoadExtAction + // for f32, f16 + for (MVT VT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); } - setOperationAction(ISD::BR_CC, GRLenVT, Expand); - setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); - if (!Subtarget.is64Bit()) - setLibcallName(RTLIB::MUL_I128, nullptr); + // Set LoadExtAction for f16 vectors to Expand + for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { + MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements()); + if (F16VT.isValid()) + setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand); + } - setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // Used by legalize types to correctly generate the setcc result. + // Without this, every float setcc comes with a AND/OR with the result, + // we don't want this, since the fpcmp result goes to a flag register, + // which is used implicitly by brcond and select operations. + AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); + + // LoongArch Custom Operations + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - // Compute derived properties from the register classes. - computeRegisterProperties(STI.getRegisterInfo()); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Legal); + setOperationAction(ISD::STORE, MVT::i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); + } - setStackPointerRegisterToSaveRestore(LoongArch::R3); + if (!Subtarget.is64Bit()) { + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); + } - setBooleanContents(ZeroOrOneBooleanContent); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); + + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + // Operations not directly supported by LoongArch. + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f32, Legal); + setOperationAction(ISD::FMA, MVT::f64, Legal); + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); + + // Lower f16 conversion operations into library calls + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + + // Use the default for now + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + + if (!Subtarget.is64Bit()) { + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); + } - setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + if (Subtarget.is64Bit()) { + setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i32, Custom); + setTruncStoreAction(MVT::i64, MVT::i32, Custom); + } - // Function alignments. - const Align FunctionAlignment(4); - setMinFunctionAlignment(FunctionAlignment); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::AssertZext); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SRL); -} + setTargetDAGCombine(ISD::SRA); -SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: - report_fatal_error("unimplemented operand"); - case ISD::GlobalAddress: - return lowerGlobalAddress(Op, DAG); - case ISD::SHL_PARTS: - return lowerShiftLeftParts(Op, DAG); - case ISD::SRA_PARTS: - return lowerShiftRightParts(Op, DAG, true); - case ISD::SRL_PARTS: - return lowerShiftRightParts(Op, DAG, false); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - // This can be called for an i32 shift amount that needs to be promoted. - assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && - "Unexpected custom legalisation"); - return SDValue(); - case ISD::ConstantPool: - return lowerConstantPool(Op, DAG); - case ISD::FP_TO_SINT: - return lowerFP_TO_SINT(Op, DAG); - case ISD::BITCAST: - return lowerBITCAST(Op, DAG); - case ISD::FP_TO_UINT: - return SDValue(); - case ISD::UINT_TO_FP: - return lowerUINT_TO_FP(Op, DAG); + if (ABI.IsLP32()) { + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); } -} -SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, - SelectionDAG &DAG) const { + if (Subtarget.hasLSX() || Subtarget.hasLASX()) { + // Expand all truncating stores and extending loads. + for (MVT VT0 : MVT::vector_valuetypes()) { + for (MVT VT1 : MVT::vector_valuetypes()) { + setTruncStoreAction(VT0, VT1, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); + } + } + } - SDLoc DL(Op); - auto &TLI = DAG.getTargetLoweringInfo(); - SDValue Tmp1, Tmp2; - SDValue Op1 = Op.getOperand(0); - if (Op1->getOpcode() == ISD::AssertZext || - Op1->getOpcode() == ISD::AssertSext) - return Op; - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0)); - SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc); - SDNode *N = Res.getNode(); - TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG); - return Tmp1; -} + if (Subtarget.hasLSX()) { + addLSXIntType(MVT::v16i8, &LoongArch::LSX128BRegClass); + addLSXIntType(MVT::v8i16, &LoongArch::LSX128HRegClass); + addLSXIntType(MVT::v4i32, &LoongArch::LSX128WRegClass); + addLSXIntType(MVT::v2i64, &LoongArch::LSX128DRegClass); + addLSXFloatType(MVT::v4f32, &LoongArch::LSX128WRegClass); + addLSXFloatType(MVT::v2f64, &LoongArch::LSX128DRegClass); + + // f16 is a storage-only type, always promote it to f32. + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine(ISD::XOR); + } -SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, - SelectionDAG &DAG) const { + if (Subtarget.hasLASX()) { + addLASXIntType(MVT::v32i8, &LoongArch::LASX256BRegClass); + addLASXIntType(MVT::v16i16, &LoongArch::LASX256HRegClass); + addLASXIntType(MVT::v8i32, &LoongArch::LASX256WRegClass); + addLASXIntType(MVT::v4i64, &LoongArch::LASX256DRegClass); + addLASXFloatType(MVT::v8f32, &LoongArch::LASX256WRegClass); + addLASXFloatType(MVT::v4f64, &LoongArch::LASX256DRegClass); + + // f16 is a storage-only type, always promote it to f32. + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::VSELECT); + setTargetDAGCombine(ISD::XOR); + } - SDLoc DL(Op); - SDValue Op0 = Op.getOperand(0); + if (!Subtarget.useSoftFloat()) { + addRegisterClass(MVT::f32, &LoongArch::FGR32RegClass); - if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && - Subtarget.is64Bit() && Subtarget.hasBasicF()) { - SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); - return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); + // When dealing with single precision only, use libcalls + if (!Subtarget.isSingleFloat()) { + if (Subtarget.isFP64bit()) + addRegisterClass(MVT::f64, &LoongArch::FGR64RegClass); + } } - return Op; -} -SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, - SelectionDAG &DAG) const { + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); - SDLoc DL(Op); + if (Subtarget.is64Bit()) + setOperationAction(ISD::MUL, MVT::i64, Custom); - if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && - !Subtarget.hasBasicD()) { - SDValue Dst = - DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); - return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); + setOperationAction(ISD::SDIVREM, MVT::i64, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); } - EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); - SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); - return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Legal); + setOperationAction(ISD::STORE, MVT::i32, Legal); + + setTargetDAGCombine(ISD::MUL); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + + // Replace the accumulator-based multiplies with a + // three register instruction. + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MULHS, MVT::i32, Legal); + setOperationAction(ISD::MULHU, MVT::i32, Legal); + + // Replace the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Legal); + setOperationAction(ISD::UDIV, MVT::i32, Legal); + setOperationAction(ISD::SREM, MVT::i32, Legal); + setOperationAction(ISD::UREM, MVT::i32, Legal); + + // Replace the accumulator-based multiplies with a + // three register instruction. + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MUL, MVT::i64, Legal); + setOperationAction(ISD::MULHS, MVT::i64, Legal); + setOperationAction(ISD::MULHU, MVT::i64, Legal); + + // Replace the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Legal); + setOperationAction(ISD::UDIV, MVT::i64, Legal); + setOperationAction(ISD::SREM, MVT::i64, Legal); + setOperationAction(ISD::UREM, MVT::i64, Legal); + + MaxGluedStoresPerMemcpy = 4; + + setMinFunctionAlignment(Align(4)); + + // The arguments on the stack are defined in terms of 4-byte slots on LP32 + // and 8-byte slots on LPX32/LP64. + setMinStackArgumentAlignment((ABI.IsLPX32() || ABI.IsLP64()) ? Align(8) + : Align(4)); + + setStackPointerRegisterToSaveRestore(ABI.IsLP64() ? LoongArch::SP_64 : LoongArch::SP); + + if (Subtarget.hasLASX()) { + // = 16*32/2; the smallest memcpy; + MaxStoresPerMemcpy = 16; + } else if (Subtarget.hasLSX()) { + MaxStoresPerMemcpy = 65535; + } else { + MaxStoresPerMemcpy = 16; + } + + computeRegisterProperties(Subtarget.getRegisterInfo()); } -SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT Ty = Op.getValueType(); - ConstantPoolSDNode *N = cast(Op); +// Enable LSX support for the given integer type and Register class. +void LoongArchTargetLowering::addLSXIntType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::ABS, Ty, Legal); + setOperationAction(ISD::UNDEF, Ty, Legal); + setOperationAction(ISD::EXTRACT_SUBVECTOR, Ty, Legal); + setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); + + if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { + setOperationAction(ISD::FP_TO_SINT, Ty, Custom); + setOperationAction(ISD::FP_TO_UINT, Ty, Custom); + } - // FIXME: Only support PC-relative addressing to access the symbol. - // Target flags will be added later. - if (!isPositionIndependent()) { - SDValue ConstantN = DAG.getTargetConstantPool( - N->getConstVal(), Ty, N->getAlign(), N->getOffset()); - SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, ConstantN), - 0); - SDValue Addr(DAG.getMachineNode(Subtarget.is64Bit() ? LoongArch::ADDI_D - : LoongArch::ADDI_W, - DL, Ty, AddrHi, ConstantN), - 0); - return Addr; + setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::AND, Ty, Legal); + setOperationAction(ISD::CTLZ, Ty, Legal); + setOperationAction(ISD::CTPOP, Ty, Legal); + setOperationAction(ISD::MUL, Ty, Legal); + setOperationAction(ISD::OR, Ty, Legal); + setOperationAction(ISD::SDIV, Ty, Legal); + setOperationAction(ISD::SREM, Ty, Legal); + setOperationAction(ISD::SHL, Ty, Legal); + setOperationAction(ISD::SRA, Ty, Legal); + setOperationAction(ISD::SRL, Ty, Legal); + setOperationAction(ISD::SUB, Ty, Legal); + setOperationAction(ISD::SMAX, Ty, Legal); + setOperationAction(ISD::SMIN, Ty, Legal); + setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::UREM, Ty, Legal); + setOperationAction(ISD::UMAX, Ty, Legal); + setOperationAction(ISD::UMIN, Ty, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::XOR, Ty, Legal); + setOperationAction(ISD::MULHS, Ty, Legal); + setOperationAction(ISD::MULHU, Ty, Legal); + + if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { + setOperationAction(ISD::SINT_TO_FP, Ty, Custom); + setOperationAction(ISD::UINT_TO_FP, Ty, Custom); } - report_fatal_error("Unable to lower ConstantPool"); -} -SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT Ty = getPointerTy(DAG.getDataLayout()); - const GlobalValue *GV = cast(Op)->getGlobal(); - unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETNE, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); +} - // TODO: Support dso_preemptable and target flags. - if (GV->isDSOLocal()) { - SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty); - SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0); - SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0); - return Addr; +// Enable LASX support for the given integer type and Register class. +void LoongArchTargetLowering::addLASXIntType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + // FIXME + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Custom); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); + setOperationAction(ISD::UNDEF, Ty, Legal); + setOperationAction(ISD::UADDSAT, Ty, Legal); + setOperationAction(ISD::SADDSAT, Ty, Legal); + setOperationAction(ISD::USUBSAT, Ty, Legal); + setOperationAction(ISD::SSUBSAT, Ty, Legal); + setOperationAction(ISD::ABS, Ty, Legal); + + setOperationAction(ISD::ADD, Ty, Legal); + setOperationAction(ISD::AND, Ty, Legal); + setOperationAction(ISD::CTLZ, Ty, Legal); + setOperationAction(ISD::CTPOP, Ty, Legal); + setOperationAction(ISD::MUL, Ty, Legal); + setOperationAction(ISD::OR, Ty, Legal); + setOperationAction(ISD::SDIV, Ty, Legal); + setOperationAction(ISD::SREM, Ty, Legal); + setOperationAction(ISD::SHL, Ty, Legal); + setOperationAction(ISD::SRA, Ty, Legal); + setOperationAction(ISD::SRL, Ty, Legal); + setOperationAction(ISD::SUB, Ty, Legal); + setOperationAction(ISD::SMAX, Ty, Legal); + setOperationAction(ISD::SMIN, Ty, Legal); + setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::UREM, Ty, Legal); + setOperationAction(ISD::UMAX, Ty, Legal); + setOperationAction(ISD::UMIN, Ty, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::XOR, Ty, Legal); + setOperationAction(ISD::INSERT_SUBVECTOR, Ty, Legal); + setOperationAction(ISD::MULHS, Ty, Legal); + setOperationAction(ISD::MULHU, Ty, Legal); + + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, Ty, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, Ty, Legal); + + setOperationAction(ISD::SIGN_EXTEND, Ty, Legal); + setOperationAction(ISD::ZERO_EXTEND, Ty, Legal); + + if (Ty == MVT::v8i32 || Ty == MVT::v4i64) { + setOperationAction(ISD::SINT_TO_FP, Ty, Custom); + setOperationAction(ISD::UINT_TO_FP, Ty, Custom); } - report_fatal_error("Unable to lowerGlobalAddress"); + + setTargetDAGCombine(ISD::CONCAT_VECTORS); + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETNE, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); } -SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - SDValue Lo = Op.getOperand(0); - SDValue Hi = Op.getOperand(1); - SDValue Shamt = Op.getOperand(2); - EVT VT = Lo.getValueType(); +// Enable LSX support for the given floating-point type and Register class. +void LoongArchTargetLowering::addLSXFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::UNDEF, Ty, Legal); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); + + if (Ty == MVT::v4f32 || Ty == MVT::v2f64) { + setOperationAction(ISD::FP_TO_SINT, Ty, Custom); + setOperationAction(ISD::FP_TO_UINT, Ty, Custom); + } - // if Shamt-GRLen < 0: // Shamt < GRLen - // Lo = Lo << Shamt - // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt)) - // else: - // Lo = 0 - // Hi = Lo << (Shamt-GRLen) - - SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue One = DAG.getConstant(1, DL, VT); - SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); - SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); - SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); - SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); - - SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); - SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); - SDValue ShiftRightLo = - DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt); - SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); - SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); - SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen); + setOperationAction(ISD::FADD, Ty, Legal); + setOperationAction(ISD::FDIV, Ty, Legal); + setOperationAction(ISD::FMA, Ty, Legal); + setOperationAction(ISD::FMUL, Ty, Legal); + setOperationAction(ISD::FSQRT, Ty, Legal); + setOperationAction(ISD::FSUB, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::FNEG, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETOGE, Ty, Expand); + setCondCodeAction(ISD::SETOGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); +} - SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); +// Enable LASX support for the given floating-point type and Register class. +void LoongArchTargetLowering::addLASXFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC) { + addRegisterClass(Ty, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, Ty, Expand); + + setOperationAction(ISD::LOAD, Ty, Legal); + setOperationAction(ISD::STORE, Ty, Legal); + setOperationAction(ISD::BITCAST, Ty, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); + setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); + setOperationAction(ISD::UNDEF, Ty, Legal); + setOperationAction(ISD::CONCAT_VECTORS, Ty, Legal); + + setOperationAction(ISD::FADD, Ty, Legal); + setOperationAction(ISD::FDIV, Ty, Legal); + setOperationAction(ISD::FMA, Ty, Legal); + setOperationAction(ISD::FMUL, Ty, Legal); + setOperationAction(ISD::FSQRT, Ty, Legal); + setOperationAction(ISD::FSUB, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); + setOperationAction(ISD::FNEG, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + + if (Ty == MVT::v8f32 || Ty == MVT::v4f64) { + setOperationAction(ISD::FP_TO_SINT, Ty, Custom); + setOperationAction(ISD::FP_TO_UINT, Ty, Custom); + } - Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); - Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + setOperationAction(ISD::SETCC, Ty, Legal); + setCondCodeAction(ISD::SETOGE, Ty, Expand); + setCondCodeAction(ISD::SETOGT, Ty, Expand); + setCondCodeAction(ISD::SETUGE, Ty, Expand); + setCondCodeAction(ISD::SETUGT, Ty, Expand); + setCondCodeAction(ISD::SETGE, Ty, Expand); + setCondCodeAction(ISD::SETGT, Ty, Expand); +} - SDValue Parts[2] = {Lo, Hi}; - return DAG.getMergeValues(Parts, DL); +bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses( + EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, + bool *Fast) const { + if (!Subtarget.allowUnalignedAccess()) + return false; + if (Fast) + *Fast = true; + return true; } -SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, - SelectionDAG &DAG, - bool IsSRA) const { - SDLoc DL(Op); - SDValue Lo = Op.getOperand(0); - SDValue Hi = Op.getOperand(1); - SDValue Shamt = Op.getOperand(2); - EVT VT = Lo.getValueType(); - - // SRA expansion: - // if Shamt-GRLen < 0: // Shamt < GRLen - // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) - // Hi = Hi >>s Shamt - // else: - // Lo = Hi >>s (Shamt-GRLen); - // Hi = Hi >>s (GRLen-1) - // - // SRL expansion: - // if Shamt-GRLen < 0: // Shamt < GRLen - // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1)) - // Hi = Hi >>u Shamt - // else: - // Lo = Hi >>u (Shamt-GRLen); - // Hi = 0; - - unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; - - SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue One = DAG.getConstant(1, DL, VT); - SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT); - SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT); - SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen); - SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1); +EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const { + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} - SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); - SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); - SDValue ShiftLeftHi = - DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt); - SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); - SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); - SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen); - SDValue HiFalse = - IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero; +static LoongArch::CondCode condCodeToFCC(ISD::CondCode CC) { + switch (CC) { + default: llvm_unreachable("Unknown fp condition code!"); + case ISD::SETEQ: + case ISD::SETOEQ: return LoongArch::FCOND_OEQ; + case ISD::SETUNE: return LoongArch::FCOND_UNE; + case ISD::SETLT: + case ISD::SETOLT: return LoongArch::FCOND_OLT; + case ISD::SETGT: + case ISD::SETOGT: return LoongArch::FCOND_OGT; + case ISD::SETLE: + case ISD::SETOLE: return LoongArch::FCOND_OLE; + case ISD::SETGE: + case ISD::SETOGE: return LoongArch::FCOND_OGE; + case ISD::SETULT: return LoongArch::FCOND_ULT; + case ISD::SETULE: return LoongArch::FCOND_ULE; + case ISD::SETUGT: return LoongArch::FCOND_UGT; + case ISD::SETUGE: return LoongArch::FCOND_UGE; + case ISD::SETUO: return LoongArch::FCOND_UN; + case ISD::SETO: return LoongArch::FCOND_OR; + case ISD::SETNE: + case ISD::SETONE: return LoongArch::FCOND_ONE; + case ISD::SETUEQ: return LoongArch::FCOND_UEQ; + } +} - SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT); +/// This function returns true if the floating point conditional branches and +/// conditional moves which use condition code CC should be inverted. +static bool invertFPCondCodeUser(LoongArch::CondCode CC) { + if (CC >= LoongArch::FCOND_F && CC <= LoongArch::FCOND_SUNE) + return false; - Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); - Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + assert((CC >= LoongArch::FCOND_T && CC <= LoongArch::FCOND_GT) && + "Illegal Condition Code"); - SDValue Parts[2] = {Lo, Hi}; - return DAG.getMergeValues(Parts, DL); + return true; } -// Returns the opcode of the target-specific SDNode that implements the 32-bit -// form of the given Opcode. -static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) { - switch (Opcode) { - default: - llvm_unreachable("Unexpected opcode"); - case ISD::SHL: - return LoongArchISD::SLL_W; - case ISD::SRA: - return LoongArchISD::SRA_W; - case ISD::SRL: - return LoongArchISD::SRL_W; - } +// Creates and returns an FPCmp node from a setcc node. +// Returns Op if setcc is not a floating point comparison. +static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { + // must be a SETCC node + if (Op.getOpcode() != ISD::SETCC) + return Op; + + SDValue LHS = Op.getOperand(0); + + if (!LHS.getValueType().isFloatingPoint()) + return Op; + + SDValue RHS = Op.getOperand(1); + SDLoc DL(Op); + + // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of + // node if necessary. + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + return DAG.getNode(LoongArchISD::FPCmp, DL, MVT::Glue, LHS, RHS, + DAG.getConstant(condCodeToFCC(CC), DL, MVT::i32)); } -// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG -// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would -// otherwise be promoted to i64, making it difficult to select the -// SLL_W/.../*W later one because the fact the operation was originally of -// type i8/i16/i32 is lost. -static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, - unsigned ExtOpc = ISD::ANY_EXTEND) { - SDLoc DL(N); - LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode()); - SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0)); - SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1)); - SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); - // ReplaceNodeResults requires we maintain the same type for the return value. - return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); +// Creates and returns a CMovFPT/F node. +static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, + SDValue False, const SDLoc &DL) { + ConstantSDNode *CC = cast(Cond.getOperand(2)); + bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue()); + SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32); + + return DAG.getNode((invert ? LoongArchISD::CMovFP_F : LoongArchISD::CMovFP_T), DL, + True.getValueType(), True, FCC0, False, Cond); + } -void LoongArchTargetLowering::ReplaceNodeResults( - SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - SDLoc DL(N); - switch (N->getOpcode()) { - default: - llvm_unreachable("Don't know how to legalize this operation"); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - "Unexpected custom legalisation"); - if (N->getOperand(1).getOpcode() != ISD::Constant) { - Results.push_back(customLegalizeToWOp(N, DAG)); - break; - } - break; - case ISD::FP_TO_SINT: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - "Unexpected custom legalisation"); - SDValue Src = N->getOperand(0); - EVT VT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); - SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, VT, Src); - Results.push_back(DAG.getNode(ISD::BITCAST, DL, N->getValueType(0), Dst)); - break; - } - case ISD::BITCAST: { - EVT VT = N->getValueType(0); - SDValue Src = N->getOperand(0); - EVT SrcVT = Src.getValueType(); - if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && - Subtarget.hasBasicF()) { - SDValue Dst = - DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); - } - break; - } - case ISD::FP_TO_UINT: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - "Unexpected custom legalisation"); - auto &TLI = DAG.getTargetLoweringInfo(); - SDValue Tmp1, Tmp2; - TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); - break; +static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue SetCC = N->getOperand(0); + + if ((SetCC.getOpcode() != ISD::SETCC) || + !SetCC.getOperand(0).getValueType().isInteger()) + return SDValue(); + + SDValue False = N->getOperand(2); + EVT FalseTy = False.getValueType(); + + if (!FalseTy.isInteger()) + return SDValue(); + + ConstantSDNode *FalseC = dyn_cast(False); + + // If the RHS (False) is 0, we swap the order of the operands + // of ISD::SELECT (obviously also inverting the condition) so that we can + // take advantage of conditional moves using the $0 register. + // Example: + // return (a != 0) ? x : 0; + // load $reg, x + // movz $reg, $0, a + if (!FalseC) + return SDValue(); + + const SDLoc DL(N); + + if (!FalseC->getZExtValue()) { + ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + SDValue True = N->getOperand(1); + + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), + ISD::getSetCCInverse(CC, SetCC.getValueType())); + + return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } + + // If both operands are integer constants there's a possibility that we + // can do some interesting optimizations. + SDValue True = N->getOperand(1); + ConstantSDNode *TrueC = dyn_cast(True); + + if (!TrueC || !True.getValueType().isInteger()) + return SDValue(); + + // We'll also ignore MVT::i64 operands as this optimizations proves + // to be ineffective because of the required sign extensions as the result + // of a SETCC operator is always MVT::i32 for non-vector types. + if (True.getValueType() == MVT::i64) + return SDValue(); + + int64_t Diff = TrueC->getSExtValue() - FalseC->getSExtValue(); + + // 1) (a < x) ? y : y-1 + // slti $reg1, a, x + // addiu $reg2, $reg1, y-1 + if (Diff == 1) + return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, False); + + // 2) (a < x) ? y-1 : y + // slti $reg1, a, x + // xor $reg1, $reg1, 1 + // addiu $reg2, $reg1, y-1 + if (Diff == -1) { + ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), + ISD::getSetCCInverse(CC, SetCC.getValueType())); + return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True); } + + // Could not optimize. + return SDValue(); } static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget) { + + if (Subtarget.hasLSX()) { + + // Fold zero extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT + // + // Performs the following transformations: + // - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its + // sign/zero-extension is completely overwritten by the new one performed + // by the ISD::AND. + // - Removes redundant zero extensions performed by an ISD::AND. + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + unsigned Op0Opcode = Op0->getOpcode(); + + // (and (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d) + // where $d + 1 == 2^n and n == 32 + // or $d + 1 == 2^n and n <= 32 and ZExt + // -> (LoongArchVExtractZExt $a, $b, $c) + if (Op0Opcode == LoongArchISD::VEXTRACT_SEXT_ELT || + Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT) { + ConstantSDNode *Mask = dyn_cast(Op1); + + if (Mask) { + + int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); + + if (Log2IfPositive > 0) { + SDValue Op0Op2 = Op0->getOperand(2); + EVT ExtendTy = cast(Op0Op2)->getVT(); + unsigned ExtendTySize = ExtendTy.getSizeInBits(); + unsigned Log2 = Log2IfPositive; + + if ((Op0Opcode == LoongArchISD::VEXTRACT_ZEXT_ELT && + Log2 >= ExtendTySize) || + Log2 == ExtendTySize) { + SDValue Ops[] = {Op0->getOperand(0), Op0->getOperand(1), Op0Op2}; + return DAG.getNode(LoongArchISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), + Op0->getVTList(), + makeArrayRef(Ops, Op0->getNumOperands())); + } + } + } + } + } + if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue FirstOperand = N->getOperand(0); - SDValue SecondOperand = N->getOperand(1); unsigned FirstOperandOpc = FirstOperand.getOpcode(); + SDValue Mask = N->getOperand(1); EVT ValTy = N->getValueType(0); SDLoc DL(N); - uint64_t lsb, msb; - unsigned SMIdx, SMLen; + + uint64_t Lsb = 0, SMLsb, SMSize; ConstantSDNode *CN; SDValue NewOperand; - MVT GRLenVT = Subtarget.getGRLenVT(); + unsigned Opc; // Op's second operand must be a shifted mask. - if (!(CN = dyn_cast(SecondOperand)) || - !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen)) + if (!(CN = dyn_cast(Mask)) || + !isShiftedMask(CN->getZExtValue(), SMLsb, SMSize)) return SDValue(); if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { // Pattern match BSTRPICK. - // $dst = and ((sra or srl) $src , lsb), (2**len - 1) - // => BSTRPICK $dst, $src, msb, lsb - // where msb = lsb + len - 1 + // $dst = and ((sra or srl) $src , lsb), (2**size - 1) + // => bstrpick $dst, $src, lsb+size-1, lsb // The second operand of the shift must be an immediate. if (!(CN = dyn_cast(FirstOperand.getOperand(1)))) return SDValue(); - lsb = CN->getZExtValue(); + Lsb = CN->getZExtValue(); - // Return if the shifted mask does not start at bit 0 or the sum of its - // length and lsb exceeds the word's size. - if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits()) + // Return if the shifted mask does not start at bit 0 or the sum of its size + // and Lsb exceeds the word's size. + if (SMLsb != 0 || Lsb + SMSize > ValTy.getSizeInBits()) return SDValue(); + Opc = LoongArchISD::BSTRPICK; NewOperand = FirstOperand.getOperand(0); } else { // Pattern match BSTRPICK. - // $dst = and $src, (2**len- 1) , if len > 12 - // => BSTRPICK $dst, $src, msb, lsb - // where lsb = 0 and msb = len - 1 + // $dst = and $src, (2**size - 1) , if size > 12 + // => bstrpick $dst, $src, lsb+size-1, lsb , lsb = 0 // If the mask is <= 0xfff, andi can be used instead. if (CN->getZExtValue() <= 0xfff) return SDValue(); - // Return if the mask doesn't start at position 0. - if (SMIdx) + if (SMLsb) return SDValue(); - lsb = 0; + Opc = LoongArchISD::BSTRPICK; NewOperand = FirstOperand; } - msb = lsb + SMLen - 1; - return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand, - DAG.getConstant(msb, DL, GRLenVT), - DAG.getConstant(lsb, DL, GRLenVT)); + return DAG.getNode(Opc, DL, ValTy, NewOperand, + DAG.getConstant((Lsb + SMSize - 1), DL, MVT::i32), + DAG.getConstant(Lsb, DL, MVT::i32)); } -static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const LoongArchSubtarget &Subtarget) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); +// Determine if the specified node is a constant vector splat. +// +// Returns true and sets Imm if: +// * N is a ISD::BUILD_VECTOR representing a constant splat +static bool isVSplat(SDValue N, APInt &Imm) { + BuildVectorSDNode *Node = dyn_cast(N.getNode()); - // $dst = srl (and $src, Mask), Shamt - // => - // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt - // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1 - // + if (!Node) + return false; - SDValue FirstOperand = N->getOperand(0); - ConstantSDNode *CN; - EVT ValTy = N->getValueType(0); - SDLoc DL(N); - MVT GRLenVT = Subtarget.getGRLenVT(); - unsigned MaskIdx, MaskLen; - uint64_t Shamt; + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8)) + return false; + + Imm = SplatValue; - // The first operand must be an AND and the second operand of the AND must be - // a shifted mask. - if (FirstOperand.getOpcode() != ISD::AND || - !(CN = dyn_cast(FirstOperand.getOperand(1))) || - !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen)) + return true; +} + +// Test whether the given node is an all-ones build_vector. +static bool isVectorAllOnes(SDValue N) { + // Look through bitcasts. Endianness doesn't matter because we are looking + // for an all-ones value. + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + BuildVectorSDNode *BVN = dyn_cast(N); + + if (!BVN) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + // Endianness doesn't matter in this context because we are looking for + // an all-ones value. + if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) + return SplatValue.isAllOnesValue(); + + return false; +} + +// Test whether N is the bitwise inverse of OfNode. +static bool isBitwiseInverse(SDValue N, SDValue OfNode) { + if (N->getOpcode() != ISD::XOR) + return false; + + if (isVectorAllOnes(N->getOperand(0))) + return N->getOperand(1) == OfNode; + + if (isVectorAllOnes(N->getOperand(1))) + return N->getOperand(0) == OfNode; + + return false; +} + +static SDValue performSet(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue N1, N2; + if (Op0->getOpcode() == ISD::BUILD_VECTOR && + (Op1->getValueType(0).is128BitVector() || + Op1->getValueType(0).is256BitVector())) { + N1 = Op0; + N2 = Op1; + } else if (Op1->getOpcode() == ISD::BUILD_VECTOR && + (Op0->getValueType(0).is128BitVector() || + Op0->getValueType(0).is256BitVector())) { + N1 = Op1; + N2 = Op0; + } else return SDValue(); - // The second operand (shift amount) must be an immediate. - if (!(CN = dyn_cast(N->getOperand(1)))) + APInt Mask1, Mask2; + if (!isVSplat(N1, Mask1)) return SDValue(); - Shamt = CN->getZExtValue(); - if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1) - return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, - FirstOperand->getOperand(0), - DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), - DAG.getConstant(Shamt, DL, GRLenVT)); + if (!N1->getValueType(0).isSimple()) + return SDValue(); - return SDValue(); + ConstantSDNode *C1; + uint64_t Imm; + unsigned ImmL; + if (!(C1 = dyn_cast(N1.getOperand(0))) || + !isPowerOf2_64(C1->getZExtValue())) + return SDValue(); + + Imm = C1->getZExtValue(); + ImmL = Log2_64(Imm); + MVT VT = N1->getSimpleValueType(0).SimpleTy; + + SDNode *Res; + + if (Subtarget.hasLASX() && N->getValueType(0).is256BitVector()) { + if (VT == MVT::v32i8 && ImmL < 8) + Res = DAG.getMachineNode(LoongArch::XVBITSETI_B, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v16i16 && ImmL < 16) + Res = DAG.getMachineNode(LoongArch::XVBITSETI_H, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v8i32 && ImmL < 32) + Res = DAG.getMachineNode(LoongArch::XVBITSETI_W, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v4i64 && ImmL < 64) + Res = DAG.getMachineNode(LoongArch::XVBITSETI_D, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else + return SDValue(); + } else if (N->getValueType(0).is128BitVector()) { + if (VT == MVT::v16i8 && ImmL < 8) + Res = DAG.getMachineNode(LoongArch::VBITSETI_B, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v8i16 && ImmL < 16) + Res = DAG.getMachineNode(LoongArch::VBITSETI_H, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v4i32 && ImmL < 32) + Res = DAG.getMachineNode(LoongArch::VBITSETI_W, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else if (VT == MVT::v2i64 && ImmL < 64) + Res = DAG.getMachineNode(LoongArch::VBITSETI_D, SDLoc(N), VT, N2, + DAG.getTargetConstant(ImmL, SDLoc(N), MVT::i32)); + else + return SDValue(); + + } else + return SDValue(); + + return SDValue(Res, 0); } static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget) { - MVT GRLenVT = Subtarget.getGRLenVT(); - EVT ValTy = N->getValueType(0); - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - ConstantSDNode *CN0, *CN1; - SDLoc DL(N); - unsigned ValBits = ValTy.getSizeInBits(); - unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; - unsigned Shamt; - bool SwapAndRetried = false; + SDValue Res; + if (Subtarget.hasLSX() && (N->getValueType(0).is128BitVector() || + N->getValueType(0).is256BitVector())) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + SDValue Op1Op0 = Op1->getOperand(0); + SDValue Op1Op1 = Op1->getOperand(1); + + SDValue IfSet, IfClr, Cond; + bool IsConstantMask = false; + APInt Mask, InvMask; + + // If Op0Op0 is an appropriate mask, try to find it's inverse in either + // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, + // while looking. IfClr will be set if we find a valid match. + if (isVSplat(Op0Op0, Mask)) { + Cond = Op0Op0; + IfSet = Op0Op1; + + if (isVSplat(Op1Op0, InvMask) && + Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask) && + Mask.getBitWidth() == InvMask.getBitWidth() && + Mask == ~InvMask) + IfClr = Op1Op0; + + IsConstantMask = true; + } + + // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the + // same thing again using this mask. IfClr will be set if we find a valid + // match. + if (!IfClr.getNode() && isVSplat(Op0Op1, Mask)) { + Cond = Op0Op1; + IfSet = Op0Op0; + + if (isVSplat(Op1Op0, InvMask) && + Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask) && + Mask.getBitWidth() == InvMask.getBitWidth() && + Mask == ~InvMask) + IfClr = Op1Op0; + + IsConstantMask = true; + } + + // If IfClr is not yet set, try looking for a non-constant match. + // IfClr will be set if we find a valid match amongst the eight + // possibilities. + if (!IfClr.getNode()) { + if (isBitwiseInverse(Op0Op0, Op1Op0)) { + Cond = Op1Op0; + IfSet = Op1Op1; + IfClr = Op0Op1; + } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { + Cond = Op1Op0; + IfSet = Op1Op1; + IfClr = Op0Op0; + } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { + Cond = Op1Op1; + IfSet = Op1Op0; + IfClr = Op0Op1; + } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { + Cond = Op1Op1; + IfSet = Op1Op0; + IfClr = Op0Op0; + } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { + Cond = Op0Op0; + IfSet = Op0Op1; + IfClr = Op1Op1; + } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { + Cond = Op0Op0; + IfSet = Op0Op1; + IfClr = Op1Op0; + } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { + Cond = Op0Op1; + IfSet = Op0Op0; + IfClr = Op1Op1; + } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { + Cond = Op0Op1; + IfSet = Op0Op0; + IfClr = Op1Op0; + } + } + + // At this point, IfClr will be set if we have a valid match. + if (IfClr.getNode()) { + assert(Cond.getNode() && IfSet.getNode()); + + // Fold degenerate cases. + if (IsConstantMask) { + if (Mask.isAllOnesValue()) + return IfSet; + else if (Mask == 0) + return IfClr; + } + + // Transform the DAG into an equivalent VSELECT. + return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), Cond, + IfSet, IfClr); + } + } + + if (Res = performSet(N, DAG, DCI, Subtarget)) + return Res; + } + + // Pattern match BSTRINS. + // $dst = or (and $src1 , mask0), (and (shl $src, lsb), mask1), + // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 + // => bstrins $dst, $src, lsb+size-1, lsb, $src1 if (DCI.isBeforeLegalizeOps()) return SDValue(); - if (ValBits != 32 && ValBits != 64) + SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); + uint64_t SMLsb0, SMSize0, SMLsb1, SMSize1; + ConstantSDNode *CN, *CN1; + + // See if Op's first operand matches (and $src1 , mask0). + if (And0.getOpcode() != ISD::AND) return SDValue(); -Retry: - // 1st pattern to match BSTRINS: - // R = or (and X, mask0), (and (shl Y, lsb), mask1) - // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 - // => - // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) - if (N0.getOpcode() == ISD::AND && - (CN0 = dyn_cast(N0.getOperand(1))) && - isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && - N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && - (CN1 = dyn_cast(N1.getOperand(1))) && - isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && - MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && - (CN1 = dyn_cast(N1.getOperand(0).getOperand(1))) && - (Shamt = CN1->getZExtValue()) == MaskIdx0 && - (MaskIdx0 + MaskLen0 <= ValBits)) { - LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); - return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), - N1.getOperand(0).getOperand(0), - DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), - DAG.getConstant(MaskIdx0, DL, GRLenVT)); - } - - // 2nd pattern to match BSTRINS: - // R = or (and X, mask0), (shl (and Y, mask1), lsb) - // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) - // => - // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) - if (N0.getOpcode() == ISD::AND && - (CN0 = dyn_cast(N0.getOperand(1))) && - isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && - N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && - (CN1 = dyn_cast(N1.getOperand(1))) && - (Shamt = CN1->getZExtValue()) == MaskIdx0 && - (CN1 = dyn_cast(N1.getOperand(0).getOperand(1))) && - isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && - MaskLen0 == MaskLen1 && MaskIdx1 == 0 && - (MaskIdx0 + MaskLen0 <= ValBits)) { - LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); - return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), - N1.getOperand(0).getOperand(0), - DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), - DAG.getConstant(MaskIdx0, DL, GRLenVT)); - } - - // 3rd pattern to match BSTRINS: - // R = or (and X, mask0), (and Y, mask1) - // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 - // => - // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb - // where msb = lsb + size - 1 - if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && - (CN0 = dyn_cast(N0.getOperand(1))) && - isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && - (MaskIdx0 + MaskLen0 <= 64) && - (CN1 = dyn_cast(N1->getOperand(1))) && - (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { - LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); - return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), - DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, - DAG.getConstant(MaskIdx0, DL, GRLenVT)), - DAG.getConstant(ValBits == 32 - ? (MaskIdx0 + (MaskLen0 & 31) - 1) - : (MaskIdx0 + MaskLen0 - 1), - DL, GRLenVT), - DAG.getConstant(MaskIdx0, DL, GRLenVT)); - } - - // 4th pattern to match BSTRINS: - // R = or (and X, mask), (shl Y, shamt) - // where mask = (2**shamt - 1) - // => - // R = BSTRINS X, Y, ValBits - 1, shamt - // where ValBits = 32 or 64 - if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && - (CN0 = dyn_cast(N0.getOperand(1))) && - isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && - MaskIdx0 == 0 && (CN1 = dyn_cast(N1.getOperand(1))) && - (Shamt = CN1->getZExtValue()) == MaskLen0 && - (MaskIdx0 + MaskLen0 <= ValBits)) { - LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); - return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), - N1.getOperand(0), - DAG.getConstant((ValBits - 1), DL, GRLenVT), - DAG.getConstant(Shamt, DL, GRLenVT)); - } - - // 5th pattern to match BSTRINS: - // R = or (and X, mask), const - // where ~mask = (2**size - 1) << lsb, mask & const = 0 - // => - // R = BSTRINS X, (const >> lsb), msb, lsb - // where msb = lsb + size - 1 - if (N0.getOpcode() == ISD::AND && - (CN0 = dyn_cast(N0.getOperand(1))) && - isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && - (CN1 = dyn_cast(N1)) && - (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { - LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); - return DAG.getNode( - LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), - DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), - DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), - DAG.getConstant(MaskIdx0, DL, GRLenVT)); - } - - // 6th pattern. - // a = b | ((c & mask) << shamt), where all positions in b to be overwritten - // by the incoming bits are known to be zero. - // => - // a = BSTRINS b, c, shamt + MaskLen - 1, shamt - // - // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th - // pattern is more common than the 1st. So we put the 1st before the 6th in - // order to match as many nodes as possible. - ConstantSDNode *CNMask, *CNShamt; - unsigned MaskIdx, MaskLen; - if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && - (CNMask = dyn_cast(N1.getOperand(0).getOperand(1))) && - isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && - MaskIdx == 0 && (CNShamt = dyn_cast(N1.getOperand(1))) && - CNShamt->getZExtValue() + MaskLen <= ValBits) { - Shamt = CNShamt->getZExtValue(); - APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); - if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { - LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); - return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, - N1.getOperand(0).getOperand(0), - DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), - DAG.getConstant(Shamt, DL, GRLenVT)); + if (!(CN = dyn_cast(And0.getOperand(1))) || + !isShiftedMask(~CN->getSExtValue(), SMLsb0, SMSize0)) + return SDValue(); + + // See if Op's second operand matches (and (shl $src, lsb), mask1). + if (And1.getOpcode() == ISD::AND && + And1.getOperand(0).getOpcode() == ISD::SHL) { + + if (!(CN = dyn_cast(And1.getOperand(1))) || + !isShiftedMask(CN->getZExtValue(), SMLsb1, SMSize1)) + return SDValue(); + + // The shift masks must have the same least significant bit and size. + if (SMLsb0 != SMLsb1 || SMSize0 != SMSize1) + return SDValue(); + + SDValue Shl = And1.getOperand(0); + + if (!(CN = dyn_cast(Shl.getOperand(1)))) + return SDValue(); + + unsigned Shamt = CN->getZExtValue(); + + // Return if the shift amount and the first bit position of mask are not the + // same. + EVT ValTy = N->getValueType(0); + if ((Shamt != SMLsb0) || (SMLsb0 + SMSize0 > ValTy.getSizeInBits())) + return SDValue(); + + SDLoc DL(N); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, Shl.getOperand(0), + DAG.getConstant((SMLsb0 + SMSize0 - 1), DL, MVT::i32), + DAG.getConstant(SMLsb0, DL, MVT::i32), + And0.getOperand(0)); + } else { + // Pattern match BSTRINS. + // $dst = or (and $src, mask0), mask1 + // where mask0 = ((1 << SMSize0) -1) << SMLsb0 + // => bstrins $dst, $src, SMLsb0+SMSize0-1, SMLsb0 + if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMLsb0) && + (SMSize0 + SMLsb0 <= 64)) { + // Check if AND instruction has constant as argument + bool isConstCase = And1.getOpcode() != ISD::AND; + if (And1.getOpcode() == ISD::AND) { + if (!(CN1 = dyn_cast(And1->getOperand(1)))) + return SDValue(); + } else { + if (!(CN1 = dyn_cast(N->getOperand(1)))) + return SDValue(); + } + // Don't generate BSTRINS if constant OR operand doesn't fit into bits + // cleared by constant AND operand. + if (CN->getSExtValue() & CN1->getSExtValue()) + return SDValue(); + + SDLoc DL(N); + EVT ValTy = N->getOperand(0)->getValueType(0); + SDValue Const1; + SDValue SrlX; + if (!isConstCase) { + Const1 = DAG.getConstant(SMLsb0, DL, MVT::i32); + SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1); + } + return DAG.getNode( + LoongArchISD::BSTRINS, DL, N->getValueType(0), + isConstCase + ? DAG.getConstant(CN1->getSExtValue() >> SMLsb0, DL, ValTy) + : SrlX, + DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? (SMLsb0 + (SMSize0 & 31) - 1) + : (SMLsb0 + SMSize0 - 1), + DL, MVT::i32), + DAG.getConstant(SMLsb0, DL, MVT::i32), + And0->getOperand(0)); + } + return SDValue(); } +} - // 7th pattern. - // a = b | ((c << shamt) & shifted_mask), where all positions in b to be - // overwritten by the incoming bits are known to be zero. - // => - // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx +static bool +shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + // Estimate the number of operations the below transform will turn a + // constant multiply into. The number is approximately equal to the minimal + // number of powers of two that constant can be broken down to by adding + // or subtracting them. // - // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd - // before the 7th in order to match as many nodes as possible. - if (N1.getOpcode() == ISD::AND && - (CNMask = dyn_cast(N1.getOperand(1))) && - isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && - N1.getOperand(0).getOpcode() == ISD::SHL && - (CNShamt = dyn_cast(N1.getOperand(0).getOperand(1))) && - CNShamt->getZExtValue() == MaskIdx) { - APInt ShMask(ValBits, CNMask->getZExtValue()); - if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { - LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); - return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, - N1.getOperand(0).getOperand(0), - DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), - DAG.getConstant(MaskIdx, DL, GRLenVT)); - } - } + // If we have taken more than 10[1] / 8[2] steps to attempt the + // optimization for a native sized value, it is more than likely that this + // optimization will make things worse. + // + // [1] LA64 requires 4 instructions at most to materialize any constant, + // multiplication requires at least 4 cycles, but another cycle (or two) + // to retrieve the result from corresponding registers. + // + // [2] LA32 requires 2 instructions at most to materialize any constant, + // multiplication requires at least 4 cycles, but another cycle (or two) + // to retrieve the result from corresponding registers. + // + // TODO: + // - MaxSteps needs to consider the `VT` of the constant for the current + // target. + // - Consider to perform this optimization after type legalization. + // That allows to remove a workaround for types not supported natively. + // - Take in account `-Os, -Oz` flags because this optimization + // increases code size. + unsigned MaxSteps = Subtarget.isABI_LP32() ? 8 : 10; + + SmallVector WorkStack(1, C); + unsigned Steps = 0; + unsigned BitWidth = C.getBitWidth(); + + while (!WorkStack.empty()) { + APInt Val = WorkStack.pop_back_val(); + + if (Val == 0 || Val == 1) + continue; - // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. - if (!SwapAndRetried) { - std::swap(N0, N1); - SwapAndRetried = true; - goto Retry; - } + if (Steps >= MaxSteps) + return false; - SwapAndRetried = false; -Retry2: - // 8th pattern. - // a = b | (c & shifted_mask), where all positions in b to be overwritten by - // the incoming bits are known to be zero. - // => - // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx - // - // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So - // we put it here in order to match as many nodes as possible or generate less - // instructions. - if (N1.getOpcode() == ISD::AND && - (CNMask = dyn_cast(N1.getOperand(1))) && - isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { - APInt ShMask(ValBits, CNMask->getZExtValue()); - if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { - LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); - return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, - DAG.getNode(ISD::SRL, DL, N1->getValueType(0), - N1->getOperand(0), - DAG.getConstant(MaskIdx, DL, GRLenVT)), - DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), - DAG.getConstant(MaskIdx, DL, GRLenVT)); + if (Val.isPowerOf2()) { + ++Steps; + continue; } - } - // Swap N0/N1 and retry. - if (!SwapAndRetried) { - std::swap(N0, N1); - SwapAndRetried = true; - goto Retry2; - } + + APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); + APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) + : APInt(BitWidth, 1) << C.ceilLogBase2(); + + if ((Val - Floor).ule(Ceil - Val)) { + WorkStack.push_back(Floor); + WorkStack.push_back(Val - Floor); + } else { + WorkStack.push_back(Ceil); + WorkStack.push_back(Ceil - Val); + } + + ++Steps; + } + + // If the value being multiplied is not supported natively, we have to pay + // an additional legalization cost, conservatively assume an increase in the + // cost of 3 instructions per step. This values for this heuristic were + // determined experimentally. + unsigned RegisterSize = DAG.getTargetLoweringInfo() + .getRegisterType(*DAG.getContext(), VT) + .getSizeInBits(); + Steps *= (VT.getSizeInBits() != RegisterSize) * 3; + if (Steps > 27) + return false; + + return true; +} + +static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, + EVT ShiftTy, SelectionDAG &DAG) { + // Return 0. + if (C == 0) + return DAG.getConstant(0, DL, VT); + + // Return x. + if (C == 1) + return X; + + // If c is power of 2, return (shl x, log2(c)). + if (C.isPowerOf2()) + return DAG.getNode(ISD::SHL, DL, VT, X, + DAG.getConstant(C.logBase2(), DL, ShiftTy)); + + unsigned BitWidth = C.getBitWidth(); + APInt Floor = APInt(BitWidth, 1) << C.logBase2(); + APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : + APInt(BitWidth, 1) << C.ceilLogBase2(); + + // If |c - floor_c| <= |c - ceil_c|, + // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), + // return (add constMult(x, floor_c), constMult(x, c - floor_c)). + if ((C - Floor).ule(Ceil - C)) { + SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); + SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); + return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); + } + + // If |c - floor_c| > |c - ceil_c|, + // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). + SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); + SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); + return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); +} + +static SDValue performLogicCombine(SDNode *N, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + if (!(N0->getOpcode() == ISD::TRUNCATE && N1->getOpcode() == ISD::TRUNCATE)) + return SDValue(); + + if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() && + N1->getValueType(0).isSimple() && + N0->getOperand(0)->getValueType(0).isSimple() && + N1->getOperand(0)->getValueType(0).isSimple())) + return SDValue(); + + if (!(N->getValueType(0).isSimple() && N0->getValueType(0).isSimple() && + N1->getValueType(0).isSimple() && + N0->getOperand(0)->getValueType(0).isSimple() && + N1->getOperand(0)->getValueType(0).isSimple())) + return SDValue(); + + if (!(N->getSimpleValueType(0).SimpleTy == MVT::i32 && + N0->getSimpleValueType(0).SimpleTy == MVT::i32 && + N1->getSimpleValueType(0).SimpleTy == MVT::i32)) + return SDValue(); + + if (!(N0->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64 && + N1->getOperand(0)->getSimpleValueType(0).SimpleTy == MVT::i64)) + return SDValue(); + + SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); + SDValue Val0 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + N0->getValueType(0), + N0->getOperand(0), SubReg), + 0); + SDValue Val1 = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + N1->getValueType(0), + N1->getOperand(0), SubReg), + 0); + + return DAG.getNode(N->getOpcode(), DL, N0->getValueType(0), Val0, Val1); +} + +static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, + const TargetLowering::DAGCombinerInfo &DCI, + const LoongArchTargetLowering *TL, + const LoongArchSubtarget &Subtarget) { + EVT VT = N->getValueType(0); + + SDValue Res; + if ((Res = performLogicCombine(N, DAG, Subtarget))) + return Res; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) + if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( + C->getAPIntValue(), VT, DAG, Subtarget)) + return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, + TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), + DAG); + + return SDValue(N, 0); +} + +// Fold sign-extensions into LoongArchISD::VEXTRACT_[SZ]EXT_ELT for LSX. +// +// Performs the following transformations: +// - Changes LoongArchISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its +// sign/zero-extension is completely overwritten by the new one performed by +// the ISD::SRA and ISD::SHL nodes. +// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL +// sequence. +static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + SDValue Res; + if ((Res = performLogicCombine(N, DAG, Subtarget))) + return Res; + + if (Subtarget.hasLSX() || Subtarget.hasLASX()) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // (sra (shl (LoongArchVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) + // where $d + sizeof($c) == 32 + // or $d + sizeof($c) <= 32 and SExt + // -> (LoongArchVExtractSExt $a, $b, $c) + if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { + SDValue Op0Op0 = Op0->getOperand(0); + ConstantSDNode *ShAmount = dyn_cast(Op1); + + if (!ShAmount) + return SDValue(); + + if (Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_SEXT_ELT && + Op0Op0->getOpcode() != LoongArchISD::VEXTRACT_ZEXT_ELT) + return SDValue(); + + EVT ExtendTy = cast(Op0Op0->getOperand(2))->getVT(); + unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); + + if (TotalBits == 32 || + (Op0Op0->getOpcode() == LoongArchISD::VEXTRACT_SEXT_ELT && + TotalBits <= 32)) { + SDValue Ops[] = {Op0Op0->getOperand(0), Op0Op0->getOperand(1), + Op0Op0->getOperand(2)}; + return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), + Op0Op0->getVTList(), + makeArrayRef(Ops, Op0Op0->getNumOperands())); + } + } + } + + return SDValue(); +} + +// combine vsub/vslt/vbitsel.v to vabsd +static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::VSELECT) && "Need ISD::VSELECT"); + + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue TrueOpnd = N->getOperand(1); + SDValue FalseOpnd = N->getOperand(2); + + if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || + FalseOpnd.getOpcode() != ISD::SUB) + return SDValue(); + + if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) + return SDValue(); + + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + + switch (CC) { + default: + return SDValue(); + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETGT: + case ISD::SETGE: + break; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + std::swap(TrueOpnd, FalseOpnd); + break; + } + + SDValue Op1 = Cond.getOperand(0); + SDValue Op2 = Cond.getOperand(1); + + if (TrueOpnd.getOperand(0) == Op1 && TrueOpnd.getOperand(1) == Op2 && + FalseOpnd.getOperand(0) == Op2 && FalseOpnd.getOperand(1) == Op1) { + if (ISD::isSignedIntSetCC(CC)) { + return DAG.getNode(LoongArchISD::VABSD, dl, + N->getOperand(1).getValueType(), Op1, Op2, + DAG.getTargetConstant(0, dl, MVT::i32)); + } else { + return DAG.getNode(LoongArchISD::UVABSD, dl, + N->getOperand(1).getValueType(), Op1, Op2, + DAG.getTargetConstant(0, dl, MVT::i32)); + } + } + return SDValue(); +} + +static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + + EVT Ty = N->getValueType(0); + + if ((Subtarget.hasLSX() && Ty.is128BitVector() && Ty.isInteger()) || + (Subtarget.hasLASX() && Ty.is256BitVector() && Ty.isInteger())) { + // Try the following combines: + // (xor (or $a, $b), (build_vector allones)) + // (xor (or $a, $b), (bitcast (build_vector allones))) + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue NotOp; + + if (ISD::isBuildVectorAllOnes(Op0.getNode())) + NotOp = Op1; + else if (ISD::isBuildVectorAllOnes(Op1.getNode())) + NotOp = Op0; + else + return SDValue(); + + if (NotOp->getOpcode() == ISD::OR) + return DAG.getNode(LoongArchISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), + NotOp->getOperand(1)); + } + + return SDValue(); +} + +// When using a 256-bit vector is less expensive than using a 128-bit vector, +// use this function to convert a 128-bit vector to a 256-bit vector. +static SDValue +performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + assert((N->getOpcode() == ISD::CONCAT_VECTORS) && "Need ISD::CONCAT_VECTORS"); + if (DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDLoc DL(N); + SDValue Top0 = N->getOperand(0); + SDValue Top1 = N->getOperand(1); + + // Check for cheaper optimizations. + if (!((Top0->getOpcode() == ISD::SIGN_EXTEND) && + (Top1->getOpcode() == ISD::SIGN_EXTEND))) + return SDValue(); + if (!((Top0->getOperand(0)->getOpcode() == ISD::ADD) && + (Top1->getOperand(0)->getOpcode() == ISD::ADD))) + return SDValue(); + + SDValue Op_a0 = Top0->getOperand(0); + SDValue Op_a1 = Top1->getOperand(0); + for (int i = 0; i < 2; i++) { + if (!((Op_a0->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR) && + (Op_a1->getOperand(i)->getOpcode() == ISD::BUILD_VECTOR))) + return SDValue(); + } + + SDValue Ops_b[] = {Op_a0->getOperand(0), Op_a0->getOperand(1), + Op_a1->getOperand(0), Op_a1->getOperand(1)}; + for (int i = 0; i < 4; i++) { + if (Ops_b[i]->getNumOperands() != 2) + return SDValue(); + } + + // Currently only a single case is handled, and more optimization scenarios + // will be added in the future. + SDValue Ops_e[] = {Ops_b[0]->getOperand(0), Ops_b[0]->getOperand(1), + Ops_b[2]->getOperand(0), Ops_b[2]->getOperand(1), + Ops_b[1]->getOperand(0), Ops_b[1]->getOperand(1), + Ops_b[3]->getOperand(0), Ops_b[3]->getOperand(1)}; + for (int i = 0; i < 8; i++) { + if (dyn_cast(Ops_e[i])) + return SDValue(); + if (i < 4) { + if (cast(Ops_e[i]->getOperand(1))->getSExtValue() != + (2 * i)) + return SDValue(); + } else { + if (cast(Ops_e[i]->getOperand(1))->getSExtValue() != + (2 * i - 7)) + return SDValue(); + } + } + + for (int i = 0; i < 5; i = i + 4) { + if (!((Ops_e[i]->getOperand(0) == Ops_e[i + 1]->getOperand(0)) && + (Ops_e[i + 1]->getOperand(0) == Ops_e[i + 2]->getOperand(0)) && + (Ops_e[i + 2]->getOperand(0) == Ops_e[i + 3]->getOperand(0)))) + return SDValue(); + } + return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64, + Ops_e[6]->getOperand(0), + Ops_e[0]->getOperand(0)), + 0); +} + +static SDValue performParity(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + SDLoc DL(N); + SDValue T = N->getOperand(0); + if (!(N->getValueType(0).isSimple() && T->getValueType(0).isSimple())) + return SDValue(); + + if (DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDValue Ops[4]; + bool pos_e = false; + bool pos_o = false; + + for (int i = 0; i < 4; i++) { + Ops[i] = T->getOperand(i); + if (!Ops[i]->getValueType(0).isSimple()) + return SDValue(); + if (Ops[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + if (!dyn_cast(Ops[i]->getOperand(1))) + return SDValue(); + + if (cast(Ops[i]->getOperand(1))->getSExtValue() == + (2 * i)) { + pos_e = true; + } else if (cast(Ops[i]->getOperand(1))->getSExtValue() == + (2 * i + 1)) { + pos_o = true; + } else + return SDValue(); + } + + if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 && + T->getSimpleValueType(0).SimpleTy == MVT::v4i32)) + return SDValue(); + + for (int j = 0; j < 3; j++) { + if (Ops[j]->getOperand(0) != Ops[j + 1]->getOperand(0)) + return SDValue(); + } + if (pos_e) { + if (N->getOpcode() == ISD::SIGN_EXTEND) { + if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_W, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(1), + Ops[0]->getOperand(0)->getOperand(0)), + 0); + else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_W, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(0), + Ops[0]->getOperand(0)->getOperand(1)), + 0); + } else if (N->getOpcode() == ISD::ZERO_EXTEND) { + if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVADDWEV_D_WU, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(1), + Ops[0]->getOperand(0)->getOperand(0)), + 0); + else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVSUBWEV_D_WU, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(0), + Ops[0]->getOperand(0)->getOperand(1)), + 0); + } + } else if (pos_o) { + if (N->getOpcode() == ISD::SIGN_EXTEND) { + if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_W, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(1), + Ops[0]->getOperand(0)->getOperand(0)), + 0); + else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_W, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(0), + Ops[0]->getOperand(0)->getOperand(1)), + 0); + } else if (N->getOpcode() == ISD::ZERO_EXTEND) { + if (Ops[0]->getOperand(0)->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVADDWOD_D_WU, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(1), + Ops[0]->getOperand(0)->getOperand(0)), + 0); + else if (Ops[0]->getOperand(0)->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVSUBWOD_D_WU, DL, + MVT::v4i64, + Ops[0]->getOperand(0)->getOperand(0), + Ops[0]->getOperand(0)->getOperand(1)), + 0); + } + } else + return SDValue(); + + return SDValue(); +} + +// Optimize zero extension and sign extension of data +static SDValue performExtend(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + if (!Subtarget.hasLASX()) + return SDValue(); + + SDLoc DL(N); + SDValue T = N->getOperand(0); + + if (T->getOpcode() == ISD::BUILD_VECTOR) + return performParity(N, DAG, DCI, Subtarget); + + if (T->getOpcode() != ISD::ADD && T->getOpcode() != ISD::SUB) + return SDValue(); + + SDValue T0 = T->getOperand(0); + SDValue T1 = T->getOperand(1); + + if (!(T0->getOpcode() == ISD::BUILD_VECTOR && + T1->getOpcode() == ISD::BUILD_VECTOR)) + return SDValue(); + + if (DCI.isAfterLegalizeDAG()) + return SDValue(); + + if (!(T->getValueType(0).isSimple() && T0->getValueType(0).isSimple() && + T1->getValueType(0).isSimple() && N->getValueType(0).isSimple())) + return SDValue(); + + if (!(N->getSimpleValueType(0).SimpleTy == MVT::v4i64 && + T->getSimpleValueType(0).SimpleTy == MVT::v4i32 && + T0->getSimpleValueType(0).SimpleTy == MVT::v4i32 && + T1->getSimpleValueType(0).SimpleTy == MVT::v4i32)) + return SDValue(); + + SDValue Opse0[4]; + SDValue Opse1[4]; + + for (int i = 0; i < 4; i++) { + if (T->getOpcode() == ISD::ADD) { + Opse0[i] = T1->getOperand(i); + Opse1[i] = T0->getOperand(i); + } else if (T->getOpcode() == ISD::SUB) { + Opse0[i] = T0->getOperand(i); + Opse1[i] = T1->getOperand(i); + } + + if (Opse0[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Opse1[i]->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + if (!(dyn_cast(Opse0[i]->getOperand(1)) && + dyn_cast(Opse1[i]->getOperand(1)))) + return SDValue(); + + if (cast(Opse0[i]->getOperand(1))->getSExtValue() != + (2 * i + 1) || + cast(Opse1[i]->getOperand(1))->getSExtValue() != + (2 * i)) + return SDValue(); + + if (i > 0 && (Opse0[i]->getOperand(0) != Opse0[i - 1]->getOperand(0) || + Opse1[i]->getOperand(0) != Opse1[i - 1]->getOperand(0))) + return SDValue(); + } + + if (N->getOpcode() == ISD::SIGN_EXTEND) { + if (T->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_D_W, DL, MVT::v4i64, + Opse0[0]->getOperand(0), + Opse1[0]->getOperand(0)), + 0); + else if (T->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_D_W, DL, MVT::v4i64, + Opse0[0]->getOperand(0), + Opse1[0]->getOperand(0)), + 0); + } else if (N->getOpcode() == ISD::ZERO_EXTEND) { + if (T->getOpcode() == ISD::ADD) + return SDValue(DAG.getMachineNode(LoongArch::XVHADDW_DU_WU, DL, + MVT::v4i64, Opse0[0]->getOperand(0), + Opse1[0]->getOperand(0)), + 0); + else if (T->getOpcode() == ISD::SUB) + return SDValue(DAG.getMachineNode(LoongArch::XVHSUBW_DU_WU, DL, + MVT::v4i64, Opse0[0]->getOperand(0), + Opse1[0]->getOperand(0)), + 0); + } + + return SDValue(); +} + +static SDValue performSIGN_EXTENDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + assert((N->getOpcode() == ISD::SIGN_EXTEND) && "Need ISD::SIGN_EXTEND"); + + SDLoc DL(N); + SDValue Top = N->getOperand(0); + + SDValue Res; + if (Res = performExtend(N, DAG, DCI, Subtarget)) + return Res; + + if (!(Top->getOpcode() == ISD::CopyFromReg)) + return SDValue(); + + if ((Top->getOperand(0)->getOpcode() == ISD::EntryToken) && + (N->getValueType(0) == MVT::i64)) { + + SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); + SDNode *Res = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); + + Res = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i64, + SDValue(Res, 0), Top, SubReg); + + return SDValue(Res, 0); + } + + return SDValue(); +} + +static SDValue performZERO_EXTENDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + + assert((N->getOpcode() == ISD::ZERO_EXTEND) && "Need ISD::ZERO_EXTEND"); + + SDLoc DL(N); + + SDValue Res; + if (Res = performExtend(N, DAG, DCI, Subtarget)) + return Res; + + return SDValue(); +} + +SDValue LoongArchTargetLowering:: +PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDValue Val; + + switch (N->getOpcode()) { + default: break; + case ISD::AND: + return performANDCombine(N, DAG, DCI, Subtarget); + case ISD::OR: + return performORCombine(N, DAG, DCI, Subtarget); + case ISD::XOR: + return performXORCombine(N, DAG, Subtarget); + case ISD::MUL: + return performMULCombine(N, DAG, DCI, this, Subtarget); + case ISD::SRA: + return performSRACombine(N, DAG, DCI, Subtarget); + case ISD::SELECT: + return performSELECTCombine(N, DAG, DCI, Subtarget); + case ISD::VSELECT: + return performVSELECTCombine(N, DAG); + case ISD::CONCAT_VECTORS: + return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget); + case ISD::SIGN_EXTEND: + return performSIGN_EXTENDCombine(N, DAG, DCI, Subtarget); + case ISD::ZERO_EXTEND: + return performZERO_EXTENDCombine(N, DAG, DCI, Subtarget); + case ISD::ADD: + case ISD::SUB: + case ISD::SHL: + case ISD::SRL: + return performLogicCombine(N, DAG, Subtarget); + } + return SDValue(); +} + +static SDValue lowerLSXSplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { + EVT ResVecTy = Op->getValueType(0); + EVT ViaVecTy = ResVecTy; + SDLoc DL(Op); + + // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and + // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating + // lanes. + SDValue LaneA = Op->getOperand(OpNr); + SDValue LaneB; + + if (ResVecTy == MVT::v2i64) { + // In case of the index being passed as an immediate value, set the upper + // lane to 0 so that the splati.d instruction can be matched. + if (isa(LaneA)) + LaneB = DAG.getConstant(0, DL, MVT::i32); + // Having the index passed in a register, set the upper lane to the same + // value as the lower - this results in the BUILD_VECTOR node not being + // expanded through stack. This way we are able to pattern match the set of + // nodes created here to splat.d. + else + LaneB = LaneA; + ViaVecTy = MVT::v4i32; + } else + LaneB = LaneA; + + SDValue Ops[16] = {LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, + LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB}; + + SDValue Result = DAG.getBuildVector( + ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); + + if (ViaVecTy != ResVecTy) { + SDValue One = DAG.getConstant(1, DL, ViaVecTy); + Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, + DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); + } + + return Result; +} + +static SDValue lowerLSXSplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, + bool IsSigned = false) { + return DAG.getConstant( + APInt(Op->getValueType(0).getScalarType().getSizeInBits(), + Op->getConstantOperandVal(ImmOp), IsSigned), + SDLoc(Op), Op->getValueType(0)); +} + +static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, + SelectionDAG &DAG) { + EVT ViaVecTy = VecTy; + SDValue SplatValueA = SplatValue; + SDValue SplatValueB = SplatValue; + SDLoc DL(SplatValue); + + if (VecTy == MVT::v2i64) { + // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. + ViaVecTy = MVT::v4i32; + + SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); + SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, + DAG.getConstant(32, DL, MVT::i32)); + SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); + } + + SDValue Ops[32] = {SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB}; + + SDValue Result = DAG.getBuildVector( + ViaVecTy, DL, makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); + + if (VecTy != ViaVecTy) + Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); + + return Result; +} + +static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Vec = Op->getOperand(2); + MVT ResEltTy = + (ResTy == MVT::v2i64 || ResTy == MVT::v4i64) ? MVT::i64 : MVT::i32; + SDValue ConstValue = + DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResEltTy); + SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, DAG); + + return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); +} + +static SDValue lowerLSXBitClear(SDValue Op, SelectionDAG &DAG) { + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + SDValue One = DAG.getConstant(1, DL, ResTy); + SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); + + return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), + DAG.getNOT(DL, Bit, ResTy)); +} + +static SDValue lowerLSXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), + /* Alignment = */ 16); +} + +static SDValue lowerLASXLoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), + /* Alignment = */ 32); +} + +static SDValue lowerLASXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), + /* Alignment = */ 32); + return DAG.getNode(LoongArchISD::XVBROADCAST, DL, + DAG.getVTList(ResTy, MVT::Other), Load); +} + +static SDValue lowerLSXVLDRIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Address = Op->getOperand(2); + SDValue Offset = Op->getOperand(3); + EVT ResTy = Op->getValueType(0); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + SDValue Load = DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), + /* Alignment = */ 16); + return DAG.getNode(LoongArchISD::VBROADCAST, DL, + DAG.getVTList(ResTy, MVT::Other), Load); +} + +static SDValue lowerLSXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Value = Op->getOperand(2); + SDValue Address = Op->getOperand(3); + SDValue Offset = Op->getOperand(4); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), + /* Alignment = */ 16); +} + +static SDValue lowerLASXStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue ChainIn = Op->getOperand(0); + SDValue Value = Op->getOperand(2); + SDValue Address = Op->getOperand(3); + SDValue Offset = Op->getOperand(4); + EVT PtrTy = Address->getValueType(0); + + // For LP64 addresses have the underlying type MVT::i64. This intrinsic + // however takes an i32 signed constant offset. The actual type of the + // intrinsic is a scaled signed i12. + if (Subtarget.isABI_LP64()) + Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); + + Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); + + return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), + /* Alignment = */ 32); +} + +static SDValue LowerSUINT_TO_FP(unsigned ExtOpcode, SDValue Op, SelectionDAG &DAG) { + + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT ViaTy = Op0->getValueType(0); + SDLoc DL(Op); + + if (!ResTy.isVector()) { + if(ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ResTy, Op0); + else if(ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { + Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op0); + return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Op0); + } else { + Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Op0); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::f32, Op0); + } + + } + + if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) { + // v4i32 => v4f32 v8i32 => v8f32 + // v2i64 => v2f64 v4i64 => v4f64 + // do nothing + } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { + // v4i32 => v4i64 => v4f64 + Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0}); + Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0); + } else { + // v4i64 => v4f32 + SDValue Ops[4]; + for (unsigned i = 0; i < 4; i++) { + SDValue I64 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op0, + DAG.getConstant(i, DL, MVT::i32)); + Ops[i] = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, I64); + } + Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4)); + } + + return Op0; +} + +static SDValue LowerFP_TO_SUINT(unsigned FPToSUI, unsigned ExtOpcode, + SDValue Op, SelectionDAG &DAG) { + + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT ViaTy = Op0->getValueType(0); + SDLoc DL(Op); + + if (ResTy.getScalarSizeInBits() == ViaTy.getScalarSizeInBits()) { + // v4f32 => v4i32 v8f32 => v8i32 + // v2f64 => v2i64 v4f64 => v4i64 + // do nothing + Op0 = DAG.getNode(FPToSUI, DL, ResTy, Op0); + } else if (ResTy.getScalarSizeInBits() > ViaTy.getScalarSizeInBits()) { + // v4f32 => v4i32 => v4i64 + Op0 = DAG.getNode(FPToSUI, DL, MVT::v4i32, Op0); + Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, {Op0, Op0}); + Op0 = DAG.getNode(ExtOpcode, DL, MVT::v4i64, Op0); + } else { + SDValue Ops[4]; + Ops[0] = DAG.getNode(FPToSUI, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, + DAG.getConstant(0, DL, MVT::i64))); + Ops[1] = DAG.getNode(FPToSUI, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, + DAG.getConstant(1, DL, MVT::i64))); + Ops[2] = DAG.getNode(FPToSUI, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, + DAG.getConstant(2, DL, MVT::i64))); + Ops[3] = DAG.getNode(FPToSUI, DL, MVT::i32, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Op0, + DAG.getConstant(3, DL, MVT::i64))); + + Op0 = DAG.getBuildVector(MVT::v4i32, DL, makeArrayRef(Ops, 4)); + } + + return Op0; +} + +// Lower VECTOR_SHUFFLE into SHF (if possible). +// +// SHF splits the vector into blocks of four elements, then shuffles these +// elements according to a <4 x i2> constant (encoded as an integer immediate). +// +// It is therefore possible to lower into SHF when the mask takes the form: +// +// When undef's appear they are treated as if they were whatever value is +// necessary in order to fit the above forms. +// +// For example: +// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, +// <8 x i32> +// is lowered to: +// (VSHUF4I_H $v0, $v1, 27) +// where the 27 comes from: +// 3 + (2 << 2) + (1 << 4) + (0 << 6) +static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + int SHFIndices[4] = {-1, -1, -1, -1}; + + if (Indices.size() < 4) + return SDValue(); + + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = i; j < Indices.size(); j += 4) { + int Idx = Indices[j]; + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SHFIndices[i] == -1) + SHFIndices[i] = Idx; + + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + if (!(Idx == -1 || Idx == SHFIndices[i])) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(32, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SHFIndices[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + + SDLoc DL(Op); + return DAG.getNode(LoongArchISD::SHF, DL, ResTy, + DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); +} + +/// Determine whether a range fits a regular pattern of values. +/// This function accounts for the possibility of jumping over the End iterator. +template +static bool +fitsRegularPattern(typename SmallVectorImpl::const_iterator Begin, + unsigned CheckStride, + typename SmallVectorImpl::const_iterator End, + ValType ExpectedIndex, unsigned ExpectedIndexStride) { + auto &I = Begin; + + while (I != End) { + if (*I != -1 && *I != ExpectedIndex) + return false; + ExpectedIndex += ExpectedIndexStride; + + // Incrementing past End is undefined behaviour so we must increment one + // step at a time and check for End at each step. + for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) + ; // Empty loop body. + } + return true; +} + +// Determine whether VECTOR_SHUFFLE is a VREPLVEI. +// +// It is a VREPLVEI when the mask is: +// +// where x is any valid index. +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above form. +static bool isVECTOR_SHUFFLE_VREPLVEI(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + int SplatIndex = -1; + for (const auto &V : Indices) { + if (V != -1) { + SplatIndex = V; + break; + } + } + + return fitsRegularPattern(Indices.begin(), 1, Indices.end(), SplatIndex, + 0); +} + +// Lower VECTOR_SHUFFLE into VPACKEV (if possible). +// +// VPACKEV interleaves the even elements from each vector. +// +// It is possible to lower into VPACKEV when the mask consists of two of the +// following forms interleaved: +// <0, 2, 4, ...> +// +// where n is the number of elements in the vector. +// For example: +// <0, 0, 2, 2, 4, 4, ...> +// <0, n, 2, n+2, 4, n+4, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the even elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 0, 2)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2)) + Vj = Op->getOperand(1); + else + return SDValue(); + + // Check odd elements are taken from the even elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 0, 2)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VPACKOD (if possible). +// +// VPACKOD interleaves the odd elements from each vector. +// +// It is possible to lower into VPACKOD when the mask consists of two of the +// following forms interleaved: +// <1, 3, 5, ...> +// +// where n is the number of elements in the vector. +// For example: +// <1, 1, 3, 3, 5, 5, ...> +// <1, n+1, 3, n+3, 5, n+5, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the odd elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 1, 2)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2)) + Vj = Op->getOperand(1); + else + return SDValue(); + + // Check odd elements are taken from the odd elements of one half or the + // other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 1, 2)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VILVL (if possible). +// +// VILVL interleaves consecutive elements from the right (lowest-indexed) half +// of each vector. +// +// It is possible to lower into VILVL when the mask consists of two of the +// following forms interleaved: +// <0, 1, 2, ...> +// +// where n is the number of elements in the vector. +// For example: +// <0, 0, 1, 1, 2, 2, ...> +// <0, n, 1, n+1, 2, n+2, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVL(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the right (lowest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, 0, 1)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 1)) + Vj = Op->getOperand(1); + else + return SDValue(); + + // Check odd elements are taken from the right (lowest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, 0, 1)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 1)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VILVH (if possible). +// +// VILVH interleaves consecutive elements from the left (highest-indexed) half +// of each vector. +// +// It is possible to lower into VILVH when the mask consists of two of the +// following forms interleaved: +// +// +// where n is the number of elements in the vector and x is half n. +// For example: +// +// +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVH(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + unsigned HalfSize = Indices.size() / 2; + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + // Check even elements are taken from the left (highest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin, 2, End, HalfSize, 1)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size() + HalfSize, 1)) + Vj = Op->getOperand(1); + else + return SDValue(); + + // Check odd elements are taken from the left (highest-indexed) elements of + // one half or the other and pick an operand accordingly. + if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + HalfSize, + 1)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VPICKEV (if possible). +// +// VPICKEV copies the even elements of each vector into the result vector. +// +// It is possible to lower into VPICKEV when the mask consists of two of the +// following forms concatenated: +// <0, 2, 4, ...> +// +// where n is the number of elements in the vector. +// For example: +// <0, 2, 4, ..., 0, 2, 4, ...> +// <0, 2, 4, ..., n, n+2, n+4, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &Mid = Indices.begin() + Indices.size() / 2; + const auto &End = Indices.end(); + + if (fitsRegularPattern(Begin, 1, Mid, 0, 2)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, Mid, Indices.size(), 2)) + Vj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 0, 2)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Mid, 1, End, Indices.size(), 2)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VPICKOD (if possible). +// +// VPICKOD copies the odd elements of each vector into the result vector. +// +// It is possible to lower into VPICKOD when the mask consists of two of the +// following forms concatenated: +// <1, 3, 5, ...> +// +// where n is the number of elements in the vector. +// For example: +// <1, 3, 5, ..., 1, 3, 5, ...> +// <1, 3, 5, ..., n+1, n+3, n+5, ...> +// +// When undef's appear in the mask they are treated as if they were whatever +// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Vj; + SDValue Vk; + const auto &Begin = Indices.begin(); + const auto &Mid = Indices.begin() + Indices.size() / 2; + const auto &End = Indices.end(); + + if (fitsRegularPattern(Begin, 1, Mid, 1, 2)) + Vj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, Mid, Indices.size() + 1, 2)) + Vj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 1, 2)) + Vk = Op->getOperand(0); + else if (fitsRegularPattern(Mid, 1, End, Indices.size() + 1, 2)) + Vk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Vk, Vj); +} + +// Lower VECTOR_SHUFFLE into VSHF. +// +// This mostly consists of converting the shuffle indices in Indices into a +// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is +// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, +// if the type is v8i16 and all the indices are less than 8 then the second +// operand is unused and can be replaced with anything. We choose to replace it +// with the used operand since this reduces the number of instructions overall. +static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + SmallVector Ops; + SDValue Op0; + SDValue Op1; + EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); + EVT MaskEltTy = MaskVecTy.getVectorElementType(); + bool Using1stVec = false; + bool Using2ndVec = false; + SDLoc DL(Op); + int ResTyNumElts = ResTy.getVectorNumElements(); + + for (int i = 0; i < ResTyNumElts; ++i) { + // Idx == -1 means UNDEF + int Idx = Indices[i]; + + if (0 <= Idx && Idx < ResTyNumElts) + Using1stVec = true; + if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) + Using2ndVec = true; + } + + for (SmallVector::iterator I = Indices.begin(); I != Indices.end(); + ++I) + Ops.push_back(DAG.getTargetConstant(*I, DL, MaskEltTy)); + + SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); + + if (Using1stVec && Using2ndVec) { + Op0 = Op->getOperand(0); + Op1 = Op->getOperand(1); + } else if (Using1stVec) + Op0 = Op1 = Op->getOperand(0); + else if (Using2ndVec) + Op0 = Op1 = Op->getOperand(1); + else + llvm_unreachable("shuffle vector mask references neither vector operand?"); + + // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. + // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> + // VSHF concatenates the vectors in a bitwise fashion: + // <0b00, 0b01> + <0b10, 0b11> -> + // 0b0100 + 0b1110 -> 0b01001110 + // <0b10, 0b11, 0b00, 0b01> + // We must therefore swap the operands to get the correct result. + return DAG.getNode(LoongArchISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); +} + +static SDValue lowerVECTOR_SHUFFLE_XVILVL(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 2, End - HalfSize, 0, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 1)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End - HalfSize, Indices.size(), + 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Indices.size() + HalfSize, 1)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, 0, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, Indices.size(), + 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Indices.size() + HalfSize, 1)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XVILVH(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + unsigned HalfSize = Indices.size() / 2; + unsigned LeftSize = HalfSize / 2; + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + + if (fitsRegularPattern(Begin, 2, End - HalfSize, HalfSize - LeftSize, + 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End - HalfSize, + Indices.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Indices.size() + HalfSize + LeftSize, 1)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize, + 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize, + 1)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, + Indices.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Indices.size() + HalfSize + LeftSize, 1)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 2, End, 0, 2) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 2)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size(), 2) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Indices.size() + HalfSize, 2)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 2) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 2)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size(), 2) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Indices.size() + HalfSize, 2)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKEV, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &End = Indices.end(); + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 2, End, 1, 2) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + 1, 2)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 2, End, Indices.size() + 1, 2) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Indices.size() + HalfSize + 1, 2)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 1, 2) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + 1, 2)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(Begin + 1, 2, End, Indices.size() + 1, 2) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Indices.size() + HalfSize + 1, 2)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKOD, SDLoc(Op), ResTy, Xk, Xj); +} + +static bool isVECTOR_SHUFFLE_XVREPLVEI(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + unsigned HalfSize = Indices.size() / 2; + + for (unsigned i = 0; i < HalfSize; i++) { + if (Indices[i] == -1 || Indices[HalfSize + i] == -1) + return false; + if (Indices[0] != Indices[i] || Indices[HalfSize] != Indices[HalfSize + i]) + return false; + } + return true; +} + +static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &LeftMid = Indices.begin() + Indices.size() / 4; + const auto &End = Indices.end(); + const auto &RightMid = Indices.end() - Indices.size() / 4; + const auto &Mid = Indices.begin() + Indices.size() / 2; + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 0, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize, 2)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, LeftMid, Indices.size(), 2) && + fitsRegularPattern(Mid, 1, RightMid, Indices.size() + HalfSize, + 2)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 0, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize, 2)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(LeftMid, 1, Mid, Indices.size(), 2) && + fitsRegularPattern(RightMid, 1, End, Indices.size() + HalfSize, + 2)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + assert((Indices.size() % 2) == 0); + + SDValue Xj; + SDValue Xk; + const auto &Begin = Indices.begin(); + const auto &LeftMid = Indices.begin() + Indices.size() / 4; + const auto &Mid = Indices.begin() + Indices.size() / 2; + const auto &RightMid = Indices.end() - Indices.size() / 4; + const auto &End = Indices.end(); + unsigned HalfSize = Indices.size() / 2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize + 1, 2)) + Xj = Op->getOperand(0); + else if (fitsRegularPattern(Begin, 1, LeftMid, Indices.size() + 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, + Indices.size() + HalfSize + 1, 2)) + Xj = Op->getOperand(1); + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 1, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize + 1, 2)) + Xk = Op->getOperand(0); + else if (fitsRegularPattern(LeftMid, 1, Mid, Indices.size() + 1, 2) && + fitsRegularPattern(RightMid, 1, End, + Indices.size() + HalfSize + 1, 2)) + Xk = Op->getOperand(1); + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, SDLoc(Op), ResTy, Xk, Xj); +} + +static SDValue lowerVECTOR_SHUFFLE_XSHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + int SHFIndices[4] = {-1, -1, -1, -1}; + + // If the size of the mask is 4, it should not be converted to SHF node, + // because SHF only corresponds to type b/h/w instruction but no type d. + if (Indices.size() <= 4) + return SDValue(); + + int HalfSize = Indices.size() / 2; + for (int i = 0; i < 4; ++i) { + for (int j = i; j < HalfSize; j += 4) { + int Idx = Indices[j]; + // check mxshf + if (Idx + HalfSize != Indices[j + HalfSize]) + return SDValue(); + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SHFIndices[i] == -1) + SHFIndices[i] = Idx; + + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + if (!(Idx == -1 || Idx == SHFIndices[i])) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(32, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SHFIndices[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + SDLoc DL(Op); + return DAG.getNode(LoongArchISD::SHF, DL, ResTy, + DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0)); +} + +static bool isConstantOrUndef(const SDValue Op) { + if (Op->isUndef()) + return true; + if (isa(Op)) + return true; + if (isa(Op)) + return true; + return false; +} + +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + +static bool isLASXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) { + switch (SplatBitSize) { + default: + return false; + case 8: + ViaVecTy = MVT::v32i8; + break; + case 16: + ViaVecTy = MVT::v16i16; + break; + case 32: + ViaVecTy = MVT::v8i32; + break; + case 64: + ViaVecTy = MVT::v4i64; + break; + case 128: + // There's no fill.q to fall back on for 64-bit values + return false; + } + + return true; +} + +static bool isLSXBySplatBitSize(unsigned SplatBitSize, EVT &ViaVecTy) { + switch (SplatBitSize) { + default: + return false; + case 8: + ViaVecTy = MVT::v16i8; + break; + case 16: + ViaVecTy = MVT::v8i16; + break; + case 32: + ViaVecTy = MVT::v4i32; + break; + case 64: + // There's no fill.d to fall back on for 64-bit values + return false; + } + + return true; +} + +bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; } + +bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; } + +void LoongArchTargetLowering::LowerOperationWrapper( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + + if (!Res.getNode()) + return; + + assert((N->getNumValues() <= Res->getNumValues()) && + "Lowering returned the wrong number of results!"); + + for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) + Results.push_back(Res.getValue(I)); +} + +void LoongArchTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + SDLoc DL(N); + switch (N->getOpcode()) { + default: + return LowerOperationWrapper(N, Results, DAG); + case LoongArchISD::VABSD: + case LoongArchISD::UVABSD: { + EVT VT = N->getValueType(0); + assert(VT.isVector() && "Unexpected VT"); + if (getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) { + EVT PromoteVT; + if (VT.getVectorNumElements() == 2) + PromoteVT = MVT::v2i64; + else if (VT.getVectorNumElements() == 4) + PromoteVT = MVT::v4i32; + else if (VT.getVectorNumElements() == 8) + PromoteVT = MVT::v8i16; + else + return; + + SDValue N0 = + DAG.getNode(ISD::ANY_EXTEND, DL, PromoteVT, N->getOperand(0)); + SDValue N1 = + DAG.getNode(ISD::ANY_EXTEND, DL, PromoteVT, N->getOperand(1)); + + SDValue Vabsd = + DAG.getNode(N->getOpcode(), DL, PromoteVT, N0, N1, N->getOperand(2)); + + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Vabsd)); + } + return; + } + } +} + +SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::STORE: + return lowerSTORE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: + return lowerINTRINSIC_VOID(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return lowerVECTOR_SHUFFLE(Op, DAG); + case ISD::UINT_TO_FP: + return lowerUINT_TO_FP(Op, DAG); + case ISD::SINT_TO_FP: + return lowerSINT_TO_FP(Op, DAG); + case ISD::FP_TO_UINT: + return lowerFP_TO_UINT(Op, DAG); + case ISD::FP_TO_SINT: + return lowerFP_TO_SINT(Op, DAG); + case ISD::BRCOND: + return lowerBRCOND(Op, DAG); + case ISD::ConstantPool: + return lowerConstantPool(Op, DAG); + case ISD::GlobalAddress: + return lowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: + return lowerBlockAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: + return lowerJumpTable(Op, DAG); + case ISD::SELECT: + return lowerSELECT(Op, DAG); + case ISD::SETCC: + return lowerSETCC(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::VAARG: + return lowerVAARG(Op, DAG); + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG); + case ISD::EH_RETURN: + return lowerEH_RETURN(Op, DAG); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); + case ISD::SHL_PARTS: + return lowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return lowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return lowerShiftRightParts(Op, DAG, false); + case ISD::EH_DWARF_CFA: + return lowerEH_DWARF_CFA(Op, DAG); + } + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Lower helper functions +//===----------------------------------------------------------------------===// + +template +SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, + bool IsLocal) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + + if (isPositionIndependent()) { + SDValue Addr = getTargetNode(N, Ty, DAG, 0U); + if (IsLocal) + // Use PC-relative addressing to access the symbol. + return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), + 0); + + // Use PC-relative addressing to access the GOT for this symbol, then load + // the address from the GOT. + return SDValue(DAG.getMachineNode(LoongArch::LoadAddrGlobal, DL, Ty, Addr), + 0); + } + + SDValue Addr = getTargetNode(N, Ty, DAG, 0U); + return SDValue(DAG.getMachineNode(LoongArch::LoadAddrLocal, DL, Ty, Addr), 0); +} + +// addLiveIn - This helper function adds the specified physical register to the +// MachineFunction as a live in value. It also creates a corresponding +// virtual register for it. +static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, + const TargetRegisterClass *RC) { + unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); + MF.getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, + MachineBasicBlock &MBB, + const TargetInstrInfo &TII, + bool Is64Bit) { + if (NoZeroDivCheck) + return &MBB; + + // Insert pseudo instruction(PseudoTEQ), will expand: + // beq $divisor_reg, $zero, 8 + // break 7 + MachineBasicBlock::iterator I(MI); + MachineInstrBuilder MIB; + MachineOperand &Divisor = MI.getOperand(2); + unsigned TeqOp = LoongArch::PseudoTEQ; + + MIB = BuildMI(MBB, std::next(I), MI.getDebugLoc(), TII.get(TeqOp)) + .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())); + + // Use the 32-bit sub-register if this is a 64-bit division. + //if (Is64Bit) + // MIB->getOperand(0).setSubReg(LoongArch::sub_32); + + // Clear Divisor's kill flag. + Divisor.setIsKill(false); + + // We would normally delete the original instruction here but in this case + // we only needed to inject an additional instruction rather than replace it. + + return &MBB; +} + +MachineBasicBlock * +LoongArchTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected instr type to insert"); + case LoongArch::FILL_FW_PSEUDO: + return emitFILL_FW(MI, BB); + case LoongArch::FILL_FD_PSEUDO: + return emitFILL_FD(MI, BB); + case LoongArch::SNZ_B_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_B); + case LoongArch::SNZ_H_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_H); + case LoongArch::SNZ_W_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_W); + case LoongArch::SNZ_D_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETALLNEZ_D); + case LoongArch::SNZ_V_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETNEZ_V); + case LoongArch::SZ_B_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_B); + case LoongArch::SZ_H_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_H); + case LoongArch::SZ_W_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_W); + case LoongArch::SZ_D_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETANYEQZ_D); + case LoongArch::SZ_V_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::VSETEQZ_V); + case LoongArch::XSNZ_B_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_B); + case LoongArch::XSNZ_H_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_H); + case LoongArch::XSNZ_W_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_W); + case LoongArch::XSNZ_D_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETALLNEZ_D); + case LoongArch::XSNZ_V_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETNEZ_V); + case LoongArch::XSZ_B_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_B); + case LoongArch::XSZ_H_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_H); + case LoongArch::XSZ_W_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_W); + case LoongArch::XSZ_D_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETANYEQZ_D); + case LoongArch::XSZ_V_PSEUDO: + return emitLSXCBranchPseudo(MI, BB, LoongArch::XVSETEQZ_V); + case LoongArch::INSERT_FW_PSEUDO: + return emitINSERT_FW(MI, BB); + case LoongArch::INSERT_FD_PSEUDO: + return emitINSERT_FD(MI, BB); + case LoongArch::XINSERT_H_PSEUDO: + return emitXINSERT_BH(MI, BB, 2); + case LoongArch::XCOPY_FW_PSEUDO: + return emitXCOPY_FW(MI, BB); + case LoongArch::XCOPY_FD_PSEUDO: + return emitXCOPY_FD(MI, BB); + case LoongArch::XINSERT_FW_PSEUDO: + return emitXINSERT_FW(MI, BB); + case LoongArch::COPY_FW_PSEUDO: + return emitCOPY_FW(MI, BB); + case LoongArch::XFILL_FW_PSEUDO: + return emitXFILL_FW(MI, BB); + case LoongArch::XFILL_FD_PSEUDO: + return emitXFILL_FD(MI, BB); + case LoongArch::COPY_FD_PSEUDO: + return emitCOPY_FD(MI, BB); + case LoongArch::XINSERT_FD_PSEUDO: + return emitXINSERT_FD(MI, BB); + case LoongArch::XINSERT_B_PSEUDO: + return emitXINSERT_BH(MI, BB, 1); + case LoongArch::CONCAT_VECTORS_B_PSEUDO: + return emitCONCAT_VECTORS(MI, BB, 1); + case LoongArch::CONCAT_VECTORS_H_PSEUDO: + return emitCONCAT_VECTORS(MI, BB, 2); + case LoongArch::CONCAT_VECTORS_W_PSEUDO: + case LoongArch::CONCAT_VECTORS_FW_PSEUDO: + return emitCONCAT_VECTORS(MI, BB, 4); + case LoongArch::CONCAT_VECTORS_D_PSEUDO: + case LoongArch::CONCAT_VECTORS_FD_PSEUDO: + return emitCONCAT_VECTORS(MI, BB, 8); + case LoongArch::XCOPY_FW_GPR_PSEUDO: + return emitXCOPY_FW_GPR(MI, BB); + + case LoongArch::ATOMIC_LOAD_ADD_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_ADD_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_ADD_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_ADD_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_AND_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_AND_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_AND_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_AND_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_OR_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_OR_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_OR_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_OR_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_XOR_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_XOR_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_XOR_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_XOR_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_NAND_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_NAND_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_NAND_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_NAND_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_SUB_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_SUB_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_SUB_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_SUB_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_SWAP_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_SWAP_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_SWAP_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_SWAP_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::XINSERT_B_VIDX_PSEUDO: + case LoongArch::XINSERT_B_VIDX64_PSEUDO: + return emitXINSERT_B(MI, BB); + case LoongArch::INSERT_H_VIDX64_PSEUDO: + return emitINSERT_H_VIDX(MI, BB); + case LoongArch::XINSERT_FW_VIDX_PSEUDO: + return emitXINSERT_DF_VIDX(MI, BB, false); + case LoongArch::XINSERT_FW_VIDX64_PSEUDO: + return emitXINSERT_DF_VIDX(MI, BB, true); + + case LoongArch::ATOMIC_LOAD_MAX_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_MAX_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_MAX_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_MAX_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_MIN_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_MIN_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_MIN_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_MIN_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_UMAX_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_UMAX_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_UMAX_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_UMAX_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::ATOMIC_LOAD_UMIN_I8: + return emitAtomicBinaryPartword(MI, BB, 1); + case LoongArch::ATOMIC_LOAD_UMIN_I16: + return emitAtomicBinaryPartword(MI, BB, 2); + case LoongArch::ATOMIC_LOAD_UMIN_I32: + return emitAtomicBinary(MI, BB); + case LoongArch::ATOMIC_LOAD_UMIN_I64: + return emitAtomicBinary(MI, BB); + + case LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE: + case LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL: + case LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC: + case LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE: + case LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwapPartword(MI, BB, 1); + case LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE: + case LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL: + case LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC: + case LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE: + case LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwapPartword(MI, BB, 2); + case LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE: + case LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL: + case LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC: + case LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE: + case LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST: + case LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE: + case LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL: + case LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC: + case LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE: + case LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwap(MI, BB); + + case LoongArch::PseudoSELECT_I: + case LoongArch::PseudoSELECT_I64: + case LoongArch::PseudoSELECT_S: + case LoongArch::PseudoSELECT_D64: + return emitPseudoSELECT(MI, BB, false, LoongArch::BNE32); + + case LoongArch::PseudoSELECTFP_T_I: + case LoongArch::PseudoSELECTFP_T_I64: + return emitPseudoSELECT(MI, BB, true, LoongArch::BCNEZ); + + case LoongArch::PseudoSELECTFP_F_I: + case LoongArch::PseudoSELECTFP_F_I64: + return emitPseudoSELECT(MI, BB, true, LoongArch::BCEQZ); + case LoongArch::DIV_W: + case LoongArch::DIV_WU: + case LoongArch::MOD_W: + case LoongArch::MOD_WU: + return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false); + case LoongArch::DIV_D: + case LoongArch::DIV_DU: + case LoongArch::MOD_D: + case LoongArch::MOD_DU: + return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true); + } +} + +MachineBasicBlock *LoongArchTargetLowering::emitXINSERT_DF_VIDX( + MachineInstr &MI, MachineBasicBlock *BB, bool IsGPR64) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned insertOp; + insertOp = IsGPR64 ? LoongArch::XINSERT_FW_VIDX64_PSEUDO_POSTRA + : LoongArch::XINSERT_FW_VIDX_PSEUDO_POSTRA; + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcVecReg = MI.getOperand(1).getReg(); + unsigned LaneReg = MI.getOperand(2).getReg(); + unsigned SrcValReg = MI.getOperand(3).getReg(); + unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); + + MachineBasicBlock::iterator II(MI); + + unsigned VecCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); + unsigned LaneCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); + unsigned ValCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); + + const TargetRegisterClass *RC = + IsGPR64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + unsigned RI = RegInfo.createVirtualRegister(RC); + + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + BuildMI(*BB, II, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) + .addImm(0) + .addReg(SrcValReg) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, II, DL, TII->get(LoongArch::XVPICKVE2GR_W), Rj) + .addReg(Xj) + .addImm(0); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); + + BuildMI(*BB, II, DL, TII->get(insertOp)) + .addReg(DstReg, RegState::Define | RegState::EarlyClobber) + .addReg(VecCopy) + .addReg(LaneCopy) + .addReg(ValCopy) + .addReg(Dest, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(RI, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Rj, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); + + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitINSERT_H_VIDX(MachineInstr &MI, + MachineBasicBlock *BB) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned insertOp; + unsigned isGP64 = 0; + switch (MI.getOpcode()) { + case LoongArch::INSERT_H_VIDX64_PSEUDO: + isGP64 = 1; + insertOp = LoongArch::INSERT_H_VIDX64_PSEUDO_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo vector for replacement!"); + } + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcVecReg = MI.getOperand(1).getReg(); + unsigned LaneReg = MI.getOperand(2).getReg(); + unsigned SrcValReg = MI.getOperand(3).getReg(); + unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); + + MachineBasicBlock::iterator II(MI); + + unsigned VecCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); + unsigned LaneCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); + unsigned ValCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); + + const TargetRegisterClass *RC = + isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + unsigned RI = RegInfo.createVirtualRegister(RC); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); + + BuildMI(*BB, II, DL, TII->get(insertOp)) + .addReg(DstReg, RegState::Define | RegState::EarlyClobber) + .addReg(VecCopy) + .addReg(LaneCopy) + .addReg(ValCopy) + .addReg(Dest, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(RI, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); + + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXINSERT_B(MachineInstr &MI, + MachineBasicBlock *BB) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned insertOp; + unsigned isGP64 = 0; + switch (MI.getOpcode()) { + case LoongArch::XINSERT_B_VIDX64_PSEUDO: + isGP64 = 1; + insertOp = LoongArch::XINSERT_B_VIDX64_PSEUDO_POSTRA; + break; + case LoongArch::XINSERT_B_VIDX_PSEUDO: + insertOp = LoongArch::XINSERT_B_VIDX_PSEUDO_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo vector for replacement!"); + } + + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcVecReg = MI.getOperand(1).getReg(); + unsigned LaneReg = MI.getOperand(2).getReg(); + unsigned SrcValReg = MI.getOperand(3).getReg(); + unsigned Dest = RegInfo.createVirtualRegister(RegInfo.getRegClass(DstReg)); + + MachineBasicBlock::iterator II(MI); + + unsigned VecCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcVecReg)); + unsigned LaneCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(LaneReg)); + unsigned ValCopy = + RegInfo.createVirtualRegister(RegInfo.getRegClass(SrcValReg)); + const TargetRegisterClass *RC = + isGP64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + unsigned Rimm = RegInfo.createVirtualRegister(RC); + unsigned R4r = RegInfo.createVirtualRegister(RC); + unsigned Rib = RegInfo.createVirtualRegister(RC); + unsigned Ris = RegInfo.createVirtualRegister(RC); + unsigned R7b1 = RegInfo.createVirtualRegister(RC); + unsigned R7b2 = RegInfo.createVirtualRegister(RC); + unsigned R7b3 = RegInfo.createVirtualRegister(RC); + unsigned RI = RegInfo.createVirtualRegister(RC); + + unsigned R7r80_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r80l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r81_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r81l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r82_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R7r82l_3 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned R70 = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned tmp_Dst73 = + RegInfo.createVirtualRegister(&LoongArch::LASX256BRegClass); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), VecCopy).addReg(SrcVecReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), LaneCopy).addReg(LaneReg); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), ValCopy).addReg(SrcValReg); + + BuildMI(*BB, II, DL, TII->get(insertOp)) + .addReg(DstReg, RegState::Define | RegState::EarlyClobber) + .addReg(VecCopy) + .addReg(LaneCopy) + .addReg(ValCopy) + .addReg(Dest, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R4r, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Rib, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Ris, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7b1, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7b2, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7b3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r80_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r80l_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r81_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r81l_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r82_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R7r82l_3, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(RI, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(tmp_Dst73, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(Rimm, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead) + .addReg(R70, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); + + return BB; +} + +const TargetRegisterClass * +LoongArchTargetLowering::getRepRegClassFor(MVT VT) const { + return TargetLowering::getRepRegClassFor(VT); +} + +// This function also handles LoongArch::ATOMIC_SWAP_I32 (when BinOpcode == 0), and +// LoongArch::ATOMIC_LOAD_NAND_I32 (when Nand == true) +MachineBasicBlock * +LoongArchTargetLowering::emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const { + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned AtomicOp; + switch (MI.getOpcode()) { + case LoongArch::ATOMIC_LOAD_ADD_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_SUB_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_AND_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_AND_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_OR_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_OR_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_XOR_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_NAND_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I32_POSTRA; + break; + case LoongArch::ATOMIC_SWAP_I32: + AtomicOp = LoongArch::ATOMIC_SWAP_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MAX_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MIN_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I32: + AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I32_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_ADD_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_SUB_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_AND_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_AND_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_OR_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_OR_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_XOR_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_NAND_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I64_POSTRA; + break; + case LoongArch::ATOMIC_SWAP_I64: + AtomicOp = LoongArch::ATOMIC_SWAP_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MAX_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MIN_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I64_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I64: + AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I64_POSTRA; + break; + default: + llvm_unreachable("Unknown pseudo atomic for replacement!"); + } + + unsigned OldVal = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned Incr = MI.getOperand(2).getReg(); + unsigned Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); + + MachineBasicBlock::iterator II(MI); + + // The scratch registers here with the EarlyClobber | Define | Implicit + // flags is used to persuade the register allocator and the machine + // verifier to accept the usage of this register. This has to be a real + // register which has an UNDEF value but is dead after the instruction which + // is unique among the registers chosen for the instruction. + + // The EarlyClobber flag has the semantic properties that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique among the operands to the instruction. + // The Define flag is needed to coerce the machine verifier that an Undef + // value isn't a problem. + // The Dead flag is needed as the value in scratch isn't used by any other + // instruction. Kill isn't used as Dead is more precise. + // The implicit flag is here due to the interaction between the other flags + // and the machine verifier. + + // For correctness purpose, a new pseudo is introduced here. We need this + // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence + // that is spread over >1 basic blocks. A register allocator which + // introduces (or any codegen infact) a store, can violate the expectations + // of the hardware. + // + // An atomic read-modify-write sequence starts with a linked load + // instruction and ends with a store conditional instruction. The atomic + // read-modify-write sequence fails if any of the following conditions + // occur between the execution of ll and sc: + // * A coherent store is completed by another process or coherent I/O + // module into the block of synchronizable physical memory containing + // the word. The size and alignment of the block is + // implementation-dependent. + // * A coherent store is executed between an LL and SC sequence on the + // same processor to the block of synchornizable physical memory + // containing the word. + // + + unsigned PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); + unsigned IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), IncrCopy).addReg(Incr); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr); + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(OldVal, RegState::Define | RegState::EarlyClobber) + .addReg(PtrCopy) + .addReg(IncrCopy) + .addReg(Scratch, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + + MI.eraseFromParent(); + + return BB; +} + +MachineBasicBlock *LoongArchTargetLowering::emitSignExtendToI32InReg( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg, + unsigned SrcReg) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + if (Size == 1) { + BuildMI(BB, DL, TII->get(LoongArch::EXT_W_B32), DstReg).addReg(SrcReg); + return BB; + } + + if (Size == 2) { + BuildMI(BB, DL, TII->get(LoongArch::EXT_W_H32), DstReg).addReg(SrcReg); + return BB; + } + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + unsigned ScrReg = RegInfo.createVirtualRegister(RC); + + assert(Size < 32); + int64_t ShiftImm = 32 - (Size * 8); + + BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ScrReg).addReg(SrcReg).addImm(ShiftImm); + BuildMI(BB, DL, TII->get(LoongArch::SRAI_W), DstReg).addReg(ScrReg).addImm(ShiftImm); + + return BB; +} + +MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicBinaryPartial."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const bool ArePtrs64bit = ABI.ArePtrs64bit(); + const TargetRegisterClass *RCp = + getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned Incr = MI.getOperand(2).getReg(); + + unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); + unsigned Mask = RegInfo.createVirtualRegister(RC); + unsigned Mask2 = RegInfo.createVirtualRegister(RC); + unsigned Incr2 = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned MaskUppest = RegInfo.createVirtualRegister(RC); + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + unsigned Scratch3 = RegInfo.createVirtualRegister(RC); + unsigned Scratch4 = RegInfo.createVirtualRegister(RC); + unsigned Scratch5 = RegInfo.createVirtualRegister(RC); + + unsigned AtomicOp = 0; + switch (MI.getOpcode()) { + case LoongArch::ATOMIC_LOAD_NAND_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_NAND_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_NAND_I16_POSTRA; + break; + case LoongArch::ATOMIC_SWAP_I8: + AtomicOp = LoongArch::ATOMIC_SWAP_I8_POSTRA; + break; + case LoongArch::ATOMIC_SWAP_I16: + AtomicOp = LoongArch::ATOMIC_SWAP_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MAX_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MAX_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_MAX_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MIN_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_MIN_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_MIN_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMAX_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_UMAX_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_UMIN_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_UMIN_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_ADD_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_ADD_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_ADD_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_SUB_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_SUB_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_SUB_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_AND_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_AND_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_AND_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_AND_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_OR_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_OR_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_OR_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_OR_I16_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_XOR_I8: + AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I8_POSTRA; + break; + case LoongArch::ATOMIC_LOAD_XOR_I16: + AtomicOp = LoongArch::ATOMIC_LOAD_XOR_I16_POSTRA; + break; + default: + llvm_unreachable("Unknown subword atomic pseudo for expansion!"); + } + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB->getIterator(); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(exitMBB, BranchProbability::getOne()); + + // thisMBB: + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt + // nor mask2,$0,mask + // sll incr2,incr,shiftamt + + int64_t MaskImm = (Size == 1) ? 255 : 4095; + BuildMI(BB, DL, TII->get(ABI.GetPtrAddiOp()), MaskLSB2) + .addReg(ABI.GetNullPtr()).addImm(-4); + BuildMI(BB, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2) + .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3); + BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3); + + if(MaskImm==4095){ + BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf); + BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) + .addReg(MaskUppest).addImm(MaskImm); + } + else{ + BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) + .addReg(LoongArch::ZERO).addImm(MaskImm); + } + + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask) + .addReg(MaskUpper).addReg(ShiftAmt); + BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask); + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Incr2).addReg(Incr).addReg(ShiftAmt); + + + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) + .addReg(Incr2) + .addReg(Mask) + .addReg(Mask2) + .addReg(ShiftAmt) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch3, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch4, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch5, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); + + MI.eraseFromParent(); // The instruction is gone now. + + return exitMBB; +} + +// Lower atomic compare and swap to a pseudo instruction, taking care to +// define a scratch register for the pseudo instruction's expansion. The +// instruction is expanded after the register allocator as to prevent +// the insertion of stores between the linked load and the store conditional. + +MachineBasicBlock * +LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB) const { + unsigned Op = MI.getOpcode(); + assert((Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE || + Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL || + Op == LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC || + Op == LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE || + Op == LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST || + Op == LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE || + Op == LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL || + Op == LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC || + Op == LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE || + Op == LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST) && + "Unsupported atomic psseudo for EmitAtomicCmpSwap."); + + const unsigned Size = (Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE || + Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL || + Op == LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC || + Op == LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE || + Op == LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST) + ? 4 + : 8; + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned AtomicOp = Size == 4 ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA + : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA; + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned OldVal = MI.getOperand(2).getReg(); + unsigned NewVal = MI.getOperand(3).getReg(); + + unsigned Scratch = MRI.createVirtualRegister(RC); + MachineBasicBlock::iterator II(MI); + + // We need to create copies of the various registers and kill them at the + // atomic pseudo. If the copies are not made, when the atomic is expanded + // after fast register allocation, the spills will end up outside of the + // blocks that their values are defined in, causing livein errors. + + unsigned PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); + unsigned OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); + unsigned NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); + + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), PtrCopy).addReg(Ptr); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), OldValCopy).addReg(OldVal); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), NewValCopy).addReg(NewVal); + + AtomicOrdering Ordering; + switch (Op) { + case LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE: + case LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE: + Ordering = AtomicOrdering::Acquire; + break; + case LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL: + case LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL: + Ordering = AtomicOrdering::AcquireRelease; + break; + case LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST: + case LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST: + Ordering = AtomicOrdering::SequentiallyConsistent; + break; + case LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE: + case LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE: + Ordering = AtomicOrdering::Release; + break; + case LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC: + case LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC: + Ordering = AtomicOrdering::Monotonic; + break; + } + + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(PtrCopy, RegState::Kill) + .addReg(OldValCopy, RegState::Kill) + .addReg(NewValCopy, RegState::Kill) + .addImm(static_cast(Ordering)) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); + + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +} + +MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicCmpSwapPartial."); + unsigned Op = MI.getOpcode(); + assert((Op == LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE || + Op == LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL || + Op == LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC || + Op == LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE || + Op == LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST || + Op == LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE || + Op == LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL || + Op == LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC || + Op == LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE || + Op == LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST) && + "Unsupported atomic psseudo for EmitAtomicCmpSwapPartword."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const bool ArePtrs64bit = ABI.ArePtrs64bit(); + const TargetRegisterClass *RCp = + getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned CmpVal = MI.getOperand(2).getReg(); + unsigned NewVal = MI.getOperand(3).getReg(); + + unsigned AlignedAddr = RegInfo.createVirtualRegister(RCp); + unsigned ShiftAmt = RegInfo.createVirtualRegister(RC); + unsigned Mask = RegInfo.createVirtualRegister(RC); + unsigned Mask2 = RegInfo.createVirtualRegister(RC); + unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); + unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); + unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); + unsigned MaskUpper = RegInfo.createVirtualRegister(RC); + unsigned MaskUppest = RegInfo.createVirtualRegister(RC); + unsigned Mask3 = RegInfo.createVirtualRegister(RC); + unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); + unsigned AtomicOp = Size == 1 ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA + : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA; + + // The scratch registers here with the EarlyClobber | Define | Dead | Implicit + // flags are used to coerce the register allocator and the machine verifier to + // accept the usage of these registers. + // The EarlyClobber flag has the semantic properties that the operand it is + // attached to is clobbered before the rest of the inputs are read. Hence it + // must be unique among the operands to the instruction. + // The Define flag is needed to coerce the machine verifier that an Undef + // value isn't a problem. + // The Dead flag is needed as the value in scratch isn't used by any other + // instruction. Kill isn't used as Dead is more precise. + unsigned Scratch = RegInfo.createVirtualRegister(RC); + unsigned Scratch2 = RegInfo.createVirtualRegister(RC); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++BB->getIterator(); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(exitMBB, BranchProbability::getOne()); + + // thisMBB: + // addiu masklsb2,$0,-4 # 0xfffffffc + // and alignedaddr,ptr,masklsb2 + // andi ptrlsb2,ptr,3 + // xori ptrlsb2,ptrlsb2,3 # Only for BE + // sll shiftamt,ptrlsb2,3 + // ori maskupper,$0,255 # 0xff + // sll mask,maskupper,shiftamt + // nor mask2,$0,mask + // andi maskedcmpval,cmpval,255 + // sll shiftedcmpval,maskedcmpval,shiftamt + // andi maskednewval,newval,255 + // sll shiftednewval,maskednewval,shiftamt + + int64_t MaskImm = (Size == 1) ? 255 : 4095; + BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::ADDI_D : LoongArch::ADDI_W), MaskLSB2) + .addReg(ABI.GetNullPtr()).addImm(-4); + BuildMI(BB, DL, TII->get(ArePtrs64bit ? LoongArch::AND : LoongArch::AND32), AlignedAddr) + .addReg(Ptr).addReg(MaskLSB2); + BuildMI(BB, DL, TII->get(LoongArch::ANDI32), PtrLSB2) + .addReg(Ptr, 0, ArePtrs64bit ? LoongArch::sub_32 : 0).addImm(3); + BuildMI(BB, DL, TII->get(LoongArch::SLLI_W), ShiftAmt).addReg(PtrLSB2).addImm(3); + + if(MaskImm==4095){ + BuildMI(BB, DL, TII->get(LoongArch::LU12I_W32), MaskUppest).addImm(0xf); + BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) + .addReg(MaskUppest).addImm(MaskImm); + } + else{ + BuildMI(BB, DL, TII->get(LoongArch::ORI32), MaskUpper) + .addReg(LoongArch::ZERO).addImm(MaskImm); + } + + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), Mask) + .addReg(MaskUpper).addReg(ShiftAmt); + BuildMI(BB, DL, TII->get(LoongArch::NOR32), Mask2).addReg(LoongArch::ZERO).addReg(Mask); + if(MaskImm==4095){ + BuildMI(BB, DL, TII->get(LoongArch::ORI32), Mask3) + .addReg(MaskUppest).addImm(MaskImm); + BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedCmpVal) + .addReg(CmpVal).addReg(Mask3); + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal) + .addReg(MaskedCmpVal).addReg(ShiftAmt); + BuildMI(BB, DL, TII->get(LoongArch::AND32), MaskedNewVal) + .addReg(NewVal).addReg(Mask3); + } + else{ + BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedCmpVal) + .addReg(CmpVal).addImm(MaskImm); + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedCmpVal) + .addReg(MaskedCmpVal).addReg(ShiftAmt); + BuildMI(BB, DL, TII->get(LoongArch::ANDI32), MaskedNewVal) + .addReg(NewVal).addImm(MaskImm); + } + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedNewVal) + .addReg(MaskedNewVal).addReg(ShiftAmt); + + AtomicOrdering Ordering; + switch (Op) { + case LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE: + case LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE: + Ordering = AtomicOrdering::Acquire; + break; + case LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL: + case LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL: + Ordering = AtomicOrdering::AcquireRelease; + break; + case LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST: + case LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST: + Ordering = AtomicOrdering::SequentiallyConsistent; + break; + case LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE: + case LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE: + Ordering = AtomicOrdering::Release; + break; + case LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC: + case LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC: + Ordering = AtomicOrdering::Monotonic; + break; + } + // The purposes of the flags on the scratch registers are explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among the register chosen for the instruction. + + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) + .addReg(Mask) + .addReg(ShiftedCmpVal) + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt) + .addImm(static_cast(Ordering)) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); + + MI.eraseFromParent(); // The instruction is gone now. + + return exitMBB; +} + +SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + // The first operand is the chain, the second is the condition, the third is + // the block to branch to if the condition is true. + SDValue Chain = Op.getOperand(0); + SDValue Dest = Op.getOperand(2); + SDLoc DL(Op); + + SDValue CondRes = createFPCmp(DAG, Op.getOperand(1)); + + // Return if flag is not set by a floating point comparison. + if (CondRes.getOpcode() != LoongArchISD::FPCmp) + return Op; + + SDValue CCNode = CondRes.getOperand(2); + LoongArch::CondCode CC = + (LoongArch::CondCode)cast(CCNode)->getZExtValue(); + unsigned Opc = invertFPCondCodeUser(CC) ? LoongArch::BRANCH_F : LoongArch::BRANCH_T; + SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32); + SDValue FCC0 = DAG.getRegister(LoongArch::FCC0, MVT::i32); + return DAG.getNode(LoongArchISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, + FCC0, Dest, CondRes); +} + +SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Cond = createFPCmp(DAG, Op.getOperand(0)); + + // Return if flag is not set by a floating point comparison. + if (Cond.getOpcode() != LoongArchISD::FPCmp) + return Op; + + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + SDLoc DL(Op); + + ConstantSDNode *CC = cast(Cond.getOperand(2)); + bool invert = invertFPCondCodeUser((LoongArch::CondCode)CC->getSExtValue()); + SDValue FCC = DAG.getRegister(LoongArch::FCC0, MVT::i32); + + if (Op->getSimpleValueType(0).SimpleTy == MVT::f64 || + Op->getSimpleValueType(0).SimpleTy == MVT::f32) { + if (invert) + return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N1, FCC, N2, + Cond); + else + return DAG.getNode(LoongArchISD::FSEL, DL, N1.getValueType(), N2, FCC, N1, + Cond); + + } else + return Op; +} + +SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = createFPCmp(DAG, Op); + + assert(Cond.getOpcode() == LoongArchISD::FPCmp && + "Floating point operand expected."); + + SDLoc DL(Op); + SDValue True = DAG.getConstant(1, DL, MVT::i32); + SDValue False = DAG.getConstant(0, DL, MVT::i32); + + return createCMovFP(DAG, Cond, True, False, DL); +} + +SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *N = cast(Op); + + const GlobalValue *GV = N->getGlobal(); + bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); + SDValue Addr = getAddr(N, DAG, IsLocal); + + return Addr; +} + +SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + BlockAddressSDNode *N = cast(Op); + + return getAddr(N, DAG); +} + +SDValue LoongArchTargetLowering:: +lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const +{ + GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().useEmulatedTLS()) + return LowerToTLSEmulatedModel(GA, DAG); + + SDLoc DL(GA); + const GlobalValue *GV = GA->getGlobal(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + TLSModel::Model model = getTargetMachine().getTLSModel(GV); + + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { + // General Dynamic TLS Model && Local Dynamic TLS Model + unsigned PtrSize = PtrVT.getSizeInBits(); + IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); + // SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrTy, 0, 0); + SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U); + SDValue Load = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_GD , + DL, PtrVT, Addr), 0); + SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); + + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Load; + Entry.Ty = PtrTy; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args)); + std::pair CallResult = LowerCallTo(CLI); + + SDValue Ret = CallResult.first; + + return Ret; + } + + SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0U); + SDValue Offset; + if (model == TLSModel::InitialExec) { + // Initial Exec TLS Model + Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_IE, DL, + PtrVT, Addr), 0); + } else { + // Local Exec TLS Model + assert(model == TLSModel::LocalExec); + Offset = SDValue(DAG.getMachineNode(LoongArch::LoadAddrTLS_LE, DL, + PtrVT, Addr), 0); + } + + SDValue ThreadPointer = DAG.getRegister((PtrVT == MVT::i32) + ? LoongArch::TP + : LoongArch::TP_64, PtrVT); + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset); +} + +SDValue LoongArchTargetLowering:: +lowerJumpTable(SDValue Op, SelectionDAG &DAG) const +{ + JumpTableSDNode *N = cast(Op); + + return getAddr(N, DAG); +} + +SDValue LoongArchTargetLowering:: +lowerConstantPool(SDValue Op, SelectionDAG &DAG) const +{ + ConstantPoolSDNode *N = cast(Op); + + return getAddr(N, DAG); +} + +SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + LoongArchFunctionInfo *FuncInfo = MF.getInfo(); + + SDLoc DL(Op); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy(MF.getDataLayout())); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +SDValue LoongArchTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + SDValue Chain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + const Align Align = + llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne(); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + unsigned ArgSlotSizeInBytes = (ABI.IsLPX32() || ABI.IsLP64()) ? 8 : 4; + + SDValue VAListLoad = DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, + VAListPtr, MachinePointerInfo(SV)); + SDValue VAList = VAListLoad; + + // Re-align the pointer if necessary. + // It should only ever be necessary for 64-bit types on LP32 since the minimum + // argument alignment is the same as the maximum type alignment for LPX32/LP64. + // + // FIXME: We currently align too often. The code generator doesn't notice + // when the pointer is still aligned from the last va_arg (or pair of + // va_args for the i64 on LP32 case). + if (Align > getMinStackArgumentAlignment()) { + VAList = DAG.getNode( + ISD::ADD, DL, VAList.getValueType(), VAList, + DAG.getConstant(Align.value() - 1, DL, VAList.getValueType())); + + VAList = DAG.getNode( + ISD::AND, DL, VAList.getValueType(), VAList, + DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg. + auto &TD = DAG.getDataLayout(); + unsigned ArgSizeInBytes = + TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); + SDValue Tmp3 = + DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, + DAG.getConstant(alignTo(ArgSizeInBytes, ArgSlotSizeInBytes), + DL, VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr, + MachinePointerInfo(SV)); + + // Load the actual argument out of the pointer VAList + return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo()); +} + +SDValue LoongArchTargetLowering:: +lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { + // check the depth + assert((cast(Op.getOperand(0))->getZExtValue() == 0) && + "Frame address can only be determined for current frame."); + + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FrameAddr = DAG.getCopyFromReg( + DAG.getEntryNode(), DL, ABI.IsLP64() ? LoongArch::FP_64 : LoongArch::FP, VT); + return FrameAddr; +} + +SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + // check the depth + assert((cast(Op.getOperand(0))->getZExtValue() == 0) && + "Return address can be determined only for current frame."); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MVT VT = Op.getSimpleValueType(); + unsigned RA = ABI.IsLP64() ? LoongArch::RA_64 : LoongArch::RA; + MFI.setReturnAddressIsTaken(true); + + // Return RA, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT)); + return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, VT); +} + +// An EH_RETURN is the result of lowering llvm.eh.return which in turn is +// generated from __builtin_eh_return (offset, handler) +// The effect of this is to adjust the stack pointer by "offset" +// and then branch to "handler". +SDValue LoongArchTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) + const { + MachineFunction &MF = DAG.getMachineFunction(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + + LoongArchFI->setCallsEhReturn(); + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + SDLoc DL(Op); + EVT Ty = ABI.IsLP64() ? MVT::i64 : MVT::i32; + + // Store stack offset in A1, store jump target in A0. Glue CopyToReg and + // EH_RETURN nodes, so that instructions are emitted back-to-back. + unsigned OffsetReg = ABI.IsLP64() ? LoongArch::A1_64 : LoongArch::A1; + unsigned AddrReg = ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0; + Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); + Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); + return DAG.getNode(LoongArchISD::EH_RETURN, DL, MVT::Other, Chain, + DAG.getRegister(OffsetReg, Ty), + DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())), + Chain.getValue(1)); +} + +SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + // TODO: handle SyncScope::SingleThread. + return Op; +} + +SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; + + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + // if shamt < (VT.bits): + // lo = (shl lo, shamt) + // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt)) + // else: + // lo = 0 + // hi = (shl lo, shamt[4:0]) + SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, + DAG.getConstant(-1, DL, MVT::i32)); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, + DAG.getConstant(1, DL, VT)); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, + DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, + DAG.getConstant(0, DL, VT), ShiftLeftLo); + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, DL); +} + +SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, + bool IsSRA) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + MVT VT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; + + // if shamt < (VT.bits): + // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt)) + // if isSRA: + // hi = (sra hi, shamt) + // else: + // hi = (srl hi, shamt) + // else: + // if isSRA: + // lo = (sra hi, shamt[4:0]) + // hi = (sra hi, 31) + // else: + // lo = (srl hi, shamt[4:0]) + // hi = 0 + SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, + DAG.getConstant(-1, DL, MVT::i32)); + SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, + DAG.getConstant(1, DL, VT)); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, + DL, VT, Hi, Shamt); + SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, + DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); + SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi, + DAG.getConstant(VT.getSizeInBits() - 1, DL, VT)); + Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or); + Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, + IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, DL); +} + +// Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr). +static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG, + bool SingleFloat) { + SDValue Val = SD->getValue(); + + if (Val.getOpcode() != ISD::FP_TO_SINT || + (Val.getValueSizeInBits() > 32 && SingleFloat)) + return SDValue(); + + EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits()); + SDValue Tr = DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Val), FPTy, + Val.getOperand(0)); + return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(), + SD->getPointerInfo(), SD->getAlignment(), + SD->getMemOperand()->getFlags()); +} + +SDValue LoongArchTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode *SD = cast(Op); + return lowerFP_TO_SINT_STORE(SD, DAG, Subtarget.isSingleFloat()); +} + +SDValue LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + unsigned Intrinsic = cast(Op->getOperand(0))->getZExtValue(); + switch (Intrinsic) { + default: + return SDValue(); + case Intrinsic::loongarch_lsx_vaddi_bu: + case Intrinsic::loongarch_lsx_vaddi_hu: + case Intrinsic::loongarch_lsx_vaddi_wu: + case Intrinsic::loongarch_lsx_vaddi_du: + return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vand_v: + case Intrinsic::loongarch_lasx_xvand_v: + return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vbitclr_b: + case Intrinsic::loongarch_lsx_vbitclr_h: + case Intrinsic::loongarch_lsx_vbitclr_w: + case Intrinsic::loongarch_lsx_vbitclr_d: + return lowerLSXBitClear(Op, DAG); + case Intrinsic::loongarch_lsx_vdiv_b: + case Intrinsic::loongarch_lsx_vdiv_h: + case Intrinsic::loongarch_lsx_vdiv_w: + case Intrinsic::loongarch_lsx_vdiv_d: + return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vdiv_bu: + case Intrinsic::loongarch_lsx_vdiv_hu: + case Intrinsic::loongarch_lsx_vdiv_wu: + case Intrinsic::loongarch_lsx_vdiv_du: + return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vfdiv_s: + case Intrinsic::loongarch_lsx_vfdiv_d: + return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vffint_s_wu: + case Intrinsic::loongarch_lsx_vffint_d_lu: + return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vffint_s_w: + case Intrinsic::loongarch_lsx_vffint_d_l: + return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vfmul_s: + case Intrinsic::loongarch_lsx_vfmul_d: + return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vfrint_s: + case Intrinsic::loongarch_lsx_vfrint_d: + return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vfsqrt_s: + case Intrinsic::loongarch_lsx_vfsqrt_d: + return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vftintrz_wu_s: + case Intrinsic::loongarch_lsx_vftintrz_lu_d: + return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vpackev_b: + case Intrinsic::loongarch_lsx_vpackev_h: + case Intrinsic::loongarch_lsx_vpackev_w: + case Intrinsic::loongarch_lsx_vpackev_d: + return DAG.getNode(LoongArchISD::VPACKEV, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vilvh_b: + case Intrinsic::loongarch_lsx_vilvh_h: + case Intrinsic::loongarch_lsx_vilvh_w: + case Intrinsic::loongarch_lsx_vilvh_d: + return DAG.getNode(LoongArchISD::VILVH, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vpackod_b: + case Intrinsic::loongarch_lsx_vpackod_h: + case Intrinsic::loongarch_lsx_vpackod_w: + case Intrinsic::loongarch_lsx_vpackod_d: + return DAG.getNode(LoongArchISD::VPACKOD, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vilvl_b: + case Intrinsic::loongarch_lsx_vilvl_h: + case Intrinsic::loongarch_lsx_vilvl_w: + case Intrinsic::loongarch_lsx_vilvl_d: + return DAG.getNode(LoongArchISD::VILVL, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmadd_b: + case Intrinsic::loongarch_lsx_vmadd_h: + case Intrinsic::loongarch_lsx_vmadd_w: + case Intrinsic::loongarch_lsx_vmadd_d: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, + Op->getOperand(2), Op->getOperand(3))); + } + case Intrinsic::loongarch_lsx_vmax_b: + case Intrinsic::loongarch_lsx_vmax_h: + case Intrinsic::loongarch_lsx_vmax_w: + case Intrinsic::loongarch_lsx_vmax_d: + return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmax_bu: + case Intrinsic::loongarch_lsx_vmax_hu: + case Intrinsic::loongarch_lsx_vmax_wu: + case Intrinsic::loongarch_lsx_vmax_du: + return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmin_b: + case Intrinsic::loongarch_lsx_vmin_h: + case Intrinsic::loongarch_lsx_vmin_w: + case Intrinsic::loongarch_lsx_vmin_d: + return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmin_bu: + case Intrinsic::loongarch_lsx_vmin_hu: + case Intrinsic::loongarch_lsx_vmin_wu: + case Intrinsic::loongarch_lsx_vmin_du: + return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmini_bu: + case Intrinsic::loongarch_lsx_vmini_hu: + case Intrinsic::loongarch_lsx_vmini_wu: + case Intrinsic::loongarch_lsx_vmini_du: + return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vmod_b: + case Intrinsic::loongarch_lsx_vmod_h: + case Intrinsic::loongarch_lsx_vmod_w: + case Intrinsic::loongarch_lsx_vmod_d: + return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_bu: + case Intrinsic::loongarch_lsx_vmod_hu: + case Intrinsic::loongarch_lsx_vmod_wu: + case Intrinsic::loongarch_lsx_vmod_du: + return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmul_b: + case Intrinsic::loongarch_lsx_vmul_h: + case Intrinsic::loongarch_lsx_vmul_w: + case Intrinsic::loongarch_lsx_vmul_d: + return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vmsub_b: + case Intrinsic::loongarch_lsx_vmsub_h: + case Intrinsic::loongarch_lsx_vmsub_w: + case Intrinsic::loongarch_lsx_vmsub_d: { + EVT ResTy = Op->getValueType(0); + return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, + Op->getOperand(2), Op->getOperand(3))); + } + case Intrinsic::loongarch_lsx_vclz_b: + case Intrinsic::loongarch_lsx_vclz_h: + case Intrinsic::loongarch_lsx_vclz_w: + case Intrinsic::loongarch_lsx_vclz_d: + return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vnor_v: + case Intrinsic::loongarch_lasx_xvnor_v: { + SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); + } + case Intrinsic::loongarch_lsx_vor_v: + case Intrinsic::loongarch_lasx_xvor_v: + return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vpickev_b: + case Intrinsic::loongarch_lsx_vpickev_h: + case Intrinsic::loongarch_lsx_vpickev_w: + case Intrinsic::loongarch_lsx_vpickev_d: + return DAG.getNode(LoongArchISD::VPICKEV, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vpickod_b: + case Intrinsic::loongarch_lsx_vpickod_h: + case Intrinsic::loongarch_lsx_vpickod_w: + case Intrinsic::loongarch_lsx_vpickod_d: + return DAG.getNode(LoongArchISD::VPICKOD, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vpcnt_b: + case Intrinsic::loongarch_lsx_vpcnt_h: + case Intrinsic::loongarch_lsx_vpcnt_w: + case Intrinsic::loongarch_lsx_vpcnt_d: + return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vsat_b: + case Intrinsic::loongarch_lsx_vsat_h: + case Intrinsic::loongarch_lsx_vsat_w: + case Intrinsic::loongarch_lsx_vsat_d: + case Intrinsic::loongarch_lsx_vsat_bu: + case Intrinsic::loongarch_lsx_vsat_hu: + case Intrinsic::loongarch_lsx_vsat_wu: + case Intrinsic::loongarch_lsx_vsat_du: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::loongarch_lsx_vsat_b: + case Intrinsic::loongarch_lsx_vsat_bu: + Max = 7; + break; + case Intrinsic::loongarch_lsx_vsat_h: + case Intrinsic::loongarch_lsx_vsat_hu: + Max = 15; + break; + case Intrinsic::loongarch_lsx_vsat_w: + case Intrinsic::loongarch_lsx_vsat_wu: + Max = 31; + break; + case Intrinsic::loongarch_lsx_vsat_d: + case Intrinsic::loongarch_lsx_vsat_du: + Max = 63; + break; + default: + llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } + case Intrinsic::loongarch_lsx_vshuf4i_b: + case Intrinsic::loongarch_lsx_vshuf4i_h: + case Intrinsic::loongarch_lsx_vshuf4i_w: + // case Intrinsic::loongarch_lsx_vshuf4i_d: + { + int64_t Value = cast(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > 255) + report_fatal_error("Immediate out of range"); + return DAG.getNode(LoongArchISD::SHF, DL, Op->getValueType(0), + Op->getOperand(2), Op->getOperand(1)); + } + case Intrinsic::loongarch_lsx_vsll_b: + case Intrinsic::loongarch_lsx_vsll_h: + case Intrinsic::loongarch_lsx_vsll_w: + case Intrinsic::loongarch_lsx_vsll_d: + return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), + truncateVecElts(Op, DAG)); + case Intrinsic::loongarch_lsx_vslli_b: + case Intrinsic::loongarch_lsx_vslli_h: + case Intrinsic::loongarch_lsx_vslli_w: + case Intrinsic::loongarch_lsx_vslli_d: + return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vreplve_b: + case Intrinsic::loongarch_lsx_vreplve_h: + case Intrinsic::loongarch_lsx_vreplve_w: + case Intrinsic::loongarch_lsx_vreplve_d: + // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle + // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because + // EXTRACT_VECTOR_ELT can't extract i64's on LoongArch32. + // Instead we lower to LoongArchISD::VSHF and match from there. + return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), + lowerLSXSplatZExt(Op, 2, DAG), Op->getOperand(1), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vreplvei_b: + case Intrinsic::loongarch_lsx_vreplvei_h: + case Intrinsic::loongarch_lsx_vreplvei_w: + case Intrinsic::loongarch_lsx_vreplvei_d: + return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), + lowerLSXSplatImm(Op, 2, DAG), Op->getOperand(1), + Op->getOperand(1)); + case Intrinsic::loongarch_lsx_vsra_b: + case Intrinsic::loongarch_lsx_vsra_h: + case Intrinsic::loongarch_lsx_vsra_w: + case Intrinsic::loongarch_lsx_vsra_d: + return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), + truncateVecElts(Op, DAG)); + case Intrinsic::loongarch_lsx_vsrari_b: + case Intrinsic::loongarch_lsx_vsrari_h: + case Intrinsic::loongarch_lsx_vsrari_w: + case Intrinsic::loongarch_lsx_vsrari_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::loongarch_lsx_vsrari_b: + Max = 7; + break; + case Intrinsic::loongarch_lsx_vsrari_h: + Max = 15; + break; + case Intrinsic::loongarch_lsx_vsrari_w: + Max = 31; + break; + case Intrinsic::loongarch_lsx_vsrari_d: + Max = 63; + break; + default: + llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } + case Intrinsic::loongarch_lsx_vsrl_b: + case Intrinsic::loongarch_lsx_vsrl_h: + case Intrinsic::loongarch_lsx_vsrl_w: + case Intrinsic::loongarch_lsx_vsrl_d: + return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), + truncateVecElts(Op, DAG)); + case Intrinsic::loongarch_lsx_vsrli_b: + case Intrinsic::loongarch_lsx_vsrli_h: + case Intrinsic::loongarch_lsx_vsrli_w: + case Intrinsic::loongarch_lsx_vsrli_d: + return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrlri_b: + case Intrinsic::loongarch_lsx_vsrlri_h: + case Intrinsic::loongarch_lsx_vsrlri_w: + case Intrinsic::loongarch_lsx_vsrlri_d: { + // Report an error for out of range values. + int64_t Max; + switch (Intrinsic) { + case Intrinsic::loongarch_lsx_vsrlri_b: + Max = 7; + break; + case Intrinsic::loongarch_lsx_vsrlri_h: + Max = 15; + break; + case Intrinsic::loongarch_lsx_vsrlri_w: + Max = 31; + break; + case Intrinsic::loongarch_lsx_vsrlri_d: + Max = 63; + break; + default: + llvm_unreachable("Unmatched intrinsic"); + } + int64_t Value = cast(Op->getOperand(2))->getSExtValue(); + if (Value < 0 || Value > Max) + report_fatal_error("Immediate out of range"); + return SDValue(); + } + case Intrinsic::loongarch_lsx_vsubi_bu: + case Intrinsic::loongarch_lsx_vsubi_hu: + case Intrinsic::loongarch_lsx_vsubi_wu: + case Intrinsic::loongarch_lsx_vsubi_du: + return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), + lowerLSXSplatImm(Op, 2, DAG)); + case Intrinsic::loongarch_lsx_vshuf_h: + case Intrinsic::loongarch_lsx_vshuf_w: + case Intrinsic::loongarch_lsx_vshuf_d: + case Intrinsic::loongarch_lasx_xvshuf_h: + case Intrinsic::loongarch_lasx_xvshuf_w: + case Intrinsic::loongarch_lasx_xvshuf_d: + return DAG.getNode(LoongArchISD::VSHF, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); + case Intrinsic::loongarch_lsx_vxor_v: + case Intrinsic::loongarch_lasx_xvxor_v: + return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vrotr_b: + case Intrinsic::loongarch_lsx_vrotr_h: + case Intrinsic::loongarch_lsx_vrotr_w: + case Intrinsic::loongarch_lsx_vrotr_d: + return DAG.getNode(LoongArchISD::VROR, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::loongarch_lsx_vrotri_b: + case Intrinsic::loongarch_lsx_vrotri_h: + case Intrinsic::loongarch_lsx_vrotri_w: + case Intrinsic::loongarch_lsx_vrotri_d: + return DAG.getNode(LoongArchISD::VRORI, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::thread_pointer: { + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + if (PtrVT == MVT::i64) + return DAG.getRegister(LoongArch::TP_64, MVT::i64); + return DAG.getRegister(LoongArch::TP, MVT::i32); + } + } +} + +SDValue +LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { + default: + return SDValue(); + case Intrinsic::loongarch_lsx_vld: + return lowerLSXLoadIntr(Op, DAG, Intr, Subtarget); + case Intrinsic::loongarch_lasx_xvld: + return lowerLASXLoadIntr(Op, DAG, Intr, Subtarget); + case Intrinsic::loongarch_lasx_xvldrepl_b: + case Intrinsic::loongarch_lasx_xvldrepl_h: + case Intrinsic::loongarch_lasx_xvldrepl_w: + case Intrinsic::loongarch_lasx_xvldrepl_d: + return lowerLASXVLDRIntr(Op, DAG, Intr, Subtarget); + case Intrinsic::loongarch_lsx_vldrepl_b: + case Intrinsic::loongarch_lsx_vldrepl_h: + case Intrinsic::loongarch_lsx_vldrepl_w: + case Intrinsic::loongarch_lsx_vldrepl_d: + return lowerLSXVLDRIntr(Op, DAG, Intr, Subtarget); + } +} + +SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + unsigned Intr = cast(Op->getOperand(1))->getZExtValue(); + switch (Intr) { + default: + return SDValue(); + case Intrinsic::loongarch_lsx_vst: + return lowerLSXStoreIntr(Op, DAG, Intr, Subtarget); + case Intrinsic::loongarch_lasx_xvst: + return lowerLASXStoreIntr(Op, DAG, Intr, Subtarget); + } +} + +// Lower ISD::EXTRACT_VECTOR_ELT into LoongArchISD::VEXTRACT_SEXT_ELT. +// +// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We +// choose to sign-extend but we could have equally chosen zero-extend. The +// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT +// result into this node later (possibly changing it to a zero-extend in the +// process). +SDValue +LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT VecTy = Op0->getValueType(0); + + if (!VecTy.is128BitVector() && !VecTy.is256BitVector()) + return SDValue(); + + if (ResTy.isInteger()) { + SDValue Op1 = Op->getOperand(1); + EVT EltTy = VecTy.getVectorElementType(); + if (VecTy.is128BitVector()) + return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, + DAG.getValueType(EltTy)); + + ConstantSDNode *cn = dyn_cast(Op1); + if (!cn) + return SDValue(); + + if (EltTy == MVT::i32 || EltTy == MVT::i64) + return DAG.getNode(LoongArchISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, + DAG.getValueType(EltTy)); + } + + return SDValue(); +} + +SDValue +LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + + MVT VT = Op.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + if (!EltVT.isInteger()) + return Op; + + if (!isa(Op2)) { + if (EltVT == MVT::i8 || EltVT == MVT::i16) { + return Op; // ==> pseudo + // use stack + return SDValue(); + } else { + return Op; + } + } + + if (VT.is128BitVector()) + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2); + + if (VT.is256BitVector()) { + + if (EltVT == MVT::i32 || EltVT == MVT::i64) + return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Op0, Op1, Op2); + + return Op; + } + + return SDValue(); +} + +// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the +// backend. +// +// Lowers according to the following rules: +// - Constant splats are legal as-is as long as the SplatBitSize is a power of +// 2 less than or equal to 64 and the value fits into a signed 10-bit +// immediate +// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize +// is a power of 2 less than or equal to 64 and the value does not fit into a +// signed 10-bit immediate +// - Non-constant splats are legal as-is. +// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. +// - All others are illegal and must be expanded. +SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if ((!Subtarget.hasLSX() || !ResTy.is128BitVector()) && + (!Subtarget.hasLASX() || !ResTy.is256BitVector())) + return SDValue(); + + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8) && + SplatBitSize <= 64) { + // We can only cope with 8, 16, 32, or 64-bit elements + if ((ResTy.is128BitVector() && SplatBitSize != 8 && SplatBitSize != 16 && + SplatBitSize != 32 && SplatBitSize != 64) || + (ResTy.is256BitVector() && SplatBitSize != 8 && SplatBitSize != 16 && + SplatBitSize != 32 && SplatBitSize != 64)) + return SDValue(); + + // If the value isn't an integer type we will have to bitcast + // from an integer type first. Also, if there are any undefs, we must + // lower them to defined values first. + if (ResTy.isInteger() && !HasAnyUndefs) + return Op; + + EVT ViaVecTy; + + if ((ResTy.is128BitVector() && + !isLSXBySplatBitSize(SplatBitSize, ViaVecTy)) || + (ResTy.is256BitVector() && + !isLASXBySplatBitSize(SplatBitSize, ViaVecTy))) + return SDValue(); + + // SelectionDAG::getConstant will promote SplatValue appropriately. + SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); + + // Bitcast to the type we originally wanted + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); + + return Result; + } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) + return Op; + else if (!isConstantOrUndefBUILD_VECTOR(Node)) { + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations + EVT ResTy = Node->getValueType(0); + + assert(ResTy.isVector()); + + unsigned NumElts = ResTy.getVectorNumElements(); + SDValue Vector = DAG.getUNDEF(ResTy); + for (unsigned i = 0; i < NumElts; ++i) { + Vector = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, + Node->getOperand(i), DAG.getConstant(i, DL, MVT::i32)); + } + return Vector; + } + + return SDValue(); +} + +SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + Op = LowerSUINT_TO_FP(ISD::ZERO_EXTEND_VECTOR_INREG, Op, DAG); + if (!ResTy.isVector()) + return Op; + return DAG.getNode(ISD::UINT_TO_FP, DL, ResTy, Op); +} + +SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + Op = LowerSUINT_TO_FP(ISD::SIGN_EXTEND_VECTOR_INREG, Op, DAG); + if (!ResTy.isVector()) + return Op; + return DAG.getNode(ISD::SINT_TO_FP, DL, ResTy, Op); +} + +SDValue LoongArchTargetLowering::lowerFP_TO_UINT(SDValue Op, + SelectionDAG &DAG) const { + if (!Op->getValueType(0).isVector()) + return SDValue(); + return LowerFP_TO_SUINT(ISD::FP_TO_UINT, ISD::ZERO_EXTEND_VECTOR_INREG, Op, + DAG); +} + +SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, + SelectionDAG &DAG) const { + if (Op->getValueType(0).isVector()) + return LowerFP_TO_SUINT(ISD::FP_TO_SINT, ISD::SIGN_EXTEND_VECTOR_INREG, Op, + DAG); + + if (Op.getValueSizeInBits() > 32 && Subtarget.isSingleFloat()) + return SDValue(); + + EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); + SDValue Trunc = + DAG.getNode(LoongArchISD::TruncIntFP, SDLoc(Op), FPTy, Op.getOperand(0)); + return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op.getValueType(), Trunc); +} + +static bool checkUndef(ArrayRef Mask, int Lo, int Hi) { + + for (int i = Lo, end = Hi; i != end; i++, Hi++) + if (!((Mask[i] == -1) || (Mask[i] == Hi))) + return false; + return true; +} + +static bool CheckRev(ArrayRef Mask) { + + int Num = Mask.size() - 1; + for (long unsigned int i = 0; i < Mask.size(); i++, Num--) + if (Mask[i] != Num) + return false; + return true; +} + +static bool checkHalf(ArrayRef Mask, int Lo, int Hi, int base) { + + for (int i = Lo; i < Hi; i++) + if (Mask[i] != (base + i)) + return false; + return true; +} + +static SDValue lowerHalfHalf(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2, + ArrayRef Mask, SelectionDAG &DAG) { + + int Num = VT.getVectorNumElements(); + int HalfNum = Num / 2; + + if (Op1->isUndef() || Op2->isUndef() || Mask.size() > (long unsigned int)Num) + return SDValue(); + + if (checkHalf(Mask, HalfNum, Num, Num) && checkHalf(Mask, 0, HalfNum, 0)) { + return SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, + DAG.getTargetConstant(48, DL, MVT::i32)), + 0); + } + + return SDValue(); +} + +static bool checkHalfUndef(ArrayRef Mask, int Lo, int Hi) { + + for (int i = Lo; i < Hi; i++) + if (Mask[i] != -1) + return false; + return true; +} + +// Lowering vectors with half undef data, +// use EXTRACT_SUBVECTOR and INSERT_SUBVECTOR instead of VECTOR_SHUFFLE +static SDValue lowerHalfUndef(const SDLoc &DL, MVT VT, SDValue Op1, SDValue Op2, + ArrayRef Mask, SelectionDAG &DAG) { + + int Num = VT.getVectorNumElements(); + int HalfNum = Num / 2; + MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNum); + MVT VT1 = Op1.getSimpleValueType(); + SDValue Op; + + bool check1 = Op1->isUndef() && (!Op2->isUndef()); + bool check2 = Op2->isUndef() && (!Op1->isUndef()); + + if ((check1 || check2) && (VT1 == VT)) { + if (check1) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op2); + } else if (check2) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, Op1); + } + + if (VT == MVT::v32i8 && CheckRev(Mask)) { + SDValue Vector; + SDValue Rev[4]; + SDValue Ext[4]; + for (int i = 0; i < 4; i++) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, + DAG.getConstant(i, DL, MVT::i32)); + Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]); + } + + Vector = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, DAG.getUNDEF(VT), + Rev[3], DAG.getConstant(3, DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, + Rev[2], DAG.getConstant(2, DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, + Rev[1], DAG.getConstant(1, DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i64, Vector, + Rev[0], DAG.getConstant(0, DL, MVT::i32)); + + Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, Vector); + + return Vector; + } + } + + if (checkHalfUndef(Mask, HalfNum, Num) && checkUndef(Mask, 0, HalfNum)) { + SDValue High = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, + DAG.getConstant(HalfNum, DL, MVT::i64)); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), High, + DAG.getConstant(0, DL, MVT::i64)); + } + + if (checkHalfUndef(Mask, HalfNum, Num) && (VT == MVT::v8i32) && + (Mask[0] == 0) && (Mask[1] == 1) && (Mask[2] == (Num + 2)) && + (Mask[3] == (Num + 3))) { + + SDValue Val1 = + SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, + DAG.getTargetConstant(32, DL, MVT::i32)), + 0); + + SDValue Val2 = + SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1, + DAG.getTargetConstant(12, DL, MVT::i32)), + 0); + + SDValue Val3 = SDValue( + DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT), + DAG.getTargetConstant(2, DL, MVT::i32)), + 0); + return Val3; + } + + if (checkHalfUndef(Mask, 0, HalfNum) && checkUndef(Mask, HalfNum, Num)) { + SDValue Low = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op1, + DAG.getConstant(0, DL, MVT::i32)); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Low, + DAG.getConstant(HalfNum, DL, MVT::i32)); + } + + if (checkHalfUndef(Mask, 0, HalfNum) && (VT == MVT::v8i32) && + (Mask[HalfNum] == HalfNum) && (Mask[HalfNum + 1] == (HalfNum + 1)) && + (Mask[HalfNum + 2] == (2 * Num - 2)) && + (Mask[HalfNum + 3] == (2 * Num - 1))) { + + SDValue Val1 = + SDValue(DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Op2, Op1, + DAG.getTargetConstant(49, DL, MVT::i32)), + 0); + + SDValue Val2 = + SDValue(DAG.getMachineNode(LoongArch::XVPERMI_D, DL, VT, Val1, + DAG.getTargetConstant(12, DL, MVT::i32)), + 0); + + SDValue Val3 = SDValue( + DAG.getMachineNode(LoongArch::XVPERMI_Q, DL, VT, Val2, DAG.getUNDEF(VT), + DAG.getTargetConstant(32, DL, MVT::i32)), + 0); + return Val3; + } + + if ((VT == MVT::v8i32) || (VT == MVT::v4i64)) { + int def = 0; + int j = 0; + int ext[3]; + int ins[3]; + bool useOp1[3] = {true, true, true}; + bool checkdef = true; + + for (int i = 0; i < Num; i++) { + if (def > 2) { + checkdef = false; + break; + } + if (Mask[i] != -1) { + def++; + ins[j] = i; + if (Mask[i] >= Num) { + ext[j] = Mask[i] - Num; + useOp1[j] = false; + } else { + ext[j] = Mask[i]; + } + j++; + } + } + + if (checkdef) { + SDValue Vector = DAG.getUNDEF(VT); + EVT EltTy = VT.getVectorElementType(); + SDValue Ext[2]; + + if (check1 || check2) { + for (int i = 0; i < def; i++) { + if (check1) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2, + DAG.getConstant(ext[i], DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], + DAG.getConstant(ins[i], DL, MVT::i32)); + } else if (check2) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1, + DAG.getConstant(ext[i], DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], + DAG.getConstant(ins[i], DL, MVT::i32)); + } + } + return Vector; + } else { + for (int i = 0; i < def; i++) { + if (!useOp1[i]) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op2, + DAG.getConstant(ext[i], DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], + DAG.getConstant(ins[i], DL, MVT::i32)); + } else { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, Op1, + DAG.getConstant(ext[i], DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vector, Ext[i], + DAG.getConstant(ins[i], DL, MVT::i32)); + } + } + return Vector; + } + } + } + + return SDValue(); +} + +static SDValue lowerHalfUndef_LSX(const SDLoc &DL, EVT ResTy, MVT VT, + SDValue Op1, SDValue Op2, ArrayRef Mask, + SelectionDAG &DAG) { + + MVT VT1 = Op1.getSimpleValueType(); + + bool check1 = Op1->isUndef() && (!Op2->isUndef()); + bool check2 = Op2->isUndef() && (!Op1->isUndef()); + + if ((check1 || check2) && (VT1 == VT)) { + SDValue Op; + + if (VT == MVT::v16i8 && CheckRev(Mask)) { + + if (check1) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op2); + } else if (check2) { + Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op1); + } + + SDValue Vector; + SDValue Rev[2]; + SDValue Ext[2]; + for (int i = 0; i < 2; i++) { + Ext[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op, + DAG.getConstant(i, DL, MVT::i32)); + Rev[i] = DAG.getNode(LoongArchISD::REVBD, DL, MVT::i64, Ext[i]); + } + + Vector = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, DAG.getUNDEF(VT), + Rev[1], DAG.getConstant(1, DL, MVT::i32)); + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Vector, + Rev[0], DAG.getConstant(0, DL, MVT::i32)); + + Vector = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Vector); + + return Vector; + } + } + + return SDValue(); +} + +// Use SDNode of LoongArchINSVE instead of +// a series of EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT +static SDValue lowerVECTOR_SHUFFLE_INSVE(const SDLoc &DL, MVT VT, EVT ResTy, + SDValue Op1, SDValue Op2, + ArrayRef Mask, + SelectionDAG &DAG) { + + int Num = VT.getVectorNumElements(); + if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8) + return SDValue(); + + int CheckOne = 0; + int CheckOther = 0; + int Idx; + + for (int i = 0; i < Num; i++) { + if ((Mask[i] == i) || (Mask[i] == -1)) { + CheckOther++; + } else if (Mask[i] == Num) { + CheckOne++; + Idx = i; + } else + return SDValue(); + } + + if ((CheckOne != 1) || (CheckOther != (Num - 1))) + return SDValue(); + else { + return DAG.getNode(LoongArchISD::INSVE, DL, ResTy, Op1, Op2, + DAG.getConstant(Idx, DL, MVT::i32)); + } + + return SDValue(); +} + +static SDValue lowerVECTOR_SHUFFLE_XVPICKVE(const SDLoc &DL, MVT VT, EVT ResTy, + SDValue Op1, SDValue Op2, + ArrayRef Mask, + SelectionDAG &DAG) { + + int Num = VT.getVectorNumElements(); + if (ResTy == MVT::v16i16 || ResTy == MVT::v32i8 || + (!ISD::isBuildVectorAllZeros(Op1.getNode()))) + return SDValue(); + + bool CheckV = true; + + if ((Mask[0] < Num) || (Mask[0] > (2 * Num - 1))) + CheckV = false; + + for (int i = 1; i < Num; i++) { + if (Mask[i] != 0) { + CheckV = false; + break; + } + } + + if (!CheckV) + return SDValue(); + else { + return DAG.getNode(LoongArchISD::XVPICKVE, DL, ResTy, Op1, Op2, + DAG.getConstant(Mask[0] - Num, DL, MVT::i32)); + } + + return SDValue(); +} + +static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, MVT VT, EVT ResTy, + SDValue Op1, SDValue Op2, + ArrayRef Mask, + SelectionDAG &DAG) { + + if (VT == MVT::v4i64) { + int Num = VT.getVectorNumElements(); + + bool CheckV = true; + for (int i = 0; i < Num; i++) { + if (Mask[i] != (i * 2)) { + CheckV = false; + break; + } + } + + if (!CheckV) + return SDValue(); + else { + SDValue Res = DAG.getNode(LoongArchISD::XVSHUF4I, DL, ResTy, Op1, Op2, + DAG.getConstant(8, DL, MVT::i32)); + return DAG.getNode(LoongArchISD::XVPERMI, DL, ResTy, Res, + DAG.getConstant(0xD8, DL, MVT::i32)); + } + } else + return SDValue(); +} + +// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the +// indices in the shuffle. +SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + ShuffleVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + ArrayRef Mask = Node->getMask(); + SDValue Op1 = Op.getOperand(0); + SDValue Op2 = Op.getOperand(1); + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + + if (ResTy.is128BitVector()) { + + int ResTyNumElts = ResTy.getVectorNumElements(); + SmallVector Indices; + + for (int i = 0; i < ResTyNumElts; ++i) + Indices.push_back(Node->getMaskElt(i)); + + SDValue Result; + if (isVECTOR_SHUFFLE_VREPLVEI(Op, ResTy, Indices, DAG)) + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVH(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVL(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerHalfUndef_LSX(DL, ResTy, VT, Op1, Op2, Mask, DAG))) + return Result; + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + + } else if (ResTy.is256BitVector()) { + int ResTyNumElts = ResTy.getVectorNumElements(); + SmallVector Indices; + + for (int i = 0; i < ResTyNumElts; ++i) + Indices.push_back(Node->getMaskElt(i)); + + SDValue Result; + if ((Result = lowerHalfHalf(DL, VT, Op1, Op2, Mask, DAG))) + return Result; + if ((Result = lowerHalfUndef(DL, VT, Op1, Op2, Mask, DAG))) + return Result; + if (isVECTOR_SHUFFLE_XVREPLVEI(Op, ResTy, Indices, DAG)) + return SDValue(); + if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVH(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVL(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XSHF(Op, ResTy, Indices, DAG))) + return Result; + if ((Result = + lowerVECTOR_SHUFFLE_INSVE(DL, VT, ResTy, Op1, Op2, Mask, DAG))) + return Result; + if ((Result = + lowerVECTOR_SHUFFLE_XVPICKVE(DL, VT, ResTy, Op1, Op2, Mask, DAG))) + return Result; + if ((Result = + lowerVECTOR_SHUFFLE_XVSHUF(DL, VT, ResTy, Op1, Op2, Mask, DAG))) + return Result; + } return SDValue(); } -SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; - switch (N->getOpcode()) { - default: - break; - case ISD::AND: - return performANDCombine(N, DAG, DCI, Subtarget); - case ISD::OR: - return performORCombine(N, DAG, DCI, Subtarget); - case ISD::SRL: - return performSRLCombine(N, DAG, DCI, Subtarget); - } - return SDValue(); +SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op, + SelectionDAG &DAG) const { + + // Return a fixed StackObject with offset 0 which points to the old stack + // pointer. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + EVT ValTy = Op->getValueType(0); + int FI = MFI.CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false); + return DAG.getFrameIndex(FI, ValTy); +} + +// Check whether the tail call optimization conditions are met +bool LoongArchTargetLowering::isEligibleForTailCallOptimization( + const CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, + unsigned NextStackOffset, const LoongArchFunctionInfo &FI) const { + + auto CalleeCC = CLI.CallConv; + auto IsVarArg = CLI.IsVarArg; + auto &Outs = CLI.Outs; + auto &Caller = MF.getFunction(); + auto CallerCC = Caller.getCallingConv(); + + if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + + if (Caller.hasFnAttribute("interrupt")) + return false; + + if (IsVarArg) + return false; + + if (getTargetMachine().getCodeModel() == CodeModel::Large) + return false; + + if (getTargetMachine().getRelocationModel() == Reloc::Static) + return false; + + // Do not tail call optimize if the stack is used to pass parameters. + if (CCInfo.getNextStackOffset() != 0) + return false; + + // Do not tail call optimize functions with byval parameters. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) + return false; + + // Do not tail call optimize if either caller or callee uses structret + // semantics. + auto IsCallerStructRet = Caller.hasStructRetAttr(); + auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); + if (IsCallerStructRet || IsCalleeStructRet) + return false; + + // The callee has to preserve all registers the caller needs to preserve. + const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + if (CalleeCC != CallerCC) { + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + return false; + } + + // Return false if either the callee or caller has a byval argument. + if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) + return false; + + // Return true if the callee's argument area is no larger than the + // caller's. + return NextStackOffset <= FI.getIncomingArgSize(); +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TODO: Implement a generic logic using tblgen that can support this. +// LoongArch LP32 ABI rules: +// --- +// i32 - Passed in A0, A1, A2, A3 and stack +// f32 - Only passed in f32 registers if no int reg has been used yet to hold +// an argument. Otherwise, passed in A1, A2, A3 and stack. +// f64 - Only passed in two aliased f32 registers if no int reg has been used +// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is +// not used, it must be shadowed. If only A3 is available, shadow it and +// go to stack. +// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. +// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} +// with the remainder spilled to the stack. +// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases +// spilling the remainder to the stack. +// +// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. +//===----------------------------------------------------------------------===// + +static bool CC_LoongArchLP32(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State, ArrayRef F64Regs) { + static const MCPhysReg IntRegs[] = { LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 }; + + const LoongArchCCState * LoongArchState = static_cast(&State); + + static const MCPhysReg F32Regs[] = { LoongArch::F12, LoongArch::F14 }; + + static const MCPhysReg FloatVectorIntRegs[] = { LoongArch::A0, LoongArch::A2 }; + + // Do not process byval args here. + if (ArgFlags.isByVal()) + return true; + + + // Promote i8 and i16 + if (LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + unsigned Reg; + + // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following + // is true: function is vararg, argument is 3rd or higher, there is previous + // argument which is not f32 or f64. + bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 || + State.getFirstUnallocated(F32Regs) != ValNo; + Align OrigAlign = ArgFlags.getNonZeroOrigAlign(); + bool isI64 = (ValVT == MVT::i32 && OrigAlign == Align(8)); + bool isVectorFloat = LoongArchState->WasOriginalArgVectorFloat(ValNo); + + // The LoongArch vector ABI for floats passes them in a pair of registers + if (ValVT == MVT::i32 && isVectorFloat) { + // This is the start of an vector that was scalarized into an unknown number + // of components. It doesn't matter how many there are. Allocate one of the + // notional 8 byte aligned registers which map onto the argument stack, and + // shadow the register lost to alignment requirements. + if (ArgFlags.isSplit()) { + Reg = State.AllocateReg(FloatVectorIntRegs); + if (Reg == LoongArch::A2) + State.AllocateReg(LoongArch::A1); + else if (Reg == 0) + State.AllocateReg(LoongArch::A3); + } else { + // If we're an intermediate component of the split, we can just attempt to + // allocate a register directly. + Reg = State.AllocateReg(IntRegs); + } + } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { + Reg = State.AllocateReg(IntRegs); + // If this is the first part of an i64 arg, + // the allocated register must be either A0 or A2. + if (isI64 && (Reg == LoongArch::A1 || Reg == LoongArch::A3)) + Reg = State.AllocateReg(IntRegs); + LocVT = MVT::i32; + } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) { + // Allocate int register and shadow next int register. If first + // available register is LoongArch::A1 or LoongArch::A3, shadow it too. + Reg = State.AllocateReg(IntRegs); + if (Reg == LoongArch::A1 || Reg == LoongArch::A3) + Reg = State.AllocateReg(IntRegs); + State.AllocateReg(IntRegs); + LocVT = MVT::i32; + } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) { + // we are guaranteed to find an available float register + if (ValVT == MVT::f32) { + Reg = State.AllocateReg(F32Regs); + // Shadow int register + State.AllocateReg(IntRegs); + } else { + Reg = State.AllocateReg(F64Regs); + // Shadow int registers + unsigned Reg2 = State.AllocateReg(IntRegs); + if (Reg2 == LoongArch::A1 || Reg2 == LoongArch::A3) + State.AllocateReg(IntRegs); + State.AllocateReg(IntRegs); + } + } else + llvm_unreachable("Cannot handle this ValVT."); + + if (!Reg) { + unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), OrigAlign); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + } else + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + + return false; +} + +static bool CC_LoongArchLP32_FP32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \ + LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \ + LoongArch::F6_64, LoongArch::F7_64 }; + + return CC_LoongArchLP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); +} + +static bool CC_LoongArchLP32_FP64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + static const MCPhysReg F64Regs[] = {LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, \ + LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, \ + LoongArch::F6_64, LoongArch::F7_64 }; + + return CC_LoongArchLP32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); +} + +static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED; + +static bool CC_LoongArch_F128(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const MCPhysReg ArgRegs[8] = { + LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64, + LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64}; + + unsigned Idx = State.getFirstUnallocated(ArgRegs); + // Skip 'odd' register if necessary. + if (!ArgFlags.isSplitEnd() && Idx != array_lengthof(ArgRegs) && Idx % 2 == 1) + State.AllocateReg(ArgRegs); + return true; +} + +static bool CC_LoongArchLP32(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State) LLVM_ATTRIBUTE_UNUSED; + +#include "LoongArchGenCallingConv.inc" + + CCAssignFn *LoongArchTargetLowering::CCAssignFnForCall() const{ + return CC_LoongArch; + } + + CCAssignFn *LoongArchTargetLowering::CCAssignFnForReturn() const{ + return RetCC_LoongArch; + } + +//===----------------------------------------------------------------------===// +// Call Calling Convention Implementation +//===----------------------------------------------------------------------===// +SDValue LoongArchTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, + SDValue Chain, SDValue Arg, + const SDLoc &DL, bool IsTailCall, + SelectionDAG &DAG) const { + if (!IsTailCall) { + SDValue PtrOff = + DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr, + DAG.getIntPtrConstant(Offset, DL)); + return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()); + } + + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), + /* Alignment = */ 0, MachineMemOperand::MOVolatile); +} + +void LoongArchTargetLowering::getOpndList( + SmallVectorImpl &Ops, + std::deque> &RegsToPass, bool IsPICCall, + bool GlobalOrExternal, bool IsCallReloc, CallLoweringInfo &CLI, + SDValue Callee, SDValue Chain, bool IsTailCall) const { + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + + Ops.push_back(Callee); + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (!IsTailCall) { + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(CLI.DAG.getRegisterMask(Mask)); + } + + if (InFlag.getNode()) + Ops.push_back(InFlag); +} + +void LoongArchTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const { + switch (MI.getOpcode()) { + default: + return; + } +} + +/// LowerCall - functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +SDValue +LoongArchTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); + bool IsPIC = isPositionIndependent(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + LoongArchCCState CCInfo( + CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), + LoongArchCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget)); + + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(Callee.getNode()); + + // There is one case where CALLSEQ_START..CALLSEQ_END can be nested, which + // is during the lowering of a call with a byval argument which produces + // a call to memcpy. For the LP32 case, this causes the caller to allocate + // stack space for the reserved argument area for the callee, then recursively + // again for the memcpy call. In the NEWABI case, this doesn't occur as those + // ABIs mandate that the callee allocates the reserved argument area. We do + // still produce nested CALLSEQ_START..CALLSEQ_END with zero space though. + // + // If the callee has a byval argument and memcpy is used, we are mandated + // to already have produced a reserved argument area for the callee for LP32. + // Therefore, the reserved argument area can be reused for both calls. + // + // Other cases of calling memcpy cannot have a chain with a CALLSEQ_START + // present, as we have yet to hook that node onto the chain. + // + // Hence, the CALLSEQ_START and CALLSEQ_END nodes can be eliminated in this + // case. GCC does a similar trick, in that wherever possible, it calculates + // the maximum out going argument area (including the reserved area), and + // preallocates the stack space on entrance to the caller. + // + // FIXME: We should do the same for efficiency and space. + + bool MemcpyInByVal = ES && + StringRef(ES->getSymbol()) == StringRef("memcpy") && + Chain.getOpcode() == ISD::CALLSEQ_START; + + CCInfo.AnalyzeCallOperands(Outs, CC_LoongArch, CLI.getArgs(), + ES ? ES->getSymbol() : nullptr); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NextStackOffset = CCInfo.getNextStackOffset(); + + // Check if it's really possible to do a tail call. Restrict it to functions + // that are part of this compilation unit. + if (IsTailCall) { + IsTailCall = isEligibleForTailCallOptimization( + CCInfo, CLI, MF, NextStackOffset, *MF.getInfo()); + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (G->getGlobal()->hasExternalWeakLinkage()) + IsTailCall = false; + } + } + if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); + + if (IsTailCall) + ++NumTailCalls; + + // Chain is the output chain of the last Load/Store or CopyToReg node. + // ByValChain is the output chain of the last Memcpy node created for copying + // byval arguments to the stack. + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = alignTo(NextStackOffset, StackAlignment); + SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true); + + if (!(IsTailCall || MemcpyInByVal)) + Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL); + + SDValue StackPtr = + DAG.getCopyFromReg(Chain, DL, ABI.IsLP64() ? LoongArch::SP_64 : LoongArch::SP, + getPointerTy(DAG.getDataLayout())); + + std::deque> RegsToPass; + SmallVector MemOpChains; + + CCInfo.rewindByValRegsInfo(); + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + SDValue Arg = OutVals[i]; + CCValAssign &VA = ArgLocs[i]; + MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); + ISD::ArgFlagsTy Flags = Outs[i].Flags; + bool UseUpperBits = false; + + // ByVal Arg. + if (Flags.isByVal()) { + unsigned FirstByValReg, LastByValReg; + unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); + CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); + + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + assert(ByValIdx < CCInfo.getInRegsParamsCount()); + assert(!IsTailCall && + "Do not tail-call optimize if there is a byval argument."); + passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, + FirstByValReg, LastByValReg, Flags, + VA); + CCInfo.nextInRegsParam(); + continue; + } + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + if (VA.isRegLoc()) { + if ((ValVT == MVT::f32 && LocVT == MVT::i32) || + (ValVT == MVT::f64 && LocVT == MVT::i64) || + (ValVT == MVT::i64 && LocVT == MVT::f64)) + Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); + } + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); + break; + case CCValAssign::SExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg); + break; + case CCValAssign::ZExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg); + break; + case CCValAssign::AExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg); + break; + } + + if (UseUpperBits) { + unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + Arg = DAG.getNode( + ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + } + + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; + } + + // Register can't get to this point... + assert(VA.isMemLoc()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), + Chain, Arg, DL, IsTailCall, DAG)); + } + + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + + bool GlobalOrExternal = false, IsCallReloc = false; + + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, + getPointerTy(DAG.getDataLayout()), 0, + LoongArchII::MO_NO_FLAG); + GlobalOrExternal = true; + } + else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol( + Sym, getPointerTy(DAG.getDataLayout()), LoongArchII::MO_NO_FLAG); + + GlobalOrExternal = true; + } + + SmallVector Ops(1, Chain); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + getOpndList(Ops, RegsToPass, IsPIC, GlobalOrExternal, IsCallReloc, CLI, + Callee, Chain, IsTailCall); + + if (IsTailCall) { + MF.getFrameInfo().setHasTailCall(); + return DAG.getNode(LoongArchISD::TailCall, DL, MVT::Other, Ops); + } + + Chain = DAG.getNode(LoongArchISD::JmpLink, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + SDValue InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node in the case of where it is not a call to + // memcpy. + if (!(MemcpyInByVal)) { + Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, + DAG.getIntPtrConstant(0, DL, true), InFlag, DL); + InFlag = Chain.getValue(1); + } + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, + InVals, CLI); +} + +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +SDValue LoongArchTargetLowering::LowerCallResult( + SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals, + TargetLowering::CallLoweringInfo &CLI) const { + // Assign locations to each value returned by this call. + SmallVector RVLocs; + LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(CLI.Callee.getNode()); + CCInfo.AnalyzeCallResult(Ins, RetCC_LoongArch, CLI.RetTy, + ES ? ES->getSymbol() : nullptr); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(), + RVLocs[i].getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + if (VA.isUpperBitsInLoc()) { + unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + unsigned Shift = + VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; + Val = DAG.getNode( + Shift, DL, VA.getLocVT(), Val, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + } + + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); + break; + case CCValAssign::AExt: + case CCValAssign::AExtUpper: + Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); + break; + case CCValAssign::ZExt: + case CCValAssign::ZExtUpper: + Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val, + DAG.getValueType(VA.getValVT())); + Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); + break; + case CCValAssign::SExt: + case CCValAssign::SExtUpper: + Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val, + DAG.getValueType(VA.getValVT())); + Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); + break; + } + + InVals.push_back(Val); + } + + return Chain; +} + +static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA, + EVT ArgVT, const SDLoc &DL, + SelectionDAG &DAG) { + MVT LocVT = VA.getLocVT(); + EVT ValVT = VA.getValVT(); + + // Shift into the upper bits if necessary. + switch (VA.getLocInfo()) { + default: + break; + case CCValAssign::AExtUpper: + case CCValAssign::SExtUpper: + case CCValAssign::ZExtUpper: { + unsigned ValSizeInBits = ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + unsigned Opcode = + VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; + Val = DAG.getNode( + Opcode, DL, VA.getLocVT(), Val, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + break; + } + } + + // If this is an value smaller than the argument slot size (32-bit for LP32, + // 64-bit for LPX32/LP64), it has been promoted in some way to the argument slot + // size. Extract the value and insert any appropriate assertions regarding + // sign/zero extension. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::AExtUpper: + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::SExtUpper: + case CCValAssign::SExt: { + if ((ArgVT == MVT::i1) || (ArgVT == MVT::i8) || (ArgVT == MVT::i16)) { + SDValue SubReg = DAG.getTargetConstant(LoongArch::sub_32, DL, MVT::i32); + Val = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, ValVT, + Val, SubReg), + 0); + } else { + Val = + DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + } + break; + } + case CCValAssign::ZExtUpper: + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); + break; + } + + return Val; +} + +//===----------------------------------------------------------------------===// +// Formal Arguments Calling Convention Implementation +//===----------------------------------------------------------------------===// +/// LowerFormalArguments - transform physical registers into virtual registers +/// and generate load operations for arguments places on the stack. +SDValue LoongArchTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + + LoongArchFI->setVarArgsFrameIndex(0); + + // Used with vargs to acumulate store chains. + std::vector OutChains; + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + LoongArchCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + const Function &Func = DAG.getMachineFunction().getFunction(); + Function::const_arg_iterator FuncArg = Func.arg_begin(); + + CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_FixedArg); + LoongArchFI->setFormalArgInfo(CCInfo.getNextStackOffset(), + CCInfo.getInRegsParamsCount() > 0); + + unsigned CurArgIdx = 0; + CCInfo.rewindByValRegsInfo(); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (Ins[i].isOrigArg()) { + std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx); + CurArgIdx = Ins[i].getOrigArgIndex(); + } + EVT ValVT = VA.getValVT(); + ISD::ArgFlagsTy Flags = Ins[i].Flags; + bool IsRegLoc = VA.isRegLoc(); + + if (Flags.isByVal()) { + assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit"); + unsigned FirstByValReg, LastByValReg; + unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); + CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); + + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + assert(ByValIdx < CCInfo.getInRegsParamsCount()); + copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg, + FirstByValReg, LastByValReg, VA, CCInfo); + CCInfo.nextInRegsParam(); + continue; + } + + // Arguments stored on registers + if (IsRegLoc) { + MVT RegVT = VA.getLocVT(); + unsigned ArgReg = VA.getLocReg(); + const TargetRegisterClass *RC = getRegClassFor(RegVT); + + // Transform the arguments stored on + // physical registers into virtual ones + unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); + + ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); + + // Handle floating point arguments passed in integer registers and + // long double arguments passed in floating point registers. + if ((RegVT == MVT::i32 && ValVT == MVT::f32) || + (RegVT == MVT::i64 && ValVT == MVT::f64) || + (RegVT == MVT::f64 && ValVT == MVT::i64)) + ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); + else if (ABI.IsLP32() && RegVT == MVT::i32 && + ValVT == MVT::f64) { + // TODO: lp32 + } + + InVals.push_back(ArgValue); + } else { // VA.isRegLoc() + MVT LocVT = VA.getLocVT(); + + if (ABI.IsLP32()) { + // We ought to be able to use LocVT directly but LP32 sets it to i32 + // when allocating floating point values to integer registers. + // This shouldn't influence how we load the value into registers unless + // we are targeting softfloat. + if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat()) + LocVT = VA.getValVT(); + } + + // sanity check + assert(VA.isMemLoc()); + + // The stack pointer offset is relative to the caller stack frame. + int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, + VA.getLocMemOffset(), true); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue ArgValue = DAG.getLoad( + LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + OutChains.push_back(ArgValue.getValue(1)); + + ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); + + InVals.push_back(ArgValue); + } + } + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + // The loongarch ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. Save the argument into + // a virtual register so that we can access it from the return points. + if (Ins[i].Flags.isSRet()) { + unsigned Reg = LoongArchFI->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister( + getRegClassFor(ABI.IsLP64() ? MVT::i64 : MVT::i32)); + LoongArchFI->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); + break; + } + } + + if (IsVarArg) + writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo); + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens when on varg functions + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention Implementation +//===----------------------------------------------------------------------===// + +bool +LoongArchTargetLowering::CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool IsVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const { + SmallVector RVLocs; + LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_LoongArch); +} + +bool +LoongArchTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { + if ((ABI.IsLPX32() || ABI.IsLP64()) && Type == MVT::i32) + return true; + + return IsSigned; } -static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, - MachineBasicBlock &MBB, - const TargetInstrInfo &TII) { - if (!ZeroDivCheck) - return &MBB; +SDValue +LoongArchTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of + // the return value to a location + SmallVector RVLocs; + MachineFunction &MF = DAG.getMachineFunction(); - // Build instructions: - // div(or mod) $dst, $dividend, $divisor - // bnez $divisor, 8 - // break 7 - // fallthrough - MachineOperand &Divisor = MI.getOperand(2); - auto FallThrough = std::next(MI.getIterator()); + // CCState - Info about the registers and stack slot. + LoongArchCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BNEZ)) - .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())) - .addImm(8); + // Analyze return values. + CCInfo.AnalyzeReturn(Outs, RetCC_LoongArch); - // See linux header file arch/loongarch/include/uapi/asm/break.h for the - // definition of BRK_DIVZERO. - BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BREAK)) - .addImm(7/*BRK_DIVZERO*/); + SDValue Flag; + SmallVector RetOps(1, Chain); - // Clear Divisor's kill flag. - Divisor.setIsKill(false); + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + SDValue Val = OutVals[i]; + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + bool UseUpperBits = false; - return &MBB; + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val); + break; + case CCValAssign::AExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::AExt: + Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val); + break; + case CCValAssign::ZExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val); + break; + case CCValAssign::SExtUpper: + UseUpperBits = true; + LLVM_FALLTHROUGH; + case CCValAssign::SExt: + Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val); + break; + } + + if (UseUpperBits) { + unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + Val = DAG.getNode( + ISD::SHL, DL, VA.getLocVT(), Val, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + } + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + // The loongarch ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. We saved the argument into + // a virtual register in the entry block, so now we copy the value out + // and into $v0. + if (MF.getFunction().hasStructRetAttr()) { + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); + unsigned Reg = LoongArchFI->getSRetReturnReg(); + + if (!Reg) + llvm_unreachable("sret virtual register not created in the entry block"); + SDValue Val = + DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout())); + unsigned A0 = ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0; + + Chain = DAG.getCopyToReg(Chain, DL, A0, Val, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(A0, getPointerTy(DAG.getDataLayout()))); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + // Standard return on LoongArch is a "jr $ra" + return DAG.getNode(LoongArchISD::Ret, DL, MVT::Other, RetOps); } -MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( - MachineInstr &MI, MachineBasicBlock *BB) const { +//===----------------------------------------------------------------------===// +// LoongArch Inline Assembly Support +//===----------------------------------------------------------------------===// - switch (MI.getOpcode()) { +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +LoongArchTargetLowering::ConstraintType +LoongArchTargetLowering::getConstraintType(StringRef Constraint) const { + // LoongArch specific constraints + // GCC config/loongarch/constraints.md + // + // 'f': Floating Point register + // 'G': Floating-point 0 + // 'l': Signed 16-bit constant + // 'R': Memory address that can be used in a non-macro load or store + // "ZC" Memory address with 16-bit and 4 bytes aligned offset + // "ZB" Memory address with 0 offset + + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default : break; + case 'f': + return C_RegisterClass; + case 'l': + case 'G': + return C_Other; + case 'R': + return C_Memory; + } + } + + if (Constraint == "ZC" || Constraint == "ZB") + return C_Memory; + + return TargetLowering::getConstraintType(Constraint); +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +LoongArchTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { default: - llvm_unreachable("Unexpected instr type to insert"); - case LoongArch::DIV_W: - case LoongArch::DIV_WU: - case LoongArch::MOD_W: - case LoongArch::MOD_WU: - case LoongArch::DIV_D: - case LoongArch::DIV_DU: - case LoongArch::MOD_D: - case LoongArch::MOD_DU: - return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo()); + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'f': // FPU + if (Subtarget.hasLSX() && type->isVectorTy() && + type->getPrimitiveSizeInBits() == 128) + weight = CW_Register; + else if (Subtarget.hasLASX() && type->isVectorTy() && + type->getPrimitiveSizeInBits() == 256) + weight = CW_Register; + else if (type->isFloatTy()) + weight = CW_Register; + break; + case 'l': // signed 16 bit immediate + case 'I': // signed 12 bit immediate + case 'J': // integer zero + case 'G': // floating-point zero + case 'K': // unsigned 12 bit immediate + if (isa(CallOperandVal)) + weight = CW_Constant; + break; + case 'm': + case 'R': + weight = CW_Memory; break; } + return weight; } -const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((LoongArchISD::NodeType)Opcode) { - case LoongArchISD::FIRST_NUMBER: - break; - -#define NODE_NAME_CASE(node) \ - case LoongArchISD::node: \ - return "LoongArchISD::" #node; - - // TODO: Add more target-dependent nodes later. - NODE_NAME_CASE(CALL) - NODE_NAME_CASE(RET) - NODE_NAME_CASE(SLL_W) - NODE_NAME_CASE(SRA_W) - NODE_NAME_CASE(SRL_W) - NODE_NAME_CASE(BSTRINS) - NODE_NAME_CASE(BSTRPICK) - NODE_NAME_CASE(MOVGR2FR_W_LA64) - NODE_NAME_CASE(MOVFR2GR_S_LA64) - NODE_NAME_CASE(FTINT) - } -#undef NODE_NAME_CASE +/// This is a helper function to parse a physical register string and split it +/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag +/// that is returned indicates whether parsing was successful. The second flag +/// is true if the numeric part exists. +static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, + unsigned long long &Reg) { + if (C.empty() || C.front() != '{' || C.back() != '}') + return std::make_pair(false, false); + + // Search for the first numeric character. + StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; + I = std::find_if(B, E, isdigit); + + Prefix = StringRef(B, I - B); + + // The second flag is set to false if no numeric characters were found. + if (I == E) + return std::make_pair(true, false); + + // Parse the numeric characters. + return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg), + true); +} + +EVT LoongArchTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, + ISD::NodeType) const { + bool Cond = !Subtarget.isABI_LP32() && VT.getSizeInBits() == 32; + EVT MinVT = getRegisterType(Context, Cond ? MVT::i64 : MVT::i32); + return VT.bitsLT(MinVT) ? MinVT : VT; +} + +static const TargetRegisterClass *getRegisterClassForVT(MVT VT, bool Is64Bit) { + // Newer llvm versions (>= 12) do not require simple VTs for constraints and + // they use MVT::Other for constraints with complex VTs. For more details, + // please see https://reviews.llvm.org/D91710. + if (VT == MVT::Other || VT.getSizeInBits() <= 32) + return &LoongArch::GPR32RegClass; + if (VT.getSizeInBits() <= 64) + return Is64Bit ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; return nullptr; } -//===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// -// FIXME: Now, we only support CallingConv::C with fixed arguments which are -// passed with integer or floating-point registers. -const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6, - LoongArch::R7, LoongArch::R8, LoongArch::R9, - LoongArch::R10, LoongArch::R11}; -const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2, - LoongArch::F3, LoongArch::F4, LoongArch::F5, - LoongArch::F6, LoongArch::F7}; -const MCPhysReg ArgFPR64s[] = { - LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, - LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; - -// Implements the LoongArch calling convention. Returns true upon failure. -static bool CC_LoongArch(unsigned ValNo, MVT ValVT, - CCValAssign::LocInfo LocInfo, CCState &State) { - // Allocate to a register if possible. - Register Reg; - - if (ValVT == MVT::f32) - Reg = State.AllocateReg(ArgFPR32s); - else if (ValVT == MVT::f64) - Reg = State.AllocateReg(ArgFPR64s); - else - Reg = State.AllocateReg(ArgGPRs); - if (Reg) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, ValVT, LocInfo)); - return false; +std::pair LoongArchTargetLowering:: +parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { + const TargetRegisterInfo *TRI = + Subtarget.getRegisterInfo(); + const TargetRegisterClass *RC; + StringRef Prefix; + unsigned long long Reg; + + std::pair R = parsePhysicalReg(C, Prefix, Reg); + + if (!R.first) + return std::make_pair(0U, nullptr); + + if (!R.second) + return std::make_pair(0U, nullptr); + + if (Prefix == "$f") { // Parse $f0-$f31. + // If the size of FP registers is 64-bit or Reg is an even number, select + // the 64-bit register class. Otherwise, select the 32-bit register class. + if (VT == MVT::Other) + VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32; + + RC = getRegClassFor(VT); + } + else if (Prefix == "$vr") { // Parse $vr0-$vr31. + RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); + } + else if (Prefix == "$xr") { // Parse $xr0-$xr31. + RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); + } + else if (Prefix == "$fcc") // Parse $fcc0-$fcc7. + RC = TRI->getRegClass(LoongArch::FCFRRegClassID); + else { // Parse $r0-$r31. + assert(Prefix == "$r"); + if ((RC = getRegisterClassForVT(VT, Subtarget.is64Bit())) == nullptr) { + // This will generate an error message. + return std::make_pair(0U, nullptr); + } } - // TODO: Handle arguments passed without register. - return true; + assert(Reg < RC->getNumRegs()); + + if (RC == &LoongArch::GPR64RegClass || RC == &LoongArch::GPR32RegClass) { + // Sync with the GPR32/GPR64 RegisterClass in LoongArchRegisterInfo.td + // that just like LoongArchAsmParser.cpp + switch (Reg) { + case 0: return std::make_pair(*(RC->begin() + 0), RC); // r0 + case 1: return std::make_pair(*(RC->begin() + 27), RC); // r1 + case 2: return std::make_pair(*(RC->begin() + 28), RC); // r2 + case 3: return std::make_pair(*(RC->begin() + 29), RC); // r3 + case 4: return std::make_pair(*(RC->begin() + 1), RC); // r4 + case 5: return std::make_pair(*(RC->begin() + 2), RC); // r5 + case 6: return std::make_pair(*(RC->begin() + 3), RC); // r6 + case 7: return std::make_pair(*(RC->begin() + 4), RC); // r7 + case 8: return std::make_pair(*(RC->begin() + 5), RC); // r8 + case 9: return std::make_pair(*(RC->begin() + 6), RC); // r9 + case 10: return std::make_pair(*(RC->begin() + 7), RC); // r10 + case 11: return std::make_pair(*(RC->begin() + 8), RC); // r11 + case 12: return std::make_pair(*(RC->begin() + 9), RC); // r12 + case 13: return std::make_pair(*(RC->begin() + 10), RC); // r13 + case 14: return std::make_pair(*(RC->begin() + 11), RC); // r14 + case 15: return std::make_pair(*(RC->begin() + 12), RC); // r15 + case 16: return std::make_pair(*(RC->begin() + 13), RC); // r16 + case 17: return std::make_pair(*(RC->begin() + 14), RC); // r17 + case 18: return std::make_pair(*(RC->begin() + 15), RC); // r18 + case 19: return std::make_pair(*(RC->begin() + 16), RC); // r19 + case 20: return std::make_pair(*(RC->begin() + 17), RC); // r20 + case 21: return std::make_pair(*(RC->begin() + 30), RC); // r21 + case 22: return std::make_pair(*(RC->begin() + 31), RC); // r22 + case 23: return std::make_pair(*(RC->begin() + 18), RC); // r23 + case 24: return std::make_pair(*(RC->begin() + 19), RC); // r24 + case 25: return std::make_pair(*(RC->begin() + 20), RC); // r25 + case 26: return std::make_pair(*(RC->begin() + 21), RC); // r26 + case 27: return std::make_pair(*(RC->begin() + 22), RC); // r27 + case 28: return std::make_pair(*(RC->begin() + 23), RC); // r28 + case 29: return std::make_pair(*(RC->begin() + 24), RC); // r29 + case 30: return std::make_pair(*(RC->begin() + 25), RC); // r30 + case 31: return std::make_pair(*(RC->begin() + 26), RC); // r31 + } + } + return std::make_pair(*(RC->begin() + Reg), RC); } -void LoongArchTargetLowering::analyzeInputArgs( - CCState &CCInfo, const SmallVectorImpl &Ins, - LoongArchCCAssignFn Fn) const { - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - MVT ArgVT = Ins[i].VT; +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. +std::pair +LoongArchTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, getRegisterClassForVT(VT, Subtarget.is64Bit())); + case 'f': // FPU or LSX register + if (VT == MVT::v16i8) + return std::make_pair(0U, &LoongArch::LSX128BRegClass); + else if (VT == MVT::v8i16) + return std::make_pair(0U, &LoongArch::LSX128HRegClass); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return std::make_pair(0U, &LoongArch::LSX128WRegClass); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return std::make_pair(0U, &LoongArch::LSX128DRegClass); + else if (VT == MVT::v32i8) + return std::make_pair(0U, &LoongArch::LASX256BRegClass); + else if (VT == MVT::v16i16) + return std::make_pair(0U, &LoongArch::LASX256HRegClass); + else if (VT == MVT::v8i32 || VT == MVT::v8f32) + return std::make_pair(0U, &LoongArch::LASX256WRegClass); + else if (VT == MVT::v4i64 || VT == MVT::v4f64) + return std::make_pair(0U, &LoongArch::LASX256DRegClass); + else if (VT == MVT::f32) + return std::make_pair(0U, &LoongArch::FGR32RegClass); + else if (VT == MVT::f64) + return std::make_pair(0U, &LoongArch::FGR64RegClass); + break; + } + } + + std::pair R; + R = parseRegForInlineAsmConstraint(Constraint, VT); + + if (R.second) + return R; + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} - if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) { - LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString() << '\n'); - llvm_unreachable(""); +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void LoongArchTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector&Ops, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Result; + + // Only support length 1 constraints for now. + if (Constraint.length() > 1) return; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: break; // This will fall through to the generic implementation + case 'l': // Signed 16 bit constant + // If this fails, the parent routine will give an error + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if (isInt<16>(Val)) { + Result = DAG.getTargetConstant(Val, DL, Type); + break; + } + } + return; + case 'I': // Signed 12 bit constant + // If this fails, the parent routine will give an error + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if (isInt<12>(Val)) { + Result = DAG.getTargetConstant(Val, DL, Type); + break; + } + } + return; + case 'J': // integer zero + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getZExtValue(); + if (Val == 0) { + Result = DAG.getTargetConstant(0, DL, Type); + break; + } } + return; + case 'G': // floating-point zero + if (ConstantFPSDNode *C = dyn_cast(Op)) { + if (C->isZero()) { + EVT Type = Op.getValueType(); + Result = DAG.getTargetConstantFP(0, DL, Type); + break; + } + } + return; + case 'K': // unsigned 12 bit immediate + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + uint64_t Val = (uint64_t)C->getZExtValue(); + if (isUInt<12>(Val)) { + Result = DAG.getTargetConstant(Val, DL, Type); + break; + } + } + return; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS, Instruction *I) const { + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (!AM.HasBaseReg) // allow "r+i". + break; + return false; // disallow "r+r" or "r+r+i". + default: + return false; } + + return true; } -void LoongArchTargetLowering::analyzeOutputArgs( - CCState &CCInfo, const SmallVectorImpl &Outs, - LoongArchCCAssignFn Fn) const { - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - MVT ArgVT = Outs[i].VT; +bool +LoongArchTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The LoongArch target isn't yet aware of offsets. + return false; +} - if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) { - LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString() << "\n"); - llvm_unreachable(""); +EVT LoongArchTargetLowering::getOptimalMemOpType( + const MemOp &Op, const AttributeList &FuncAttributes) const { + if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { + if (Op.size() >= 16) { + if (Op.size() >= 32 && Subtarget.hasLASX()) { + return MVT::v32i8; + } + if (Subtarget.hasLSX()) + return MVT::v16i8; } } + + if (Subtarget.is64Bit()) + return MVT::i64; + + return MVT::i32; } -static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, - const CCValAssign &VA, const SDLoc &DL, - const LoongArchTargetLowering &TLI) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - EVT LocVT = VA.getLocVT(); - const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT()); - Register VReg = RegInfo.createVirtualRegister(RC); - RegInfo.addLiveIn(VA.getLocReg(), VReg); +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + if (VT != MVT::f32 && VT != MVT::f64) + return false; + if (Imm.isNegZero()) + return false; + return (Imm.isZero() || Imm.isExactlyValue(+1.0)); +} - return DAG.getCopyFromReg(Chain, DL, VReg, LocVT); +bool LoongArchTargetLowering::useSoftFloat() const { + return Subtarget.useSoftFloat(); } -// Transform physical registers into virtual registers. -SDValue LoongArchTargetLowering::LowerFormalArguments( - SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl &Ins, const SDLoc &DL, - SelectionDAG &DAG, SmallVectorImpl &InVals) const { +// Return whether the an instruction can potentially be optimized to a tail +// call. This will cause the optimizers to attempt to move, or duplicate, +// return instructions to help enable tail call optimizations for this +// instruction. +bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { + return CI->isTailCall(); +} +void LoongArchTargetLowering::copyByValRegs( + SDValue Chain, const SDLoc &DL, std::vector &OutChains, + SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, + SmallVectorImpl &InVals, const Argument *FuncArg, + unsigned FirstReg, unsigned LastReg, const CCValAssign &VA, + LoongArchCCState &State) const { MachineFunction &MF = DAG.getMachineFunction(); - - switch (CallConv) { - default: - llvm_unreachable("Unsupported calling convention"); - case CallingConv::C: - break; + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes(); + unsigned NumRegs = LastReg - FirstReg; + unsigned RegAreaSize = NumRegs * GPRSizeInBytes; + unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); + int FrameObjOffset; + ArrayRef ByValArgRegs = ABI.GetByValArgRegs(); + + if (RegAreaSize) + FrameObjOffset = -(int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes); + else + FrameObjOffset = VA.getLocMemOffset(); + + // Create frame object. + EVT PtrTy = getPointerTy(DAG.getDataLayout()); + // Make the fixed object stored to mutable so that the load instructions + // referencing it have their memory dependencies added. + // Set the frame object as isAliased which clears the underlying objects + // vector in ScheduleDAGInstrs::buildSchedGraph() resulting in addition of all + // stores as dependencies for loads referencing this fixed object. + int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, false, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + InVals.push_back(FIN); + + if (!NumRegs) + return; + + // Copy arg registers. + MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + + for (unsigned I = 0; I < NumRegs; ++I) { + unsigned ArgReg = ByValArgRegs[FirstReg + I]; + unsigned VReg = addLiveIn(MF, ArgReg, RC); + unsigned Offset = I * GPRSizeInBytes; + SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, + DAG.getConstant(Offset, DL, PtrTy)); + SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), + StorePtr, MachinePointerInfo(FuncArg, Offset)); + OutChains.push_back(Store); } +} - // Assign locations to all of the incoming arguments. - SmallVector ArgLocs; - CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); +// Copy byVal arg to registers and stack. +void LoongArchTargetLowering::passByValArg( + SDValue Chain, const SDLoc &DL, + std::deque> &RegsToPass, + SmallVectorImpl &MemOpChains, SDValue StackPtr, + MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg, + unsigned LastReg, const ISD::ArgFlagsTy &Flags, + const CCValAssign &VA) const { + unsigned ByValSizeInBytes = Flags.getByValSize(); + unsigned OffsetInBytes = 0; // From beginning of struct + unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); + Align Alignment = + std::min(Flags.getNonZeroByValAlign(), Align(RegSizeInBytes)); + EVT PtrTy = getPointerTy(DAG.getDataLayout()), + RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); + unsigned NumRegs = LastReg - FirstReg; + + if (NumRegs) { + ArrayRef ArgRegs = ABI.GetByValArgRegs(); + bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes); + unsigned I = 0; + + // Copy words to registers. + for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) { + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, PtrTy)); + SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, + MachinePointerInfo(), Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + unsigned ArgReg = ArgRegs[FirstReg + I]; + RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); + } - analyzeInputArgs(CCInfo, Ins, CC_LoongArch); + // Return if the struct has been fully copied. + if (ByValSizeInBytes == OffsetInBytes) + return; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) - InVals.push_back(unpackFromRegLoc(DAG, Chain, ArgLocs[i], DL, *this)); + // Copy the remainder of the byval argument with sub-word loads and shifts. + if (LeftoverBytes) { + SDValue Val; - return Chain; -} + for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; + OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { + unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; -// Lower a call to a callseq_start + CALL + callseq_end chain, and add input -// and output parameter nodes. -SDValue -LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - SelectionDAG &DAG = CLI.DAG; - SDLoc &DL = CLI.DL; - SmallVectorImpl &Outs = CLI.Outs; - SmallVectorImpl &OutVals = CLI.OutVals; - SmallVectorImpl &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - CallingConv::ID CallConv = CLI.CallConv; - bool IsVarArg = CLI.IsVarArg; - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - CLI.IsTailCall = false; + if (RemainingSizeInBytes < LoadSizeInBytes) + continue; - if (IsVarArg) - report_fatal_error("LowerCall with varargs not implemented"); + // Load subword. + SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, + PtrTy)); + SDValue LoadVal = DAG.getExtLoad( + ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); - MachineFunction &MF = DAG.getMachineFunction(); + // Shift the loaded value. + unsigned Shamt; - // Analyze the operands of the call, assigning locations to each operand. - SmallVector ArgLocs; - CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + Shamt = TotalBytesLoaded * 8; - analyzeOutputArgs(ArgCCInfo, Outs, CC_LoongArch); + SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, + DAG.getConstant(Shamt, DL, MVT::i32)); - // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = ArgCCInfo.getNextStackOffset(); + if (Val.getNode()) + Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); + else + Val = Shift; - for (auto &Arg : Outs) { - if (!Arg.Flags.isByVal()) - continue; - report_fatal_error("Passing arguments byval not implemented"); + OffsetInBytes += LoadSizeInBytes; + TotalBytesLoaded += LoadSizeInBytes; + Alignment = std::min(Alignment, Align(LoadSizeInBytes)); + } + + unsigned ArgReg = ArgRegs[FirstReg + I]; + RegsToPass.push_back(std::make_pair(ArgReg, Val)); + return; + } } - Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + // Copy remainder of byval arg to it with memcpy. + unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; + SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, + DAG.getConstant(OffsetInBytes, DL, PtrTy)); + SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); + Chain = DAG.getMemcpy( + Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy), + Align(Alignment), /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); + MemOpChains.push_back(Chain); +} - // Copy argument values to their designated locations. - SmallVector> RegsToPass; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - SDValue ArgValue = OutVals[i]; +void LoongArchTargetLowering::writeVarArgRegs(std::vector &OutChains, + SDValue Chain, const SDLoc &DL, + SelectionDAG &DAG, + CCState &State) const { + ArrayRef ArgRegs = ABI.GetVarArgRegs(); + unsigned Idx = State.getFirstUnallocated(ArgRegs); + unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); + MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + LoongArchFunctionInfo *LoongArchFI = MF.getInfo(); - // Promote the value if needed. - // For now, only handle fully promoted arguments. - if (VA.getLocInfo() != CCValAssign::Full) - report_fatal_error("Unknown loc info"); + // Offset of the first variable argument from stack pointer. + int VaArgOffset, VarArgsSaveSize; - if (VA.isRegLoc()) { - // Queue up the argument copies and emit them at the end. - RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); - } else { - report_fatal_error("Passing arguments via the stack not implemented"); - } + if (ArgRegs.size() == Idx) { + VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = (int)(RegSizeInBytes * (ArgRegs.size() - Idx)); + VaArgOffset = -VarArgsSaveSize; } - SDValue Glue; + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); + LoongArchFI->setVarArgsFrameIndex(FI); + + // If saving an odd number of registers then create an extra stack slot to + // ensure that the frame pointer is 2*GRLEN-aligned, which in turn ensures + // offsets to even-numbered registered remain 2*GRLEN-aligned. + if (Idx % 2) { + MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset - (int)RegSizeInBytes, + true); + VarArgsSaveSize += RegSizeInBytes; + } - // Build a sequence of copy-to-reg nodes, chained and glued together. - for (auto &Reg : RegsToPass) { - Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); - Glue = Chain.getValue(1); + // Copy the integer registers that have not been used for argument passing + // to the argument register save area. For LP32, the save area is allocated + // in the caller's stack frame, while for LPX32/LP64, it is allocated in the + // callee's stack frame. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += RegSizeInBytes) { + unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = + DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo()); + cast(Store.getNode())->getMemOperand()->setValue( + (Value *)nullptr); + OutChains.push_back(Store); } + LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize); +} - // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a - // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't - // split it and then direct call can be matched by PseudoCALL. - // FIXME: Add target flags for relocation. - if (GlobalAddressSDNode *S = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT); - else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); +void LoongArchTargetLowering::HandleByVal(CCState *State, unsigned &Size, + Align Alignment) const { + const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); + + assert(Size && "Byval argument's size shouldn't be 0."); + + Alignment = std::min(Alignment, TFL->getStackAlign()); + + unsigned FirstReg = 0; + unsigned NumRegs = 0; + unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); + ArrayRef IntArgRegs = ABI.GetByValArgRegs(); + // FIXME: The LP32 case actually describes no shadow registers. + const MCPhysReg *ShadowRegs = + ABI.IsLP32() ? IntArgRegs.data() : LoongArch64DPRegs; + + // We used to check the size as well but we can't do that anymore since + // CCState::HandleByVal() rounds up the size after calling this function. + assert(Alignment >= Align(RegSizeInBytes) && + "Byval argument's alignment should be a multiple of RegSizeInBytes."); + + FirstReg = State->getFirstUnallocated(IntArgRegs); + + // If Alignment > RegSizeInBytes, the first arg register must be even. + // FIXME: This condition happens to do the right thing but it's not the + // right way to test it. We want to check that the stack frame offset + // of the register is aligned. + if ((Alignment > RegSizeInBytes) && (FirstReg % 2)) { + State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]); + ++FirstReg; + // assert(true && "debug#######################################"); + } - // The first call operand is the chain and the second is the target address. - SmallVector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); + // Mark the registers allocated. + // Size = alignTo(Size, RegSizeInBytes); + // for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size()); + // Size -= RegSizeInBytes, ++I, ++NumRegs) + // State->AllocateReg(IntArgRegs[I], ShadowRegs[I]); - // Add argument registers to the end of the list so that they are - // known live into the call. - for (auto &Reg : RegsToPass) - Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs); +} - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); +MachineBasicBlock *LoongArchTargetLowering::emitPseudoSELECT(MachineInstr &MI, + MachineBasicBlock *BB, + bool isFPCmp, + unsigned Opc) const { + const TargetInstrInfo *TII = + Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + // To "insert" a SELECT instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = ++BB->getIterator(); + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + if (isFPCmp) { + // bc1[tf] cc, sinkMBB + BuildMI(BB, DL, TII->get(Opc)) + .addReg(MI.getOperand(1).getReg()) + .addMBB(sinkMBB); + } else { + BuildMI(BB, DL, TII->get(Opc)) + .addReg(MI.getOperand(1).getReg()) + .addReg(LoongArch::ZERO) + .addMBB(sinkMBB); + } - // Glue the call to the argument copies, if any. - if (Glue.getNode()) - Ops.push_back(Glue); + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; - // Emit the call. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); - Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); - DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); - Glue = Chain.getValue(1); + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; - // Mark the end of the call, which is glued to the call itself. - Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true), - DAG.getConstant(0, DL, PtrVT, true), Glue, DL); - Glue = Chain.getValue(1); + BuildMI(*BB, BB->begin(), DL, TII->get(LoongArch::PHI), MI.getOperand(0).getReg()) + .addReg(MI.getOperand(2).getReg()) + .addMBB(thisMBB) + .addReg(MI.getOperand(3).getReg()) + .addMBB(copy0MBB); - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - analyzeInputArgs(RetCCInfo, Ins, CC_LoongArch); + MI.eraseFromParent(); // The pseudo instruction is gone now. - // Copy all of the result registers out of their specified physreg. - for (auto &VA : RVLocs) { - // Copy the value out. - SDValue RetValue = - DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); - Chain = RetValue.getValue(1); - Glue = RetValue.getValue(2); + return BB; +} - InVals.push_back(Chain.getValue(0)); +MachineBasicBlock *LoongArchTargetLowering::emitLSXCBranchPseudo( + MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterClass *RC = &LoongArch::GPR32RegClass; + DebugLoc DL = MI.getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + // Insert the real bnz.b instruction to $BB. + BuildMI(BB, DL, TII->get(BranchOp)) + .addReg(LoongArch::FCC0) + .addReg(MI.getOperand(1).getReg()); + + BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)) + .addReg(LoongArch::FCC0) + .addMBB(TBB); + + // Fill $FBB. + unsigned RD1 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::ADDI_W), RD1) + .addReg(LoongArch::ZERO) + .addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(LoongArch::B32)).addMBB(Sink); + + // Fill $TBB. + unsigned RD2 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(LoongArch::ADDI_W), RD2) + .addReg(LoongArch::ZERO) + .addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI), + MI.getOperand(0).getReg()) + .addReg(RD1) + .addMBB(FBB) + .addReg(RD2) + .addMBB(TBB); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} + +// Emit the COPY_FW pseudo instruction. +// +// copy_fw_pseudo $fd, $vk, n +// => +// vreplvei.w $rt, $vk, $n +// copy $rt, $fd +// +// When n is zero, the equivalent operation can be performed with (potentially) +// zero instructions due to register overlaps. +MachineBasicBlock * +LoongArchTargetLowering::emitCOPY_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Vk = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + + if (Lane == 0) { + unsigned Vj = Vk; + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Vj, 0, LoongArch::sub_lo); + } else { + unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vj) + .addReg(Vk) + .addImm(Lane); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Vj, 0, LoongArch::sub_lo); } - return Chain; + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the COPY_FD pseudo instruction. +// +// copy_fd_pseudo $fd, $vj, n +// => +// vreplvei.d $vd, $vj, $n +// copy $fd, $vd:sub_64 +// +// When n is zero, the equivalent operation can be performed with (potentially) +// zero instructions due to register overlaps. +MachineBasicBlock * +LoongArchTargetLowering::emitCOPY_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.isFP64bit()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Vk = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + DebugLoc DL = MI.getDebugLoc(); + + if (Lane == 0) + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Vk, 0, LoongArch::sub_64); + else { + unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); + assert(Lane == 1); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vj) + .addReg(Vk) + .addImm(Lane); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Vj, 0, LoongArch::sub_64); + } + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; } -bool LoongArchTargetLowering::CanLowerReturn( - CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, - const SmallVectorImpl &Outs, LLVMContext &Context) const { - // Any return value split in to more than two values can't be returned - // directly. - return Outs.size() <= 2; +MachineBasicBlock * +LoongArchTargetLowering::emitXCOPY_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Xk = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned Xj = Xk; + + if (Lane == 0) { + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Xj, 0, LoongArch::sub_lo); + } else { + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj) + .addReg(Xk) + .addImm(Lane); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj); + } + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; } -SDValue LoongArchTargetLowering::LowerReturn( - SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, const SDLoc &DL, - SelectionDAG &DAG) const { - // Stores the assignment of the return value to a location. - SmallVector RVLocs; +MachineBasicBlock * +LoongArchTargetLowering::emitXCOPY_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.isFP64bit()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Xk = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass); + if (Lane == 0) { + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd) + .addReg(Xk, 0, LoongArch::sub_64); + } else { + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_DU), Rj) + .addReg(Xk) + .addImm(Lane); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Fd).addReg(Rj); + } - // Info about the registers and stack slot. - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, - *DAG.getContext()); + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} - analyzeOutputArgs(CCInfo, Outs, CC_LoongArch); +MachineBasicBlock *LoongArchTargetLowering::emitCONCAT_VECTORS( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Bytes) const { - SDValue Glue; - SmallVector RetOps(1, Chain); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned SubReg1 = MI.getOperand(1).getReg(); + unsigned SubReg2 = MI.getOperand(2).getReg(); + const TargetRegisterClass *RC = nullptr; - // Copy the result values into the output registers. - for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); + switch (Bytes) { + default: + llvm_unreachable("Unexpected size"); + case 1: + RC = &LoongArch::LASX256BRegClass; + break; + case 2: + RC = &LoongArch::LASX256HRegClass; + break; + case 4: + RC = &LoongArch::LASX256WRegClass; + break; + case 8: + RC = &LoongArch::LASX256DRegClass; + break; + } - // Handle a 'normal' return. - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue); + unsigned X0 = RegInfo.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X0) + .addImm(0) + .addReg(SubReg1) + .addImm(LoongArch::sub_128); + unsigned X1 = RegInfo.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), X1) + .addImm(0) + .addReg(SubReg2) + .addImm(LoongArch::sub_128); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd) + .addReg(X0) + .addReg(X1) + .addImm(2); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} - // Guarantee that all emitted copies are stuck together. - Glue = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); +// xcopy_fw_gpr_pseudo $fd, $xs, $rk +// => +// bb: addi.d $rt1, zero, 4 +// bge $lane, $rt1 hbb +// lbb:xvreplve.w $xt1, $xs, $lane +// copy $rf0, $xt1 +// b sink +// hbb: addi.d $rt2, $lane, -4 +// xvpermi.q $xt2 $xs, 1 +// xvreplve.w $xt3, $xt2, $rt2 +// copy $rf1, $xt3 +// sink:phi +MachineBasicBlock * +LoongArchTargetLowering::emitXCOPY_FW_GPR(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xs = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getReg(); + + const TargetRegisterClass *RC = &LoongArch::GPR64RegClass; + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *HBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *LBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, LBB); + F->insert(It, HBB); + F->insert(It, Sink); + + Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(LBB); + BB->addSuccessor(HBB); + HBB->addSuccessor(Sink); + LBB->addSuccessor(Sink); + + unsigned Rt1 = RegInfo.createVirtualRegister(RC); + BuildMI(BB, DL, TII->get(LoongArch::ADDI_D), Rt1) + .addReg(LoongArch::ZERO_64) + .addImm(4); + BuildMI(BB, DL, TII->get(LoongArch::BGE)) + .addReg(Lane) + .addReg(Rt1) + .addMBB(HBB); + + unsigned Xt1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Rf0 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass); + BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt1) + .addReg(Xs) + .addReg(Lane); + BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::COPY), Rf0) + .addReg(Xt1, 0, LoongArch::sub_lo); + BuildMI(*LBB, LBB->end(), DL, TII->get(LoongArch::B)).addMBB(Sink); + + unsigned Xt2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Xt3 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Rt2 = RegInfo.createVirtualRegister(RC); + unsigned Rf1 = RegInfo.createVirtualRegister(&LoongArch::FGR32RegClass); + BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::ADDI_D), Rt2) + .addReg(Lane) + .addImm(-4); + BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVPERMI_Q), Xt2) + .addReg(Xs) + .addReg(Xs) + .addImm(1); + BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::XVREPLVE_W_N), Xt3) + .addReg(Xt2) + .addReg(Rt2); + BuildMI(*HBB, HBB->end(), DL, TII->get(LoongArch::COPY), Rf1) + .addReg(Xt3, 0, LoongArch::sub_lo); + + BuildMI(*Sink, Sink->begin(), DL, TII->get(LoongArch::PHI), + MI.getOperand(0).getReg()) + .addReg(Rf0) + .addMBB(LBB) + .addReg(Rf1) + .addMBB(HBB); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB, + unsigned Size) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Xd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + const TargetRegisterClass *VecRC = nullptr; + const TargetRegisterClass *SubVecRC = nullptr; + unsigned HalfSize = 0; + unsigned InsertOp = 0; + + if (Size == 1) { + VecRC = &LoongArch::LASX256BRegClass; + SubVecRC = &LoongArch::LSX128BRegClass; + HalfSize = 16; + InsertOp = LoongArch::VINSGR2VR_B; + } else if (Size == 2) { + VecRC = &LoongArch::LASX256HRegClass; + SubVecRC = &LoongArch::LSX128HRegClass; + HalfSize = 8; + InsertOp = LoongArch::VINSGR2VR_H; + } else { + llvm_unreachable("Unexpected type"); + } + + unsigned Xk = Xd_in; + unsigned Imm = Lane; + if (Lane >= HalfSize) { + Xk = RegInfo.createVirtualRegister(VecRC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xk) + .addReg(Xd_in) + .addReg(Xd_in) + .addImm(1); + Imm = Lane - HalfSize; } - RetOps[0] = Chain; // Update chain. + unsigned Xk128 = RegInfo.createVirtualRegister(SubVecRC); + unsigned Xd128 = RegInfo.createVirtualRegister(SubVecRC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), Xk128) + .addReg(Xk, 0, LoongArch::sub_128); + BuildMI(*BB, MI, DL, TII->get(InsertOp), Xd128) + .addReg(Xk128) + .addReg(Fs) + .addImm(Imm); + + unsigned Xd256 = Xd; + if (Lane >= HalfSize) { + Xd256 = RegInfo.createVirtualRegister(VecRC); + } + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xd256) + .addImm(0) + .addReg(Xd128) + .addImm(LoongArch::sub_128); - // Add the glue node if we have it. - if (Glue.getNode()) - RetOps.push_back(Glue); + if (Lane >= HalfSize) { + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), Xd) + .addReg(Xd_in) + .addReg(Xd256) + .addImm(2); + } - return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; } -bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const { - assert((VT == MVT::f32 || VT == MVT::f64) && "Unexpected VT"); +MachineBasicBlock * +LoongArchTargetLowering::emitXINSERT_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Xd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) + .addImm(0) + .addReg(Fs) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Rj) + .addReg(Xj) + .addImm(0); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSGR2VR_W), Xd) + .addReg(Xd_in) + .addReg(Rj) + .addImm(Lane); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the INSERT_FW pseudo instruction. +// +// insert_fw_pseudo $vd, $vd_in, $n, $fs +// => +// subreg_to_reg $vj:sub_lo, $fs +// vpickve2gr.w rj, vj, 0 +// vinsgr2vr.w, vd, rj, lane +MachineBasicBlock * +LoongArchTargetLowering::emitINSERT_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Vd = MI.getOperand(0).getReg(); + unsigned Vd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR32RegClass); + unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj) + .addImm(0) + .addReg(Fs) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_W), Rj) + .addReg(Vj) + .addImm(0); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_W), Vd) + .addReg(Vd_in) + .addReg(Rj) + .addImm(Lane); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the INSERT_FD pseudo instruction. +// insert_fd_pseudo $vd, $fs, n +// => +// subreg_to_reg $vk:sub_64, $fs +// vpickve2gr.d rj, vk, 0 +// vinsgr2vr.d vd, rj, lane +MachineBasicBlock * +LoongArchTargetLowering::emitINSERT_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.isFP64bit()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Vd = MI.getOperand(0).getReg(); + unsigned Vd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + unsigned Vj = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); + unsigned Rj = RegInfo.createVirtualRegister(&LoongArch::GPR64RegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Vj) + .addImm(0) + .addReg(Fs) + .addImm(LoongArch::sub_64); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_D), Rj) + .addReg(Vj) + .addImm(0); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VINSGR2VR_D), Vd) + .addReg(Vd_in) + .addReg(Rj) + .addImm(Lane); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +LoongArchTargetLowering::emitXINSERT_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.isFP64bit()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Xd_in = MI.getOperand(1).getReg(); + unsigned Lane = MI.getOperand(2).getImm(); + unsigned Fs = MI.getOperand(3).getReg(); + unsigned Xj = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), Xj) + .addImm(0) + .addReg(Fs) + .addImm(LoongArch::sub_64); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVINSVE0_D), Xd) + .addReg(Xd_in) + .addReg(Xj) + .addImm(Lane); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the FILL_FW pseudo instruction. +// +// fill_fw_pseudo $vd, $fs +// => +// implicit_def $vt1 +// insert_subreg $vt2:subreg_lo, $vt1, $fs +// vreplvei.w vd, vt2, 0 +MachineBasicBlock * +LoongArchTargetLowering::emitFILL_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Vd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); + unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128WRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1); + BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2) + .addReg(Vj1) + .addReg(Fs) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_W), Vd) + .addReg(Vj2) + .addImm(0); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the FILL_FD pseudo instruction. +// +// fill_fd_pseudo $vd, $fs +// => +// implicit_def $vt1 +// insert_subreg $vt2:subreg_64, $vt1, $fs +// vreplvei.d vd, vt2, 0 +MachineBasicBlock * +LoongArchTargetLowering::emitFILL_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.isFP64bit()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Vd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + unsigned Vj1 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); + unsigned Vj2 = RegInfo.createVirtualRegister(&LoongArch::LSX128DRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Vj1); + BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Vj2) + .addReg(Vj1) + .addReg(Fs) + .addImm(LoongArch::sub_64); + BuildMI(*BB, MI, DL, TII->get(LoongArch::VREPLVEI_D), Vd) + .addReg(Vj2) + .addImm(0); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the XFILL_FW pseudo instruction. +// +// xfill_fw_pseudo $xd, $fs +// => +// implicit_def $xt1 +// insert_subreg $xt2:subreg_lo, $xt1, $fs +// xvreplve0.w xd, xt2, 0 +MachineBasicBlock * +LoongArchTargetLowering::emitXFILL_FW(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256WRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1); + BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2) + .addReg(Xj1) + .addReg(Fs) + .addImm(LoongArch::sub_lo); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_W), Xd).addReg(Xj2); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the XFILL_FD pseudo instruction. +// +// xfill_fd_pseudo $xd, $fs +// => +// implicit_def $xt1 +// insert_subreg $xt2:subreg_64, $xt1, $fs +// xvreplve0.d xd, xt2, 0 +MachineBasicBlock * +LoongArchTargetLowering::emitXFILL_FD(MachineInstr &MI, + MachineBasicBlock *BB) const { + assert(Subtarget.isFP64bit()); + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Xd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + unsigned Xj1 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); + unsigned Xj2 = RegInfo.createVirtualRegister(&LoongArch::LASX256DRegClass); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::IMPLICIT_DEF), Xj1); + BuildMI(*BB, MI, DL, TII->get(LoongArch::INSERT_SUBREG), Xj2) + .addReg(Xj1) + .addReg(Fs) + .addImm(LoongArch::sub_64); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVREPLVE0_D), Xd).addReg(Xj2); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const { + bool IsLegal = false; + if (Subtarget.hasLSX() || Subtarget.hasLASX()) { + return isUInt<5>(Imm); + } + return IsLegal; +} - if (VT == MVT::f32 && !Subtarget.hasBasicF()) +bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { + + VT = VT.getScalarType(); + + if (!VT.isSimple()) return false; - if (VT == MVT::f64 && !Subtarget.hasBasicD()) + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + default: + break; + } + + return false; +} + +bool LoongArchTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const { + if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) return false; - return (Imm.isZero() || Imm.isExactlyValue(+1.0)); + + return ( + (ResVT != MVT::v16i8) && (ResVT != MVT::v8i16) && + (Index == 0 || (Index == ResVT.getVectorNumElements() && + (ResVT.getSizeInBits() == SrcVT.getSizeInBits() / 2)))); +} + +Register +LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT, + const MachineFunction &MF) const { + // Named registers is expected to be fairly rare. For now, just support $r2 + // and $r21 since the linux kernel uses them. + if (Subtarget.is64Bit()) { + Register Reg = StringSwitch(RegName) + .Case("$r2", LoongArch::TP_64) + .Case("$r21", LoongArch::T9_64) + .Default(Register()); + if (Reg) + return Reg; + } else { + Register Reg = StringSwitch(RegName) + .Case("$r2", LoongArch::TP) + .Case("$r21", LoongArch::T9) + .Default(Register()); + if (Reg) + return Reg; + } + report_fatal_error("Invalid register name global variable"); } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 141f1fd3a55d..ea23b6350369 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -1,4 +1,4 @@ -//=- LoongArchISelLowering.h - LoongArch DAG Lowering Interface -*- C++ -*-===// +//===- LoongArchISelLowering.h - LoongArch DAG Lowering Interface ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,115 +6,551 @@ // //===----------------------------------------------------------------------===// // -// This file defines the interfaces that LoongArch uses to lower LLVM code into -// a selection DAG. +// This file defines the interfaces that LoongArch uses to lower LLVM code into a +// selection DAG. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHISELLOWERING_H +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "LoongArch.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/MachineValueType.h" +#include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include +#include namespace llvm { + +class Argument; +class CCState; +class CCValAssign; +class FastISel; +class FunctionLoweringInfo; +class MachineBasicBlock; +class MachineFrameInfo; +class MachineInstr; +class LoongArchCCState; +class LoongArchFunctionInfo; class LoongArchSubtarget; -struct LoongArchRegisterInfo; -namespace LoongArchISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // TODO: add more LoongArchISDs - CALL, - RET, - // 32-bit shifts, directly matching the semantics of the named LoongArch - // instructions. - SLL_W, - SRA_W, - SRL_W, - - // FPR<->GPR transfer operations - MOVGR2FR_W_LA64, - MOVFR2GR_S_LA64, - - FTINT, - - BSTRINS, - BSTRPICK, - -}; -} // end namespace LoongArchISD - -class LoongArchTargetLowering : public TargetLowering { - const LoongArchSubtarget &Subtarget; - -public: - explicit LoongArchTargetLowering(const TargetMachine &TM, - const LoongArchSubtarget &STI); - - const LoongArchSubtarget &getSubtarget() const { return Subtarget; } - - // Provide custom lowering hooks for some operations. - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const override; - - SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - - // This method returns the name of a target specific DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - - // Lower incoming arguments, copy physregs into vregs. - SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, - bool IsVarArg, - const SmallVectorImpl &Ins, - const SDLoc &DL, SelectionDAG &DAG, - SmallVectorImpl &InVals) const override; - bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool IsVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const override; - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, const SDLoc &DL, - SelectionDAG &DAG) const override; - SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const override; - -private: - /// Target-specific function used to lower LoongArch calling conventions. - typedef bool LoongArchCCAssignFn(unsigned ValNo, MVT ValVT, - CCValAssign::LocInfo LocInfo, - CCState &State); - - void analyzeInputArgs(CCState &CCInfo, - const SmallVectorImpl &Ins, - LoongArchCCAssignFn Fn) const; - void analyzeOutputArgs(CCState &CCInfo, - const SmallVectorImpl &Outs, - LoongArchCCAssignFn Fn) const; - - SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; - - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr &MI, - MachineBasicBlock *BB) const override; - SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - - bool isFPImmLegal(const APFloat &Imm, EVT VT, - bool ForCodeSize) const override; - - bool shouldInsertFencesForAtomic(const Instruction *I) const override { - return isa(I) || isa(I); - } -}; +class LoongArchTargetMachine; +class SelectionDAG; +class TargetLibraryInfo; +class TargetRegisterClass; + + namespace LoongArchISD { + + enum NodeType : unsigned { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Jump and link (call) + JmpLink, + + // Tail call + TailCall, + + // global address + GlobalAddress, + + // Floating Point Branch Conditional + FPBrcond, + + // Floating Point Compare + FPCmp, + + // Floating Point Conditional Moves + CMovFP_T, + CMovFP_F, + FSEL, + + // FP-to-int truncation node. + TruncIntFP, + + // Return + Ret, + + // error trap Return + ERet, + + // Software Exception Return. + EH_RETURN, + + BSTRPICK, + BSTRINS, + + // Vector comparisons. + // These take a vector and return a boolean. + VALL_ZERO, + VANY_ZERO, + VALL_NONZERO, + VANY_NONZERO, + + // Vector Shuffle with mask as an operand + VSHF, // Generic shuffle + SHF, // 4-element set shuffle. + VPACKEV, // Interleave even elements + VPACKOD, // Interleave odd elements + VILVH, // Interleave left elements + VILVL, // Interleave right elements + VPICKEV, // Pack even elements + VPICKOD, // Pack odd elements + + // Vector Lane Copy + INSVE, // Copy element from one vector to another + + // Combined (XOR (OR $a, $b), -1) + VNOR, + + VROR, + VRORI, + XVPICKVE, + XVPERMI, + XVSHUF4I, + REVBD, + + // Extended vector element extraction + VEXTRACT_SEXT_ELT, + VEXTRACT_ZEXT_ELT, + + XVBROADCAST, + VBROADCAST, + VABSD, + UVABSD, + }; + + } // ene namespace LoongArchISD + + //===--------------------------------------------------------------------===// + // TargetLowering Implementation + //===--------------------------------------------------------------------===// + + class LoongArchTargetLowering : public TargetLowering { + public: + explicit LoongArchTargetLowering(const LoongArchTargetMachine &TM, + const LoongArchSubtarget &STI); + + bool allowsMisalignedMemoryAccesses( + EVT VT, unsigned AS = 0, Align Alignment = Align(1), + MachineMemOperand::Flags Flags = MachineMemOperand::MONone, + bool *Fast = nullptr) const override; + + /// Enable LSX support for the given integer type and Register + /// class. + void addLSXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + + /// Enable LSX support for the given floating-point type and + /// Register class. + void addLSXFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC); + + /// Enable LASX support for the given integer type and Register + /// class. + void addLASXIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + + /// Enable LASX support for the given floating-point type and + /// Register class. + void addLASXFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC); + + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } + + EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, + ISD::NodeType) const override; + + bool isCheapToSpeculateCttz() const override; + bool isCheapToSpeculateCtlz() const override; + + bool isLegalAddImmediate(int64_t) const override; + + /// Return the correct alignment for the current calling convention. + Align getABIAlignmentForCallingConv(Type *ArgTy, + const DataLayout &DL) const override { + const Align ABIAlign = DL.getABITypeAlign(ArgTy); + if (ArgTy->isVectorTy()) + return std::min(ABIAlign, Align(8)); + return ABIAlign; + } + + ISD::NodeType getExtendForAtomicOps() const override { + return ISD::SIGN_EXTEND; + } + + bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const override; + + void LowerOperationWrapper(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + /// LowerOperation - Provide custom lowering hooks for some operations. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + + /// ReplaceNodeResults - Replace the results of node with an illegal result + /// type with new values built out of custom code. + /// + void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG) const override; + + /// getTargetNodeName - This method returns the name of a target specific + // DAG node. + const char *getTargetNodeName(unsigned Opcode) const override; + + /// getSetCCResultType - get the ISD::SETCC result ValueType + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const override; + + bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { + return false; + } + + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; + + void AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const override; + + void HandleByVal(CCState *, unsigned &, Align) const override; + + Register getRegisterByName(const char* RegName, LLT VT, + const MachineFunction &MF) const override; + + /// If a physical register, this returns the register that receives the + /// exception address on entry to an EH pad. + Register + getExceptionPointerRegister(const Constant *PersonalityFn) const override { + return ABI.IsLP64() ? LoongArch::A0_64 : LoongArch::A0; + } + + /// If a physical register, this returns the register that receives the + /// exception typeid on entry to a landing pad. + Register + getExceptionSelectorRegister(const Constant *PersonalityFn) const override { + return ABI.IsLP64() ? LoongArch::A1_64 : LoongArch::A1; + } + + bool isJumpTableRelative() const override { + return getTargetMachine().isPositionIndependent(); + } + + CCAssignFn *CCAssignFnForCall() const; + + CCAssignFn *CCAssignFnForReturn() const; + + private: + template + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; + + /// This function fills Ops, which is the list of operands that will later + /// be used when a function call node is created. It also generates + /// copyToReg nodes to set up argument registers. + void getOpndList(SmallVectorImpl &Ops, + std::deque> &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool IsCallReloc, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain, + bool IsTailCall) const; + + SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; + + // Subtarget Info + const LoongArchSubtarget &Subtarget; + // Cache the ABI from the TargetMachine, we use it everywhere. + const LoongArchABIInfo &ABI; + + // Create a TargetGlobalAddress node. + SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetExternalSymbol node. + SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetBlockAddress node. + SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetJumpTable node. + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetConstantPool node. + SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Lower Operand helpers + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals, + TargetLowering::CallLoweringInfo &CLI) const; + + // Lower Operand specifics + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + /// Lower VECTOR_SHUFFLE into one of a number of instructions + /// depending on the indices in the shuffle. + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; + SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, + bool IsSRA) const; + SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; + + /// isEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. + bool + isEligibleForTailCallOptimization(const CCState &CCInfo, + CallLoweringInfo &CLI, MachineFunction &MF, + unsigned NextStackOffset, + const LoongArchFunctionInfo &FI) const; + + /// copyByValArg - Copy argument registers which were used to pass a byval + /// argument to the stack. Create a stack frame object for the byval + /// argument. + void copyByValRegs(SDValue Chain, const SDLoc &DL, + std::vector &OutChains, SelectionDAG &DAG, + const ISD::ArgFlagsTy &Flags, + SmallVectorImpl &InVals, + const Argument *FuncArg, unsigned FirstReg, + unsigned LastReg, const CCValAssign &VA, + LoongArchCCState &State) const; + + /// passByValArg - Pass a byval argument in registers or on stack. + void passByValArg(SDValue Chain, const SDLoc &DL, + std::deque> &RegsToPass, + SmallVectorImpl &MemOpChains, SDValue StackPtr, + MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, + unsigned FirstReg, unsigned LastReg, + const ISD::ArgFlagsTy &Flags, + const CCValAssign &VA) const; + + /// writeVarArgRegs - Write variable function arguments passed in registers + /// to the stack. Also create a stack frame object for the first variable + /// argument. + void writeVarArgRegs(std::vector &OutChains, SDValue Chain, + const SDLoc &DL, SelectionDAG &DAG, + CCState &State) const; + + SDValue + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, + SDValue Arg, const SDLoc &DL, bool IsTailCall, + SelectionDAG &DAG) const; + + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const override; + + bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; + + // Inline asm support + ConstraintType getConstraintType(StringRef Constraint) const override; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const override; + + /// This function parses registers that appear in inline-asm constraints. + /// It returns pair (0, 0) on failure. + std::pair + parseRegForInlineAsmConstraint(StringRef C, MVT VT) const; + + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops + /// vector. If it is invalid, don't add anything to Ops. If hasMemory is + /// true it means one of the asm constraint of the inline asm instruction + /// being processed is 'm'. + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + + unsigned + getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode == "R") + return InlineAsm::Constraint_R; + else if (ConstraintCode == "ZC") + return InlineAsm::Constraint_ZC; + else if (ConstraintCode == "ZB") + return InlineAsm::Constraint_ZB; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; + + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + + EVT getOptimalMemOpType(const MemOp &Op, + const AttributeList &FuncAttributes) const override; + + /// isFPImmLegal - Returns true if the target can instruction select the + /// specified FP immediate natively. If false, the legalizer will + /// materialize the FP immediate as a load from a constant pool. + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + bool useSoftFloat() const override; + + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return isa(I) || isa(I); + } + + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + + /// Emit a sign-extension using sll/sra, seb, or seh appropriately. + MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size, unsigned DstReg, + unsigned SrcRec) const; + + MachineBasicBlock *emitLoadAddress(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitAtomicBinary(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size) const; + + MachineBasicBlock *emitXINSERT_B(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitINSERT_H_VIDX(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Size) const; + MachineBasicBlock *emitSEL_D(MachineInstr &MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB, + bool isFPCmp, unsigned Opc) const; + + /// SE + MachineBasicBlock *emitLSXCBranchPseudo(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned BranchOp) const; + /// Emit the COPY_FW pseudo instruction + MachineBasicBlock *emitCOPY_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + /// Emit the COPY_FD pseudo instruction + MachineBasicBlock *emitCOPY_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXCOPY_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXCOPY_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitCONCAT_VECTORS(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Bytes) const; + + MachineBasicBlock *emitXCOPY_FW_GPR(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXINSERT_BH(MachineInstr &MI, MachineBasicBlock *BB, + unsigned EltSizeInBytes) const; + + MachineBasicBlock *emitXINSERT_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + + /// Emit the INSERT_FW pseudo instruction + MachineBasicBlock *emitINSERT_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + /// Emit the INSERT_FD pseudo instruction + MachineBasicBlock *emitINSERT_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXINSERT_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXINSERT_DF_VIDX(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsGPR64) const; + /// Emit the FILL_FW pseudo instruction + MachineBasicBlock *emitFILL_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + /// Emit the FILL_FD pseudo instruction + MachineBasicBlock *emitFILL_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitXFILL_FW(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitXFILL_FD(MachineInstr &MI, + MachineBasicBlock *BB) const; + }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td index bebc83a861ae..d75d5198bde1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td @@ -1,4 +1,4 @@ -//===- LoongArchInstrFormats.td - LoongArch Instr. Formats -*- tablegen -*-===// +//===-- LoongArchInstrFormats.td - LoongArch Instruction Formats -----*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,396 +9,782 @@ //===----------------------------------------------------------------------===// // Describe LoongArch instructions format // -// opcode - operation code. -// rd - destination register operand. -// r{j/k} - source register operand. -// immN - immediate data operand. +// CPU INSTRUCTION FORMATS +// +// opcode - operation code. +// rs - src reg. +// rt - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr). +// rd - dst reg, only used on 3 regs instr. +// shamt - only used on shift instructions, contains the shift amount. +// funct - combined with opcode field give us an operation code. // //===----------------------------------------------------------------------===// -class LAInst pattern = []> - : Instruction { +class StdArch { + + bits<32> Inst; +} + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<4> Value = val; +} + +def Pseudo : Format<0>; +def FrmR : Format<1>; +def FrmI : Format<2>; +def FrmJ : Format<3>; +def FrmFR : Format<4>; +def FrmFI : Format<5>; +def FrmOther : Format<6>; + +// Generic LoongArch Format +class InstLA pattern, Format f> + : Instruction +{ field bits<32> Inst; - // SoftFail is a field the disassembler can use to provide a way for - // instructions to not match without killing the whole decode process. It is - // mainly used for ARM, but Tablegen expects this field to exist or it fails - // to build the decode table. - field bits<32> SoftFail = 0; + Format Form = f; let Namespace = "LoongArch"; + let Size = 4; + let OutOperandList = outs; - let InOperandList = ins; - let AsmString = opcstr # "\t" # opnstr; - let Pattern = pattern; + let InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + + // + // Attributes specific to LoongArch instructions... + // + bits<4> FormBits = Form.Value; + bit isCTI = 0; // Any form of Control Transfer Instruction. + // Required for LoongArch + bit hasForbiddenSlot = 0; // Instruction has a forbidden slot. + bit IsPCRelativeLoad = 0; // Load instruction with implicit source register + // ($pc) and with explicit offset and destination + // register + bit hasFCCRegOperand = 0; // Instruction uses $fcc register + + // TSFlags layout should be kept in sync with MCTargetDesc/LoongArchBaseInfo.h. + let TSFlags{3-0} = FormBits; + let TSFlags{4} = isCTI; + let TSFlags{5} = hasForbiddenSlot; + let TSFlags{6} = IsPCRelativeLoad; + let TSFlags{7} = hasFCCRegOperand; + + let DecoderNamespace = "LoongArch"; + + field bits<32> SoftFail = 0; } -// Pseudo instructions -class Pseudo pattern = [], string opcstr = "", - string opnstr = ""> - : LAInst { - let isPseudo = 1; - let isCodeGenOnly = 1; +class InstForm pattern, + Format f, string opstr = ""> : + InstLA { + string BaseOpcode = opstr; + string Arch; +} + +class LoongArch_str { + string Arch; + string BaseOpcode = opstr; +} + +//===-----------------------------------------------------------===// +// Format instruction classes in the LoongArch +//===-----------------------------------------------------------===// + +// R2 classes: 2 registers +// +class R2 : StdArch { + bits<5> rj; + bits<5> rd; + + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class R2I op> + : R2 { + let Inst{31-15} = 0x0; + let Inst{14-10} = op; +} + +class R2F op> + : R2 { + bits<5> fj; + bits<5> fd; + + let Inst{31-20} = 0x11; + let Inst{19-10} = op; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +class MOVFI op> + : R2 { + bits<5> rj; + bits<5> fd; + + let Inst{31-20} = 0x11; + let Inst{19-10} = op; + let Inst{9-5} = rj; + let Inst{4-0} = fd; +} + +class MOVIF op> + : R2 { + bits<5> fj; + bits<5> rd; + + let Inst{31-20} = 0x11; + let Inst{19-10} = op; + let Inst{9-5} = fj; + let Inst{4-0} = rd; +} + +class R2P op> + : R2 { + let Inst{31-13} = 0x3240; + let Inst{12-10} = op; +} + +class R2_CSR op> + : StdArch { + bits<5> rj; + bits<5> rd; + bits<14> csr; + + let Inst{31-24} = op; + let Inst{23-10} = csr; + let Inst{9-5} = rj; + let Inst{4-0} = rd; } -// 2R-type -// -class Fmt2R op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { +class R2_SI16 op> + : StdArch { + bits<5> rd; + bits<5> rj; + bits<16> si16; + + let Inst{31-26} = op; + let Inst{25-10} = si16; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +class R2_COND op, bits<5> cond> + : StdArch { + bits<5> fj; + bits<5> fk; + bits<3> cd; + + let Inst{31-22} = 0x30; + let Inst{21-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + +class R2_LEVEL op> + : StdArch { bits<5> rj; bits<5> rd; + bits<8> level; - let Inst{31-10} = op; + let Inst{31-18} = op; + let Inst{17-10} = level; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 3R-type -// -class Fmt3R op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { +class IMM32 op> + : StdArch { + let Inst{31-16} = 0x0648; + let Inst{15-10} = op; + let Inst{9-0} = 0; +} + +class WAIT_FM : StdArch { + bits<15> hint; + + let Inst{31-15} = 0xc91; + let Inst{14-0} = hint; +} + +class R2_INVTLB : StdArch { + bits<5> rj; + bits<5> op; + bits<5> rk; + + let Inst{31-15} = 0xc93; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = op; +} + +class BAR_FM op> + : StdArch { + bits<15> hint; + + let Inst{31-16} = 0x3872; + let Inst{15} = op; + let Inst{14-0} = hint; +} + +class PRELD_FM : StdArch { + bits<5> rj; + bits<5> hint; + bits<12> imm12; + + let Inst{31-22} = 0xab; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = hint; +} + +// R3 classes: 3 registers +// +class R3 : StdArch { bits<5> rk; bits<5> rj; bits<5> rd; - let Inst{31-15} = op; let Inst{14-10} = rk; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 3RI2-type -// -class Fmt3RI2 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<2> imm2; +class R3I op> + : R3 { + let Inst{31-22} = 0x0; + let Inst{21-15} = op; +} + +class R3F op> + : R3 { + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-21} = 0x8; + let Inst{20-15} = op; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +class R3MI op> + : R3 { + let Inst{31-23} = 0x70; + let Inst{22-15} = op; +} + +class AM op> : StdArch { + bits<5> rk; + bits<17> addr; // rj + 12 bits offset 0 + bits<5> rd; + + let Inst{31-21} = 0x1c3; + let Inst{20-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = rd; +} + +class R3MF op> + : R3 { + bits<5> fd; + + let Inst{31-23} = 0x70; + let Inst{22-15} = op; + let Inst{4-0} = fd; +} + +class R3_SA2 op> + : StdArch { bits<5> rk; bits<5> rj; bits<5> rd; + bits<2> sa; - let Inst{31-17} = op; - let Inst{16-15} = imm2; + let Inst{31-22} = 0x0; + let Inst{21-17} = op; + let Inst{16-15} = sa; let Inst{14-10} = rk; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 3RI3-type -// -class Fmt3RI3 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<3> imm3; +class R3_SA3 : StdArch { bits<5> rk; bits<5> rj; bits<5> rd; + bits<3> sa; - let Inst{31-18} = op; - let Inst{17-15} = imm3; + let Inst{31-18} = 3; + let Inst{17-15} = sa; let Inst{14-10} = rk; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 2RI5-type -// -class Fmt2RI5 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<5> imm5; +// R4 classes: 4 registers +// +class R4MUL op> + : StdArch { + bits<5> fa; + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-24} = 0x8; + let Inst{23-20} = op; + let Inst{19-15} = fa; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +class R4CMP op> + : StdArch { + bits<5> cond; + bits<5> fk; + bits<5> fj; + bits<3> cd; + + let Inst{31-22} = 0x30; + let Inst{21-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-3} = 0; + let Inst{2-0} = cd; +} + +class R4SEL : StdArch { + bits<3> ca; + bits<5> fk; + bits<5> fj; + bits<5> fd; + + let Inst{31-18} = 0x340; + let Inst{17-15} = ca; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-0} = fd; +} + +// R2_IMM5 classes: 2registers and 1 5bit-immediate +// +class R2_IMM5 op> + : StdArch { bits<5> rj; bits<5> rd; + bits<5> imm5; - let Inst{31-15} = op; + let Inst{31-20} = 0x4; + let Inst{19-18} = op; + let Inst{17-15} = 0x1; let Inst{14-10} = imm5; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 2RI6-type -// -class Fmt2RI6 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<6> imm6; +// R2_IMM6 classes: 2registers and 1 6bit-immediate +// +class R2_IMM6 op> + : StdArch { bits<5> rj; bits<5> rd; + bits<6> imm6; - let Inst{31-16} = op; + let Inst{31-20} = 0x4; + let Inst{19-18} = op; + let Inst{17-16} = 0x1; let Inst{15-10} = imm6; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 2RI8-type -// -class Fmt2RI8 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<8> imm8; +// R2_IMM12 classes: 2 registers and 1 12bit-immediate +// +class LOAD_STORE op> + : StdArch { + bits<5> rd; + bits<17> addr; + + let Inst{31-26} = 0xa; + let Inst{25-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = rd; +} +// for reloc +class LOAD_STORE_RRI op> + : StdArch { bits<5> rj; bits<5> rd; + bits<12> imm12; - let Inst{31-18} = op; - let Inst{17-10} = imm8; + let Inst{31-26} = 0xa; + let Inst{25-22} = op; + let Inst{21-10} = imm12; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 2RI12-type -// -class Fmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<12> imm12; + +class R2_IMM12 op> + : StdArch { bits<5> rj; bits<5> rd; + bits<12> imm12; - let Inst{31-22} = op; + let Inst{31-25} = 0x1; + let Inst{24-22} = op; let Inst{21-10} = imm12; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 2RI14-type -// -class Fmt2RI14 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<14> imm14; +class LEA_ADDI_FM op> + : StdArch { + bits<5> rd; + bits<17> addr; + + let Inst{31-25} = 0x1; + let Inst{24-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = rd; +} + +// R2_IMM14 classes: 2 registers and 1 14bit-immediate +// +class LL_SC op> + : StdArch { + bits<5> rd; + bits<19> addr; + + let Inst{31-27} = 4; + let Inst{26-24} = op; + let Inst{23-10} = addr{13-0}; + let Inst{9-5} = addr{18-14}; + let Inst{4-0} = rd; +} + +// R2_IMM16 classes: 2 registers and 1 16bit-immediate +// +class R2_IMM16BEQ op> + : StdArch { bits<5> rj; bits<5> rd; + bits<16> offs16; - let Inst{31-24} = op; - let Inst{23-10} = imm14; + let Inst{31-26} = op; + let Inst{25-10} = offs16; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// 2RI16-type -// -class Fmt2RI16 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<16> imm16; +class R2_IMM16JIRL : StdArch { bits<5> rj; bits<5> rd; + bits<16> offs16; + + let Inst{31-26} = 0x13; + let Inst{25-10} = offs16; + let Inst{9-5} = rj; + let Inst{4-0} = rd; +} + +// R1_IMM21 classes: 1 registers and 1 21bit-immediate +// +class R1_IMM21BEQZ op> + : StdArch { + bits<5> rj; + bits<21> offs21; let Inst{31-26} = op; - let Inst{25-10} = imm16; + let Inst{25-10} = offs21{15-0}; let Inst{9-5} = rj; + let Inst{4-0} = offs21{20-16}; +} + +class R1_CSR op> + : StdArch { + bits<5> rd; + bits<14> csr; + + let Inst{31-24} = op{7-0}; + let Inst{23-10} = csr; + let Inst{9-5} = op{12-8}; let Inst{4-0} = rd; } -// 1RI20-type -// -class Fmt1RI20 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<20> imm20; +class R1_SI20 op> + : StdArch { bits<5> rd; + bits<20> si20; let Inst{31-25} = op; - let Inst{24-5} = imm20; + let Inst{24-5} = si20; let Inst{4-0} = rd; } -// 1RI21-type -// -class Fmt1RI21 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<21> imm21; +class R1_CACHE : StdArch { bits<5> rj; + bits<5> op; + bits<12> si12; - let Inst{31-26} = op; - let Inst{25-10} = imm21{15-0}; + let Inst{31-22} = 0x18; + let Inst{21-10} = si12; let Inst{9-5} = rj; - let Inst{4-0} = imm21{20-16}; + let Inst{4-0} = op; +} + +class R1_SEQ op> + : StdArch { + bits<5> rj; + bits<5> offset; + bits<8> seq; + + let Inst{31-18} = op; + let Inst{17-10} = seq; + let Inst{9-5} = rj; + let Inst{4-0} = 0b00000; } -// I15-type -// -class FmtI15 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<15> imm15; +class R1_BCEQZ op> + : StdArch { + bits<21> offset; + bits<3> cj; - let Inst{31-15} = op; - let Inst{14-0} = imm15; + let Inst{31-26} = 0x12; + let Inst{25-10} = offset{15-0}; + let Inst{9-8} = op; + let Inst{7-5} = cj; + let Inst{4-0} = offset{20-16}; } -// I26-type -// -class FmtI26 op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<26> imm26; +// IMM26 classes: 1 26bit-immediate +// +class IMM26B op> + : StdArch { + bits<26> offs26; let Inst{31-26} = op; - let Inst{25-10} = imm26{15-0}; - let Inst{9-0} = imm26{25-16}; + let Inst{25-10} = offs26{15-0}; + let Inst{9-0} = offs26{25-16}; +} + +// LoongArch Pseudo Instructions Format +class LoongArchPseudo pattern> : + InstLA { + let isCodeGenOnly = 1; + let isPseudo = 1; +} + +// Pseudo-instructions for alternate assembly syntax (never used by codegen). +// These are aliases that require C++ handling to convert to the target +// instruction, while InstAliases can be handled directly by tblgen. +class LoongArchAsmPseudoInst: + InstLA { + let isPseudo = 1; + let Pattern = []; } -// FmtBSTR_W -// -class FmtBSTR_W op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { +// +// Misc instruction classes +class ASSERT op> + : StdArch { + bits<5> rk; + bits<5> rj; + + let Inst{31-17} = 0x0; + let Inst{16-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = 0x0; +} + +class CODE15 op> + : StdArch { + bits<15> Code; + + let Inst{31-22} = 0x0; + let Inst{21-15} = op; + let Inst{14-0} = Code; +} + +class INSERT_BIT32 op> + : StdArch { bits<5> msbw; bits<5> lsbw; bits<5> rj; bits<5> rd; - let Inst{31-21} = op{11-1}; + let Inst{31-21} = 0x3; let Inst{20-16} = msbw; - let Inst{15} = op{0}; + let Inst{15} = op; let Inst{14-10} = lsbw; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// FmtBSTR_D -// -class FmtBSTR_D op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { +class INSERT_BIT64 op> + : StdArch { bits<6> msbd; bits<6> lsbd; bits<5> rj; bits<5> rd; - let Inst{31-22} = op; + let Inst{31-23} = 0x1; + let Inst{22} = op; let Inst{21-16} = msbd; let Inst{15-10} = lsbd; let Inst{9-5} = rj; let Inst{4-0} = rd; } -// FmtASRT -// -class FmtASRT op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<5> rk; +class MOVGPR2FCSR: StdArch { + bits<5> fcsr; bits<5> rj; - let Inst{31-15} = op; - let Inst{14-10} = rk; + let Inst{31-10} = 0x4530; let Inst{9-5} = rj; - let Inst{4-0} = 0x0; + let Inst{4-0} = fcsr; } -// FmtPRELD -// < 0b0010101011 | I12 | rj | I5> -class FmtPRELD pattern = []> - : LAInst { - bits<12> imm12; - bits<5> rj; - bits<5> imm5; +class MOVFCSR2GPR: StdArch { + bits<5> fcsr; + bits<5> rd; - let Inst{31-22} = 0b0010101011; - let Inst{21-10} = imm12; - let Inst{9-5} = rj; - let Inst{4-0} = imm5; + let Inst{31-10} = 0x4532; + let Inst{9-5} = fcsr; + let Inst{4-0} = rd; } -// FmtPRELDX -// < 0b00111000001011000 | rk | rj | I5> -class FmtPRELDX pattern = []> - : LAInst { - bits<5> rk; +class MOVFGR2FCFR: StdArch { + bits<3> cd; + bits<5> fj; + + let Inst{31-10} = 0x4534; + let Inst{9-5} = fj; + let Inst{4-3} = 0; + let Inst{2-0} = cd; +} + +class MOVFCFR2FGR: StdArch { + bits<3> cj; + bits<5> fd; + + let Inst{31-10} = 0x4535; + let Inst{9-8} = 0; + let Inst{7-5} = cj; + let Inst{4-0} = fd; +} + +class MOVGPR2FCFR: StdArch { + bits<3> cd; bits<5> rj; - bits<5> imm5; - let Inst{31-15} = 0b00111000001011000; - let Inst{14-10} = rk; + let Inst{31-10} = 0x4536; let Inst{9-5} = rj; - let Inst{4-0} = imm5; + let Inst{4-3} = 0; + let Inst{2-0} = cd; } -// FmtCSR -// -class FmtCSR op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<14> csr_num; +class MOVFCFR2GPR: StdArch { + bits<3> cj; bits<5> rd; - let Inst{31-24} = op{12-5}; - let Inst{23-10} = csr_num; - let Inst{9-5} = op{4-0}; + let Inst{31-10} = 0x4537; + let Inst{9-8} = 0; + let Inst{7-5} = cj; let Inst{4-0} = rd; } -// FmtCSRXCHG -// -class FmtCSRXCHG op, dag outs, dag ins, string opcstr, string opnstr, - list pattern = []> - : LAInst { - bits<14> csr_num; - bits<5> rj; - bits<5> rd; +class LoongArchInst : InstLA<(outs), (ins), "", [], FrmOther> { +} +class JMP_OFFS_2R op> : LoongArchInst { + bits<5> rs; + bits<5> rd; + bits<16> offset; - let Inst{31-24} = op; - let Inst{23-10} = csr_num; - let Inst{9-5} = rj; - let Inst{4-0} = rd; + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-10} = offset; + let Inst{9-5} = rs; + let Inst{4-0} = rd; } -// FmtCACOP -// <0b0000011000 | I12 | rj | I5> -class FmtCACOP pattern = []> - : LAInst { - bits<12> imm12; - bits<5> rj; - bits<5> op; +class FJ op> : StdArch +{ + bits<26> target; - let Inst{31-22} = 0b0000011000; - let Inst{21-10} = imm12; - let Inst{9-5} = rj; - let Inst{4-0} = op; + let Inst{31-26} = op; + let Inst{25-10} = target{15-0}; + let Inst{9-0} = target{25-16}; } -// FmtIMM32 -// -class FmtI32 op, string opstr, list pattern = []> - : LAInst<(outs), (ins), opstr, "", pattern> { - let Inst{31-0} = op; +class LUI_FM : StdArch { + bits<5> rt; + bits<16> imm16; + + let Inst{31-26} = 0xf; + let Inst{25-21} = 0; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; } -// FmtINVTLB -// <0b00000110010010011 | rk | rj | I5> -class FmtINVTLB pattern = []> - : LAInst { - bits<5> rk; +class R2_IMM12M_STD op> : StdArch { bits<5> rj; - bits<5> op; + bits<5> rd; + bits<12> imm12; - let Inst{31-15} = 0b00000110010010011; - let Inst{14-10} = rk; + let Inst{31-26} = 0xa; + let Inst{25-22} = op; + let Inst{21-10} = imm12; let Inst{9-5} = rj; - let Inst{4-0} = op; + let Inst{4-0} = rd; } -// FmtLDPTE -// <0b00000110010001 | seq | rj | 00000> -class FmtLDPTE pattern = []> - : LAInst { - bits<8> seq; - bits<5> rj; +class LLD_2R Code> : LoongArchInst { + bits<5> rd; + bits<19> addr; + bits<5> rj = addr{18-14}; + bits<14> offset = addr{13-0}; - let Inst{31-18} = 0b00000110010001; - let Inst{17-10} = seq; + bits<32> Inst; + + let Inst{31-27} = 0x4; + let Inst{26-24} = Code; + let Inst{23-10} = offset; let Inst{9-5} = rj; - let Inst{4-0} = 0b00000; + let Inst{4-0} = rd; } + +class CEQS_FM op> { + bits<5> fj; + bits<5> fk; + bits<3> cd; + bits<5> cond; + + bits<32> Inst; + + let Inst{31-22} = 0x30; + let Inst{21-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = fk; + let Inst{9-5} = fj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index bcbd4b28f3c7..00abd91679f9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -1,4 +1,4 @@ -//=- LoongArchInstrInfo.cpp - LoongArch Instruction Information -*- C++ -*-===// +//===- LoongArchInstrInfo.cpp - LoongArch Instruction Information -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,105 +11,1030 @@ //===----------------------------------------------------------------------===// #include "LoongArchInstrInfo.h" -#include "LoongArch.h" -#include "LoongArchMachineFunctionInfo.h" +#include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchAnalyzeImmediate.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Target/TargetMachine.h" +#include using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "LoongArchGenInstrInfo.inc" -LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) +// Pin the vtable to this file. +void LoongArchInstrInfo::anchor() {} +LoongArchInstrInfo::LoongArchInstrInfo(const LoongArchSubtarget &STI) : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN, - LoongArch::ADJCALLSTACKUP) {} + LoongArch::ADJCALLSTACKUP), + RI(), Subtarget(STI) {} + +const LoongArchRegisterInfo &LoongArchInstrInfo::getRegisterInfo() const { + return RI; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned LoongArchInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + unsigned Opc = MI.getOpcode(); + if ((Opc == LoongArch::LD_W) || (Opc == LoongArch::LD_D) || + (Opc == LoongArch::FLD_S) || (Opc == LoongArch::FLD_D)) { + if ((MI.getOperand(1).isFI()) && // is a stack slot + (MI.getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI.getOperand(2)))) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + } + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned LoongArchInstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + unsigned Opc = MI.getOpcode(); + if ((Opc == LoongArch::ST_D) || (Opc == LoongArch::ST_W) || + (Opc == LoongArch::FST_S) ||(Opc == LoongArch::FST_D)) { + if ((MI.getOperand(1).isFI()) && // is a stack slot + (MI.getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI.getOperand(2)))) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + } + return 0; +} void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, MCRegister DstReg, + MachineBasicBlock::iterator I, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const { - if (LoongArch::GPRRegClass.contains(DstReg, SrcReg)) { - BuildMI(MBB, MBBI, DL, get(LoongArch::OR), DstReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addReg(LoongArch::R0); - return; + unsigned Opc = 0, ZeroReg = 0; + unsigned ZeroImm = 1; + if (LoongArch::GPR32RegClass.contains(DestReg)) { // Copy to CPU Reg. + if (LoongArch::GPR32RegClass.contains(SrcReg)) { + Opc = LoongArch::OR32, ZeroReg = LoongArch::ZERO; + } + else if (LoongArch::FGR32RegClass.contains(SrcReg)) + Opc = LoongArch::MOVFR2GR_S; + else if (LoongArch::FCFRRegClass.contains(SrcReg)) + Opc = LoongArch::MOVCF2GR; } - - // FPR->FPR copies. - unsigned Opc; - if (LoongArch::FPR32RegClass.contains(DstReg, SrcReg)) { + else if (LoongArch::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg. + if (LoongArch::FGR32RegClass.contains(DestReg)) + Opc = LoongArch::MOVGR2FR_W; + else if (LoongArch::FCFRRegClass.contains(DestReg)) + Opc = LoongArch::MOVGR2CF; + } + else if (LoongArch::FGR32RegClass.contains(DestReg, SrcReg)) Opc = LoongArch::FMOV_S; - } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { + else if (LoongArch::FGR64RegClass.contains(DestReg, SrcReg)) Opc = LoongArch::FMOV_D; - } else { - // TODO: support other copies. - llvm_unreachable("Impossible reg-to-reg copy"); + else if (LoongArch::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg. + if (LoongArch::GPR64RegClass.contains(SrcReg)) + Opc = LoongArch::OR, ZeroReg = LoongArch::ZERO_64; + else if (LoongArch::FGR64RegClass.contains(SrcReg)) + Opc = LoongArch::MOVFR2GR_D; + else if (LoongArch::FCFRRegClass.contains(SrcReg)) + Opc = LoongArch::MOVCF2GR; + } + else if (LoongArch::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg. + if (LoongArch::FGR64RegClass.contains(DestReg)) + Opc = LoongArch::MOVGR2FR_D; + else if (LoongArch::FCFRRegClass.contains(DestReg)) + Opc = LoongArch::MOVGR2CF; } + else if (LoongArch::FGR32RegClass.contains(DestReg)) // Copy to FGR32 Reg + Opc = LoongArch::MOVCF2FR; + else if (LoongArch::FGR32RegClass.contains(SrcReg)) // Copy from FGR32 Reg + Opc = LoongArch::MOVFR2CF; + else if (LoongArch::FGR64RegClass.contains(DestReg)) // Copy to FGR64 Reg + Opc = LoongArch::MOVCF2FR; + else if (LoongArch::FGR64RegClass.contains(SrcReg)) // Copy from FGR64 Reg + Opc = LoongArch::MOVFR2CF; + else if (LoongArch::LSX128BRegClass.contains(DestReg)) { // Copy to LSX reg + if (LoongArch::LSX128BRegClass.contains(SrcReg)) + Opc = LoongArch::VORI_B, ZeroImm = 0; + } else if (LoongArch::LASX256BRegClass.contains( + DestReg)) { // Copy to LASX reg + if (LoongArch::LASX256BRegClass.contains(SrcReg)) + Opc = LoongArch::XVORI_B, ZeroImm = 0; + } + + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + + if (ZeroReg) + MIB.addReg(ZeroReg); - BuildMI(MBB, MBBI, DL, get(Opc), DstReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + if (!ZeroImm) + MIB.addImm(0); +} + +static bool isORCopyInst(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + break; + case LoongArch::OR: + if (MI.getOperand(2).getReg() == LoongArch::ZERO_64) + return true; + break; + case LoongArch::OR32: + if (MI.getOperand(2).getReg() == LoongArch::ZERO) + return true; + break; + } + return false; +} + +/// We check for the common case of 'or', as it's LoongArch' preferred instruction +/// for GPRs but we have to check the operands to ensure that is the case. +/// Other move instructions for LoongArch are directly identifiable. +Optional +LoongArchInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { + if (MI.isMoveReg() || isORCopyInst(MI)) { + return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; + } + return None; } -void LoongArchInstrInfo::storeRegToStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, - bool IsKill, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void LoongArchInstrInfo:: +storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + Register SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, + int64_t Offset) const { DebugLoc DL; - if (I != MBB.end()) - DL = I->getDebugLoc(); - MachineFunction *MF = MBB.getParent(); - MachineFrameInfo &MFI = MF->getFrameInfo(); - - unsigned Opcode; - if (LoongArch::GPRRegClass.hasSubClassEq(RC)) - Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 - ? LoongArch::ST_W - : LoongArch::ST_D; - else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::FST_S; - else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::FST_D; - else - llvm_unreachable("Can't store this register to stack slot"); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); - MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, - MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + unsigned Opc = 0; + if (LoongArch::GPR32RegClass.hasSubClassEq(RC)) + Opc = LoongArch::ST_W; + else if (LoongArch::GPR64RegClass.hasSubClassEq(RC)) + Opc = LoongArch::ST_D; + else if (LoongArch::FGR64RegClass.hasSubClassEq(RC)) + Opc = LoongArch::FST_D; + else if (LoongArch::FGR32RegClass.hasSubClassEq(RC)) + Opc = LoongArch::FST_S; - BuildMI(MBB, I, DL, get(Opcode)) - .addReg(SrcReg, getKillRegState(IsKill)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) + Opc = LoongArch::VST; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16)) + Opc = LoongArch::VST_H; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) + Opc = LoongArch::VST_W; + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) + Opc = LoongArch::VST_D; + else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8)) + Opc = LoongArch::XVST; + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16)) + Opc = LoongArch::XVST_H; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) || + TRI->isTypeLegalForClass(*RC, MVT::v8f32)) + Opc = LoongArch::XVST_W; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) || + TRI->isTypeLegalForClass(*RC, MVT::v4f64)) + Opc = LoongArch::XVST_D; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) - .addImm(0) + .addImm(Offset) .addMemOperand(MMO); } -void LoongArchInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, - int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +void LoongArchInstrInfo:: +loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + Register DestReg, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - MachineFunction *MF = MBB.getParent(); - MachineFrameInfo &MFI = MF->getFrameInfo(); - - unsigned Opcode; - if (LoongArch::GPRRegClass.hasSubClassEq(RC)) - Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 - ? LoongArch::LD_W - : LoongArch::LD_D; - else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::FLD_S; - else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) - Opcode = LoongArch::FLD_D; - else - llvm_unreachable("Can't load this register from stack slot"); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); + unsigned Opc = 0; - MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + if (LoongArch::GPR32RegClass.hasSubClassEq(RC)) + Opc = LoongArch::LD_W; + else if (LoongArch::GPR64RegClass.hasSubClassEq(RC)) + Opc = LoongArch::LD_D; + else if (LoongArch::FGR32RegClass.hasSubClassEq(RC)) + Opc = LoongArch::FLD_S; + else if (LoongArch::FGR64RegClass.hasSubClassEq(RC)) + Opc = LoongArch::FLD_D; + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) + Opc = LoongArch::VLD; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16)) + Opc = LoongArch::VLD_H; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) + Opc = LoongArch::VLD_W; + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) + Opc = LoongArch::VLD_D; + else if (TRI->isTypeLegalForClass(*RC, MVT::v32i8)) + Opc = LoongArch::XVLD; + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i16)) + Opc = LoongArch::XVLD_H; + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i32) || + TRI->isTypeLegalForClass(*RC, MVT::v8f32)) + Opc = LoongArch::XVLD_W; + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i64) || + TRI->isTypeLegalForClass(*RC, MVT::v4f64)) + Opc = LoongArch::XVLD_D; - BuildMI(MBB, I, DL, get(Opcode), DstReg) + assert(Opc && "Register class not handled!"); + + BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI) - .addImm(0) + .addImm(Offset) .addMemOperand(MMO); } + +bool LoongArchInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + MachineBasicBlock &MBB = *MI.getParent(); + switch (MI.getDesc().getOpcode()) { + default: + return false; + case LoongArch::RetRA: + expandRetRA(MBB, MI); + break; + case LoongArch::ERet: + expandERet(MBB, MI); + break; + case LoongArch::PseudoFFINT_S_W: + expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_W, LoongArch::MOVGR2FR_W, false); + break; + case LoongArch::PseudoFFINT_S_L: + expandCvtFPInt(MBB, MI, LoongArch::FFINT_S_L, LoongArch::MOVGR2FR_D, true); + break; + case LoongArch::PseudoFFINT_D_W: + expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_W, LoongArch::MOVGR2FR_W, true); + break; + case LoongArch::PseudoFFINT_D_L: + expandCvtFPInt(MBB, MI, LoongArch::FFINT_D_L, LoongArch::MOVGR2FR_D, true); + break; + case LoongArch::LoongArcheh_return32: + case LoongArch::LoongArcheh_return64: + expandEhReturn(MBB, MI); + break; + } + + MBB.erase(MI); + return true; +} + +/// getOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned LoongArchInstrInfo::getOppositeBranchOpc(unsigned Opc) const { + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case LoongArch::BEQ32: return LoongArch::BNE32; + case LoongArch::BEQ: return LoongArch::BNE; + case LoongArch::BNE32: return LoongArch::BEQ32; + case LoongArch::BNE: return LoongArch::BEQ; + case LoongArch::BEQZ32: return LoongArch::BNEZ32; + case LoongArch::BEQZ: return LoongArch::BNEZ; + case LoongArch::BNEZ32: return LoongArch::BEQZ32; + case LoongArch::BNEZ: return LoongArch::BEQZ; + case LoongArch::BCEQZ: return LoongArch::BCNEZ; + case LoongArch::BCNEZ: return LoongArch::BCEQZ; + case LoongArch::BLT32: return LoongArch::BGE32; + case LoongArch::BLT: return LoongArch::BGE; + case LoongArch::BGE32: return LoongArch::BLT32; + case LoongArch::BGE: return LoongArch::BLT; + case LoongArch::BLTU32: return LoongArch::BGEU32; + case LoongArch::BLTU: return LoongArch::BGEU; + case LoongArch::BGEU32: return LoongArch::BLTU32; + case LoongArch::BGEU: return LoongArch::BLTU; + } +} + +void LoongArchInstrInfo::adjustReg(unsigned DestReg, unsigned SrcReg, + int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineInstr::MIFlag Flag) const { + LoongArchABIInfo ABI = Subtarget.getABI(); + DebugLoc DL; + unsigned ADDI = ABI.GetPtrAddiOp(); + + if (Amount == 0) + return; + + if (isInt<12>(Amount)) { + // addi $DestReg, $SrcReg, amount + BuildMI(MBB, I, DL, get(ADDI), DestReg) + .addReg(SrcReg) + .addImm(Amount) + .setMIFlag(Flag); + } else { + // For numbers which are not 12bit integers we synthesize Amount inline + // then add or subtract it from $SrcReg. + unsigned Opc = ABI.GetPtrAddOp(); + if (Amount < 0) { + Opc = ABI.GetPtrSubOp(); + Amount = -Amount; + } + unsigned Reg = loadImmediate(Amount, MBB, I, DL); + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg) + .addReg(Reg, RegState::Kill) + .setMIFlag(Flag); + } +} + +/// This function generates the sequence of instructions needed to get the +/// result of adding register REG and immediate IMM. +unsigned LoongArchInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, + const DebugLoc &DL) const { + const TargetRegisterClass *RC = Subtarget.isABI_LP64() + ? &LoongArch::GPR64RegClass + : &LoongArch::GPR32RegClass; + LoongArchAnalyzeImmediate::InstSeq Seq = + LoongArchAnalyzeImmediate::generateInstSeq(Imm, Subtarget.is64Bit()); + unsigned DstReg = MBB.getParent()->getRegInfo().createVirtualRegister(RC); + unsigned SrcReg = + Subtarget.isABI_LP64() ? LoongArch::ZERO_64 : LoongArch::ZERO; + + // Build the instructions in Seq. + for (auto &Inst : Seq) { + if (Inst.Opc == LoongArch::LU12I_W || Inst.Opc == LoongArch::LU12I_W32) + BuildMI(MBB, II, DL, get(Inst.Opc), DstReg).addImm(Inst.Imm); + else + BuildMI(MBB, II, DL, get(Inst.Opc), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(Inst.Imm); + SrcReg = DstReg; + } + return DstReg; +} + +unsigned LoongArchInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { + return (Opc == LoongArch::B || Opc == LoongArch::B32 || + Opc == LoongArch::BEQZ || Opc == LoongArch::BEQZ32 || + Opc == LoongArch::BNEZ || Opc == LoongArch::BNEZ32 || + Opc == LoongArch::BCEQZ || + Opc == LoongArch::BCNEZ || + Opc == LoongArch::BEQ || Opc == LoongArch::BEQ32 || + Opc == LoongArch::BNE || Opc == LoongArch::BNE32 || + Opc == LoongArch::BLT || Opc == LoongArch::BLT32 || + Opc == LoongArch::BGE || Opc == LoongArch::BGE32 || + Opc == LoongArch::BLTU || Opc == LoongArch::BLTU32 || + Opc == LoongArch::BGEU || Opc == LoongArch::BGEU32) ? Opc : 0; +} + +void LoongArchInstrInfo::expandRetRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + + MachineInstrBuilder MIB; + + if (Subtarget.is64Bit()) + MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn64)) + .addReg(LoongArch::RA_64, RegState::Undef); + else + MIB = BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::PseudoReturn)) + .addReg(LoongArch::RA, RegState::Undef); + + // Retain any imp-use flags. + for (auto & MO : I->operands()) { + if (MO.isImplicit()) + MIB.add(MO); + } +} + +void LoongArchInstrInfo::expandERet(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + BuildMI(MBB, I, I->getDebugLoc(), get(LoongArch::ERTN)); +} + +std::pair +LoongArchInstrInfo::compareOpndSize(unsigned Opc, + const MachineFunction &MF) const { + const MCInstrDesc &Desc = get(Opc); + assert(Desc.NumOperands == 2 && "Unary instruction expected."); + const LoongArchRegisterInfo *RI = &getRegisterInfo(); + unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); + unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); + + return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); +} + +void LoongArchInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned CvtOpc, unsigned MovOpc, + bool IsI64) const { + const MCInstrDesc &CvtDesc = get(CvtOpc), &MovDesc = get(MovOpc); + const MachineOperand &Dst = I->getOperand(0), &Src = I->getOperand(1); + unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg; + unsigned KillSrc = getKillRegState(Src.isKill()); + DebugLoc DL = I->getDebugLoc(); + bool DstIsLarger, SrcIsLarger; + + std::tie(DstIsLarger, SrcIsLarger) = + compareOpndSize(CvtOpc, *MBB.getParent()); + + if (DstIsLarger) + TmpReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo); + + if (SrcIsLarger) + DstReg = getRegisterInfo().getSubReg(DstReg, LoongArch::sub_lo); + + BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc); + BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill); +} + +void LoongArchInstrInfo::expandEhReturn(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // This pseudo instruction is generated as part of the lowering of + // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and + // indirect jump to TargetReg + LoongArchABIInfo ABI = Subtarget.getABI(); + unsigned ADD = ABI.GetPtrAddOp(); + unsigned SP = Subtarget.is64Bit() ? LoongArch::SP_64 : LoongArch::SP; + unsigned RA = Subtarget.is64Bit() ? LoongArch::RA_64 : LoongArch::RA; + unsigned T8 = Subtarget.is64Bit() ? LoongArch::T8_64 : LoongArch::T8; + unsigned ZERO = Subtarget.is64Bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; + unsigned OffsetReg = I->getOperand(0).getReg(); + unsigned TargetReg = I->getOperand(1).getReg(); + + // add $ra, $v0, $zero + // add $sp, $sp, $v1 + // jr $ra (via RetRA) + const TargetMachine &TM = MBB.getParent()->getTarget(); + if (TM.isPositionIndependent()) + BuildMI(MBB, I, I->getDebugLoc(), get(ADD), T8) + .addReg(TargetReg) + .addReg(ZERO); + BuildMI(MBB, I, I->getDebugLoc(), get(ADD), RA) + .addReg(TargetReg) + .addReg(ZERO); + BuildMI(MBB, I, I->getDebugLoc(), get(ADD), SP).addReg(SP).addReg(OffsetReg); + expandRetRA(MBB, I); +} + + +bool LoongArchInstrInfo::isZeroImm(const MachineOperand &op) const { + return op.isImm() && op.getImm() == 0; +} + +/// insertNoop - If data hazard condition is found insert the target nop +/// instruction. +// FIXME: This appears to be dead code. +void LoongArchInstrInfo:: +insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const +{ + DebugLoc DL; + BuildMI(MBB, MI, DL, get(LoongArch::NOP)); +} + +MachineMemOperand * +LoongArchInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, + MachineMemOperand::Flags Flags) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), + Flags, MFI.getObjectSize(FI), + MFI.getObjectAlign(FI)); +} + +//===----------------------------------------------------------------------===// +// Branch Analysis +//===----------------------------------------------------------------------===// + +void LoongArchInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl &Cond) const { + assert(getAnalyzableBrOpc(Opc) && "Not an analyzable branch"); + int NumOp = Inst->getNumExplicitOperands(); + + // for both int and fp branches, the last explicit operand is the + // MBB. + BB = Inst->getOperand(NumOp-1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Opc)); + + for (int i = 0; i < NumOp-1; i++) + Cond.push_back(Inst->getOperand(i)); +} + +bool LoongArchInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + SmallVector BranchInstrs; + BranchType BT = analyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs); + + return (BT == BT_None) || (BT == BT_Indirect); +} + +MachineInstr * +LoongArchInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + const DebugLoc &DL, + ArrayRef Cond) const { + unsigned Opc = Cond[0].getImm(); + const MCInstrDesc &MCID = get(Opc); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); + + for (unsigned i = 1; i < Cond.size(); ++i) { + assert((Cond[i].isImm() || Cond[i].isReg()) && + "Cannot copy operand for conditional branch!"); + MIB.add(Cond[i]); + } + MIB.addMBB(TBB); + return MIB.getInstr(); +} + +unsigned LoongArchInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded) const { + unsigned UncondBrOpc = LoongArch::B; + // Shouldn't be a fall through. + assert(TBB && "insertBranch must not be told to insert a fallthrough"); + if (BytesAdded) + *BytesAdded = 0; + + // # of condition operands: + // Unconditional branches: 0 + // Floating point branches: 1 (opc) + // Int BranchZero: 2 (opc, reg) + // Int Branch: 3 (opc, reg0, reg1) + assert((Cond.size() <= 3) && + "# of LoongArch branch conditions must be <= 3!"); + + // Two-way Conditional branch. + if (FBB) { + MachineInstr &MI1 = *BuildCondBr(MBB, TBB, DL, Cond); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI1); + MachineInstr &MI2 = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI2); + return 2; + } + + // One way branch. + // Unconditional branch. + if (Cond.empty()) { + MachineInstr &MI = *BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + } + else {// Conditional branch. + MachineInstr &MI = *BuildCondBr(MBB, TBB, DL, Cond); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + } + return 1; +} + +void LoongArchInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const { + assert(RS && "RegScavenger required for long branching"); + assert(MBB.empty() && + "new block should be inserted for expanding unconditional branch"); + assert(MBB.pred_size() == 1); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const LoongArchSubtarget &Subtarget = MF->getSubtarget(); + bool is64 = Subtarget.isABI_LP64(); + const TargetRegisterClass *RC = + is64 ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + + if (!is64 && !isInt<32>(BrOffset)) + report_fatal_error( + "Branch offsets outside of the signed 32-bit range not supported"); + + unsigned ScratchReg = MRI.createVirtualRegister(RC); + unsigned ZeroReg = is64 ? LoongArch::ZERO_64 : LoongArch::ZERO; + auto II = MBB.end(); + + MachineInstr &Pcaddu12iMI = + *BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_PCADDU12I), ScratchReg) + .addMBB(&DestBB, LoongArchII::MO_PCREL_HI); + BuildMI(MBB, II, DL, get(LoongArch::LONG_BRANCH_ADDID2Op), ScratchReg) + .addReg(ScratchReg) + .addMBB(&DestBB, LoongArchII::MO_PCREL_LO); + BuildMI(MBB, II, DL, get(LoongArch::JIRL)) + .addReg(ZeroReg) + .addReg(ScratchReg, RegState::Kill) + .addImm(0); + RS->enterBasicBlockEnd(MBB); + unsigned Scav = RS->scavengeRegisterBackwards( + *RC, MachineBasicBlock::iterator(Pcaddu12iMI), false, 0); + MRI.replaceRegWith(ScratchReg, Scav); + MRI.clearVirtRegs(); + RS->setRegUsed(Scav); +} + +unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + if (BytesRemoved) + *BytesRemoved = 0; + + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + unsigned removed = 0; + + // Up to 2 branches are removed. + // Note that indirect branches are not removed. + while (I != REnd && removed < 2) { + // Skip past debug instructions. + if (I->isDebugInstr()) { + ++I; + continue; + } + if (!getAnalyzableBrOpc(I->getOpcode())) + break; + // Remove the branch. + I->eraseFromParent(); + if (BytesRemoved) + *BytesRemoved += getInstSizeInBytes(*I); + I = MBB.rbegin(); + ++removed; + } + + return removed; +} + +/// reverseBranchCondition - Return the inverse opcode of the +/// specified Branch instruction. +bool LoongArchInstrInfo::reverseBranchCondition( + SmallVectorImpl &Cond) const { + assert( (Cond.size() && Cond.size() <= 3) && + "Invalid LoongArch branch condition!"); + Cond[0].setImm(getOppositeBranchOpc(Cond[0].getImm())); + return false; +} + +LoongArchInstrInfo::BranchType LoongArchInstrInfo::analyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, bool AllowModify, + SmallVectorImpl &BranchInstrs) const { + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + + // Skip all the debug instructions. + while (I != REnd && I->isDebugInstr()) + ++I; + + if (I == REnd || !isUnpredicatedTerminator(*I)) { + // This block ends with no branches (it just falls through to its succ). + // Leave TBB/FBB null. + TBB = FBB = nullptr; + return BT_NoBranch; + } + + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + BranchInstrs.push_back(LastInst); + + // Not an analyzable branch (e.g., indirect jump). + if (!getAnalyzableBrOpc(LastOpc)) + return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; + + // Get the second to last instruction in the block. + unsigned SecondLastOpc = 0; + MachineInstr *SecondLastInst = nullptr; + + // Skip past any debug instruction to see if the second last actual + // is a branch. + ++I; + while (I != REnd && I->isDebugInstr()) + ++I; + + if (I != REnd) { + SecondLastInst = &*I; + SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode()); + + // Not an analyzable branch (must be an indirect jump). + if (isUnpredicatedTerminator(*SecondLastInst) && !SecondLastOpc) + return BT_None; + } + + // If there is only one terminator instruction, process it. + if (!SecondLastOpc) { + // Unconditional branch. + if (LastInst->isUnconditionalBranch()) { + TBB = LastInst->getOperand(0).getMBB(); + return BT_Uncond; + } + + // Conditional branch + AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); + return BT_Cond; + } + + // If we reached here, there are two branches. + // If there are three terminators, we don't know what sort of block this is. + if (++I != REnd && isUnpredicatedTerminator(*I)) + return BT_None; + + BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst); + + // If second to last instruction is an unconditional branch, + // analyze it and remove the last instruction. + if (SecondLastInst->isUnconditionalBranch()) { + // Return if the last instruction cannot be removed. + if (!AllowModify) + return BT_None; + + TBB = SecondLastInst->getOperand(0).getMBB(); + LastInst->eraseFromParent(); + BranchInstrs.pop_back(); + return BT_Uncond; + } + + // Conditional branch followed by an unconditional branch. + // The last one must be unconditional. + if (!LastInst->isUnconditionalBranch()) + return BT_None; + + AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + + return BT_CondUncond; +} + +MachineBasicBlock * +LoongArchInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + assert(MI.getDesc().isBranch() && "Unexpected opcode!"); + // The branch target is always the last operand. + int NumOp = MI.getNumExplicitOperands(); + return MI.getOperand(NumOp - 1).getMBB(); +} + +bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const { +/* + switch (BranchOpc) { + case LoongArch::B: + case LoongArch::BAL: + case LoongArch::BAL_BR: + case LoongArch::BC1F: + case LoongArch::BC1FL: + case LoongArch::BC1T: + case LoongArch::BC1TL: + case LoongArch::BEQ: case LoongArch::BEQ64: + case LoongArch::BEQL: + case LoongArch::BGEZ: case LoongArch::BGEZ64: + case LoongArch::BGEZL: + case LoongArch::BGEZAL: + case LoongArch::BGEZALL: + case LoongArch::BGTZ: case LoongArch::BGTZ64: + case LoongArch::BGTZL: + case LoongArch::BLEZ: case LoongArch::BLEZ64: + case LoongArch::BLEZL: + case LoongArch::BLTZ: case LoongArch::BLTZ64: + case LoongArch::BLTZL: + case LoongArch::BLTZAL: + case LoongArch::BLTZALL: + case LoongArch::BNE: case LoongArch::BNE64: + case LoongArch::BNEL: + return isInt<18>(BrOffset); + + case LoongArch::BC1EQZ: + case LoongArch::BC1NEZ: + case LoongArch::BC2EQZ: + case LoongArch::BC2NEZ: + case LoongArch::BEQC: case LoongArch::BEQC64: + case LoongArch::BNEC: case LoongArch::BNEC64: + case LoongArch::BGEC: case LoongArch::BGEC64: + case LoongArch::BGEUC: case LoongArch::BGEUC64: + case LoongArch::BGEZC: case LoongArch::BGEZC64: + case LoongArch::BGTZC: case LoongArch::BGTZC64: + case LoongArch::BLEZC: case LoongArch::BLEZC64: + case LoongArch::BLTC: case LoongArch::BLTC64: + case LoongArch::BLTUC: case LoongArch::BLTUC64: + case LoongArch::BLTZC: case LoongArch::BLTZC64: + case LoongArch::BNVC: + case LoongArch::BOVC: + case LoongArch::BGEZALC: + case LoongArch::BEQZALC: + case LoongArch::BGTZALC: + case LoongArch::BLEZALC: + case LoongArch::BLTZALC: + case LoongArch::BNEZALC: + return isInt<18>(BrOffset); + + case LoongArch::BEQZC: case LoongArch::BEQZC64: + case LoongArch::BNEZC: case LoongArch::BNEZC64: + return isInt<23>(BrOffset); + } + */ + switch (BranchOpc) { + case LoongArch::B: case LoongArch::B32: + return isInt<28>(BrOffset); + + case LoongArch::BEQZ: case LoongArch::BEQZ32: + case LoongArch::BNEZ: case LoongArch::BNEZ32: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + return isInt<23>(BrOffset); + + case LoongArch::BEQ: case LoongArch::BEQ32: + case LoongArch::BNE: case LoongArch::BNE32: + case LoongArch::BLT: case LoongArch::BLT32: + case LoongArch::BGE: case LoongArch::BGE32: + case LoongArch::BLTU: case LoongArch::BLTU32: + case LoongArch::BGEU: case LoongArch::BGEU32: + return isInt<18>(BrOffset); + } + + llvm_unreachable("Unknown branch instruction!"); +} + + +/// Predicate for distingushing between control transfer instructions and all +/// other instructions for handling forbidden slots. Consider inline assembly +/// as unsafe as well. +bool LoongArchInstrInfo::SafeInForbiddenSlot(const MachineInstr &MI) const { + if (MI.isInlineAsm()) + return false; + + return (MI.getDesc().TSFlags & LoongArchII::IsCTI) == 0; +} + +/// Predicate for distingushing instructions that have forbidden slots. +bool LoongArchInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const { + return (MI.getDesc().TSFlags & LoongArchII::HasForbiddenSlot) != 0; +} + +/// Return the number of bytes of code the specified instruction may be. +unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + return MI.getDesc().getSize(); + case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI.getParent()->getParent(); + const char *AsmStr = MI.getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + } +} + +MachineInstrBuilder +LoongArchInstrInfo::genInstrWithNewOpc(unsigned NewOpc, + MachineBasicBlock::iterator I) const { + MachineInstrBuilder MIB; + + int ZeroOperandPosition = -1; + bool BranchWithZeroOperand = false; + if (I->isBranch() && !I->isPseudo()) { + auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo(); + ZeroOperandPosition = I->findRegisterUseOperandIdx(LoongArch::ZERO, false, TRI); + BranchWithZeroOperand = ZeroOperandPosition != -1; + } + + MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); + + if (NewOpc == LoongArch::JIRL) { + MIB->removeOperand(0); + for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { + MIB.add(I->getOperand(J)); + } + MIB.addImm(0); + } else { + for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) { + if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J) + continue; + + MIB.add(I->getOperand(J)); + } + } + + MIB.copyImplicitOps(*I); + MIB.cloneMemRefs(*I); + return MIB; +} + +bool LoongArchInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + assert(!MI.isBundle() && + "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); + + const MCInstrDesc &MCID = MI.getDesc(); + if (!MCID.isCommutable()) + return false; + + return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); +} + +// bstrins, bstrpick have the following constraints: +// 0 <= lsb <= msb <= High +static bool verifyBstrInstruction(const MachineInstr &MI, StringRef &ErrInfo, + const int64_t High) { + MachineOperand MOMsb = MI.getOperand(2); + if (!MOMsb.isImm()) { + ErrInfo = "Msb operand is not an immediate!"; + return false; + } + MachineOperand MOLsb = MI.getOperand(3); + if (!MOLsb.isImm()) { + ErrInfo = "Lsb operand is not an immediate!"; + return false; + } + + int64_t Lsb = MOLsb.getImm(); + if (!((0 <= Lsb) && (Lsb <= High))) { + ErrInfo = "Lsb operand is out of range!"; + return false; + } + + int64_t Msb = MOMsb.getImm(); + if (!((0 <= Msb) && (Msb <= High))) { + ErrInfo = "Msb operand is out of range!"; + return false; + } + + if (!(Lsb <= Msb)) { + ErrInfo = "Lsb operand is not less than or equal to msb operand!"; + return false; + } + + return true; +} + +// Perform target specific instruction verification. +bool LoongArchInstrInfo::verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const { + // Verify that bstrins and bstrpick instructions are well formed. + switch (MI.getOpcode()) { + case LoongArch::BSTRINS_W: + case LoongArch::BSTRPICK_W: + return verifyBstrInstruction(MI, ErrInfo, 31); + case LoongArch::BSTRINS_D: + case LoongArch::BSTRPICK_D: + return verifyBstrInstruction(MI, ErrInfo, 63); + default: + return true; + } + + return true; +} + +std::pair +LoongArchInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF, 0u); +} + +ArrayRef> +LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace LoongArchII; + + static const std::pair Flags[] = { + {MO_PCREL_HI, "larch-pcrel-hi"}, + {MO_PCREL_LO, "larch-pcrel-lo"}, + {MO_TLSGD_HI, "larch-tlsgd-hi"}, + {MO_TLSGD_LO, "larch-tlsgd-lo"}, + {MO_TLSIE_HI, "larch-tlsie-hi"}, + {MO_TLSIE_LO, "larch-tlsie-lo"}, + {MO_TLSLE_HI, "larch-tlsle-hi"}, + {MO_TLSLE_LO, "larch-tlsle-lo"}, + {MO_ABS_HI, "larch-abs-hi"}, + {MO_ABS_LO, "larch-abs-lo"}, + {MO_ABS_HIGHER, "larch-abs-higher"}, + {MO_ABS_HIGHEST, "larch-abs-highest"}, + {MO_GOT_HI, "larch-got-hi"}, + {MO_GOT_LO, "larch-got-lo"}, + {MO_CALL_HI, "larch-call-hi"}, + {MO_CALL_LO, "larch-call-lo"} + }; + return makeArrayRef(Flags); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index 0a8c86a5e0c2..272e1e25e54d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -1,4 +1,4 @@ -//=- LoongArchInstrInfo.h - LoongArch Instruction Information ---*- C++ -*-===// +//===- LoongArchInstrInfo.h - LoongArch Instruction Information -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,39 +8,237 @@ // // This file contains the LoongArch implementation of the TargetInstrInfo class. // +// FIXME: We need to override TargetInstrInfo::getInlineAsmLength method in +// order for LoongArchLongBranch pass to work correctly when the code has inline +// assembly. The returned value doesn't have to be the asm instruction's exact +// size in bytes; LoongArchLongBranch only expects it to be the correct upper bound. //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "LoongArch.h" #include "LoongArchRegisterInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include #define GET_INSTRINFO_HEADER #include "LoongArchGenInstrInfo.inc" namespace llvm { +class MachineInstr; +class MachineOperand; class LoongArchSubtarget; +class TargetRegisterClass; +class TargetRegisterInfo; class LoongArchInstrInfo : public LoongArchGenInstrInfo { + virtual void anchor(); + const LoongArchRegisterInfo RI; + const LoongArchSubtarget &Subtarget; + public: - explicit LoongArchInstrInfo(LoongArchSubtarget &STI); + enum BranchType { + BT_None, // Couldn't analyze branch. + BT_NoBranch, // No branches found. + BT_Uncond, // One unconditional branch. + BT_Cond, // One conditional branch. + BT_CondUncond, // A conditional branch followed by an unconditional branch. + BT_Indirect // One indirct branch. + }; + + explicit LoongArchInstrInfo(const LoongArchSubtarget &STI); + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; - void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; + /// Branch Analysis + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + + void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS = nullptr) const override; + bool + reverseBranchCondition(SmallVectorImpl &Cond) const override; + + BranchType analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify, + SmallVectorImpl &BranchInstrs) const; + + /// Get the block that branch instruction jumps to. + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + /// Determine if the branch target is in range. + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; + + /// Predicate to determine if an instruction can go in a forbidden slot. + bool SafeInForbiddenSlot(const MachineInstr &MI) const; + + /// Predicate to determine if an instruction has a forbidden slot. + bool HasForbiddenSlot(const MachineInstr &MI) const; + + /// Insert nop instruction when hazard condition is found + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + const LoongArchRegisterInfo &getRegisterInfo() const; + + bool expandPostRAPseudo(MachineInstr &MI) const override; + + unsigned getOppositeBranchOpc(unsigned Opc) const; + + /// Emit a series of instructions to load an immediate. + unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, + const DebugLoc &DL) const; + + /// Return the number of bytes of code the specified instruction may be. + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; + void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, Register SrcReg, - bool IsKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, + Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + const TargetRegisterInfo *TRI) const override { + storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0); + } + void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, Register DstReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; + MachineBasicBlock::iterator MBBI, + Register DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override { + loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); + } + + void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; + + void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + Register DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; + + /// Adjust register value(DestReg = SrcReg + Amount). + void + adjustReg(unsigned DestReg, unsigned SrcReg, int64_t Amount, + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + MachineInstr::MIFlag Flag = MachineInstr::MIFlag::NoFlags) const; + + /// Create an instruction which has the same operands and memory operands + /// as MI but has a new opcode. + MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc, + MachineBasicBlock::iterator I) const; + + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const override; + + /// Perform target specific instruction verification. + bool verifyInstruction(const MachineInstr &MI, + StringRef &ErrInfo) const override; + + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + +protected: + /// If the specific machine instruction is a instruction that moves/copies + /// value from one register to another register return true along with + /// @Source machine operand and @Destination machine operand. + Optional + isCopyInstrImpl(const MachineInstr &MI) const override; + +private: + + bool isZeroImm(const MachineOperand &op) const; + + MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, + MachineMemOperand::Flags Flags) const; + + unsigned getAnalyzableBrOpc(unsigned Opc) const; + + void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl &Cond) const; + + MachineInstr * + BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + const DebugLoc &DL, ArrayRef Cond) const; + + void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + + void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + + std::pair compareOpndSize(unsigned Opc, + const MachineFunction &MF) const; + + /// Expand pseudo Int-to-FP conversion instructions. + /// + /// For example, the following pseudo instruction + /// PseudoCVT_D32_W D2, A5 + /// gets expanded into these two instructions: + /// MTC1 F4, A5 + /// CVT_D32_W D2, F4 + /// + /// We do this expansion post-RA to avoid inserting a floating point copy + /// instruction between MTC1 and CVT_D32_W. + void expandCvtFPInt(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned CvtOpc, unsigned MovOpc, bool IsI64) const; + + void expandEhReturn(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; }; } // end namespace llvm + #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index d07d086bd7da..96eb554c93a1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1,4 +1,4 @@ -//== LoongArchInstrInfo.td - Target Description for LoongArch -*- tablegen -*-// +//===- LoongArchInstrInfo.td - Target Description for LoongArch Target -*- tablegen -*-=// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,929 +6,1942 @@ // //===----------------------------------------------------------------------===// // -// This file describes the LoongArch instructions in TableGen format. +// This file contains the LoongArch implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// +include "LoongArchInstrFormats.td" -//===----------------------------------------------------------------------===// -// LoongArch specific DAG Nodes. -//===----------------------------------------------------------------------===// +def SDT_Bstrpick : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>]>; +def SDT_Bstrins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; -// Target-independent type requirements, but with target-specific formats. -def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, - SDTCisVT<1, i32>]>; -def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, - SDTCisVT<1, i32>]>; - -// Target-dependent type requirements. -def SDT_LoongArchCall : SDTypeProfile<0, -1, [SDTCisVT<0, GRLenVT>]>; -def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64> -]>; - -def SDT_LoongArchBStrIns: SDTypeProfile<1, 4, [ - SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>, - SDTCisSameAs<3, 4> -]>; - -def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [ - SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3> -]>; - -// TODO: Add LoongArch specific DAG Nodes -// Target-independent nodes, but with target-specific formats. -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - -// Target-dependent nodes. -def loongarch_call : SDNode<"LoongArchISD::CALL", SDT_LoongArchCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; -def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; -def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; -def loongarch_bstrins - : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; -def loongarch_bstrpick - : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; +def SDT_REVBD : SDTypeProfile<1, 1, [SDTCisInt<0>]>; +def LoongArchREVBD : SDNode<"LoongArchISD::REVBD", SDT_REVBD>; -//===----------------------------------------------------------------------===// -// Operand and SDNode transformation definitions. -//===----------------------------------------------------------------------===// +def LoongArchBstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_Bstrpick>; -class ImmAsmOperand - : AsmOperandClass { - let Name = prefix # "Imm" # width # suffix; - let DiagnosticType = !strconcat("Invalid", Name); +def LoongArchBstrins : SDNode<"LoongArchISD::BSTRINS", SDT_Bstrins>; + +def SDT_LoongArchEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + +def LoongArchehret : SDNode<"LoongArchISD::EH_RETURN", SDT_LoongArchEHRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +//===---------------------------------------------------------------------===/ +// Operand, Complex Patterns and Transformations Definitions. +//===---------------------------------------------------------------------===/ + +def assertzext_lt_i32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ + return cast(N->getOperand(1))->getVT().bitsLT(MVT::i32); +}]>; + +def immz : PatLeaf<(imm), [{ return N->getSExtValue() == 0; }]>; +def immZExt12 : PatLeaf<(imm), [{ return isUInt<12>(N->getZExtValue()); }]>; +def immSExt12 : PatLeaf<(imm), [{ return isInt<12>(N->getSExtValue()); }]>; +def immSExt13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; + +def immZExt2Alsl : ImmLeaf(Imm - 1);}]>; +//class ImmAsmOperand : AsmOperandClass { +// let RenderMethod = "addImmOperands"; +// let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; +// let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; +//} +// +//def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; } +//def imm8 : Operand, ImmLeaf { +// let ParserMatchClass = Imm8AsmOperand; +//} + +def HasLSX : Predicate<"Subtarget->hasLSX()">, + AssemblerPredicate<(all_of FeatureLSX)>; +def HasLASX : Predicate<"Subtarget->hasLASX()">, + AssemblerPredicate<(all_of FeatureLASX)>; + +class EXT_LSX { + list ExtPredicate = [HasLSX]; +} + +class EXT_LASX { + list ExtPredicate = [HasLASX]; +} + +class SImmOperand : AsmOperandClass { + let Name = "SImm" # width; + let DiagnosticType = "InvalidSImm" # width; let RenderMethod = "addImmOperands"; + let PredicateMethod = "isSImm<" # width # ">"; } -class SImmAsmOperand - : ImmAsmOperand<"S", width, suffix> { +def SImm2Operand : SImmOperand<2>; +def simm2 : Operand, ImmLeaf= -2 && Imm < 2; }]> { + let ParserMatchClass = SImm2Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; +} +def SImm3Operand : SImmOperand<3>; +def simm3 : Operand, ImmLeaf= -4 && Imm < 4; }]> { + let ParserMatchClass = SImm3Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<3>"; } -class UImmAsmOperand - : ImmAsmOperand<"U", width, suffix> { +def SImm5Operand : SImmOperand<5>; +def simm5 : Operand, ImmLeaf= -16 && Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>"; } -def uimm2 : Operand { - let ParserMatchClass = UImmAsmOperand<2>; +def simm5_32 : Operand, ImmLeaf= -16 && Imm < 16; }]> { + let ParserMatchClass = SImm5Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<5>"; } -def uimm2_plus1 : Operand { - let ParserMatchClass = UImmAsmOperand<2, "plus1">; - let EncoderMethod = "getImmOpValueSub1"; - let DecoderMethod = "decodeUImmOperand<2, 1>"; +def SImm8Operand : SImmOperand<8>; +def simm8 : Operand, ImmLeaf= -128 && Imm < 128; }]> { + let ParserMatchClass = SImm8Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; +} +def simm8_32 : Operand, ImmLeaf= -128 && Imm < 128; }]> { + let ParserMatchClass = SImm8Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; } -def uimm3 : Operand { - let ParserMatchClass = UImmAsmOperand<3>; +def SImm12Operand : SImmOperand<12>; +def simm12 : Operand, ImmLeaf= -2048 && Imm < 2048; }]> { + let ParserMatchClass = SImm12Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; +} +def simm12_32 : Operand, ImmLeaf= -2048 && Imm < 2048; }]> { + let ParserMatchClass = SImm12Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; } -def uimm5 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<5>; +def SImm14Operand : SImmOperand<14>; +def simm14 : Operand, ImmLeaf= -8192 && Imm < 8192; }]> { + let ParserMatchClass = SImm14Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<14>"; } -def uimm6 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<6>; +def SImm15Operand : SImmOperand<15>; +def simm15 : Operand, ImmLeaf= -16384 && Imm < 16384; }]> { + let ParserMatchClass = SImm15Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<15>"; } -def uimm8 : Operand { - let ParserMatchClass = UImmAsmOperand<8>; +def SImm16Operand : SImmOperand<16>; +def simm16 : Operand, ImmLeaf= -32768 && Imm < 32768; }]> { + let ParserMatchClass = SImm16Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<16>"; } -def uimm12 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<12>; +def SImm20Operand : SImmOperand<20>; +def simm20 : Operand, ImmLeaf= -524288 && Imm < 524288; }]> { + let ParserMatchClass = SImm20Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>"; +} +def simm20_32 : Operand, ImmLeaf= -524288 && Imm < 524288; }]> { + let ParserMatchClass = SImm20Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<20>"; } -def uimm14 : Operand { - let ParserMatchClass = UImmAsmOperand<14>; +def SImm21Operand : SImmOperand<21>; +def simm21 : Operand, ImmLeaf= -1048576 && Imm < 1048576; }]> { + let ParserMatchClass = SImm21Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<21>"; } -def uimm15 : Operand { - let ParserMatchClass = UImmAsmOperand<15>; +def SImm26Operand : SImmOperand<26>; +def simm26 : Operand, ImmLeaf= -33554432 && Imm < 33554432; }]> { + let ParserMatchClass = SImm26Operand; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<26>"; } -def simm12 : Operand, ImmLeaf(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<12>; - let DecoderMethod = "decodeSImmOperand<12>"; +def UImm1Operand : AsmOperandClass { + let Name = "UImm1"; + let RenderMethod = "addUImmOperands<1>"; + let PredicateMethod = "isUImm<1>"; + let DiagnosticType = "InvalidImm0_1"; } -def simm14_lsl2 : Operand { - let ParserMatchClass = SImmAsmOperand<14, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; - let DecoderMethod = "decodeSImmOperand<14, 2>"; +def UImm2Operand : AsmOperandClass { + let Name = "UImm2"; + let RenderMethod = "addUImmOperands<2>"; + let PredicateMethod = "isUImm<2>"; + let DiagnosticType = "InvalidImm0_3"; } -def simm16 : Operand { - let ParserMatchClass = SImmAsmOperand<16>; - let DecoderMethod = "decodeSImmOperand<16>"; +def UImm3Operand : AsmOperandClass { + let Name = "UImm3"; + let RenderMethod = "addUImmOperands<3>"; + let PredicateMethod = "isUImm<3>"; + let DiagnosticType = "InvalidImm0_7"; } -def simm16_lsl2 : Operand, - ImmLeaf(Imm>>2);}]> { - let ParserMatchClass = SImmAsmOperand<16, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; - let DecoderMethod = "decodeSImmOperand<16, 2>"; +def UImm4Operand : AsmOperandClass { + let Name = "UImm4"; + let RenderMethod = "addUImmOperands<4>"; + let PredicateMethod = "isUImm<4>"; + let DiagnosticType = "InvalidImm0_15"; } -def simm16_lsl2_br : Operand { - let ParserMatchClass = SImmAsmOperand<16, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; - let DecoderMethod = "decodeSImmOperand<16, 2>"; +def UImm5Operand : AsmOperandClass { + let Name = "UImm5"; + let RenderMethod = "addUImmOperands<5>"; + let PredicateMethod = "isUImm<5>"; + let DiagnosticType = "InvalidImm0_31"; } -def simm20 : Operand { - let ParserMatchClass = SImmAsmOperand<20>; - let DecoderMethod = "decodeSImmOperand<20>"; +def uimm1i : Operand, ImmLeaf= 0 && Imm < 2; }]> { + let PrintMethod = "printUImm<1>"; + let ParserMatchClass = UImm1Operand; } -def simm21_lsl2 : Operand { - let ParserMatchClass = SImmAsmOperand<21, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; - let DecoderMethod = "decodeSImmOperand<21, 2>"; +def uimm2 : Operand, ImmLeaf= 0 && Imm < 4; }]> { + let PrintMethod = "printUImm<2>"; + let ParserMatchClass = UImm2Operand; } -def simm26_lsl2 : Operand { - let ParserMatchClass = SImmAsmOperand<26, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; - let DecoderMethod = "decodeSImmOperand<26, 2>"; +def uimm3 : Operand, ImmLeaf= 0 && Imm < 8; }]> { + let PrintMethod = "printUImm<3>"; + let ParserMatchClass = UImm3Operand; } -// Standalone (codegen-only) immleaf patterns. +def uimm4i : Operand, ImmLeaf= 0 && Imm < 16; }]> { + let PrintMethod = "printUImm<4>"; + let ParserMatchClass = UImm4Operand; +} -// A 12-bit signed immediate plus one where the imm range will be [-2047, 2048]. -def simm12_plus1 : ImmLeaf(Imm) && Imm != -2048) || Imm == 2048;}]>; +def uimm5 : Operand, ImmLeaf= 0 && Imm < 32; }]> { + let PrintMethod = "printUImm<5>"; + let ParserMatchClass = UImm5Operand; +} -// Return the negation of an immediate value. -def NegImm : SDNodeXFormgetTargetConstant(-N->getSExtValue(), SDLoc(N), - N->getValueType(0)); -}]>; +def UImm6Operand : AsmOperandClass { + let Name = "UImm6"; + let RenderMethod = "addUImmOperands<16>"; + let PredicateMethod = "isUImm<6>"; + let DiagnosticType = "InvalidImm0_63"; +} +def uimm6 : Operand, ImmLeaf= 0 && Imm < 64; }]> { + let PrintMethod = "printUImm<6>"; + let ParserMatchClass = UImm6Operand; +} + +def UImm7Operand : AsmOperandClass { + let Name = "UImm7"; + let RenderMethod = "addUImmOperands<16>"; + let PredicateMethod = "isUImm<7>"; + let DiagnosticType = "InvalidImm0_127"; +} + +def uimm7i : Operand, ImmLeaf= 0 && Imm < 128; }]> { + let PrintMethod = "printUImm<7>"; + let ParserMatchClass = UImm7Operand; +} + +def UImm12Operand : AsmOperandClass { + let Name = "UImm12"; + let RenderMethod = "addUImmOperands<12>"; + let PredicateMethod = "isUImm<12>"; + let DiagnosticType = "InvalidImm0_4095"; +} +def uimm12 : Operand, ImmLeaf= 0 && Imm < 4096; }]> { + let PrintMethod = "printUImm<12>"; + let ParserMatchClass = UImm12Operand; +} +def uimm12_32 : Operand, ImmLeaf= 0 && Imm < 4096; }]> { + let PrintMethod = "printUImm<12>"; + let ParserMatchClass = UImm12Operand; +} + +def UImm15Operand : AsmOperandClass { + let Name = "UImm15"; + let RenderMethod = "addUImmOperands<15>"; + let PredicateMethod = "isUImm<15>"; + let DiagnosticType = "InvalidImm0_32767"; +} +def uimm15 : Operand, ImmLeaf= 0 && Imm < 32768; }]> { + let PrintMethod = "printUImm<15>"; + let ParserMatchClass = UImm15Operand; +} + +def UImm14Operand : AsmOperandClass { + let Name = "UImm14"; + let RenderMethod = "addUImmOperands<14>"; + let PredicateMethod = "isUImm<14>"; + let DiagnosticType = "InvalidImm0_16383"; +} +def uimm14 : Operand, ImmLeaf= 0 && Imm < 16384; }]> { + let PrintMethod = "printUImm<14>"; + let ParserMatchClass = UImm14Operand; +} +def uimm14_32 : Operand, ImmLeaf= 0 && Imm < 16384; }]> { + let PrintMethod = "printUImm<14>"; + let ParserMatchClass = UImm14Operand; +} + +def UImm8Operand : AsmOperandClass { + let Name = "UImm8"; + let RenderMethod = "addUImmOperands<8>"; + let PredicateMethod = "isUImm<8>"; + let DiagnosticType = "InvalidImm0_255"; +} +def uimm8_64 : Operand, ImmLeaf= 0 && Imm < 256; }]> { + let PrintMethod = "printUImm<8>"; + let ParserMatchClass = UImm8Operand; +} + +def uimm8_32 : Operand, ImmLeaf= 0 && Imm < 256; }]> { + let PrintMethod = "printUImm<8>"; + let ParserMatchClass = UImm8Operand; +} + +def addr : +ComplexPattern; + +def addrDefault : +ComplexPattern; + +def addrRegImm : +ComplexPattern; + +def addrimm14lsl2 : ComplexPattern; + +class ConstantUImmAsmOperandClass Supers = [], + int Offset = 0> : AsmOperandClass { + let Name = "ConstantUImm" # Bits # "_" # Offset; + let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">"; + let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImm" # Bits # "_" # Offset; +} +class SImmAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "SImm" # Bits; + let RenderMethod = "addSImmOperands<" # Bits # ">"; + let PredicateMethod = "isSImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits; +} +class UImmAnyAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "ImmAny"; + let RenderMethod = "addConstantUImmOperands<32>"; + let PredicateMethod = "isSImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "ImmAny"; +} + +def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> { + let Name = "UImm32_Coerced"; + let DiagnosticType = "UImm32_Coerced"; +} +def SImm32RelaxedAsmOperandClass + : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> { + let Name = "SImm32_Relaxed"; + let PredicateMethod = "isAnyImm<33>"; + let DiagnosticType = "SImm32_Relaxed"; +} +def SImm32AsmOperandClass + : SImmAsmOperandClass<32, [SImm32RelaxedAsmOperandClass]>; +def ConstantUImm26AsmOperandClass + : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>; + +def ConstantUImm20AsmOperandClass + : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>; + +def ConstantUImm2Plus1AsmOperandClass + : ConstantUImmAsmOperandClass<2, [ConstantUImm20AsmOperandClass], 1>; + +class UImmAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "UImm" # Bits; + let RenderMethod = "addUImmOperands<" # Bits # ">"; + let PredicateMethod = "isUImm<" # Bits # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImm" # Bits; +} -// FP immediate patterns. -def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>; -def fpimm0neg : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>; -def fpimm1 : PatLeaf<(fpimm), [{return N->isExactlyValue(+1.0);}]>; +def UImm16RelaxedAsmOperandClass + : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> { + let Name = "UImm16_Relaxed"; + let PredicateMethod = "isAnyImm<16>"; + let DiagnosticType = "UImm16_Relaxed"; +} -def CallSymbol: AsmOperandClass { - let Name = "CallSymbol"; +def ConstantSImm14Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm14Lsl2"; let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<14, 2>"; + let SuperClasses = [UImm16RelaxedAsmOperandClass]; + let DiagnosticType = "SImm14_Lsl2"; +} + +foreach I = {2} in + def simm14_lsl # I : Operand { + let DecoderMethod = "DecodeSImmWithOffsetAndScale<14, " # I # ">"; + let ParserMatchClass = + !cast("ConstantSImm14Lsl" # I # "AsmOperandClass"); + } + +def uimm16_64_relaxed : Operand { + let PrintMethod = "printUImm<16>"; + let ParserMatchClass = + !cast("UImm16RelaxedAsmOperandClass"); +} + +def uimm2_plus1 : Operand { + let PrintMethod = "printUImm<2, 1>"; + let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>"; + let DecoderMethod = "DecodeUImmWithOffset<2, 1>"; + let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass; +} + +// like simm32 but coerces simm32 to uimm32. +def uimm32_coerced : Operand { + let ParserMatchClass = !cast("UImm32CoercedAsmOperandClass"); +} + +def imm64: Operand; + +def LoongArchMemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let ParserMethod = "parseMemOperand"; +} + +def LoongArchAMemAsmOperand : AsmOperandClass { + let Name = "AMem"; + let ParserMethod = "parseAMemOperand"; + let RenderMethod = "addMemOperands"; + let PredicateMethod = "isZeroMemOff"; + let DiagnosticType = "MemZeroOff"; +} + +def LoongArchMemSimm14AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm14"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<14>"; + let DiagnosticType = "MemSImm14"; +} + +foreach I = {2} in + def LoongArchMemSimm14Lsl # I # AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm14_" # I; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<14, " # I # ">"; + let DiagnosticType = "MemSImm14Lsl" # I; + } + +def LoongArchMemSimmPtrAsmOperand : AsmOperandClass { + let Name = "MemOffsetSimmPtr"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithPtrSizeOffset"; + let DiagnosticType = "MemSImmPtr"; +} + +class mem_generic : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops ptr_rc, simm12); + let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = LoongArchMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; +} + +// Address operand +def mem : mem_generic; + +def amem : mem_generic { + let PrintMethod = "printAMemOperand"; + let EncoderMethod = "getAMemEncoding"; + let ParserMatchClass = LoongArchAMemAsmOperand; +} + +def mem_simmptr : mem_generic { + let ParserMatchClass = LoongArchMemSimmPtrAsmOperand; +} + +foreach I = {2} in + def mem_simm14_lsl # I : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm14_lsl" # I)); + let EncoderMethod = "getSimm14MemEncoding<" # I # ">"; + let ParserMatchClass = + !cast("LoongArchMemSimm14Lsl" # I # "AsmOperand"); + } + +def mem_ea : Operand { + let PrintMethod = "printMemOperandEA"; + let MIOperandInfo = (ops ptr_rc, simm12); + let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; +} + +def LoongArchJumpTargetAsmOperand : AsmOperandClass { + let Name = "JumpTarget"; + let ParserMethod = "parseJumpTarget"; let PredicateMethod = "isImm"; + let RenderMethod = "addImmOperands"; +} + +def jmptarget : Operand { + let EncoderMethod = "getJumpTargetOpValue"; + let ParserMatchClass = LoongArchJumpTargetAsmOperand; } -// A bare symbol used in call only. -def call_symbol : Operand { - let ParserMatchClass = CallSymbol; +def brtarget : Operand { + let EncoderMethod = "getBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeBranchTarget"; + let ParserMatchClass = LoongArchJumpTargetAsmOperand; } -def BaseAddr : ComplexPattern; +def calltarget : Operand { + let EncoderMethod = "getJumpTargetOpValue"; + let ParserMatchClass = LoongArchJumpTargetAsmOperand; +} -//===----------------------------------------------------------------------===// -// Instruction Formats -//===----------------------------------------------------------------------===// +// +//SDNode +// +def IsGP64bit : Predicate<"Subtarget->is64Bit()">, + AssemblerPredicate<(all_of Feature64Bit)>; +def IsGP32bit : Predicate<"!Subtarget->is64Bit()">, + AssemblerPredicate<(all_of (not Feature64Bit))>; +def SDT_LoongArchCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_LoongArchCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def LoongArchRet : SDNode<"LoongArchISD::Ret", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def LoongArchERet : SDNode<"LoongArchISD::ERet", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_LoongArchCallSeqStart, + [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_LoongArchCallSeqEnd, + [SDNPHasChain, SDNPSideEffect, + SDNPOptInGlue, SDNPOutGlue]>; +def LoongArchAddress : SDNode<"LoongArchISD::GlobalAddress", SDTIntUnaryOp>; + +// Return RA. +let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, isCTI=1 in { + def RetRA : LoongArchPseudo<(outs), (ins), [(LoongArchRet)]>; + + let hasSideEffects=1 in + def ERet : LoongArchPseudo<(outs), (ins), [(LoongArchERet)]>; +} -include "LoongArchInstrFormats.td" -include "LoongArchFloatInstrFormats.td" +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { +def ADJCALLSTACKDOWN : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_start timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : LoongArchPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_end timm:$amt1, timm:$amt2)]>; +} -//===----------------------------------------------------------------------===// +class LoongArchPat : Pat, PredicateControl; + +def SDT_LoongArchJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; + +def LoongArchJmpLink : SDNode<"LoongArchISD::JmpLink",SDT_LoongArchJmpLink, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; + +def LoongArchTailCall : SDNode<"LoongArchISD::TailCall", SDT_LoongArchJmpLink, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +class GPR_32 { list GPRPredicates = [IsGP32bit]; } +class GPR_64 { list GPRPredicates = [IsGP64bit]; } + +//===---------------------------------------------------------------------===/ // Instruction Class Templates -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===/ +///R2 +class Int_Reg2 + : InstForm<(outs RO:$rd), (ins RO:$rj), + !strconcat(opstr, "\t$rd, $rj"), + [(set RO:$rd, (OpNode RO:$rj))], + FrmR, opstr>; + +class Int_Reg2_Iocsrrd + : InstForm<(outs RD:$rd), (ins RS:$rj), + !strconcat(opstr, "\t$rd, $rj"), + [(set RD:$rd, (OpNode RS:$rj))], + FrmR, opstr>; + +class Int_Reg2_Rdtime + : InstForm<(outs RO:$rd, RO:$rj), (ins), + !strconcat(opstr, "\t$rd, $rj"), + [(set (OpNode RO:$rd, RO:$rj))], + FrmR, opstr>; + +class Int_Reg2_Iocsrwr + : InstForm<(outs), (ins RD:$rd, RS:$rj), + !strconcat(opstr, "\t$rd, $rj"), + [(set (OpNode RD:$rd, RS:$rj))], + FrmR, opstr>; + +class Float_Reg2 + : InstForm<(outs RO:$fd), (ins RO:$fj), + !strconcat(opstr, "\t$fd, $fj"), + [(set RO:$fd, (OpNode RO:$fj))], + FrmFR, opstr>; + +class Count1 + : InstForm<(outs RO:$rd), (ins RO:$rj), + !strconcat(opstr, "\t$rd, $rj"), + [(set RO:$rd, (OpNode (not RO:$rj)))], + FrmR, opstr>; + +class SignExtInReg + : InstForm<(outs RO:$rd), (ins RO:$rj), !strconcat(opstr, "\t$rd, $rj"), + [(set RO:$rd, (sext_inreg RO:$rj, vt))], FrmR, opstr>; + +///R3 +class Int_Reg3 + : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set RO:$rd, (OpNode RO:$rj, RO:$rk))], + FrmR, opstr>; + +class Int_Reg3_Crc + : InstForm<(outs RS:$rd), (ins RD:$rj, RS:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set RS:$rd, (OpNode RD:$rj, RS:$rk))], + FrmR, opstr>; + +class SetCC_R + : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set GPR32Opnd:$rd, (OpNode RO:$rj, RO:$rk))], + FrmR, opstr>; + +class SetCC_I + : InstForm<(outs GPR32Opnd:$rd), (ins RO:$rj, ImmOpnd:$imm12), + !strconcat(opstr, "\t$rd, $rj, $imm12"), + [(set GPR32Opnd:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))], + FrmR, opstr>; + +class ATOMIC + : InstForm<(outs RD:$rd), (ins RD:$rk, MO:$addr), + !strconcat(opstr, "\t$rd, $rk, $addr"), + [(set RD:$rd, (OpNode RD:$rk, Addr:$addr))], + FrmR, opstr> { + let DecoderMethod = "DecodeAMem"; + let canFoldAsLoad = 1; + string BaseOpcode = opstr; + let mayLoad = 1; + let mayStore = 1; + let Constraints = "@earlyclobber $rd"; +} -class ALU_3R op, string opstr> - : Fmt3R; -class ALU_2R op, string opstr> - : Fmt2R; - -class ALU_3RI2 op, string opstr, Operand ImmOpnd> - : Fmt3RI2; -class ALU_3RI3 op, string opstr, Operand ImmOpnd> - : Fmt3RI3; -class ALU_2RI5 op, string opstr, Operand ImmOpnd> - : Fmt2RI5; -class ALU_2RI6 op, string opstr, Operand ImmOpnd> - : Fmt2RI6; -class ALU_2RI12 op, string opstr, Operand ImmOpnd> - : Fmt2RI12; -class ALU_2RI16 op, string opstr, Operand ImmOpnd> - : Fmt2RI16; -class ALU_1RI20 op, string opstr, Operand ImmOpnd> - : Fmt1RI20; - -class MISC_I15 op, string opstr> - : FmtI15; - -class RDTIME_2R op, string opstr> - : Fmt2R; - -class BrCC_2RI16 op, string opstr> - : Fmt2RI16 { - let isBranch = 1; - let isTerminator = 1; +class Nor + : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set RO:$rd, (not (or RO:$rj, RO:$rk)))], + FrmR, opstr>; + +class Shift_Var + : InstForm<(outs RO:$rd), (ins RO:$rj, GPR32Opnd:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set RO:$rd, (OpNode RO:$rj, GPR32Opnd:$rk))], + FrmR, opstr>; + +class Float_Reg3 + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), + !strconcat(opstr, "\t$fd, $fj, $fk"), + [(set RO:$fd, (OpNode RO:$fj, RO:$fk))], + FrmR, opstr>; + +class Float_Reg3_MA + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), + !strconcat(opstr, "\t$fd, $fj, $fk"), + [(set RO:$fd, (OpNode (fabs RO:$fj), (fabs RO:$fk)))], + FrmR, opstr>; + +class Float_Int_Reg3 + : InstForm<(outs RD:$fd), (ins RS:$rj, RS:$rk), + !strconcat(opstr, "\t$fd, $rj, $rk"), + [(set RS:$fd, (OpNode RS:$rj, RS:$rk))], + FrmR, opstr>; + +///R4 +class Mul_Reg4 + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa), + !strconcat(opstr, "\t$fd, $fj, $fk, $fa"), + [], + FrmFR, opstr>; + +class NMul_Reg4 + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk, RO:$fa), + !strconcat(opstr, "\t$fd, $fj, $fk, $fa"), + [], + FrmFR, opstr>; + +///R2_IMM5 +class Shift_Imm32 + : InstForm<(outs RO:$rd), (ins RO:$rj, uimm5:$imm5), + !strconcat(opstr, "\t$rd, $rj, $imm5"), + [(set RO:$rd, (OpNode RO:$rj, uimm5:$imm5))], + FrmR, opstr>; + +///R2_IMM6 +class Shift_Imm64 + : InstForm<(outs RO:$rd), (ins RO:$rj, uimm6:$imm6), + !strconcat(opstr, "\t$rd, $rj, $imm6"), + [(set RO:$rd, (OpNode RO:$rj, uimm6:$imm6))], + FrmR, opstr>; + +///LOAD_STORE +class FLd + : InstForm<(outs RD:$rd), (ins MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(set RD:$rd, (OpNode addrDefault:$addr))], + FrmR, opstr> { + let DecoderMethod = "DecodeFMem"; + let mayLoad = 1; +} + +class Ld + : InstForm<(outs RD:$rd), (ins MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(set RD:$rd, (OpNode Addr:$addr))], + FrmR, opstr> { + let DecoderMethod = "DecodeMem"; + let canFoldAsLoad = 1; + string BaseOpcode = opstr; + let mayLoad = 1; +} + +class FSt + : InstForm<(outs), (ins RD:$rd, MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(OpNode RD:$rd, addrDefault:$addr)], + FrmR, opstr> { + let DecoderMethod = "DecodeFMem"; + let mayStore = 1; +} + +class St + : InstForm<(outs), (ins RS:$rd, MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(OpNode RS:$rd, addr:$addr)], + FrmR, opstr> { + let DecoderMethod = "DecodeMem"; + string BaseOpcode = opstr; + let mayStore = 1; +} + +/// R2_IMM12 +class Int_Reg2_Imm12 + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12), + !strconcat(opstr, "\t$rd, $rj, $imm12"), + [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12))], + FrmR, opstr>; +class RELOC_rrii + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12), + !strconcat(opstr, "\t$rd, $rj, $imm12"), + [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$imm12, ImmOpnd:$i12))], + FrmR, opstr>; + +///R2_IMM14 +class LdPtr + : InstForm<(outs RO:$rd), (ins mem_simm14_lsl2:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [], FrmI, opstr>{ + let DecoderMethod = "DecodeMemSimm14"; + let canFoldAsLoad = 1; + string BaseOpcode = opstr; + let mayLoad = 1; +} + +class StPtr + : InstForm<(outs), (ins RO:$rd, mem_simm14_lsl2:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [], FrmI, opstr> { + let DecoderMethod = "DecodeMemSimm14"; + string BaseOpcode = opstr; + let mayStore = 1; +} + +///R2_IMM16 +class FJirl + : InstForm<(outs RO:$rd), (ins RO:$rj, opnd:$offs16), + !strconcat(opstr, "\t$rd, $rj, $offs16"), + [], FrmJ, opstr>; + +class Beq + : InstForm<(outs), (ins RO:$rj, RO:$rd, opnd:$offs16), + !strconcat(opstr, "\t$rj, $rd, $offs16"), + [(brcond (i32 (cond_op RO:$rj, RO:$rd)), bb:$offs16)], + FrmI, opstr> { + let isBranch = 1; + let isTerminator = 1; + bit isCTI = 1; +} + +///R1_IMM21 +class Beqz + : InstForm<(outs), (ins RO:$rj, opnd:$offs21), + !strconcat(opstr, "\t$rj, $offs21"), + [(brcond (i32 (cond_op RO:$rj, 0)), bb:$offs21)], + FrmI, opstr> { + let isBranch = 1; + let isTerminator = 1; + bit isCTI = 1; +} + +///IMM26 +class JumpFB : + InstForm<(outs), (ins opnd:$offset26), !strconcat(opstr, "\t$offset26"), + [(operator targetoperator:$offset26)], FrmJ, opstr> { + let isBranch = 1; + let isTerminator=1; + let isBarrier=1; + let DecoderMethod = "DecodeJumpTarget"; + bit isCTI = 1; +} + +/// R3_SA +class Reg3_Sa + : InstForm<(outs RO:$rd), (ins RO:$rj, RO:$rk, ImmOpnd:$sa), + !strconcat(opstr, "\t$rd, $rj, $rk, $sa"), + [(set RO:$rd, (OpNode RO:$rj, RO:$rk, ImmOpnd:$sa))], + FrmR, opstr>; + +class Reg3_SaU + : InstForm<(outs RD:$rd), (ins RS:$rj, RS:$rk, ImmOpnd:$sa), + !strconcat(opstr, "\t$rd, $rj, $rk, $sa"), + [(set RD:$rd, (OpNode RS:$rj, RS:$rk, ImmOpnd:$sa))], + FrmR, opstr>; + +/// Assert +class Assert + : InstForm<(outs), (ins RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rj, $rk"), + [(set (OpNode RO:$rj, RO:$rk))], + FrmR, opstr>; + +class Code15 + : InstForm<(outs), (ins uimm15:$Code), + !strconcat(opstr, "\t$Code"), + [(set (OpNode uimm15:$Code))], + FrmOther, opstr>; + +class TrapBase + : LoongArchPseudo<(outs), (ins), [(trap)]>, + PseudoInstExpansion<(RealInst 0)> { + let isBarrier = 1; + let isTerminator = 1; + let isCodeGenOnly = 1; + let isCTI = 1; +} + +class CSR + : InstForm<(outs RO:$rd), (ins ImmOpnd:$csr), + !strconcat(opstr, "\t$rd, $csr"), + [(set RO:$rd, (OpNode ImmOpnd:$csr))], + FrmOther, opstr>; + +class CSRW + : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$csr), + !strconcat(opstr, "\t$rd, $csr"), + [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$csr))], + FrmOther, opstr>{ + let Constraints = "$rd = $dst"; +} + +class CSRX + : InstForm<(outs RO:$dst), (ins RO:$rd, RO:$rj, ImmOpnd:$csr), + !strconcat(opstr, "\t$rd, $rj, $csr"), + [(set RO:$dst, (OpNode RO:$rd, RO:$rj, ImmOpnd:$csr))], + FrmOther, opstr>{ + let Constraints = "$rd = $dst"; +} + +class CAC + : InstForm<(outs), (ins uimm5:$op, RO:$rj, ImmOpnd:$si12), + !strconcat(opstr, "\t$op, $rj, $si12"), + [(set (OpNode uimm5:$op, RO:$rj, ImmOpnd:$si12))], + FrmOther, opstr>; + +class LEVEL + : InstForm<(outs RO:$rd), (ins RO:$rj, uimm8_64:$level), + !strconcat(opstr, "\t$rd, $rj, $level"), + [(set RO:$rd, (OpNode RO:$rj, uimm8_64:$level))], + FrmOther, opstr>; + +class SEQ + : InstForm<(outs), (ins RO:$rj, uimm8_64:$seq), + !strconcat(opstr, "\t$rj, $seq"), + [(set (OpNode RO:$rj, uimm8_64:$seq))], + FrmOther, opstr>; + +class Wait + : InstForm<(outs), (ins uimm15:$hint), + !strconcat(opstr, "\t$hint"), + [(set (OpNode uimm15:$hint))], + FrmOther, opstr>; + +class Invtlb + : InstForm<(outs), (ins uimm5:$op, RO:$rj, RO:$rk), + !strconcat(opstr, "\t$op, $rj, $rk"), + [(set (OpNode uimm5:$op, RO:$rj, RO:$rk))], + FrmOther, opstr>; + +class OP32 + : InstForm<(outs), (ins), + !strconcat(opstr, ""), + [(set (OpNode))], + FrmOther, opstr>; + +class Bar + : InstForm<(outs), (ins uimm15:$hint), + !strconcat(opstr, "\t$hint"), + [(set (OpNode uimm15:$hint))], + FrmOther, opstr>; + +//class CA op, string opstr> +// : R3_CA; + +class SI16_R2 + : InstForm<(outs RO:$rd), (ins RO:$rj, simm16:$si16), + !strconcat(opstr, "\t$rd, $rj, $si16"), + [(set RO:$rd, (OpNode RO:$rj, simm16:$si16))], + FrmR, opstr>; + +class SI20 + : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20), + !strconcat(opstr, "\t$rd, $si20"), + [(set RO:$rd, (OpNode ImmOpnd:$si20))], + FrmR, opstr>; +let isCodeGenOnly = 1, Constraints = "$dst = $rd" in +class SI20_R2 + : InstForm<(outs RO:$dst), (ins RO:$rd, ImmOpnd:$si20), + !strconcat(opstr, "\t$rd, $si20"), + [(set RO:$dst, (OpNode RO:$rd, ImmOpnd:$si20))], + FrmR, opstr>; +class RELOC_rii + : InstForm<(outs RO:$rd), (ins ImmOpnd:$si20, ImmOpnd:$i20), + !strconcat(opstr, "\t$rd, $si20"), + [(set RO:$rd, (OpNode ImmOpnd:$si20, ImmOpnd:$i20))], + FrmR, opstr>; + +// preld +class Preld + : InstForm<(outs), (ins RO:$rj, MemOpnd:$addr, uimm5:$hint), + !strconcat(opstr, "\t$hint, $rj, $addr"), + [(set (OpNode RO:$rj, MemOpnd:$addr, uimm5:$hint))], + FrmR, opstr>; +class Preld_Raw + : InstForm<(outs), (ins RO:$rj, simm12:$imm12, uimm5:$hint), + !strconcat(opstr, "\t$hint, $rj, $imm12"), + [], + FrmR, opstr>; +class IsCall { + bit isCall = 1; + bit isCTI = 1; +} + +class EffectiveAddress + : InstForm<(outs RO:$rd), (ins mem_ea:$addr), + !strconcat(opstr, "\t$rd, $addr"), + [(set RO:$rd, addr:$addr)], FrmI, + !strconcat(opstr, "_lea")> { + let isCodeGenOnly = 1; + let hasNoSchedulingInfo = 1; + let DecoderMethod = "DecodeMem"; +} + +def PtrRC : Operand { + let MIOperandInfo = (ops ptr_rc); + let DecoderMethod = "DecodePtrRegisterClass"; + let ParserMatchClass = GPR32AsmOperand; +} + +class Atomic2Ops : + LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr), + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>; + +class Atomic2OpsPostRA : + LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr), []> { + let mayLoad = 1; + let mayStore = 1; +} + +class Atomic2OpsSubwordPostRA : + LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$incr, RC:$mask, RC:$mask2, + RC:$shiftamnt), []>; +class AtomicCmpSwap : + LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; + +// These atomic cmpxchg PatFrags only care about the failure ordering. +// In llvm <= 13, the PatFrags defined by multiclass `ternary_atomic_op_ord` +// in TargetSelectionDAG.td only care about the success ordering while llvm > 13 +// care about the `merged` ordering which is the stronger one of success and +// failure. See https://reviews.llvm.org/D106729. But for LoongArch LL-SC we +// only need to care about the failure ordering as explained in +// https://github.com/llvm/llvm-project/pull/67391. So we defined these +// PatFrags. +multiclass ternary_atomic_op_failure_ord { + def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::Monotonic; + }]>; + def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::Acquire; + }]>; + def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::Release; + }]>; + def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::AcquireRelease; + }]>; + def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), + (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getFailureOrdering(); + return Ordering == AtomicOrdering::SequentiallyConsistent; + }]>; +} + +defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; + +multiclass AtomicCmpSwapInstrs { + def ATOMIC_CMP_SWAP_MONOTONIC : + AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_monotonic"), + RC>; + def ATOMIC_CMP_SWAP_ACQUIRE : + AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_acquire"), + RC>; + def ATOMIC_CMP_SWAP_RELEASE : + AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_release"), + RC>; + def ATOMIC_CMP_SWAP_ACQ_REL : + AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_acq_rel"), + RC>; + def ATOMIC_CMP_SWAP_SEQ_CST : + AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_seq_cst"), + RC>; +} + +class AtomicCmpSwapPostRA : + LoongArchPseudo<(outs RC:$dst), + (ins PtrRC:$ptr, RC:$cmp, RC:$swap, i32imm:$ordering), []> { + let mayLoad = 1; + let mayStore = 1; +} + +class AtomicCmpSwapSubwordPostRA : + LoongArchPseudo<(outs RC:$dst), + (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, RC:$mask2, + RC:$ShiftNewVal, RC:$ShiftAmt, i32imm:$ordering), []> { + let mayLoad = 1; + let mayStore = 1; +} + +class LoongArchInstAlias : + InstAlias, PredicateControl; + +//===---------------------------------------------------------------------===/ +// Instruction Definitions. +//===---------------------------------------------------------------------===/ +/// +/// R2 +/// + +def CLO_D : Count1<"clo.d", GPR64Opnd, ctlz>, R2I<0b01000>; +def CLZ_D : Int_Reg2<"clz.d", GPR64Opnd, ctlz>, R2I<0b01001>; +def CTO_D : Count1<"cto.d", GPR64Opnd, cttz>, R2I<0b01010>; +def CTZ_D : Int_Reg2<"ctz.d", GPR64Opnd, cttz>, R2I<0b01011>; + +def REVB_4H : Int_Reg2<"revb.4h", GPR64Opnd>, R2I<0b01101>; //[] +def REVB_2W : Int_Reg2<"revb.2w", GPR64Opnd>, R2I<0b01110>; +def REVB_D : Int_Reg2<"revb.d", GPR64Opnd, LoongArchREVBD>, R2I<0b01111>; +def REVH_2W : Int_Reg2<"revh.2w", GPR64Opnd>, R2I<0b10000>; +def REVH_D : Int_Reg2<"revh.d", GPR64Opnd>, R2I<0b10001>; //[] + +def BITREV_8B : Int_Reg2<"bitrev.8b", GPR64Opnd>, R2I<0b10011>; //[] +def BITREV_D : Int_Reg2<"bitrev.d", GPR64Opnd, bitreverse>, R2I<0b10101>; + +def EXT_W_H : SignExtInReg<"ext.w.h", GPR64Opnd, i16>, R2I<0b10110>; +def EXT_W_B : SignExtInReg<"ext.w.b", GPR64Opnd, i8>, R2I<0b10111>; + +def RDTIME_D : Int_Reg2_Rdtime<"rdtime.d", GPR64Opnd>, R2I<0b11010>; +def RDTIMEL_W : Int_Reg2_Rdtime<"rdtimel.w", GPR64Opnd>, R2I<0b11000>; +def RDTIMEH_W : Int_Reg2_Rdtime<"rdtimeh.w", GPR64Opnd>, R2I<0b11001>; +/// +/// R3 +/// +def ADD_D : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>; +def SUB_D : Int_Reg3<"sub.d", GPR64Opnd, sub>, R3I<0b0100011>; + +def SLT : SetCC_R<"slt", GPR64Opnd, setlt>, R3I<0b0100100>; +def SLTU : SetCC_R<"sltu", GPR64Opnd, setult>, R3I<0b0100101>; +def MASKEQZ : Int_Reg3<"maskeqz", GPR64Opnd>, R3I<0b0100110>; //[] +def MASKNEZ : Int_Reg3<"masknez", GPR64Opnd>, R3I<0b0100111>; //[] + +def NOR : Nor<"nor", GPR64Opnd>, R3I<0b0101000>; +def AND : Int_Reg3<"and", GPR64Opnd, and>, R3I<0b0101001>; +def OR : Int_Reg3<"or", GPR64Opnd, or>, R3I<0b0101010>; +def XOR : Int_Reg3<"xor", GPR64Opnd, xor>, R3I<0b0101011>; +def ORN : Int_Reg3<"orn", GPR64Opnd>, R3I<0b0101100>; +def ANDN : Int_Reg3<"andn", GPR64Opnd>, R3I<0b0101101>; + +def SLL_D : Shift_Var<"sll.d", GPR64Opnd, shl>, R3I<0b0110001>; +def SRL_D : Shift_Var<"srl.d", GPR64Opnd, srl>, R3I<0b0110010>; +def SRA_D : Shift_Var<"sra.d", GPR64Opnd, sra>, R3I<0b0110011>; +def ROTR_D: Shift_Var<"rotr.d", GPR64Opnd, rotr>, R3I<0b0110111>; + +def MUL_D : Int_Reg3<"mul.d", GPR64Opnd, mul>, R3I<0b0111011>; +def MULH_D : Int_Reg3<"mulh.d", GPR64Opnd, mulhs>, R3I<0b0111100>; +def MULH_DU : Int_Reg3<"mulh.du", GPR64Opnd, mulhu>, R3I<0b0111101>; +def MULW_D_W : Int_Reg3<"mulw.d.w", GPR64Opnd>, R3I<0b0111110>; +def MULW_D_WU : Int_Reg3<"mulw.d.wu", GPR64Opnd>, R3I<0b0111111>; + +let usesCustomInserter = 1 in { +def DIV_D : Int_Reg3<"div.d", GPR64Opnd, sdiv>, R3I<0b1000100>; +def MOD_D : Int_Reg3<"mod.d", GPR64Opnd, srem>, R3I<0b1000101>; +def DIV_DU : Int_Reg3<"div.du", GPR64Opnd, udiv>, R3I<0b1000110>; +def MOD_DU : Int_Reg3<"mod.du", GPR64Opnd, urem>, R3I<0b1000111>; +} + +def CRC_W_D_W : Int_Reg3_Crc<"crc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crc_w_d_w>, R3I<0b1001011>; +def CRCC_W_D_W : Int_Reg3_Crc<"crcc.w.d.w", GPR64Opnd, GPR32Opnd, int_loongarch_crcc_w_d_w>, R3I<0b1001111>; +/// +/// SLLI +/// +def SLLI_D : Shift_Imm64<"slli.d", GPR64Opnd, shl>, R2_IMM6<0b00>; +def SRLI_D : Shift_Imm64<"srli.d", GPR64Opnd, srl>, R2_IMM6<0b01>; +def SRAI_D : Shift_Imm64<"srai.d", GPR64Opnd, sra>, R2_IMM6<0b10>; +def ROTRI_D : Shift_Imm64<"rotri.d", GPR64Opnd, rotr>, R2_IMM6<0b11>; +/// +/// Misc +/// +def ALSL_WU : Reg3_SaU<"alsl.wu", GPR64Opnd, GPR32Opnd, uimm2_plus1>, R3_SA2<0b00011> { + let Pattern = [(set GPR64Opnd:$rd, + (i64 (zext (add GPR32Opnd:$rk, (shl GPR32Opnd:$rj, immZExt2Alsl:$sa)))))]; +} + +def ALSL_D : Reg3_Sa<"alsl.d", GPR64Opnd, uimm2_plus1>, R3_SA2<0b10110> { + let Pattern = [(set GPR64Opnd:$rd, + (add GPR64Opnd:$rk, (shl GPR64Opnd:$rj, immZExt2Alsl:$sa)))]; +} +def BYTEPICK_D : Reg3_Sa<"bytepick.d", GPR64Opnd, uimm3>, R3_SA3; //[] + +def ASRTLE_D : Assert<"asrtle.d", GPR64Opnd, int_loongarch_asrtle_d>, ASSERT<0b10>; +def ASRTGT_D : Assert<"asrtgt.d", GPR64Opnd, int_loongarch_asrtgt_d>, ASSERT<0b11>; + +def DBCL : Code15<"dbcl">, CODE15<0b1010101>; +def HYPCALL : Code15<"hypcall">, CODE15<0b1010111>; + +/// +/// R2_IMM12 +/// +def SLTI : SetCC_I<"slti", GPR64Opnd, simm12, setlt>, R2_IMM12<0b000>; +def SLTUI : SetCC_I<"sltui", GPR64Opnd, simm12, setult>, R2_IMM12<0b001>; +def ADDI_W64 : Int_Reg2_Imm12<"addi.w", GPR64Opnd, simm12>, R2_IMM12<0b010>; +def ADDI_D : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>; +def LU52I_D : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; +def ANDI : Int_Reg2_Imm12<"andi", GPR64Opnd, uimm12, and>, R2_IMM12<0b101>; +def ORI : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>; +def XORI : Int_Reg2_Imm12<"xori", GPR64Opnd, uimm12, xor>, R2_IMM12<0b111>; + +/// +/// Privilege Instructions +/// +def CSRRD : CSR<"csrrd", GPR64Opnd, uimm14, int_loongarch_csrrd_d>, R1_CSR<0b0000000000100>; +def CSRWR : CSRW<"csrwr", GPR64Opnd, uimm14, int_loongarch_csrwr_d>, R1_CSR<0b0000100000100>; +def CSRXCHG : CSRX<"csrxchg", GPR64Opnd, uimm14, int_loongarch_csrxchg_d>, R2_CSR<0b00000100>; +def IOCSRRD_D : Int_Reg2_Iocsrrd<"iocsrrd.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrrd_d>, R2P<0b011>; +def IOCSRWR_D : Int_Reg2_Iocsrwr<"iocsrwr.d", GPR64Opnd, GPR32Opnd, int_loongarch_iocsrwr_d>, R2P<0b111>; +def CACOP : CAC<"cacop", GPR64Opnd, simm12, int_loongarch_cacop_d>, R1_CACHE; +def LDDIR : LEVEL<"lddir", GPR64Opnd>, R2_LEVEL<0b00000110010000>; +def LDPTE : SEQ<"ldpte", GPR64Opnd>, R1_SEQ<0b00000110010001>; + +def IDLE : Wait<"idle">, WAIT_FM; +def INVTLB : Invtlb<"invtlb", GPR64Opnd>, R2_INVTLB; +// +def IOCSRRD_B : Int_Reg2<"iocsrrd.b", GPR64Opnd>, R2P<0b000>; +def IOCSRRD_H : Int_Reg2<"iocsrrd.h", GPR64Opnd>, R2P<0b001>; +def IOCSRRD_W : Int_Reg2<"iocsrrd.w", GPR64Opnd>, R2P<0b010>; +// +def TLBCLR : OP32<"tlbclr", int_loongarch_tlbclr>, IMM32<0b001000>; +def TLBFLUSH : OP32<"tlbflush", int_loongarch_tlbflush>, IMM32<0b001001>; +def TLBSRCH : OP32<"tlbsrch", int_loongarch_tlbsrch>, IMM32<0b001010>; +def TLBRD : OP32<"tlbrd", int_loongarch_tlbrd>, IMM32<0b001011>; +def TLBWR : OP32<"tlbwr", int_loongarch_tlbwr>, IMM32<0b001100>; +def TLBFILL : OP32<"tlbfill", int_loongarch_tlbfill>, IMM32<0b001101>; +def ERTN : OP32<"ertn">, IMM32<0b001110>; + +/// +/// R1_IMM20 +/// +def ADDU16I_D : SI16_R2<"addu16i.d", GPR64Opnd>, R2_SI16<0b000100>; +def LU12I_W : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>; +def LU32I_D : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; +def LU32I_D_R2 : SI20_R2<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; +def PCADDI : SI20<"pcaddi", GPR64Opnd, simm20>, R1_SI20<0b0001100>; +def PCALAU12I : SI20<"pcalau12i", GPR64Opnd, simm20>, R1_SI20<0b0001101>; +def PCADDU12I : SI20<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>; +def PCADDU18I : SI20<"pcaddu18i", GPR64Opnd, simm20>, R1_SI20<0b0001111>; + + +def BEQZ : Beqz<"beqz", brtarget, seteq, GPR64Opnd>, R1_IMM21BEQZ<0b010000>; +def BNEZ : Beqz<"bnez", brtarget, setne, GPR64Opnd>, R1_IMM21BEQZ<0b010001>; + +def JIRL : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL; +let isCall = 1, isCTI=1, isCodeGenOnly = 1 in { +def JIRL_CALL : FJirl<"jirl", simm16, GPR64Opnd>, R2_IMM16JIRL; +} + +def B : JumpFB, IMM26B<0b010100>; + +def BEQ : Beq<"beq", brtarget, seteq, GPR64Opnd>, R2_IMM16BEQ<0b010110>; +def BNE : Beq<"bne", brtarget, setne, GPR64Opnd>, R2_IMM16BEQ<0b010111>; +def BLT : Beq<"blt", brtarget, setlt, GPR64Opnd>, R2_IMM16BEQ<0b011000>; +def BGE : Beq<"bge", brtarget, setge, GPR64Opnd>, R2_IMM16BEQ<0b011001>; +def BLTU : Beq<"bltu", brtarget, setult, GPR64Opnd>, R2_IMM16BEQ<0b011010>; +def BGEU : Beq<"bgeu", brtarget, setuge, GPR64Opnd>, R2_IMM16BEQ<0b011011>; + +/// +/// Mem access +/// +class LLBase : + InstForm<(outs RO:$rd), (ins MO:$addr), !strconcat(opstr, "\t$rd, $addr"), + [], FrmI, opstr> { + let DecoderMethod = "DecodeMemSimm14"; + let mayLoad = 1; +} + +class SCBase : + InstForm<(outs RO:$dst), (ins RO:$rd, MO:$addr), + !strconcat(opstr, "\t$rd, $addr"), [], FrmI> { + let DecoderMethod = "DecodeMemSimm14"; + let mayStore = 1; + let Constraints = "$rd = $dst"; } -class BrCCZ_1RI21 op, string opstr> - : Fmt1RI21 { - let isBranch = 1; + +class STGT_LE : + InstForm<(outs), (ins RO:$rd, RO:$rj, RO:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [], FrmI, opstr>; + +class Float_STGT_LE + : InstForm<(outs), (ins RD:$fd, RS:$rj, RS:$rk), + !strconcat(opstr, "\t$fd, $rj, $rk"), + [], FrmR, opstr>; + +def LL_D : LLBase<"ll.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b010>; +def SC_D : SCBase<"sc.d", GPR64Opnd, mem_simm14_lsl2>, LL_SC<0b011>; + +def LDPTR_W : LdPtr<"ldptr.w", GPR64Opnd>, LL_SC<0b100>; +def STPTR_W : StPtr<"stptr.w", GPR64Opnd>, LL_SC<0b101>; +def LDPTR_D : LdPtr<"ldptr.d", GPR64Opnd>, LL_SC<0b110>; +def STPTR_D : StPtr<"stptr.d", GPR64Opnd>, LL_SC<0b111>; + +def LD_B : Ld<"ld.b", GPR64Opnd, mem, sextloadi8>, LOAD_STORE<0b0000>; +def LD_H : Ld<"ld.h", GPR64Opnd, mem, sextloadi16>, LOAD_STORE<0b0001>; +def LD_W : Ld<"ld.w", GPR64Opnd, mem, sextloadi32>, LOAD_STORE<0b0010>; +def LD_D : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>; +def ST_B : St<"st.b", GPR64Opnd, mem, truncstorei8>, LOAD_STORE<0b0100>; +def ST_H : St<"st.h", GPR64Opnd, mem, truncstorei16>, LOAD_STORE<0b0101>; +def ST_W : St<"st.w", GPR64Opnd, mem, truncstorei32>, LOAD_STORE<0b0110>; +def ST_D : St<"st.d", GPR64Opnd, mem_simmptr, store>, LOAD_STORE<0b0111>; +def LD_BU : Ld<"ld.bu", GPR64Opnd, mem, zextloadi8>, LOAD_STORE<0b1000>; +def LD_HU : Ld<"ld.hu", GPR64Opnd, mem, zextloadi16>, LOAD_STORE<0b1001>; +def LD_WU : Ld<"ld.wu", GPR64Opnd, mem, zextloadi32>, LOAD_STORE<0b1010>; + +def AMSWAP_W : ATOMIC<"amswap.w", GPR32Opnd, amem>, AM<0b000000>; +def AMSWAP_D : ATOMIC<"amswap.d", GPR64Opnd, amem>, AM<0b000001>; +def AMADD_W : ATOMIC<"amadd.w", GPR32Opnd, amem>, AM<0b000010>; +def AMADD_D : ATOMIC<"amadd.d", GPR64Opnd, amem>, AM<0b000011>; +def AMAND_W : ATOMIC<"amand.w", GPR32Opnd, amem>, AM<0b000100>; +def AMAND_D : ATOMIC<"amand.d", GPR64Opnd, amem>, AM<0b000101>; +def AMOR_W : ATOMIC<"amor.w", GPR32Opnd, amem>, AM<0b000110>; +def AMOR_D : ATOMIC<"amor.d", GPR64Opnd, amem>, AM<0b000111>; +def AMXOR_W : ATOMIC<"amxor.w", GPR32Opnd, amem>, AM<0b001000>; +def AMXOR_D : ATOMIC<"amxor.d", GPR64Opnd, amem>, AM<0b001001>; +def AMMAX_W : ATOMIC<"ammax.w", GPR32Opnd, amem>, AM<0b001010>; +def AMMAX_D : ATOMIC<"ammax.d", GPR64Opnd, amem>, AM<0b001011>; +def AMMIN_W : ATOMIC<"ammin.w", GPR32Opnd, amem>, AM<0b001100>; +def AMMIN_D : ATOMIC<"ammin.d", GPR64Opnd, amem>, AM<0b001101>; +def AMMAX_WU : ATOMIC<"ammax.wu", GPR32Opnd, amem>, AM<0b001110>; +def AMMAX_DU : ATOMIC<"ammax.du", GPR64Opnd, amem>, AM<0b001111>; +def AMMIN_WU : ATOMIC<"ammin.wu", GPR32Opnd, amem>, AM<0b010000>; +def AMMIN_DU : ATOMIC<"ammin.du", GPR64Opnd, amem>, AM<0b010001>; + + +def AMSWAP_DB_W : ATOMIC<"amswap_db.w", GPR32Opnd, amem>, AM<0b010010>; +def AMSWAP_DB_D : ATOMIC<"amswap_db.d", GPR64Opnd, amem>, AM<0b010011>; +def AMADD_DB_W : ATOMIC<"amadd_db.w", GPR32Opnd, amem>, AM<0b010100>; +def AMADD_DB_D : ATOMIC<"amadd_db.d", GPR64Opnd, amem>, AM<0b010101>; +def AMAND_DB_W : ATOMIC<"amand_db.w", GPR32Opnd, amem>, AM<0b010110>; +def AMAND_DB_D : ATOMIC<"amand_db.d", GPR64Opnd, amem>, AM<0b010111>; +def AMOR_DB_W : ATOMIC<"amor_db.w", GPR32Opnd, amem>, AM<0b011000>; +def AMOR_DB_D : ATOMIC<"amor_db.d", GPR64Opnd, amem>, AM<0b011001>; +def AMXOR_DB_W : ATOMIC<"amxor_db.w", GPR32Opnd, amem>, AM<0b011010>; +def AMXOR_DB_D : ATOMIC<"amxor_db.d", GPR64Opnd, amem>, AM<0b011011>; +def AMMAX_DB_W : ATOMIC<"ammax_db.w", GPR32Opnd, amem>, AM<0b011100>; +def AMMAX_DB_D : ATOMIC<"ammax_db.d", GPR64Opnd, amem>, AM<0b011101>; +def AMMIN_DB_W : ATOMIC<"ammin_db.w", GPR32Opnd, amem>, AM<0b011110>; +def AMMIN_DB_D : ATOMIC<"ammin_db.d", GPR64Opnd, amem>, AM<0b011111>; +def AMMAX_DB_WU : ATOMIC<"ammax_db.wu", GPR32Opnd, amem>, AM<0b100000>; +def AMMAX_DB_DU : ATOMIC<"ammax_db.du", GPR64Opnd, amem>, AM<0b100001>; +def AMMIN_DB_WU : ATOMIC<"ammin_db.wu", GPR32Opnd, amem>, AM<0b100010>; +def AMMIN_DB_DU : ATOMIC<"ammin_db.du", GPR64Opnd, amem>, AM<0b100011>; + +def LDGT_B : Int_Reg3<"ldgt.b", GPR64Opnd>, R3MI<0b11110000>; +def LDGT_H : Int_Reg3<"ldgt.h", GPR64Opnd>, R3MI<0b11110001>; +def LDGT_W : Int_Reg3<"ldgt.w", GPR64Opnd>, R3MI<0b11110010>; +def LDGT_D : Int_Reg3<"ldgt.d", GPR64Opnd>, R3MI<0b11110011>; +def LDLE_B : Int_Reg3<"ldle.b", GPR64Opnd>, R3MI<0b11110100>; +def LDLE_H : Int_Reg3<"ldle.h", GPR64Opnd>, R3MI<0b11110101>; +def LDLE_W : Int_Reg3<"ldle.w", GPR64Opnd>, R3MI<0b11110110>; +def LDLE_D : Int_Reg3<"ldle.d", GPR64Opnd>, R3MI<0b11110111>; +def STGT_B : STGT_LE<"stgt.b", GPR64Opnd>, R3MI<0b11111000>; +def STGT_H : STGT_LE<"stgt.h", GPR64Opnd>, R3MI<0b11111001>; +def STGT_W : STGT_LE<"stgt.w", GPR64Opnd>, R3MI<0b11111010>; +def STGT_D : STGT_LE<"stgt.d", GPR64Opnd>, R3MI<0b11111011>; +def STLE_B : STGT_LE<"stle.b", GPR64Opnd>, R3MI<0b11111100>; +def STLE_H : STGT_LE<"stle.h", GPR64Opnd>, R3MI<0b11111101>; +def STLE_W : STGT_LE<"stle.w", GPR64Opnd>, R3MI<0b11111110>; +def STLE_D : STGT_LE<"stle.d", GPR64Opnd>, R3MI<0b11111111>; + +let isCodeGenOnly = 1 in { +def PRELD : Preld<"preld", mem, GPR64Opnd>, PRELD_FM; +} + +def PRELD_Raw : Preld_Raw<"preld", GPR64Opnd>, PRELD_FM; + +let isCall=1, isCTI=1, Defs = [RA] in { + class JumpLink : + InstForm<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), + [(LoongArchJmpLink tglobaladdr:$target)], FrmJ, opstr> { + let DecoderMethod = "DecodeJumpTarget"; + } +} +def LONG_BRANCH_PCADDU12I : LoongArchPseudo<(outs GPR64Opnd:$dst), + (ins brtarget:$tgt), []>, GPR_64; + +def LONG_BRANCH_ADDID2Op : LoongArchPseudo<(outs GPR64Opnd:$dst), + (ins GPR64Opnd:$src, brtarget:$tgt), []>, GPR_64; + +def LONG_BRANCH_ADDID : LoongArchPseudo<(outs GPR64Opnd:$dst), + (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>, GPR_64; + +def LEA_ADDI_D: EffectiveAddress<"addi.d", GPR64Opnd>, LEA_ADDI_FM<0b011>, GPR_64; + +class PseudoReturnBase : LoongArchPseudo<(outs), (ins RO:$rs), + []> { let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + let isCodeGenOnly = 1; + let hasCtrlDep = 1; + let hasExtraSrcRegAllocReq = 1; + bit isCTI = 1; +} + +def PseudoReturn64 : PseudoReturnBase; +//def PseudoReturn : PseudoReturnBase; + + +let isCall=1, isCTI=1, Defs=[RA], isCodeGenOnly=1 in { +def PseudoCall : LoongArchPseudo<(outs), (ins calltarget:$target), + []>; } -class Br_I26 op, string opstr> - : FmtI26 { - let isBranch = 1; + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in +def PseudoTailCall : LoongArchPseudo<(outs), (ins calltarget:$target), + []>; + +class PseudoTailBase : LoongArchPseudo<(outs), (ins opnd:$offset26), + []> { let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + let isCodeGenOnly = 1; } +def PseudoTailReturn : PseudoTailBase; -let mayLoad = 1 in { -class LOAD_3R op, string opstr> - : Fmt3R; -class LOAD_2RI12 op, string opstr> - : Fmt2RI12; -class LOAD_2RI14 op, string opstr> - : Fmt2RI14; -} // mayLoad = 1 - -let mayStore = 1 in { -class STORE_3R op, string opstr> - : Fmt3R; -class STORE_2RI12 op, string opstr> - : Fmt2RI12; -class STORE_2RI14 op, string opstr> - : Fmt2RI14; -} // mayStore = 1 - -let mayLoad = 1, mayStore = 1 in -class AM_3R op, string opstr> - : Fmt3R; - -let mayLoad = 1 in -class LLBase op, string opstr> - : Fmt2RI14; - -let mayStore = 1, Constraints = "$rd = $dst" in -class SCBase op, string opstr> - : Fmt2RI14; - -class IOCSRRD op, string opstr> - : Fmt2R; - -class IOCSRWR op, string opstr> - : Fmt2R; -//===----------------------------------------------------------------------===// -// Basic Integer Instructions -//===----------------------------------------------------------------------===// +def : LoongArchPat<(LoongArchTailCall tglobaladdr:$dst), + (PseudoTailCall tglobaladdr:$dst)>; -// Arithmetic Operation Instructions -def ADD_W : ALU_3R<0b00000000000100000, "add.w">; -def SUB_W : ALU_3R<0b00000000000100010, "sub.w">; -def ADDI_W : ALU_2RI12<0b0000001010, "addi.w", simm12>; -def ALSL_W : ALU_3RI2<0b000000000000010, "alsl.w", uimm2_plus1>; -def LU12I_W : ALU_1RI20<0b0001010, "lu12i.w", simm20>; -def SLT : ALU_3R<0b00000000000100100, "slt">; -def SLTU : ALU_3R<0b00000000000100101, "sltu">; -def SLTI : ALU_2RI12<0b0000001000, "slti", simm12>; -def SLTUI : ALU_2RI12<0b0000001001, "sltui", simm12>; -def PCADDI : ALU_1RI20<0b0001100, "pcaddi", simm20>; -def PCADDU12I : ALU_1RI20<0b0001110, "pcaddu12i", simm20>; -def PCALAU12I : ALU_1RI20<0b0001101, "pcalau12i", simm20>; -def AND : ALU_3R<0b00000000000101001, "and">; -def OR : ALU_3R<0b00000000000101010, "or">; -def NOR : ALU_3R<0b00000000000101000, "nor">; -def XOR : ALU_3R<0b00000000000101011, "xor">; -def ANDN : ALU_3R<0b00000000000101101, "andn">; -def ORN : ALU_3R<0b00000000000101100, "orn">; -def ANDI : ALU_2RI12<0b0000001101, "andi", uimm12>; -def ORI : ALU_2RI12<0b0000001110, "ori", uimm12>; -def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>; -def MUL_W : ALU_3R<0b00000000000111000, "mul.w">; -def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">; -def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">; -let usesCustomInserter = true in { -def DIV_W : ALU_3R<0b00000000001000000, "div.w">; -def MOD_W : ALU_3R<0b00000000001000001, "mod.w">; -def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">; -def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">; -} // usesCustomInserter = true - -// Bit-shift Instructions -def SLL_W : ALU_3R<0b00000000000101110, "sll.w">; -def SRL_W : ALU_3R<0b00000000000101111, "srl.w">; -def SRA_W : ALU_3R<0b00000000000110000, "sra.w">; -def ROTR_W : ALU_3R<0b00000000000110110, "rotr.w">; - -def SLLI_W : ALU_2RI5<0b00000000010000001, "slli.w", uimm5>; -def SRLI_W : ALU_2RI5<0b00000000010001001, "srli.w", uimm5>; -def SRAI_W : ALU_2RI5<0b00000000010010001, "srai.w", uimm5>; -def ROTRI_W : ALU_2RI5<0b00000000010011001, "rotri.w", uimm5>; - -// Bit-manipulation Instructions -def EXT_W_B : ALU_2R<0b0000000000000000010111, "ext.w.b">; -def EXT_W_H : ALU_2R<0b0000000000000000010110, "ext.w.h">; -def CLO_W : ALU_2R<0b0000000000000000000100, "clo.w">; -def CLZ_W : ALU_2R<0b0000000000000000000101, "clz.w">; -def CTO_W : ALU_2R<0b0000000000000000000110, "cto.w">; -def CTZ_W : ALU_2R<0b0000000000000000000111, "ctz.w">; -def BYTEPICK_W : ALU_3RI2<0b000000000000100, "bytepick.w", uimm2>; -def REVB_2H : ALU_2R<0b0000000000000000001100, "revb.2h">; -def BITREV_4B : ALU_2R<0b0000000000000000010010, "bitrev.4b">; -def BITREV_W : ALU_2R<0b0000000000000000010100, "bitrev.w">; -let Constraints = "$rd = $dst" in { -def BSTRINS_W : FmtBSTR_W<0b000000000110, (outs GPR:$dst), - (ins GPR:$rd, GPR:$rj, uimm5:$msbw, uimm5:$lsbw), - "bstrins.w", "$rd, $rj, $msbw, $lsbw">; -} -def BSTRPICK_W : FmtBSTR_W<0b000000000111, (outs GPR:$rd), - (ins GPR:$rj, uimm5:$msbw, uimm5:$lsbw), - "bstrpick.w", "$rd, $rj, $msbw, $lsbw">; -def MASKEQZ : ALU_3R<0b00000000000100110, "maskeqz">; -def MASKNEZ : ALU_3R<0b00000000000100111, "masknez">; - -// Branch Instructions -def BEQ : BrCC_2RI16<0b010110, "beq">; -def BNE : BrCC_2RI16<0b010111, "bne">; -def BLT : BrCC_2RI16<0b011000, "blt">; -def BGE : BrCC_2RI16<0b011001, "bge">; -def BLTU : BrCC_2RI16<0b011010, "bltu">; -def BGEU : BrCC_2RI16<0b011011, "bgeu">; -def BEQZ : BrCCZ_1RI21<0b010000, "beqz">; -def BNEZ : BrCCZ_1RI21<0b010001, "bnez">; -def B : Br_I26<0b010100, "b">; - -let isCall = 1 in -def BL : FmtI26<0b010101, (outs), (ins simm26_lsl2:$imm26), "bl", "$imm26">; -def JIRL : Fmt2RI16<0b010011, (outs GPR:$rd), - (ins GPR:$rj, simm16_lsl2:$imm16), "jirl", - "$rd, $rj, $imm16">; - -// Common Memory Access Instructions -def LD_B : LOAD_2RI12<0b0010100000, "ld.b">; -def LD_H : LOAD_2RI12<0b0010100001, "ld.h">; -def LD_W : LOAD_2RI12<0b0010100010, "ld.w">; -def LD_BU : LOAD_2RI12<0b0010101000, "ld.bu">; -def LD_HU : LOAD_2RI12<0b0010101001, "ld.hu">; -def ST_B : STORE_2RI12<0b0010100100, "st.b">; -def ST_H : STORE_2RI12<0b0010100101, "st.h">; -def ST_W : STORE_2RI12<0b0010100110, "st.w">; -def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), "preld", - "$imm5, $rj, $imm12">; - -// Atomic Memory Access Instructions -def LL_W : LLBase<0b00100000, "ll.w">; -def SC_W : SCBase<0b00100001, "sc.w">; - -// Barrier Instructions -def DBAR : MISC_I15<0b00111000011100100, "dbar">; -def IBAR : MISC_I15<0b00111000011100101, "ibar">; - -// Other Miscellaneous Instructions -def SYSCALL : MISC_I15<0b00000000001010110, "syscall">; -def BREAK : MISC_I15<0b00000000001010100, "break">; -def RDTIMEL_W : RDTIME_2R<0b0000000000000000011000, "rdtimel.w">; -def RDTIMEH_W : RDTIME_2R<0b0000000000000000011001, "rdtimeh.w">; -def CPUCFG : ALU_2R<0b0000000000000000011011, "cpucfg">; - -/// LA64 instructions - -let Predicates = [IsLA64] in { - -// Arithmetic Operation Instructions for 64-bits -def ADD_D : ALU_3R<0b00000000000100001, "add.d">; -def SUB_D : ALU_3R<0b00000000000100011, "sub.d">; -def ADDI_D : ALU_2RI12<0b0000001011, "addi.d", simm12>; -def ADDU16I_D : ALU_2RI16<0b000100, "addu16i.d", simm16>; -def ALSL_WU : ALU_3RI2<0b000000000000011, "alsl.wu", uimm2_plus1>; -def ALSL_D : ALU_3RI2<0b000000000010110, "alsl.d", uimm2_plus1>; -let Constraints = "$rd = $dst" in { -def LU32I_D : Fmt1RI20<0b0001011, (outs GPR:$dst), - (ins GPR:$rd, simm20:$imm20), "lu32i.d", - "$rd, $imm20">; -} -def LU52I_D : ALU_2RI12<0b0000001100, "lu52i.d", simm12>; -def PCADDU18I : ALU_1RI20<0b0001111, "pcaddu18i", simm20>; -def MUL_D : ALU_3R<0b00000000000111011, "mul.d">; -def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">; -def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">; -def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">; -def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">; -let usesCustomInserter = true in { -def DIV_D : ALU_3R<0b00000000001000100, "div.d">; -def MOD_D : ALU_3R<0b00000000001000101, "mod.d">; -def DIV_DU : ALU_3R<0b00000000001000110, "div.du">; -def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">; -} // usesCustomInserter = true - -// Bit-shift Instructions for 64-bits -def SLL_D : ALU_3R<0b00000000000110001, "sll.d">; -def SRL_D : ALU_3R<0b00000000000110010, "srl.d">; -def SRA_D : ALU_3R<0b00000000000110011, "sra.d">; -def ROTR_D : ALU_3R<0b00000000000110111, "rotr.d">; -def SLLI_D : ALU_2RI6<0b0000000001000001, "slli.d", uimm6>; -def SRLI_D : ALU_2RI6<0b0000000001000101, "srli.d", uimm6>; -def SRAI_D : ALU_2RI6<0b0000000001001001, "srai.d", uimm6>; -def ROTRI_D : ALU_2RI6<0b0000000001001101, "rotri.d", uimm6>; - -// Bit-manipulation Instructions for 64-bits -def CLO_D : ALU_2R<0b0000000000000000001000, "clo.d">; -def CLZ_D : ALU_2R<0b0000000000000000001001, "clz.d">; -def CTO_D : ALU_2R<0b0000000000000000001010, "cto.d">; -def CTZ_D : ALU_2R<0b0000000000000000001011, "ctz.d">; -def BYTEPICK_D : ALU_3RI3<0b00000000000011, "bytepick.d", uimm3>; -def REVB_4H : ALU_2R<0b0000000000000000001101, "revb.4h">; -def REVB_2W : ALU_2R<0b0000000000000000001110, "revb.2w">; -def REVB_D : ALU_2R<0b0000000000000000001111, "revb.d">; -def REVH_2W : ALU_2R<0b0000000000000000010000, "revh.2w">; -def REVH_D : ALU_2R<0b0000000000000000010001, "revh.d">; -def BITREV_8B : ALU_2R<0b0000000000000000010011, "bitrev.8b">; -def BITREV_D : ALU_2R<0b0000000000000000010101, "bitrev.d">; -let Constraints = "$rd = $dst" in { -def BSTRINS_D : FmtBSTR_D<0b0000000010, (outs GPR:$dst), - (ins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), - "bstrins.d", "$rd, $rj, $msbd, $lsbd">; -} -def BSTRPICK_D : FmtBSTR_D<0b0000000011, (outs GPR:$rd), - (ins GPR:$rj, uimm6:$msbd, uimm6:$lsbd), - "bstrpick.d", "$rd, $rj, $msbd, $lsbd">; - -// Common Memory Access Instructions for 64-bits -def LD_WU : LOAD_2RI12<0b0010101010, "ld.wu">; -def LD_D : LOAD_2RI12<0b0010100011, "ld.d">; -def ST_D : STORE_2RI12<0b0010100111, "st.d">; -def LDX_B : LOAD_3R<0b00111000000000000, "ldx.b">; -def LDX_H : LOAD_3R<0b00111000000001000, "ldx.h">; -def LDX_W : LOAD_3R<0b00111000000010000, "ldx.w">; -def LDX_D : LOAD_3R<0b00111000000011000, "ldx.d">; -def LDX_BU : LOAD_3R<0b00111000001000000, "ldx.bu">; -def LDX_HU : LOAD_3R<0b00111000001001000, "ldx.hu">; -def LDX_WU : LOAD_3R<0b00111000001010000, "ldx.wu">; -def STX_B : STORE_3R<0b00111000000100000, "stx.b">; -def STX_H : STORE_3R<0b00111000000101000, "stx.h">; -def STX_W : STORE_3R<0b00111000000110000, "stx.w">; -def STX_D : STORE_3R<0b00111000000111000, "stx.d">; -def LDPTR_W : LOAD_2RI14<0b00100100, "ldptr.w">; -def LDPTR_D : LOAD_2RI14<0b00100110, "ldptr.d">; -def STPTR_W : STORE_2RI14<0b00100101, "stptr.w">; -def STPTR_D : STORE_2RI14<0b00100111, "stptr.d">; -def PRELDX : FmtPRELDX<(outs), (ins uimm5:$imm5, GPR:$rj, GPR:$rk), "preldx", - "$imm5, $rj, $rk">; - -// Bound Check Memory Access Instructions -def LDGT_B : LOAD_3R<0b00111000011110000, "ldgt.b">; -def LDGT_H : LOAD_3R<0b00111000011110001, "ldgt.h">; -def LDGT_W : LOAD_3R<0b00111000011110010, "ldgt.w">; -def LDGT_D : LOAD_3R<0b00111000011110011, "ldgt.d">; -def LDLE_B : LOAD_3R<0b00111000011110100, "ldle.b">; -def LDLE_H : LOAD_3R<0b00111000011110101, "ldle.h">; -def LDLE_W : LOAD_3R<0b00111000011110110, "ldle.w">; -def LDLE_D : LOAD_3R<0b00111000011110111, "ldle.d">; -def STGT_B : STORE_3R<0b00111000011111000, "stgt.b">; -def STGT_H : STORE_3R<0b00111000011111001, "stgt.h">; -def STGT_W : STORE_3R<0b00111000011111010, "stgt.w">; -def STGT_D : STORE_3R<0b00111000011111011, "stgt.d">; -def STLE_B : STORE_3R<0b00111000011111100, "stle.b">; -def STLE_H : STORE_3R<0b00111000011111101, "stle.h">; -def STLE_W : STORE_3R<0b00111000011111110, "stle.w">; -def STLE_D : STORE_3R<0b00111000011111111, "stle.d">; - -// Atomic Memory Access Instructions for 64-bits -def AMSWAP_W : AM_3R<0b00111000011000000, "amswap.w">; -def AMSWAP_D : AM_3R<0b00111000011000001, "amswap.d">; -def AMADD_W : AM_3R<0b00111000011000010, "amadd.w">; -def AMADD_D : AM_3R<0b00111000011000011, "amadd.d">; -def AMAND_W : AM_3R<0b00111000011000100, "amand.w">; -def AMAND_D : AM_3R<0b00111000011000101, "amand.d">; -def AMOR_W : AM_3R<0b00111000011000110, "amor.w">; -def AMOR_D : AM_3R<0b00111000011000111, "amor.d">; -def AMXOR_W : AM_3R<0b00111000011001000, "amxor.w">; -def AMXOR_D : AM_3R<0b00111000011001001, "amxor.d">; -def AMMAX_W : AM_3R<0b00111000011001010, "ammax.w">; -def AMMAX_D : AM_3R<0b00111000011001011, "ammax.d">; -def AMMIN_W : AM_3R<0b00111000011001100, "ammin.w">; -def AMMIN_D : AM_3R<0b00111000011001101, "ammin.d">; -def AMMAX_WU : AM_3R<0b00111000011001110, "ammax.wu">; -def AMMAX_DU : AM_3R<0b00111000011001111, "ammax.du">; -def AMMIN_WU : AM_3R<0b00111000011010000, "ammin.wu">; -def AMMIN_DU : AM_3R<0b00111000011010001, "ammin.du">; -def AMSWAP_DB_W : AM_3R<0b00111000011010010, "amswap_db.w">; -def AMSWAP_DB_D : AM_3R<0b00111000011010011, "amswap_db.d">; -def AMADD_DB_W : AM_3R<0b00111000011010100, "amadd_db.w">; -def AMADD_DB_D : AM_3R<0b00111000011010101, "amadd_db.d">; -def AMAND_DB_W : AM_3R<0b00111000011010110, "amand_db.w">; -def AMAND_DB_D : AM_3R<0b00111000011010111, "amand_db.d">; -def AMOR_DB_W : AM_3R<0b00111000011011000, "amor_db.w">; -def AMOR_DB_D : AM_3R<0b00111000011011001, "amor_db.d">; -def AMXOR_DB_W : AM_3R<0b00111000011011010, "amxor_db.w">; -def AMXOR_DB_D : AM_3R<0b00111000011011011, "amxor_db.d">; -def AMMAX_DB_W : AM_3R<0b00111000011011100, "ammax_db.w">; -def AMMAX_DB_D : AM_3R<0b00111000011011101, "ammax_db.d">; -def AMMIN_DB_W : AM_3R<0b00111000011011110, "ammin_db.w">; -def AMMIN_DB_D : AM_3R<0b00111000011011111, "ammin_db.d">; -def AMMAX_DB_WU : AM_3R<0b00111000011100000, "ammax_db.wu">; -def AMMAX_DB_DU : AM_3R<0b00111000011100001, "ammax_db.du">; -def AMMIN_DB_WU : AM_3R<0b00111000011100010, "ammin_db.wu">; -def AMMIN_DB_DU : AM_3R<0b00111000011100011, "ammin_db.du">; -def LL_D : LLBase<0b00100010, "ll.d">; -def SC_D : SCBase<0b00100011, "sc.d">; - -// CRC Check Instructions -def CRC_W_B_W : ALU_3R<0b00000000001001000, "crc.w.b.w">; -def CRC_W_H_W : ALU_3R<0b00000000001001001, "crc.w.h.w">; -def CRC_W_W_W : ALU_3R<0b00000000001001010, "crc.w.w.w">; -def CRC_W_D_W : ALU_3R<0b00000000001001011, "crc.w.d.w">; -def CRCC_W_B_W : ALU_3R<0b00000000001001100, "crcc.w.b.w">; -def CRCC_W_H_W : ALU_3R<0b00000000001001101, "crcc.w.h.w">; -def CRCC_W_W_W : ALU_3R<0b00000000001001110, "crcc.w.w.w">; -def CRCC_W_D_W : ALU_3R<0b00000000001001111, "crcc.w.d.w">; - -// Other Miscellaneous Instructions for 64-bits -def ASRTLE_D : FmtASRT<0b00000000000000010, (outs), (ins GPR:$rj, GPR:$rk), - "asrtle.d", "$rj, $rk">; -def ASRTGT_D : FmtASRT<0b00000000000000011, (outs), (ins GPR:$rj, GPR:$rk), - "asrtgt.d", "$rj, $rk">; -def RDTIME_D : RDTIME_2R<0b0000000000000000011010, "rdtime.d">; -} // Predicates = [IsLA64] +def : LoongArchPat<(LoongArchTailCall texternalsym:$dst), + (PseudoTailCall texternalsym:$dst)>; -//===----------------------------------------------------------------------===// -// Pseudo-instructions and codegen patterns -// -// Naming convention: For 'generic' pattern classes, we use the naming -// convention PatTy1Ty2. -//===----------------------------------------------------------------------===// +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isIndirectBranch = 1, Uses = [SP] in +def PseudoTAILIndirect : LoongArchPseudo<(outs), (ins GPRTC64Opnd:$rj), [(LoongArchTailCall GPRTC64Opnd:$rj)]>, + PseudoInstExpansion<(JIRL ZERO_64, GPR64Opnd:$rj, 0)>; -/// Generic pattern classes - -class PatGprGpr - : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; -class PatGprGpr_32 - : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; - -class PatGprImm - : Pat<(OpNode GPR:$rj, ImmOpnd:$imm), - (Inst GPR:$rj, ImmOpnd:$imm)>; -class PatGprImm_32 - : Pat<(sext_inreg (OpNode GPR:$rj, ImmOpnd:$imm), i32), - (Inst GPR:$rj, ImmOpnd:$imm)>; - -/// Simple arithmetic operations - -// Match both a plain shift and one where the shift amount is masked (this is -// typically introduced when the legalizer promotes the shift amount and -// zero-extends it). For LoongArch, the mask is unnecessary as shifts in the -// base ISA only read the least significant 5 bits (LA32) or 6 bits (LA64). -def shiftMaskGRLen - : ComplexPattern; -def shiftMask32 : ComplexPattern; - -def sexti32 : ComplexPattern; -def zexti32 : ComplexPattern; - -class shiftop - : PatFrag<(ops node:$val, node:$count), - (operator node:$val, (GRLenVT (shiftMaskGRLen node:$count)))>; -class shiftopw - : PatFrag<(ops node:$val, node:$count), - (operator node:$val, (i64 (shiftMask32 node:$count)))>; - -let Predicates = [IsLA32] in { -def : PatGprGpr; -def : PatGprImm; -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -} // Predicates = [IsLA32] - -let Predicates = [IsLA64] in { -def : PatGprGpr; -def : PatGprGpr_32; -def : PatGprImm; -def : PatGprImm_32; -def : PatGprGpr; -def : PatGprGpr_32; -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -// TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the -// product are used. -def : PatGprGpr; -def : PatGprGpr; -def : PatGprGpr; -// Select MULW_D_W for calculating the full 64 bits product of i32xi32 signed -// multiplication. -def : Pat<(i64 (mul (sext_inreg GPR:$rj, i32), (sext_inreg GPR:$rk, i32))), - (MULW_D_W GPR:$rj, GPR:$rk)>; -// Select MULW_D_WU for calculating the full 64 bits product of i32xi32 -// unsigned multiplication. -def : Pat<(i64 (mul (loongarch_bstrpick GPR:$rj, (i64 31), (i64 0)), - (loongarch_bstrpick GPR:$rk, (i64 31), (i64 0)))), - (MULW_D_WU GPR:$rj, GPR:$rk)>; -} // Predicates = [IsLA64] - -def : PatGprGpr; -def : PatGprImm; -def : PatGprGpr; -def : PatGprImm; -def : PatGprGpr; -def : PatGprImm; - -/// Shift - -let Predicates = [IsLA32] in { -def : PatGprGpr, SLL_W>; -def : PatGprGpr, SRA_W>; -def : PatGprGpr, SRL_W>; -def : PatGprImm; -def : PatGprImm; -def : PatGprImm; -} // Predicates = [IsLA32] - -let Predicates = [IsLA64] in { -def : PatGprGpr, SLL_W>; -def : PatGprGpr, SRA_W>; -def : PatGprGpr, SRL_W>; -def : PatGprGpr, SLL_D>; -def : PatGprGpr, SRA_D>; -def : PatGprGpr, SRL_D>; -def : PatGprImm; -def : PatGprImm; -def : PatGprImm; -} // Predicates = [IsLA64] - -/// sext and zext - -def : Pat<(sext_inreg GPR:$rj, i8), (EXT_W_B GPR:$rj)>; -def : Pat<(sext_inreg GPR:$rj, i16), (EXT_W_H GPR:$rj)>; - -let Predicates = [IsLA64] in { -def : Pat<(sext_inreg GPR:$rj, i32), (ADDI_W GPR:$rj, 0)>; -} // Predicates = [IsLA64] - -/// Setcc - -def : PatGprGpr; -def : PatGprImm; -def : PatGprGpr; -def : PatGprImm; - -// Define pattern expansions for setcc operations that aren't directly -// handled by a LoongArch instruction. -def : Pat<(seteq GPR:$rj, 0), (SLTUI GPR:$rj, 1)>; -def : Pat<(seteq GPR:$rj, GPR:$rk), (SLTUI (XOR GPR:$rj, GPR:$rk), 1)>; -let Predicates = [IsLA32] in { -def : Pat<(seteq GPR:$rj, simm12_plus1:$imm12), - (SLTUI (ADDI_W GPR:$rj, (NegImm simm12_plus1:$imm12)), 1)>; -} // Predicates = [IsLA32] -let Predicates = [IsLA64] in { -def : Pat<(seteq GPR:$rj, simm12_plus1:$imm12), - (SLTUI (ADDI_D GPR:$rj, (NegImm simm12_plus1:$imm12)), 1)>; -} // Predicates = [IsLA64] -def : Pat<(setne GPR:$rj, 0), (SLTU R0, GPR:$rj)>; -def : Pat<(setne GPR:$rj, GPR:$rk), (SLTU R0, (XOR GPR:$rj, GPR:$rk))>; -let Predicates = [IsLA32] in { -def : Pat<(setne GPR:$rj, simm12_plus1:$imm12), - (SLTU R0, (ADDI_W GPR:$rj, (NegImm simm12_plus1:$imm12)))>; -} // Predicates = [IsLA32] -let Predicates = [IsLA64] in { -def : Pat<(setne GPR:$rj, simm12_plus1:$imm12), - (SLTU R0, (ADDI_D GPR:$rj, (NegImm simm12_plus1:$imm12)))>; -} // Predicates = [IsLA64] -def : Pat<(setugt GPR:$rj, GPR:$rk), (SLTU GPR:$rk, GPR:$rj)>; -def : Pat<(setuge GPR:$rj, GPR:$rk), (XORI (SLTU GPR:$rj, GPR:$rk), 1)>; -def : Pat<(setule GPR:$rj, GPR:$rk), (XORI (SLTU GPR:$rk, GPR:$rj), 1)>; -def : Pat<(setgt GPR:$rj, GPR:$rk), (SLT GPR:$rk, GPR:$rj)>; -def : Pat<(setge GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rj, GPR:$rk), 1)>; -def : Pat<(setle GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rk, GPR:$rj), 1)>; - -/// Select - -def : Pat<(select GPR:$cond, GPR:$t, GPR:$f), - (OR (MASKEQZ GPR:$t, GPR:$cond), (MASKNEZ GPR:$f, GPR:$cond))>; - -/// Branches and jumps - -class BccPat - : Pat<(brcond (GRLenVT (CondOp GPR:$rj, GPR:$rd)), bb:$imm16), - (Inst GPR:$rj, GPR:$rd, bb:$imm16)>; - -def : BccPat; -def : BccPat; -def : BccPat; -def : BccPat; -def : BccPat; -def : BccPat; - -class BccSwapPat - : Pat<(brcond (GRLenVT (CondOp GPR:$rd, GPR:$rj)), bb:$imm16), - (InstBcc GPR:$rj, GPR:$rd, bb:$imm16)>; - -// Condition codes that don't have matching LoongArch branch instructions, but -// are trivially supported by swapping the two input operands. -def : BccSwapPat; -def : BccSwapPat; -def : BccSwapPat; -def : BccSwapPat; - -// An extra pattern is needed for a brcond without a setcc (i.e. where the -// condition was calculated elsewhere). -def : Pat<(brcond GPR:$rj, bb:$imm21), (BNEZ GPR:$rj, bb:$imm21)>; - -let isBarrier = 1, isBranch = 1, isTerminator = 1 in -def PseudoBR : Pseudo<(outs), (ins simm26_lsl2:$imm26), [(br bb:$imm26)]>, - PseudoInstExpansion<(B simm26_lsl2:$imm26)>; - -let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in -def PseudoBRIND : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16), []>, - PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>; - -def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; -def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), - (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; - -let isCall = 1, Defs = [R1] in -def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { - let AsmString = "bl\t$func"; -} - -def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; -def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; - -let isCall = 1, Defs = [R1] in -def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), - [(loongarch_call GPR:$rj)]>, - PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; - -let isBarrier = 1, isReturn = 1, isTerminator = 1 in -def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, - PseudoInstExpansion<(JIRL R0, R1, 0)>; - -/// BSTRINS and BSTRPICK - -let Predicates = [IsLA32] in { -def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd), - (BSTRINS_W GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; -def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd), - (BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; -} // Predicates = [IsLA32] - -let Predicates = [IsLA64] in { -def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), - (BSTRINS_D GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; -def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd), - (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; -} // Predicates = [IsLA64] - -/// Loads - -multiclass LdPat { - def : Pat<(vt (LoadOp BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>; - def : Pat<(vt (LoadOp (add BaseAddr:$rj, simm12:$imm12))), - (Inst BaseAddr:$rj, simm12:$imm12)>; -} - -defm : LdPat; -defm : LdPat; -defm : LdPat; -defm : LdPat; -defm : LdPat, Requires<[IsLA32]>; -defm : LdPat; -defm : LdPat; -let Predicates = [IsLA64] in { -defm : LdPat; -defm : LdPat; -defm : LdPat; -defm : LdPat; -} // Predicates = [IsLA64] - -/// Stores - -multiclass StPat { - def : Pat<(StoreOp (vt StTy:$rd), BaseAddr:$rj), - (Inst StTy:$rd, BaseAddr:$rj, 0)>; - def : Pat<(StoreOp (vt StTy:$rd), (add BaseAddr:$rj, simm12:$imm12)), - (Inst StTy:$rd, BaseAddr:$rj, simm12:$imm12)>; -} - -defm : StPat; -defm : StPat; -defm : StPat, Requires<[IsLA32]>; -let Predicates = [IsLA64] in { -defm : StPat; -defm : StPat; -} // Predicates = [IsLA64] - -/// Atomic loads and stores - -def : Pat<(atomic_fence timm, timm), (DBAR 0)>; - -defm : LdPat; -defm : LdPat; -defm : LdPat; - -defm : StPat; -defm : StPat; -defm : StPat, Requires<[IsLA32]>; -let Predicates = [IsLA64] in { -defm : LdPat; -defm : StPat; -defm : StPat; -} // Predicates = [IsLA64] - -/// Other pseudo-instructions - -// Pessimistically assume the stack pointer will be clobbered -let Defs = [R3], Uses = [R3] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(callseq_start timm:$amt1, timm:$amt2)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(callseq_end timm:$amt1, timm:$amt2)]>; -} // Defs = [R3], Uses = [R3] -//===----------------------------------------------------------------------===// -// Assembler Pseudo Instructions -//===----------------------------------------------------------------------===// +def : LoongArchPat<(LoongArchJmpLink tglobaladdr:$dst), + (PseudoCall tglobaladdr:$dst)>; -def : InstAlias<"nop", (ANDI R0, R0, 0)>; -def : InstAlias<"move $dst, $src", (OR GPR:$dst, GPR:$src, R0)>; +def : LoongArchPat<(LoongArchJmpLink (i32 texternalsym:$dst)), + (PseudoCall texternalsym:$dst)>; +def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)), + (PseudoCall texternalsym:$dst)>; + +def : LoongArchPat<(LoongArchJmpLink (i64 texternalsym:$dst)), + (PseudoCall texternalsym:$dst)>; + +def BL : JumpLink<"bl", calltarget>, FJ<0b010101>; + +class IsAsCheapAsAMove { + bit isAsCheapAsAMove = 1; +} +class LoadUpper: + InstForm<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"), + [], FrmI, opstr>, IsAsCheapAsAMove { + let hasSideEffects = 0; + let isReMaterializable = 1; + let mayLoad = 1; +} + +let isCodeGenOnly = 1 in { +def LAPCREL : LoadUpper<"la.pcrel", GPR64Opnd, uimm16_64_relaxed>, LUI_FM, GPR_64; +} + +def NOP : LoongArchPseudo<(outs), (ins), []>, + PseudoInstExpansion<(ANDI ZERO_64, ZERO_64, 0)>; + +def : LoongArchInstAlias<"nop", (ANDI ZERO_64, ZERO_64, 0), 1>; +def : LoongArchInstAlias<"jr $rd", (JIRL ZERO_64, GPR64Opnd:$rd, 0), 1>; +def : LoongArchInstAlias<"move $dst, $src", + (OR GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, GPR_64; + +def UImm12RelaxedAsmOperandClass +: UImmAsmOperandClass<12, [ConstantUImm20AsmOperandClass]> { + let Name = "UImm12_Relaxed"; + let PredicateMethod = "isAnyImm<12>"; + let DiagnosticType = "UImm12_Relaxed"; +} + +def SImm12RelaxedAsmOperandClass +: SImmAsmOperandClass<12, [UImm12RelaxedAsmOperandClass]> { + let Name = "SImm12_Relaxed"; + let PredicateMethod = "isAnyImm<12>"; + let DiagnosticType = "SImm12_Relaxed"; +} + +def simm12_relaxed : Operand { + let DecoderMethod = "DecodeSImmWithOffsetAndScale<12>"; + let ParserMatchClass = !cast("SImm12RelaxedAsmOperandClass"); +} + +def : LoongArchPat<(i64 (anyext GPR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>,GPR_64; + +let usesCustomInserter = 1 in { + def ATOMIC_LOAD_ADD_I64 : Atomic2Ops; + def ATOMIC_LOAD_SUB_I64 : Atomic2Ops; + def ATOMIC_LOAD_AND_I64 : Atomic2Ops; + def ATOMIC_LOAD_OR_I64 : Atomic2Ops; + def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; + def ATOMIC_SWAP_I64 : Atomic2Ops; + + defm I64_ : AtomicCmpSwapInstrs<"64", GPR64>; + + def ATOMIC_LOAD_MAX_I64 : Atomic2Ops; + def ATOMIC_LOAD_MIN_I64 : Atomic2Ops; + def ATOMIC_LOAD_UMAX_I64 : Atomic2Ops; + def ATOMIC_LOAD_UMIN_I64 : Atomic2Ops; +} + +def ATOMIC_LOAD_ADD_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_SUB_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_AND_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_OR_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_XOR_I64_POSTRA : Atomic2OpsPostRA; +def ATOMIC_LOAD_NAND_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_SWAP_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_CMP_SWAP_I64_POSTRA : AtomicCmpSwapPostRA; + +def ATOMIC_LOAD_MAX_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_MIN_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_UMAX_I64_POSTRA : Atomic2OpsPostRA; + +def ATOMIC_LOAD_UMIN_I64_POSTRA : Atomic2OpsPostRA; + +def : LoongArchPat<(atomic_load_8 addr:$a), (LD_B addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_16 addr:$a), (LD_H addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_32 addrimm14lsl2:$a), (LDPTR_W addrimm14lsl2:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_32 addr:$a), (LD_W addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_64 addrimm14lsl2:$a), (LDPTR_D addrimm14lsl2:$a)>, GPR_64; +def : LoongArchPat<(atomic_load_64 addr:$a), (LD_D addr:$a)>, GPR_64; + +def : LoongArchPat<(atomic_store_8 addr:$a, GPR64:$v), + (ST_B GPR64:$v, addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_16 addr:$a, GPR64:$v), + (ST_H GPR64:$v, addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR64:$v), + (STPTR_W GPR64:$v, addrimm14lsl2:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_32 addr:$a, GPR64:$v), + (ST_W GPR64:$v, addr:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_64 addrimm14lsl2:$a, GPR64:$v), + (STPTR_D GPR64:$v, addrimm14lsl2:$a)>, GPR_64; +def : LoongArchPat<(atomic_store_64 addr:$a, GPR64:$v), + (ST_D GPR64:$v, addr:$a)>, GPR_64; + +def : LoongArchPat<(bswap GPR64:$rt), (REVH_D (REVB_4H GPR64:$rt))>; + +def immZExt5 : ImmLeaf; + +def immZExtRange2To64 : PatLeaf<(imm), [{ + return isUInt<7>(N->getZExtValue()) && (N->getZExtValue() >= 2) && + (N->getZExtValue() <= 64); +}]>; + +// bstrins and bstrpick +class InsBase + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src), + !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"), + [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd, RO:$src))], + FrmR, opstr> { + let Constraints = "$src = $rd"; + } + +class InsBase_32 + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src), + !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"), + [(set RO:$rd, (OpNode RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw, RO:$src))], + FrmR, opstr> { + let Constraints = "$src = $rd"; +} + +class PickBase + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd), + !strconcat(opstr, "\t$rd, $rj, $msbd, $lsbd"), + [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbd, ImmOpnd:$lsbd))], + FrmR, opstr>; + +class PickBase_32 + : InstForm<(outs RO:$rd), (ins RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw), + !strconcat(opstr, "\t$rd, $rj, $msbw, $lsbw"), + [(set RO:$rd, (Op RO:$rj, ImmOpnd:$msbw, ImmOpnd:$lsbw))], + FrmR, opstr>; + + def BSTRINS_D : InsBase<"bstrins.d", GPR64Opnd, uimm6, LoongArchBstrins>, + INSERT_BIT64<0>; + def BSTRPICK_D : PickBase<"bstrpick.d", GPR64Opnd, uimm6, LoongArchBstrpick>, + INSERT_BIT64<1>; + +let isCodeGenOnly = 1 in { + def ZEXT64_32 : InstForm<(outs GPR64Opnd:$rd), + (ins GPR32Opnd:$rj, uimm6:$msbd, + uimm6:$lsbd), + "bstrpick.d $rd, $rj, $msbd, $lsbd", [], FrmR, "bstrpick.d">, + INSERT_BIT64<1>; +} + +//32-to-64-bit extension +def : LoongArchPat<(i64 (zext GPR32:$src)), (ZEXT64_32 GPR32:$src, 31, 0)>; +def : LoongArchPat<(i64 (extloadi1 addr:$src)), (LD_B addr:$src)>, + GPR_64; +def : LoongArchPat<(i64 (extloadi8 addr:$src)), (LD_B addr:$src)>, + GPR_64; +def : LoongArchPat<(i64 (extloadi16 addr:$src)), (LD_H addr:$src)>, + GPR_64; +def : LoongArchPat<(i64 (extloadi32 addr:$src)), (LD_W addr:$src)>, + GPR_64; + +class LDX_FT_LA : + InstForm<(outs DRC:$rd), (ins PtrRC:$rj, PtrRC:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(set DRC:$rd, (OpNode (add iPTR:$rj, iPTR:$rk)))], + FrmR, opstr> { + let AddedComplexity = 20; + let canFoldAsLoad = 1; + string BaseOpcode = opstr; + let mayLoad = 1; +} + +class STX_FT_LA : + InstForm<(outs), (ins DRC:$rd, PtrRC:$rj, PtrRC:$rk), + !strconcat(opstr, "\t$rd, $rj, $rk"), + [(OpNode DRC:$rd, (add iPTR:$rj, iPTR:$rk))], + FrmI, opstr> { + string BaseOpcode = opstr; + let mayStore = 1; + let AddedComplexity = 20; +} + + +def LDX_B : LDX_FT_LA<"ldx.b", GPR64Opnd, sextloadi8>, + R3MI<0b00000000>; +def LDX_H : LDX_FT_LA<"ldx.h", GPR64Opnd, sextloadi16>, + R3MI<0b00001000>; +def LDX_W : LDX_FT_LA<"ldx.w", GPR64Opnd, sextloadi32>, + R3MI<0b00010000>; +def LDX_D : LDX_FT_LA<"ldx.d", GPR64Opnd, load>, + R3MI<0b00011000>; +def STX_B : STX_FT_LA<"stx.b", GPR64Opnd, truncstorei8>, + R3MI<0b00100000>; +def STX_H : STX_FT_LA<"stx.h", GPR64Opnd, truncstorei16>, + R3MI<0b00101000>; +def STX_W : STX_FT_LA<"stx.w", GPR64Opnd, truncstorei32>, + R3MI<0b00110000>; +def STX_D : STX_FT_LA<"stx.d", GPR64Opnd, store>, + R3MI<0b00111000>; +def LDX_BU : LDX_FT_LA<"ldx.bu", GPR64Opnd, extloadi8>, + R3MI<0b01000000>; +def LDX_HU : LDX_FT_LA<"ldx.hu", GPR64Opnd, extloadi16>, + R3MI<0b01001000>; +def LDX_WU : LDX_FT_LA<"ldx.wu", GPR64Opnd, zextloadi32>, + R3MI<0b01010000>; + +//def : LoongArchPat<(bswap GPR64:$rj), (REVH_D (REVB_4H GPR64:$rj))>; +//def : LoongArchPat<(bswap GPR64:$rj), (ROTRI_D (REVB_2W GPR64:$rj), 32)>; +def : LoongArchPat<(bswap GPR64:$rj), (REVB_D GPR64:$rj)>; + +let isCodeGenOnly = 1 in { + def SLLI_D_64_32 : Shift_Imm64<"", GPR64Opnd>, R2_IMM6<0b00>, GPR_64 { + let imm6 = 0; + let AsmString = "slli.d\t$rd, $rj, 32"; + let InOperandList = (ins GPR32:$rj); + let OutOperandList = (outs GPR64:$rd); + } + + let isMoveReg = 1, imm5 = 0, + AsmString = "slli.w\t$rd, $rj, 0", + OutOperandList = (outs GPR64:$rd) in { + let InOperandList = (ins GPR32:$rj) in + def SLLI_W_64_32 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64; + let InOperandList = (ins GPR64:$rj) in + def SLLI_W_64_64 : Shift_Imm32<"", GPR32Opnd>, R2_IMM5<0b00>, GPR_64; + } + + let AsmString = "sltui\t$rd, $rj, $imm12", + OutOperandList = (outs GPR64:$rd) in { + let InOperandList = (ins GPR64:$rj, simm12:$imm12) in + def SLTUI_64 : SetCC_I<"", GPR64Opnd, simm12>, R2_IMM12<0b001>, GPR_64; + } +} + +// 32-to-64-bit extension +//def : LoongArchPat<(i64 (zext GPR32:$src)), (SRLI_D (SLLI_D_64_32 GPR32:$src), 32)>, GPR_64; +def : LoongArchPat<(i64 (sext GPR32:$src)), (SLLI_W_64_32 GPR32:$src)>, GPR_64; +def : LoongArchPat<(i64 (sext_inreg GPR64:$src, i32)), (SLLI_W_64_64 GPR64:$src)>, GPR_64; + +let Uses = [A0, A1], isTerminator = 1, isReturn = 1, isBarrier = 1, isCTI = 1 in { + def LoongArcheh_return32 : LoongArchPseudo<(outs), (ins GPR32:$spoff, GPR32:$dst), + [(LoongArchehret GPR32:$spoff, GPR32:$dst)]>; + def LoongArcheh_return64 : LoongArchPseudo<(outs), (ins GPR64:$spoff,GPR64:$dst), + [(LoongArchehret GPR64:$spoff, GPR64:$dst)]>; +} + +def : LoongArchPat<(select i32:$cond, i64:$t, i64:$f), + (OR (MASKEQZ i64:$t, (SLLI_W_64_32 i32:$cond)), + (MASKNEZ i64:$f, (SLLI_W_64_32 i32:$cond)))>; +// setcc patterns +multiclass SeteqPats { + def : LoongArchPat<(seteq RC:$lhs, 0), + (SLTiuOp RC:$lhs, 1)>; + def : LoongArchPat<(setne RC:$lhs, 0), + (SLTuOp ZEROReg, RC:$lhs)>; + def : LoongArchPat<(seteq RC:$lhs, RC:$rhs), + (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; + def : LoongArchPat<(setne RC:$lhs, RC:$rhs), + (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; +} + +multiclass SetlePats { + def : LoongArchPat<(setle RC:$lhs, RC:$rhs), + (XORiOp (SLTOp RC:$rhs, RC:$lhs), 1)>; + def : LoongArchPat<(setule RC:$lhs, RC:$rhs), + (XORiOp (SLTuOp RC:$rhs, RC:$lhs), 1)>; +} + +multiclass SetgtPats { + def : LoongArchPat<(setgt RC:$lhs, RC:$rhs), + (SLTOp RC:$rhs, RC:$lhs)>; + def : LoongArchPat<(setugt RC:$lhs, RC:$rhs), + (SLTuOp RC:$rhs, RC:$lhs)>; +} + +multiclass SetgePats { + def : LoongArchPat<(setge RC:$lhs, RC:$rhs), + (XORiOp (SLTOp RC:$lhs, RC:$rhs), 1)>; + def : LoongArchPat<(setuge RC:$lhs, RC:$rhs), + (XORiOp (SLTuOp RC:$lhs, RC:$rhs), 1)>; +} + +multiclass SetgeImmPats { + def : LoongArchPat<(setge RC:$lhs, immSExt12:$rhs), + (XORiOp (SLTiOp RC:$lhs, immSExt12:$rhs), 1)>; + def : LoongArchPat<(setuge RC:$lhs, immSExt12:$rhs), + (XORiOp (SLTiuOp RC:$lhs, immSExt12:$rhs), 1)>; +} + +class LoadRegImmPat : + LoongArchPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>; + +class StoreRegImmPat : + LoongArchPat<(Node ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>; + +class LoadRegImm14Lsl2Pat : + LoongArchPat<(ValTy (Node addrimm14lsl2:$a)), (LoadInst addrimm14lsl2:$a)>; + +class StoreRegImm14Lsl2Pat : + LoongArchPat<(Node ValTy:$v, addrimm14lsl2:$a), (StoreInst ValTy:$v, addrimm14lsl2:$a)>; + +// Patterns for loads/stores with a reg+imm operand. +// let AddedComplexity = 40 so that these instructions are selected instead of +// LDX/STX which needs one more register and an ANDI instruction. +let AddedComplexity = 40 in { + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + def : StoreRegImmPat; + + def : LoadRegImm14Lsl2Pat; + def : LoadRegImm14Lsl2Pat; + def : StoreRegImm14Lsl2Pat; + def : StoreRegImm14Lsl2Pat; +} //===----------------------------------------------------------------------===// -// Basic Floating-Point Instructions +// Base Extension Support //===----------------------------------------------------------------------===// -include "LoongArchFloat32InstrInfo.td" -include "LoongArchFloat64InstrInfo.td" +include "LoongArch32InstrInfo.td" +include "LoongArchInstrInfoF.td" +include "LoongArchLSXInstrFormats.td" +include "LoongArchLSXInstrInfo.td" +include "LoongArchLASXInstrFormats.td" +include "LoongArchLASXInstrInfo.td" + +defm : SeteqPats, GPR_64; +defm : SetlePats, GPR_64; +defm : SetgtPats, GPR_64; +defm : SetgePats, GPR_64; +defm : SetgeImmPats, GPR_64; + +/// +/// for relocation +/// +let isCodeGenOnly = 1 in { +def PCADDU12I_ri : SI20<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>; +def PCADDU12I_rii : RELOC_rii<"pcaddu12i", GPR64Opnd, simm20>, R1_SI20<0b0001110>; +def ORI_rri : Int_Reg2_Imm12<"ori", GPR64Opnd, uimm12, or>, R2_IMM12<0b110>; +def ORI_rrii : RELOC_rrii<"ori", GPR64Opnd, uimm12>, R2_IMM12<0b110>; +def LU12I_W_ri : SI20<"lu12i.w", GPR64Opnd, simm20>, R1_SI20<0b0001010>; +def LU32I_D_ri : SI20<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; +def LU32I_D_rii : RELOC_rii<"lu32i.d", GPR64Opnd, simm20>, R1_SI20<0b0001011>; +def LU52I_D_rri : Int_Reg2_Imm12<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; +def LU52I_D_rrii : RELOC_rrii<"lu52i.d", GPR64Opnd, simm12>, R2_IMM12<0b100>; +def ADDI_D_rri : Int_Reg2_Imm12<"addi.d", GPR64Opnd, simm12, add>, R2_IMM12<0b011>; +def ADDI_D_rrii : RELOC_rrii<"addi.d", GPR64Opnd, simm12>, R2_IMM12<0b011>; +def LD_D_rri : Ld<"ld.d", GPR64Opnd, mem_simmptr, load>, LOAD_STORE<0b0011>; +def LD_D_rrii : RELOC_rrii<"ld.d", GPR64Opnd, simm12>, LOAD_STORE_RRI<0b0011>; +def ADD_D_rrr : Int_Reg3<"add.d", GPR64Opnd, add>, R3I<0b0100001>; +def LDX_D_rrr : LDX_FT_LA<"ldx.d", GPR64Opnd, load>, + R3MI<0b00011000>; +} //===----------------------------------------------------------------------===// -// Privilege Instructions +// Assembler Pseudo Instructions //===----------------------------------------------------------------------===// - -// CSR Access Instructions -def CSRRD : FmtCSR<0b0000010000000, (outs GPR:$rd), (ins uimm14:$csr_num), - "csrrd", "$rd, $csr_num">; -let Constraints = "$rd = $dst" in { -def CSRWR : FmtCSR<0b0000010000001, (outs GPR:$dst), - (ins GPR:$rd, uimm14:$csr_num), "csrwr", "$rd, $csr_num">; -def CSRXCHG : FmtCSRXCHG<0b00000100, (outs GPR:$dst), - (ins GPR:$rd, GPR:$rj, uimm14:$csr_num), - "csrxchg", "$rd, $rj, $csr_num">; -} // Constraints = "$rd = $dst" - -// IOCSR Access Instructions -def IOCSRRD_B : IOCSRRD<0b0000011001001000000000, "iocsrrd.b">; -def IOCSRRD_H : IOCSRRD<0b0000011001001000000001, "iocsrrd.h">; -def IOCSRRD_W : IOCSRRD<0b0000011001001000000010, "iocsrrd.w">; -def IOCSRWR_B : IOCSRWR<0b0000011001001000000100, "iocsrwr.b">; -def IOCSRWR_H : IOCSRWR<0b0000011001001000000101, "iocsrwr.h">; -def IOCSRWR_W : IOCSRWR<0b0000011001001000000110, "iocsrwr.w">; -let Predicates = [IsLA64] in { -def IOCSRRD_D : IOCSRRD<0b0000011001001000000011, "iocsrrd.d">; -def IOCSRWR_D : IOCSRWR<0b0000011001001000000111, "iocsrwr.d">; -} // Predicates = [IsLA64] - -// Cache Maintenance Instructions -def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop", - "$op, $rj, $imm12">; - -// TLB Maintenance Instructions -def TLBSRCH : FmtI32<0b00000110010010000010100000000000, "tlbsrch">; -def TLBRD : FmtI32<0b00000110010010000010110000000000, "tlbrd">; -def TLBWR : FmtI32<0b00000110010010000011000000000000, "tlbwr">; -def TLBFILL : FmtI32<0b00000110010010000011010000000000, "tlbfill">; -def TLBCLR : FmtI32<0b00000110010010000010000000000000, "tlbclr">; -def TLBFLUSH : FmtI32<0b00000110010010000010010000000000, "tlbflush">; -def INVTLB : FmtINVTLB<(outs), (ins GPR:$rk, GPR:$rj, uimm5:$op), "invtlb", - "$op, $rj, $rk">; - -// Software Page Walking Instructions -def LDDIR : Fmt2RI8<0b00000110010000, (outs GPR:$rd), - (ins GPR:$rj, uimm8:$imm8), "lddir", "$rd, $rj, $imm8">; -def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "ldpte", "$rj, $seq">; - - -// Other Miscellaneous Instructions -def ERTN : FmtI32<0b00000110010010000011100000000000, "ertn">; -def DBCL : MISC_I15<0b00000000001010101, "dbcl">; -def IDLE : MISC_I15<0b00000110010010001, "idle">; +def LoadImm32 : LoongArchAsmPseudoInst<(outs GPR32Opnd:$rd), + (ins uimm32_coerced:$imm32), + "li.w\t$rd, $imm32">; +def LoadImm64 : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "li.d\t$rd, $imm64">; +// load address +def LoadAddrLocal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.local\t$rd, $imm64">; +def : InstAlias<"la.pcrel $rd, $imm", + (LoadAddrLocal GPR64Opnd:$rd, imm64:$imm), 1>; +def LoadAddrGlobal : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.global\t$rd, $imm64">; +def LoadAddrGlobal_Alias : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la\t$rd, $imm64">; +def : InstAlias<"la.got $rd, $imm", + (LoadAddrGlobal GPR64Opnd:$rd, imm64:$imm), 1>; + +def LoadAddrTLS_LE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.tls.le\t$rd, $imm64">; +def LoadAddrTLS_IE : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.tls.ie\t$rd, $imm64">; +def LoadAddrTLS_GD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.tls.gd\t$rd, $imm64">; +def LoadAddrTLS_LD : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins imm64:$imm64), + "la.tls.ld\t$rd, $imm64">; + +// load address with a temp reg +def LoadAddrLocalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.local\t$rd, $rt, $imm64">; +def LoadAddrGlobalRR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.global\t$rd, $rt, $imm64">; +def LoadAddrTLS_IE_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.tls.ie\t$rd, $rt, $imm64">; +def LoadAddrTLS_GD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.tls.gd\t$rd, $rt, $imm64">; +def LoadAddrTLS_LD_RR : LoongArchAsmPseudoInst<(outs GPR64Opnd:$rd), + (ins GPR64Opnd:$rt, imm64:$imm64), + "la.tls.ld\t$rd, $rt, $imm64">; + +// trap when div zero +def PseudoTEQ : LoongArchPseudo<(outs), (ins GPR64Opnd:$rt), []>; + + +def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, immSExt12:$imm12)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADDI_W GPR32:$src, immSExt12:$imm12), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (add GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADD_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SUB_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MUL_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, immZExt5:$imm5)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SLLI_W GPR32:$src, immZExt5:$imm5), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (shl GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SLL_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, immZExt5:$imm5)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRLI_W GPR32:$src, immZExt5:$imm5), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (srl GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRL_W GPR32:$src, GPR32:$src2), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, immZExt5:$imm5)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRAI_W GPR32:$src, immZExt5:$imm5), sub_32)>; + +def : LoongArchPat<(i64 (sext (i32 (sra GPR32:$src, GPR32:$src2)))), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (SRA_W GPR32:$src, GPR32:$src2), sub_32)>; + + +def : LoongArchPat<(i64 (xor GPR64:$rj, (i64 -1))), + (NOR ZERO_64, GPR64:$rj)>; + +def : LoongArchPat<(and GPR64:$rj, (i64 (xor GPR64:$rk, (i64 -1)))), + (ANDN GPR64:$rj, GPR64:$rk)>; + +def : LoongArchPat<(i64 (or GPR64:$rj, (xor GPR64:$rk, (i64 -1)))), + (ORN GPR64:$rj, GPR64:$rk)>; + +def : LoongArchPat<(i64 (zext (i32 (seteq GPR64:$rj, (i64 0))))), + (SLTUI_64 GPR64:$rj, (i64 1))>; + + +def : LoongArchPat<(i64 (zext (i32 (srl GPR32:$src, immZExt5:$imm5)))), + (BSTRPICK_D (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), + (i32 31), immZExt5:$imm5)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfoF.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfoF.td new file mode 100644 index 000000000000..4df5fc88e6e4 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfoF.td @@ -0,0 +1,630 @@ +//===- LoongArchInstrInfoF.td - Target Description for LoongArch Target -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the LoongArch implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// +// FP immediate patterns. +def fpimm0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +def fpimm0neg : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-0.0); +}]>; + +def fpimm1 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+1.0); +}]>; + +def IsNotSoftFloat : Predicate<"!Subtarget->useSoftFloat()">, + AssemblerPredicate<(all_of FeatureSoftFloat)>; + +class HARDFLOAT { list HardFloatPredicate = [IsNotSoftFloat]; } + +def SDT_LoongArchTruncIntFP : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; + +def LoongArchTruncIntFP : SDNode<"LoongArchISD::TruncIntFP", SDT_LoongArchTruncIntFP>; + +def SDT_LoongArchFPBrcond : SDTypeProfile<0, 3, [SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<2, OtherVT>]>; + +def LoongArchFPBrcond : SDNode<"LoongArchISD::FPBrcond", SDT_LoongArchFPBrcond, + [SDNPHasChain, SDNPOptInGlue]>; + +def SDT_LoongArchCMovFP : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, i32>, + SDTCisSameAs<1, 3>]>; + +def LoongArchCMovFP_T : SDNode<"LoongArchISD::CMovFP_T", SDT_LoongArchCMovFP, [SDNPInGlue]>; + +def LoongArchCMovFP_F : SDNode<"LoongArchISD::CMovFP_F", SDT_LoongArchCMovFP, [SDNPInGlue]>; + +def SDT_LoongArchFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, + SDTCisVT<2, i32>]>; + +def LoongArchFPCmp : SDNode<"LoongArchISD::FPCmp", SDT_LoongArchFPCmp, [SDNPOutGlue]>; + +def SDT_LoongArchFSEL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, + SDTCisSameAs<1, 3>]>; + +def LoongArchFSEL : SDNode<"LoongArchISD::FSEL", SDT_LoongArchFSEL, + [SDNPInGlue]>; + +//===---------------------------------------------------------------------===/ +//Instruction Class Templates +//===---------------------------------------------------------------------===/ + +class Float_MOVF + : InstForm<(outs RO:$rd), (ins RC:$fj), + !strconcat(opstr, "\t$rd, $fj"), + [(set RO:$rd, (OpNode RC:$fj))], + FrmFR, opstr>, HARDFLOAT { + let isMoveReg = 1; +} + +class Float_MOVT + : InstForm<(outs RO:$fd), (ins RC:$rj), + !strconcat(opstr, "\t$fd, $rj"), + [(set RO:$fd, (OpNode RC:$rj))], + FrmFR, opstr>, HARDFLOAT { + let isMoveReg = 1; +} + +class Float_CVT + : InstForm<(outs RO:$fd), (ins RS:$fj), + !strconcat(opstr, "\t$fd, $fj"), + [(set RO:$fd, (OpNode RS:$fj))], + FrmFR, opstr>, + HARDFLOAT { + let hasSideEffects = 0; +} + +/// float mov +class Gpr_2_Fcsr + : InstForm<(outs FCSROpnd:$fcsr), (ins RO:$rj), + !strconcat(opstr, "\t$fcsr, $rj"), + [(set FCSROpnd:$fcsr, (OpNode RO:$rj))], + FrmR, opstr>; +class Fcsr_2_Gpr + : InstForm<(outs RO:$rd), (ins FCSROpnd:$fcsr), + !strconcat(opstr, "\t$rd, $fcsr"), + [(set RO:$rd, (OpNode FCSROpnd:$fcsr))], + FrmR, opstr>; +class Fgr_2_Fcfr + : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj), + !strconcat(opstr, "\t$cd, $fj"), + [(set FCFROpnd:$cd, (OpNode RO:$fj))], + FrmR, opstr>; +class Fcfr_2_Fgr + : InstForm<(outs RO:$fd), (ins FCFROpnd:$cj), + !strconcat(opstr, "\t$fd, $cj"), + [(set RO:$fd, (OpNode FCFROpnd:$cj))], + FrmR, opstr>; +class Gpr_2_Fcfr + : InstForm<(outs FCFROpnd:$cd), (ins RO:$rj), + !strconcat(opstr, "\t$cd, $rj"), + [(set FCFROpnd:$cd, (OpNode RO:$rj))], + FrmR, opstr>; +class Fcfr_2_Gpr + : InstForm<(outs RO:$rd), (ins FCFROpnd:$cj), + !strconcat(opstr, "\t$rd, $cj"), + [(set RO:$rd, (OpNode FCFROpnd:$cj))], + FrmR, opstr>; + +class FLDX : + InstForm<(outs DRC:$fd), (ins PtrRC:$rj, PtrRC:$rk), + !strconcat(opstr, "\t$fd, $rj, $rk"), + [(set DRC:$fd, (OpNode (add iPTR:$rj, iPTR:$rk)))], + FrmR, opstr> { + let AddedComplexity = 20; +} + +class FSTX : + InstForm<(outs), (ins DRC:$fd, PtrRC:$rj, PtrRC:$rk), + !strconcat(opstr, "\t$fd, $rj, $rk"), + [(OpNode DRC:$fd, (add iPTR:$rj, iPTR:$rk))], + FrmR, opstr> { + let AddedComplexity = 20; +} + +/// f{maxa/mina}.{s/d} +class Float_Reg3_Fmaxa + : InstForm<(outs RO:$fd), (ins RO:$fj, RO:$fk), + !strconcat(opstr, "\t$fd, $fj, $fk"), + [], FrmR, opstr>; +/// frecip +class Float_Reg2_Frecip + : InstForm<(outs RO:$fd), (ins RO:$fj), + !strconcat(opstr, "\t$fd, $fj"), + [(set RO:$fd, (OpNode fpimm1, RO:$fj))], + FrmR, opstr>; +/// frsqrt +class Float_Reg2_Frsqrt + : InstForm<(outs RO:$fd), (ins RO:$fj), + !strconcat(opstr, "\t$fd, $fj"), + [(set RO:$fd, (OpNode fpimm1, (fsqrt RO:$fj)))], + FrmR, opstr>; + +class BceqzBr : + InstForm<(outs), (ins FCFROpnd:$cj, opnd:$offset), + !strconcat(opstr, "\t$cj, $offset"), + [(LoongArchFPBrcond Op, FCFROpnd:$cj, bb:$offset)], + FrmFI, opstr>, HARDFLOAT { + let isBranch = 1; + let isTerminator = 1; + let hasFCCRegOperand = 1; +} + +class FCMP_COND + : InstForm<(outs FCFROpnd:$cd), (ins RO:$fj, RO:$fk), + !strconcat("fcmp.", CondStr, ".", TypeStr, "\t$cd, $fj, $fk"), + [(set FCFROpnd:$cd, (OpNode RO:$fj, RO:$fk))], + FrmOther, + !strconcat("fcmp.", CondStr, ".", TypeStr)> { + bit isCTI = 1; // for what? from Mips32r6InstrInfo.td line 219 +} + +class FIELD_CMP_COND Val> { + bits<5> Value = Val; +} +def FIELD_CMP_COND_CAF : FIELD_CMP_COND<0x0>; +def FIELD_CMP_COND_CUN : FIELD_CMP_COND<0x8>; +def FIELD_CMP_COND_CEQ : FIELD_CMP_COND<0x4>; +def FIELD_CMP_COND_CUEQ : FIELD_CMP_COND<0xC>; +def FIELD_CMP_COND_CLT : FIELD_CMP_COND<0x2>; +def FIELD_CMP_COND_CULT : FIELD_CMP_COND<0xA>; +def FIELD_CMP_COND_CLE : FIELD_CMP_COND<0x6>; +def FIELD_CMP_COND_CULE : FIELD_CMP_COND<0xE>; +def FIELD_CMP_COND_CNE : FIELD_CMP_COND<0x10>; +def FIELD_CMP_COND_COR : FIELD_CMP_COND<0x14>; +def FIELD_CMP_COND_CUNE : FIELD_CMP_COND<0x18>; +def FIELD_CMP_COND_SAF : FIELD_CMP_COND<0x1>; +def FIELD_CMP_COND_SUN : FIELD_CMP_COND<0x9>; +def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0x5>; +def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0xD>; +def FIELD_CMP_COND_SLT : FIELD_CMP_COND<0x3>; +def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0xB>; +def FIELD_CMP_COND_SLE : FIELD_CMP_COND<0x7>; +def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0xF>; +def FIELD_CMP_COND_SNE : FIELD_CMP_COND<0x11>; +def FIELD_CMP_COND_SOR : FIELD_CMP_COND<0x15>; +def FIELD_CMP_COND_SUNE : FIELD_CMP_COND<0x19>; + +multiclass FCMP_COND_M op, string TypeStr, + RegisterOperand RO> { + def FCMP_CAF_#NAME : FCMP_COND<"caf", TypeStr, RO>, + R2_COND; + def FCMP_CUN_#NAME : FCMP_COND<"cun", TypeStr, RO, setuo>, + R2_COND; + def FCMP_CEQ_#NAME : FCMP_COND<"ceq", TypeStr, RO, setoeq>, + R2_COND; + def FCMP_CUEQ_#NAME : FCMP_COND<"cueq", TypeStr, RO, setueq>, + R2_COND; + def FCMP_CLT_#NAME : FCMP_COND<"clt", TypeStr, RO, setolt>, + R2_COND; + def FCMP_CULT_#NAME : FCMP_COND<"cult", TypeStr, RO, setult>, + R2_COND; + def FCMP_CLE_#NAME : FCMP_COND<"cle", TypeStr, RO, setole>, + R2_COND; + def FCMP_CULE_#NAME : FCMP_COND<"cule", TypeStr, RO, setule>, + R2_COND; + def FCMP_CNE_#NAME : FCMP_COND<"cne", TypeStr, RO, setone>, + R2_COND; + def FCMP_COR_#NAME : FCMP_COND<"cor", TypeStr, RO, seto>, + R2_COND; + def FCMP_CUNE_#NAME : FCMP_COND<"cune", TypeStr, RO, setune>, + R2_COND; + + def FCMP_SAF_#NAME : FCMP_COND<"saf", TypeStr, RO>, + R2_COND; + def FCMP_SUN_#NAME : FCMP_COND<"sun", TypeStr, RO>, + R2_COND; + def FCMP_SEQ_#NAME : FCMP_COND<"seq", TypeStr, RO>, + R2_COND; + def FCMP_SUEQ_#NAME : FCMP_COND<"sueq", TypeStr, RO>, + R2_COND; + def FCMP_SLT_#NAME : FCMP_COND<"slt", TypeStr, RO>, + R2_COND; + def FCMP_SULT_#NAME : FCMP_COND<"sult", TypeStr, RO>, + R2_COND; + def FCMP_SLE_#NAME : FCMP_COND<"sle", TypeStr, RO>, + R2_COND; + def FCMP_SULE_#NAME : FCMP_COND<"sule", TypeStr, RO>, + R2_COND; + def FCMP_SNE_#NAME : FCMP_COND<"sne", TypeStr, RO>, + R2_COND; + def FCMP_SOR_#NAME : FCMP_COND<"sor", TypeStr, RO>, + R2_COND; + def FCMP_SUNE_#NAME : FCMP_COND<"sune", TypeStr, RO>, + R2_COND; +} + +//// comparisons supported via another comparison +//multiclass FCmp_Pats { +// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), +// (!cast("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), +// (!cast("FCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), +// (!cast("FCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), +// (!cast("FCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), +// (!cast("FCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), +// (NOROp +// (!cast("FCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), +// ZEROReg)>; +//} + + +/// +/// R2 +/// +def FABS_S : Float_Reg2<"fabs.s", FGR32Opnd, fabs>, R2F<0b0100000001>; +def FABS_D : Float_Reg2<"fabs.d", FGR64Opnd, fabs>, R2F<0b0100000010>; +def FNEG_S : Float_Reg2<"fneg.s", FGR32Opnd, fneg>, R2F<0b0100000101>; +def FNEG_D : Float_Reg2<"fneg.d", FGR64Opnd, fneg>, R2F<0b0100000110>; +def FLOGB_S : Float_Reg2<"flogb.s", FGR32Opnd>, R2F<0b0100001001>; +def FLOGB_D : Float_Reg2<"flogb.d", FGR64Opnd>, R2F<0b0100001010>; +def FCLASS_S : Float_Reg2<"fclass.s", FGR32Opnd>, R2F<0b0100001101>; +def FCLASS_D : Float_Reg2<"fclass.d", FGR64Opnd>, R2F<0b0100001110>; +def FSQRT_S : Float_Reg2<"fsqrt.s", FGR32Opnd, fsqrt>, R2F<0b0100010001>; +def FSQRT_D : Float_Reg2<"fsqrt.d", FGR64Opnd, fsqrt>, R2F<0b0100010010>; +def FRECIP_S : Float_Reg2_Frecip<"frecip.s", FGR32Opnd, fdiv>, R2F<0b0100010101>; +def FRECIP_D : Float_Reg2_Frecip<"frecip.d", FGR64Opnd, fdiv>, R2F<0b0100010110>; +def FRSQRT_S : Float_Reg2_Frsqrt<"frsqrt.s", FGR32Opnd, fdiv>, R2F<0b0100011001>; +def FRSQRT_D : Float_Reg2_Frsqrt<"frsqrt.d", FGR64Opnd, fdiv>, R2F<0b0100011010>; +def FMOV_S : Float_Reg2<"fmov.s", FGR32Opnd>, R2F<0b0100100101>; +def FMOV_D : Float_Reg2<"fmov.d", FGR64Opnd>, R2F<0b0100100110>; + +def MOVGR2FR_W : Float_MOVT<"movgr2fr.w", FGR32Opnd, GPR32Opnd, bitconvert>, MOVFI<0b0100101001>; +def MOVGR2FR_D : Float_MOVT<"movgr2fr.d", FGR64Opnd, GPR64Opnd, bitconvert>, MOVFI<0b0100101010>; +def MOVGR2FRH_W : Float_MOVT<"movgr2frh.w", FGR64Opnd, GPR32Opnd>, MOVFI<0b0100101011>; //not realize +def MOVFR2GR_S : Float_MOVF<"movfr2gr.s", GPR32Opnd, FGR32Opnd, bitconvert>, MOVIF<0b0100101101>; +def MOVFR2GR_D : Float_MOVF<"movfr2gr.d", GPR64Opnd, FGR64Opnd, bitconvert>, MOVIF<0b0100101110>; +def MOVFRH2GR_S : Float_MOVF<"movfrh2gr.s", GPR32Opnd, FGR32Opnd>, MOVIF<0b0100101111>; //not realize + +let isCodeGenOnly = 1 in { + def MOVFR2GR_DS : Float_MOVF<"movfr2gr.s", GPR64Opnd, FGR32Opnd>, MOVIF<0b0100101101>; +} + +def FCVT_S_D : Float_CVT<"fcvt.s.d", FGR32Opnd, FGR64Opnd>, R2F<0b1001000110>; +def FCVT_D_S : Float_CVT<"fcvt.d.s", FGR64Opnd, FGR32Opnd>, R2F<0b1001001001>; + +def FTINTRM_W_S : Float_Reg2<"ftintrm.w.s", FGR32Opnd>, R2F<0b1010000001>; +def FTINTRM_W_D : Float_Reg2<"ftintrm.w.d", FGR64Opnd>, R2F<0b1010000010>; +def FTINTRM_L_S : Float_Reg2<"ftintrm.l.s", FGR32Opnd>, R2F<0b1010001001>; +def FTINTRM_L_D : Float_Reg2<"ftintrm.l.d", FGR64Opnd>, R2F<0b1010001010>; +def FTINTRP_W_S : Float_Reg2<"ftintrp.w.s", FGR32Opnd>, R2F<0b1010010001>; +def FTINTRP_W_D : Float_Reg2<"ftintrp.w.d", FGR64Opnd>, R2F<0b1010010010>; +def FTINTRP_L_S : Float_Reg2<"ftintrp.l.s", FGR32Opnd>, R2F<0b1010011001>; +def FTINTRP_L_D : Float_Reg2<"ftintrp.l.d", FGR64Opnd>, R2F<0b1010011010>; +def FTINTRZ_W_S : Float_Reg2<"ftintrz.w.s", FGR32Opnd>, R2F<0b1010100001>; +def FTINTRZ_L_D : Float_Reg2<"ftintrz.l.d", FGR64Opnd>, R2F<0b1010101010>; +def FTINTRNE_W_S : Float_Reg2<"ftintrne.w.s", FGR32Opnd>, R2F<0b1010110001>; +def FTINTRNE_W_D : Float_Reg2<"ftintrne.w.d", FGR64Opnd>, R2F<0b1010110010>; +def FTINTRNE_L_S : Float_Reg2<"ftintrne.l.s", FGR32Opnd>, R2F<0b1010111001>; +def FTINTRNE_L_D : Float_Reg2<"ftintrne.l.d", FGR64Opnd>, R2F<0b1010111010>; + +def FTINT_W_S : Float_CVT<"ftint.w.s", FGR32Opnd, FGR32Opnd>, R2F<0b1011000001>; +def FTINT_W_D : Float_CVT<"ftint.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1011000010>; +def FTINT_L_S : Float_CVT<"ftint.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1011001001>; +def FTINT_L_D : Float_CVT<"ftint.l.d", FGR64Opnd, FGR64Opnd>, R2F<0b1011001010>; +def FFINT_S_W : Float_CVT<"ffint.s.w", FGR32Opnd, FGR32Opnd>, R2F<0b1101000100>; +def FFINT_S_L : Float_CVT<"ffint.s.l", FGR32Opnd, FGR64Opnd>, R2F<0b1101000110>; +def FFINT_D_W : Float_CVT<"ffint.d.w", FGR64Opnd, FGR32Opnd>, R2F<0b1101001000>; +def FFINT_D_L : Float_CVT<"ffint.d.l", FGR64Opnd, FGR64Opnd>, R2F<0b1101001010>; + +def FRINT_S : Float_Reg2<"frint.s", FGR32Opnd, frint>, R2F<0b1110010001>; +def FRINT_D : Float_Reg2<"frint.d", FGR64Opnd, frint>, R2F<0b1110010010>; + +/// +/// R3 +/// +def FADD_S : Float_Reg3<"fadd.s", FGR32Opnd, fadd>, R3F<0b000001>; +def FADD_D : Float_Reg3<"fadd.d", FGR64Opnd, fadd>, R3F<0b000010>; +def FSUB_S : Float_Reg3<"fsub.s", FGR32Opnd, fsub>, R3F<0b000101>; +def FSUB_D : Float_Reg3<"fsub.d", FGR64Opnd, fsub>, R3F<0b000110>; +def FMUL_S : Float_Reg3<"fmul.s", FGR32Opnd, fmul>, R3F<0b001001>; +def FMUL_D : Float_Reg3<"fmul.d", FGR64Opnd, fmul>, R3F<0b001010>; +def FDIV_S : Float_Reg3<"fdiv.s", FGR32Opnd, fdiv>, R3F<0b001101>; +def FDIV_D : Float_Reg3<"fdiv.d", FGR64Opnd, fdiv>, R3F<0b001110>; +def FMAX_S : Float_Reg3<"fmax.s", FGR32Opnd, fmaxnum_ieee>, R3F<0b010001>; +def FMAX_D : Float_Reg3<"fmax.d", FGR64Opnd, fmaxnum_ieee>, R3F<0b010010>; +def FMIN_S : Float_Reg3<"fmin.s", FGR32Opnd, fminnum_ieee>, R3F<0b010101>; +def FMIN_D : Float_Reg3<"fmin.d", FGR64Opnd, fminnum_ieee>, R3F<0b010110>; +def FMAXA_S : Float_Reg3_Fmaxa<"fmaxa.s", FGR32Opnd>, R3F<0b011001>; +def FMAXA_D : Float_Reg3_Fmaxa<"fmaxa.d", FGR64Opnd>, R3F<0b011010>; +def FMINA_S : Float_Reg3_Fmaxa<"fmina.s", FGR32Opnd>, R3F<0b011101>; +def FMINA_D : Float_Reg3_Fmaxa<"fmina.d", FGR64Opnd>, R3F<0b011110>; +def FSCALEB_S : Float_Reg3<"fscaleb.s", FGR32Opnd>, R3F<0b100001>; +def FSCALEB_D : Float_Reg3<"fscaleb.d", FGR64Opnd>, R3F<0b100010>; +def FCOPYSIGN_S : Float_Reg3<"fcopysign.s", FGR32Opnd, fcopysign>, R3F<0b100101>; +def FCOPYSIGN_D : Float_Reg3<"fcopysign.d", FGR64Opnd, fcopysign>, R3F<0b100110>; +/// +/// R4_IMM21 +/// +def FMADD_S : Mul_Reg4<"fmadd.s", FGR32Opnd>, R4MUL<0b0001>; +def FMADD_D : Mul_Reg4<"fmadd.d", FGR64Opnd>, R4MUL<0b0010>; +def FMSUB_S : Mul_Reg4<"fmsub.s", FGR32Opnd>, R4MUL<0b0101>; +def FMSUB_D : Mul_Reg4<"fmsub.d", FGR64Opnd>, R4MUL<0b0110>; +def FNMADD_S : NMul_Reg4<"fnmadd.s", FGR32Opnd>, R4MUL<0b1001>; +def FNMADD_D : NMul_Reg4<"fnmadd.d", FGR64Opnd>, R4MUL<0b1010>; +def FNMSUB_S : NMul_Reg4<"fnmsub.s", FGR32Opnd>, R4MUL<0b1101>; +def FNMSUB_D : NMul_Reg4<"fnmsub.d", FGR64Opnd>, R4MUL<0b1110>; + + +// fmadd: fj * fk + fa +def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa), + (FMADD_D $fj, $fk, $fa)>; + +def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa), + (FMADD_S $fj, $fk, $fa)>; + + +// fmsub: fj * fk - fa +def : LoongArchPat<(fma FGR64Opnd:$fj, FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)), + (FMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; + +def : LoongArchPat<(fma FGR32Opnd:$fj, FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)), + (FMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; + + +// fnmadd: -(fj * fk + fa) +def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, (fneg FGR64Opnd:$fa)), + (FNMADD_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; + +def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, (fneg FGR32Opnd:$fa)), + (FNMADD_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; + +// fnmsub: -(fj * fk - fa) +def : LoongArchPat<(fma (fneg FGR64Opnd:$fj), FGR64Opnd:$fk, FGR64Opnd:$fa), + (FNMSUB_D FGR64Opnd:$fj, FGR64Opnd:$fk, FGR64Opnd:$fa)>; + +def : LoongArchPat<(fma (fneg FGR32Opnd:$fj), FGR32Opnd:$fk, FGR32Opnd:$fa), + (FNMSUB_S FGR32Opnd:$fj, FGR32Opnd:$fk, FGR32Opnd:$fa)>; + +let Pattern = [] in { +defm S : FCMP_COND_M<0b01, "s", FGR32Opnd>; +defm D : FCMP_COND_M<0b10, "d", FGR64Opnd>; +} +// +//defm S : FCmp_Pats; +//defm D : FCmp_Pats; + +/// +/// Float point branching +/// +def LoongArch_BRANCH_F : PatLeaf<(i32 0)>; +def LoongArch_BRANCH_T : PatLeaf<(i32 1)>; + +def BCEQZ : BceqzBr<"bceqz", brtarget, LoongArch_BRANCH_F>, R1_BCEQZ<0>; +def BCNEZ : BceqzBr<"bcnez", brtarget, LoongArch_BRANCH_T>, R1_BCEQZ<1>; + +/// +/// FMOV +/// +def MOVGR2FCSR : Gpr_2_Fcsr<"movgr2fcsr", GPR64Opnd>, MOVGPR2FCSR; +def MOVFCSR2GR : Fcsr_2_Gpr<"movfcsr2gr", GPR64Opnd>, MOVFCSR2GPR; +def MOVFR2CF : Fgr_2_Fcfr<"movfr2cf", FGR64Opnd>, MOVFGR2FCFR; +def MOVCF2FR : Fcfr_2_Fgr<"movcf2fr", FGR64Opnd>, MOVFCFR2FGR; +def MOVGR2CF : Gpr_2_Fcfr<"movgr2cf", GPR64Opnd>, MOVGPR2FCFR; +def MOVCF2GR : Fcfr_2_Gpr<"movcf2gr", GPR64Opnd>, MOVFCFR2GPR; + +let isCodeGenOnly = 1 in { + def MOVFR2CF32 : Fgr_2_Fcfr<"movfr2cf", FGR32Opnd>, MOVFGR2FCFR; + def MOVCF2FR32 : Fcfr_2_Fgr<"movcf2fr", FGR32Opnd>, MOVFCFR2FGR; + def MOVGR2CF32 : Gpr_2_Fcfr<"movgr2cf", GPR32Opnd>, MOVGPR2FCFR; + def MOVCF2GR32 : Fcfr_2_Gpr<"movcf2gr", GPR32Opnd>, MOVFCFR2GPR; +} + +class Sel_Reg4 + : InstForm<(outs RO:$fd), (ins FCFROpnd:$ca, RO:$fj, RO:$fk), + !strconcat(opstr, "\t$fd, $fj, $fk, $ca"), + [(set RO:$fd, (LoongArchFSEL RO:$fj, FCFROpnd:$ca, RO:$fk))], + FrmR, opstr>{ + let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6]; + let hasFCCRegOperand = 1; + } + +def FSEL_T_S : Sel_Reg4<"fsel", FGR32Opnd>, R4SEL; +let isCodeGenOnly = 1 in { + def FSEL_T_D : Sel_Reg4<"fsel", FGR64Opnd>, R4SEL; +} + +/// +/// Mem access +/// +def FLD_S : FLd<"fld.s", FGR32Opnd, mem, load>, LOAD_STORE<0b1100>; +def FST_S : FSt<"fst.s", FGR32Opnd, mem, store>, LOAD_STORE<0b1101>; +def FLD_D : FLd<"fld.d", FGR64Opnd, mem, load>, LOAD_STORE<0b1110>; +def FST_D : FSt<"fst.d", FGR64Opnd, mem, store>, LOAD_STORE<0b1111>; + +def FLDX_S : FLDX<"fldx.s", FGR32Opnd, load>, R3MF<0b01100000>; +def FLDX_D : FLDX<"fldx.d", FGR64Opnd, load>, R3MF<0b01101000>; +def FSTX_S : FSTX<"fstx.s", FGR32Opnd, store>, R3MF<0b01110000>; +def FSTX_D : FSTX<"fstx.d", FGR64Opnd, store>, R3MF<0b01111000>; + +def FLDGT_S : Float_Int_Reg3<"fldgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101000>; +def FLDGT_D : Float_Int_Reg3<"fldgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101001>; +def FLDLE_S : Float_Int_Reg3<"fldle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101010>; +def FLDLE_D : Float_Int_Reg3<"fldle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101011>; +def FSTGT_S : Float_STGT_LE<"fstgt.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101100>; +def FSTGT_D : Float_STGT_LE<"fstgt.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101101>; +def FSTLE_S : Float_STGT_LE<"fstle.s", FGR32Opnd, GPR64Opnd>, R3MF<0b11101110>; +def FSTLE_D : Float_STGT_LE<"fstle.d", FGR64Opnd, GPR64Opnd>, R3MF<0b11101111>; + +let isPseudo = 1, isCodeGenOnly = 1 in { + def PseudoFFINT_S_W : Float_CVT<"", FGR32Opnd, GPR32Opnd>; + def PseudoFFINT_D_W : Float_CVT<"", FGR64Opnd, GPR32Opnd>; + def PseudoFFINT_S_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>; + def PseudoFFINT_D_L : Float_CVT<"", FGR64Opnd, GPR64Opnd>; +} + +def : LoongArchPat<(f32 (fpround FGR64Opnd:$src)), + (FCVT_S_D FGR64Opnd:$src)>; +def : LoongArchPat<(f64 (fpextend FGR32Opnd:$src)), + (FCVT_D_S FGR32Opnd:$src)>; + +def : LoongArchPat<(f32 (sint_to_fp GPR32Opnd:$src)), + (PseudoFFINT_S_W GPR32Opnd:$src)>; +def : LoongArchPat<(f64 (sint_to_fp GPR32Opnd:$src)), + (PseudoFFINT_D_W GPR32Opnd:$src)>; +def : LoongArchPat<(f32 (sint_to_fp GPR64Opnd:$src)), + (EXTRACT_SUBREG (PseudoFFINT_S_L GPR64Opnd:$src), sub_lo)>; +def : LoongArchPat<(f64 (sint_to_fp GPR64Opnd:$src)), + (PseudoFFINT_D_L GPR64Opnd:$src)>; + +def : LoongArchPat<(f32 fpimm0), (MOVGR2FR_W ZERO)>; +def : LoongArchPat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W ZERO))>; +def : LoongArchPat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W ZERO, 1)))>; +def : LoongArchPat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D ZERO_64, 1)))>; + +// Patterns for loads/stores with a reg+imm operand. +let AddedComplexity = 40 in { + def : LoadRegImmPat; + def : StoreRegImmPat; + def : LoadRegImmPat; + def : StoreRegImmPat; +} + +def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), + (FTINTRZ_W_S FGR32Opnd:$src)>; + +def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src), + (FTINTRZ_L_D FGR64Opnd:$src)>; + +def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), + (FCVT_D_S (FTINTRZ_W_S FGR32Opnd:$src))>; + +def : LoongArchPat<(f32 (fcopysign FGR32Opnd:$lhs, FGR64Opnd:$rhs)), + (FCOPYSIGN_S FGR32Opnd:$lhs, (FCVT_S_D FGR64Opnd:$rhs))>; +def : LoongArchPat<(f64 (fcopysign FGR64Opnd:$lhs, FGR32Opnd:$rhs)), + (FCOPYSIGN_D FGR64Opnd:$lhs, (FCVT_D_S FGR32Opnd:$rhs))>; + +let PrintMethod = "printFCCOperand",EncoderMethod = "getFCMPEncoding" in + def condcode : Operand; + +class CEQS_FT : + InstForm<(outs), (ins RC:$fj, RC:$fk, condcode:$cond), + !strconcat("fcmp.$cond.", typestr, "\t$$fcc0, $fj, $fk"), + [(OpNode RC:$fj, RC:$fk, imm:$cond)], FrmFR, + !strconcat("fcmp.$cond.", typestr)>, HARDFLOAT { + let Defs = [FCC0, FCC1, FCC2, FCC3, FCC4, FCC5, FCC6, FCC7]; + let isCodeGenOnly = 1; + let hasFCCRegOperand = 1; +} + +def FCMP_S32 : CEQS_FT<"s", FGR32, LoongArchFPCmp>, CEQS_FM<0b01> { + bits<3> cd = 0; +} +def FCMP_D64 : CEQS_FT<"d", FGR64, LoongArchFPCmp>, CEQS_FM<0b10>{ + bits<3> cd = 0; +} + + +//multiclass FCmp_Pats2 { +// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), +// (!cast("SFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), +// (NOROp +// (!cast("SFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), +// ZEROReg)>; +// +// def : LoongArchPat<(seteq VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setgt VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CLE_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setge VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CLT_"#NAME) VT:$rhs, VT:$lhs)>; +// def : LoongArchPat<(setlt VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CLT_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setle VT:$lhs, VT:$rhs), +// (!cast("DFCMP_CLE_"#NAME) VT:$lhs, VT:$rhs)>; +// def : LoongArchPat<(setne VT:$lhs, VT:$rhs), +// (NOROp +// (!cast("DFCMP_CEQ_"#NAME) VT:$lhs, VT:$rhs), +// ZEROReg)>; +// } +// +//defm S : FCmp_Pats2; +//defm D : FCmp_Pats2; + +let usesCustomInserter = 1 in { + class Select_Pseudo : + LoongArchPseudo<(outs RC:$dst), (ins GPR32Opnd:$cond, RC:$T, RC:$F), + [(set RC:$dst, (select GPR32Opnd:$cond, RC:$T, RC:$F))]>; + + class SelectFP_Pseudo_T : + LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F), + [(set RC:$dst, (LoongArchCMovFP_T RC:$T, FCFROpnd:$cond, RC:$F))]>; + + class SelectFP_Pseudo_F : + LoongArchPseudo<(outs RC:$dst), (ins FCFROpnd:$cond, RC:$T, RC:$F), + [(set RC:$dst, (LoongArchCMovFP_F RC:$T, FCFROpnd:$cond, RC:$F))]>; +} + +def PseudoSELECT_I : Select_Pseudo; +def PseudoSELECT_I64 : Select_Pseudo; +def PseudoSELECT_S : Select_Pseudo; +def PseudoSELECT_D64 : Select_Pseudo; + +def PseudoSELECTFP_T_I : SelectFP_Pseudo_T; +def PseudoSELECTFP_T_I64 : SelectFP_Pseudo_T; + +def PseudoSELECTFP_F_I : SelectFP_Pseudo_F; +def PseudoSELECTFP_F_I64 : SelectFP_Pseudo_F; + +class ABSS_FT : + InstForm<(outs DstRC:$fd), (ins SrcRC:$fj), !strconcat(opstr, "\t$fd, $fj"), + [(set DstRC:$fd, (OpNode SrcRC:$fj))], FrmFR, opstr>; + +def TRUNC_W_D : ABSS_FT<"ftintrz.w.d", FGR32Opnd, FGR64Opnd>, R2F<0b1010100010>; + +def FTINTRZ_L_S : ABSS_FT<"ftintrz.l.s", FGR64Opnd, FGR32Opnd>, R2F<0b1010101001>; + +def : LoongArchPat<(LoongArchTruncIntFP FGR64Opnd:$src), + (TRUNC_W_D FGR64Opnd:$src)>; + +def : LoongArchPat<(LoongArchTruncIntFP FGR32Opnd:$src), + (FTINTRZ_L_S FGR32Opnd:$src)>; + +def : Pat<(fcanonicalize FGR32Opnd:$src), (FMAX_S $src, $src)>; +def : Pat<(fcanonicalize FGR64Opnd:$src), (FMAX_D $src, $src)>; + +def : LoongArchPat<(i64 (sext (i32 (bitconvert FGR32Opnd:$src)))), + (MOVFR2GR_DS FGR32Opnd:$src)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td new file mode 100644 index 000000000000..8e255f857819 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td @@ -0,0 +1,448 @@ +//===- LoongArchLASXInstrFormats.td - LoongArch LASX Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class LASXInst : InstLA<(outs), (ins), "", [], FrmOther>, + EXT_LASX { +} + +class LASXCBranch : LASXInst { +} + +class LASXSpecial : LASXInst { +} + +class LASXPseudo pattern>: + LoongArchPseudo { + let Predicates = [HasLASX]; +} + +class LASX_3R op>: LASXInst { + bits<5> xk; + bits<5> xj; + bits<5> xd; + + let Inst{31-15} = op; + let Inst{14-10} = xk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_4R op>: LASXInst { + bits<5> xa; + bits<5> xk; + bits<5> xj; + bits<5> xd; + + let Inst{31-20} = op; + let Inst{19-15} = xa; + let Inst{14-10} = xk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_XVFCMP op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<5> xk; + bits<5> cond; + + let Inst{31-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = xk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I12_S op>: LASXInst { + bits<5> xd; + bits<17> addr; + + let Inst{31-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = xd; +} + +class LASX_SI12_S op>: LASXInst { + bits<5> xd; + bits<17> addr; + + let Inst{31-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = xd; +} + +class LASX_SI11_S op>: LASXInst { + bits<5> xd; + bits<16> addr; + + let Inst{31-21} = op; + let Inst{20-10} = addr{10-0}; + let Inst{9-5} = addr{15-11}; + let Inst{4-0} = xd; +} + +class LASX_SI10_S op>: LASXInst { + bits<5> xd; + bits<15> addr; + + let Inst{31-20} = op; + let Inst{19-10} = addr{9-0}; + let Inst{9-5} = addr{14-10}; + let Inst{4-0} = xd; +} + +class LASX_SI9_S op>: LASXInst { + bits<5> xd; + bits<14> addr; + + let Inst{31-19} = op; + let Inst{18-10} = addr{8-0}; + let Inst{9-5} = addr{13-9}; + let Inst{4-0} = xd; +} + +class LASX_SI8_idx5 op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<8> si8; + bits<5> idx; + + let Inst{31-23} = op; + let Inst{22-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_SI8_idx2 op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<8> si8; + bits<2> idx; + + let Inst{31-20} = op; + let Inst{19-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_SI8_idx3 op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<8> si8; + bits<3> idx; + + let Inst{31-21} = op; + let Inst{20-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_SI8_idx4 op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<8> si8; + bits<4> idx; + + let Inst{31-22} = op; + let Inst{21-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_3R_2GP op>: LASXInst { + bits<5> rk; + bits<5> rj; + bits<5> xd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_3R_1GP op>: LASXInst { + bits<5> rk; + bits<5> xj; + bits<5> xd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I5 op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<5> si5; + + let Inst{31-15} = op; + let Inst{14-10} = si5; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I5_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<5> ui5; + + let Inst{31-15} = op; + let Inst{14-10} = ui5; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I5_mode_U op>: LASXInst { + bits<5> xd; + bits<5> mode; + bits<5> ui5; + + let Inst{31-15} = op; + let Inst{14-10} = ui5; + let Inst{9-5} = mode; + let Inst{4-0} = xd; +} + +class LASX_2R op>: LASXInst { + bits<5> xj; + bits<5> xd; + + let Inst{31-10} = op; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_SET op>: LASXInst { + bits<5> xj; + bits<3> cd; + + let Inst{31-10} = op; + let Inst{9-5} = xj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + +class LASX_2R_1GP op>: LASXInst { + bits<5> rj; + bits<5> xd; + + let Inst{31-10} = op; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_I3_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I4_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I6_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<6> ui6; + + let Inst{31-16} = op; + let Inst{15-10} = ui6; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I2_R_U op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_I3_R_U op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_ELM_COPY_U3 op>: LASXInst { + bits<5> rd; + bits<5> xj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +class LASX_ELM_COPY_U2 op>: LASXInst { + bits<5> rd; + bits<5> xj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +class LASX_I1_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I2_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_I7_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<7> ui7; + + let Inst{31-17} = op; + let Inst{16-10} = ui7; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +class LASX_1R_I13 op>: LASXInst { + bits<13> i13; + bits<5> xd; + + let Inst{31-18} = op; + let Inst{17-5} = i13; + let Inst{4-0} = xd; +} + +class LASX_I8_U op>: LASXInst { + bits<5> xd; + bits<5> xj; + bits<8> ui8; + + let Inst{31-18} = op; + let Inst{17-10} = ui8; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +class LASX_I1_R_U op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_I4_R_U op>: LASXInst { + bits<5> xd; + bits<5> rj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +class LASX_ELM_COPY_B op>: LASXInst { + bits<5> rd; + bits<5> xj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +class LASX_ELM_COPY_D op>: LASXInst { + bits<5> rd; + bits<5> xj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +class LASX_Addr_SI8_idx1 op>: LASXInst { + bits<5> xd; + bits<13> addr; + bits<1> idx; + + let Inst{31-19} = op; + let Inst{18-11} = addr{7-0}; + let Inst{10} = idx; + let Inst{9-5} = addr{12-8}; + let Inst{4-0} = xd; +} + +class LASX_1R_I13_I10 op>: LASXInst { + bits<10> i10; + bits<5> xd; + + let Inst{31-15} = op; + let Inst{14-5} = i10; + let Inst{4-0} = xd; +} + + + + + + diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td new file mode 100644 index 000000000000..e26d3769f4d5 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -0,0 +1,5499 @@ +//===- LoongArchLASXInstrInfo.td - loongson LASX instructions -*- tablegen ------------*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes loongson ASX instructions. +// +//===----------------------------------------------------------------------===// +def SDT_XVPERMI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDT_XVSHFI : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>]>; +def SDT_XVBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>; + +def SDT_INSVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>]>; + +def SDT_XVPICKVE : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, i32>]>; + +def SDT_XVSHUF4I : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVT<3, i32>]>; + +def LoongArchXVSHUFI : SDNode<"LoongArchISD::XVSHFI", SDT_XVSHFI>; + +def LoongArchXVSELI : SDNode<"LoongArchISD::XVSELI", SDT_XVSHFI>; + +def LoongArchXVPERMI : SDNode<"LoongArchISD::XVPERMI", SDT_XVPERMI>; + +def LoongArchXVBROADCAST : SDNode<"LoongArchISD::XVBROADCAST", SDT_XVBROADCAST>; + +def LoongArchINSVE : SDNode<"LoongArchISD::INSVE", SDT_INSVE>; + +def LoongArchXVSHUF4I : SDNode<"LoongArchISD::XVSHUF4I", SDT_XVSHUF4I>; + +def LoongArchXVPICKVE : SDNode<"LoongArchISD::XVPICKVE", SDT_INSVE>; + +def xvbroadcast_v32i8 : PatFrag<(ops node:$v1), + (v32i8 (LoongArchXVBROADCAST node:$v1))>; +def xvbroadcast_v16i16 : PatFrag<(ops node:$v1), + (v16i16 (LoongArchXVBROADCAST node:$v1))>; +def xvbroadcast_v8i32 : PatFrag<(ops node:$v1), + (v8i32 (LoongArchXVBROADCAST node:$v1))>; +def xvbroadcast_v4i64 : PatFrag<(ops node:$v1), + (v4i64 (LoongArchXVBROADCAST node:$v1))>; + + +def vfseteq_v8f32 : vfsetcc_type; +def vfseteq_v4f64 : vfsetcc_type; +def vfsetge_v8f32 : vfsetcc_type; +def vfsetge_v4f64 : vfsetcc_type; +def vfsetgt_v8f32 : vfsetcc_type; +def vfsetgt_v4f64 : vfsetcc_type; +def vfsetle_v8f32 : vfsetcc_type; +def vfsetle_v4f64 : vfsetcc_type; +def vfsetlt_v8f32 : vfsetcc_type; +def vfsetlt_v4f64 : vfsetcc_type; +def vfsetne_v8f32 : vfsetcc_type; +def vfsetne_v4f64 : vfsetcc_type; +def vfsetoeq_v8f32 : vfsetcc_type; +def vfsetoeq_v4f64 : vfsetcc_type; +def vfsetoge_v8f32 : vfsetcc_type; +def vfsetoge_v4f64 : vfsetcc_type; +def vfsetogt_v8f32 : vfsetcc_type; +def vfsetogt_v4f64 : vfsetcc_type; +def vfsetole_v8f32 : vfsetcc_type; +def vfsetole_v4f64 : vfsetcc_type; +def vfsetolt_v8f32 : vfsetcc_type; +def vfsetolt_v4f64 : vfsetcc_type; +def vfsetone_v8f32 : vfsetcc_type; +def vfsetone_v4f64 : vfsetcc_type; +def vfsetord_v8f32 : vfsetcc_type; +def vfsetord_v4f64 : vfsetcc_type; +def vfsetun_v8f32 : vfsetcc_type; +def vfsetun_v4f64 : vfsetcc_type; +def vfsetueq_v8f32 : vfsetcc_type; +def vfsetueq_v4f64 : vfsetcc_type; +def vfsetuge_v8f32 : vfsetcc_type; +def vfsetuge_v4f64 : vfsetcc_type; +def vfsetugt_v8f32 : vfsetcc_type; +def vfsetugt_v4f64 : vfsetcc_type; +def vfsetule_v8f32 : vfsetcc_type; +def vfsetule_v4f64 : vfsetcc_type; +def vfsetult_v8f32 : vfsetcc_type; +def vfsetult_v4f64 : vfsetcc_type; +def vfsetune_v8f32 : vfsetcc_type; +def vfsetune_v4f64 : vfsetcc_type; + +def xvsplati8 : PatFrag<(ops node:$e0), + (v32i8 (build_vector + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def xvsplati16 : PatFrag<(ops node:$e0), + (v16i16 (build_vector + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def xvsplati32 : PatFrag<(ops node:$e0), + (v8i32 (build_vector + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def xvsplati64 : PatFrag<(ops node:$e0), + (v4i64 (build_vector + node:$e0, node:$e0, node:$e0, node:$e0))>; +def xvsplatf32 : PatFrag<(ops node:$e0), + (v8f32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def xvsplatf64 : PatFrag<(ops node:$e0), + (v4f64 (build_vector node:$e0, node:$e0))>; + +def xvsplati8_uimm3 : SplatComplexPattern; +def xvsplati16_uimm4 : SplatComplexPattern; + +def xvsplati64_uimm6 : SplatComplexPattern; + +def xvsplati8_simm5 : SplatComplexPattern; +def xvsplati16_simm5 : SplatComplexPattern; +def xvsplati32_simm5 : SplatComplexPattern; +def xvsplati64_simm5 : SplatComplexPattern; + +def xvsplat_imm_eq_1 : PatLeaf<(build_vector), [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def xvsplati64_imm_eq_1 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def xvbitclr_b : PatFrag<(ops node:$xk, node:$xa), + (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), + immAllOnesV))>; +def xvbitclr_h : PatFrag<(ops node:$xk, node:$xa), + (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), + immAllOnesV))>; +def xvbitclr_w : PatFrag<(ops node:$xk, node:$xa), + (and node:$xk, (xor (shl vsplat_imm_eq_1, node:$xa), + immAllOnesV))>; +def xvbitclr_d : PatFrag<(ops node:$xk, node:$xa), + (and node:$xk, (xor (shl (v4i64 vsplati64_imm_eq_1), + node:$xa), + (bitconvert (v8i32 immAllOnesV))))>; + + + +def xvsplati8_uimm5 : SplatComplexPattern; +def xvsplati16_uimm5 : SplatComplexPattern; +def xvsplati32_uimm5 : SplatComplexPattern; +def xvsplati64_uimm5 : SplatComplexPattern; +def xvsplati8_uimm8 : SplatComplexPattern; +def xvsplati16_uimm8 : SplatComplexPattern; +def xvsplati32_uimm8 : SplatComplexPattern; +def xvsplati64_uimm8 : SplatComplexPattern; + + + +def xvsplati8_uimm4 : SplatComplexPattern; +def xvsplati16_uimm3 : SplatComplexPattern; +def xvsplati32_uimm2 : SplatComplexPattern; +def xvsplati64_uimm1 : SplatComplexPattern; + + +// Patterns. +class LASXPat pred = [HasLASX]> : + Pat, Requires; + +class LASX_4RF { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk, ROXA:$xa); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk, $xa"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk, ROXA:$xa))]; +} + +class LASX_3RF { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; +} + +class LASX_3R_SETCC_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, ROXK:$xk, CC)))]; +} + +class LASX_LD { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); + list Pattern = [(set ROXD:$xd, (TyNode (OpNode Addr:$addr)))]; + string DecoderMethod = "DecodeLASX256Mem"; +} + +class LASX_ST { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); + list Pattern = [(OpNode (TyNode ROXD:$xd), Addr:$addr)]; + string DecoderMethod = "DecodeLASX256Mem"; +} + +class LASX_I8_U5_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm5:$idx); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt5:$idx)]; + string DecoderMethod = "DecodeLASX256memstl"; +} + +class LASX_I8_U2_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt2:$idx)]; + string DecoderMethod = "DecodeLASX256memstl"; +} + +class LASX_I8_U3_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt3:$idx)]; + string DecoderMethod = "DecodeLASX256memstl"; +} + +class LASX_I8_U4_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, Imm:$si8, immZExt4:$idx)]; + string DecoderMethod = "DecodeLASX256memstl"; +} + +class LASX_SDX_LA { + dag OutOperandList = (outs); + dag InOperandList = (ins ROXD:$xd, PtrRC:$rj, RORK:$rk); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk"); + list Pattern = [(OpNode ROXD:$xd, iPTR:$rj, RORK:$rk)]; +} + +class LASX_3R_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; +} + +class LASX_LDX_LA { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins PtrRC:$rj, RORK:$rk); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $rk"); + list Pattern = [(set ROXD:$xd, (OpNode iPTR:$rj, RORK:$rk))]; +} + +class LASX_3R_4R_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, + ROXK:$xk))]; + string Constraints = "$xd = $xd_in"; +} + + +class LASX_3R_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, GPR32Opnd:$rk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, GPR32Opnd:$rk))]; +} + + +class LASX_3R_VREPLVE_DESC_BASE_N { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, GPR64Opnd:$rk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $rk"); + list Pattern = []; +} + + +class LASX_VEC_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; +} + + + +class LASX_3RF_DESC_BASE : + LASX_3R_DESC_BASE; + + +class LASX_3R_DESC_BASE1 { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]; +} + +class LASX_3RF_DESC_BASE1 : + LASX_3R_DESC_BASE1; + + + +class LASX_3R_VSHF_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ROXK:$xk); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $xk"); + list Pattern = [(set ROXD:$xd, (LoongArchVSHF ROXD:$xd_in, ROXJ:$xj, + ROXK:$xk))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_I5_SETCC_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); + list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$si5, CC)))]; +} + +class LASX_I5_SETCC_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))]; +} + + +class LASX_I5_U_SETCC_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (VT (vsetcc ROXJ:$xj, SplatImm:$ui5, CC)))]; +} + +class LASX_I5_U_SETCC_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_VEC_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xj, ROXK:$xk), + [(set ROXD:$xd, (OpNode ROXJ:$xj, ROXK:$xk))]>; + + +class LASX_I5_U_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))]; +} + + +class LASX_I5_U_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_U5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))]; +} + +class LASX_U5N_DESC_BASE : + LASX_U5_DESC_BASE; + +class LASX_U5_4R_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))]; +} + +class LASX_SET_DESC_BASE { + dag OutOperandList = (outs FCFROpnd:$cd); + dag InOperandList = (ins ROXD:$xj); + string AsmString = !strconcat(instr_asm, "\t$cd, $xj"); + list Pattern = []; +} + +class LASX_2RF_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj))]; +} + +class LASX_I5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$si5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$si5))]; +} + +class LASX_I5_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$si5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $si5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$si5))]; +} + + +class LASX_2R_REPL_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROS:$rj); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj"); + list Pattern = [(set ROXD:$xd, (VT (OpNode ROS:$rj)))]; +} + +class LASX_XVEXTEND_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); + list Pattern = [(set ROXD:$xd, (DTy (OpNode (STy ROXJ:$xj))))]; +} + +class LASX_RORI_U3_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; +} + +class LASX_RORI_U4_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; +} + +class LASX_RORI_U5_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_RORI_U6_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; +} + +class LASX_BIT_3_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; +} + +class LASX_BIT_4_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; +} + +class LASX_BIT_5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_BIT_6_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; +} + +class LASX_BIT_2_4O_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui2))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_BIT_2_4ON : + LASX_BIT_2_4O_DESC_BASE; + +class LASX_BIT_3_4O_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui3))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_BIT_3_4ON : + LASX_BIT_3_4O_DESC_BASE; + +class LASX_INSERT_U3_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui3"); + list Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui3)))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_INSERT_U2_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROS:$rj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$xd, $rj, $ui2"); + list Pattern = [(set ROXD:$xd, (VTy (insertelt (VTy ROXD:$xd_in), ROS:$rj, Imm:$ui2)))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_COPY_U2_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui2"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui2))]; +} + +class LASX_COPY_U3_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$rd, $xj, $ui3"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROXJ:$xj), Imm:$ui3))]; +} + +class LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))]; +} + +class LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))]; +} + +class LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm2:$ui2); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui2"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt2:$ui2))]; +} + +class LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm1:$ui1); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui1"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt1:$ui1))]; +} + +class LASX_XVBROADCAST_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj"); + list Pattern = [(set ROXD:$xd, (OpNode (TyNode ROXJ:$xj)))]; +} + +class LASX_2R_U3_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm3:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt3:$ui3))]; +} + +class LASX_2R_U4_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt4:$ui4))]; +} + +class LASX_2R_U5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt5:$ui5))]; +} + +class LASX_2R_U6_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt6:$ui6))]; +} + +class LASX_BIT_U3_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui3))]; +} + +class LASX_BIT_U4_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui4))]; +} + +class LASX_BIT_U5_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui5))]; +} + +class LASX_BIT_U6_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui6))]; +} + +class LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui3"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui3))]; +} + +class LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui4))]; +} + +class LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui5))]; +} + +class LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, Imm:$ui6))]; +} + +class LASX_U4_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in,ROXJ:$xj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in,ROXJ:$xj, Imm:$ui4))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_N4_U5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui5))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_U6_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, Imm:$ui6))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_D_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U4_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui4"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt4:$ui4))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U5_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui5"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt5:$ui5))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U6_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui6"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt6:$ui6))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U7_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm7:$ui7); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui7"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt7:$ui7))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U8_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_2R_3R_U8_SELECT { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, vsplat_uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, xvsplati8_uimm8:$ui8, ROXJ:$xj))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_I8_O4_SHF_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXD:$xd_in, ROXJ:$xj, immZExt8:$ui8))]; + string Constraints = "$xd = $xd_in"; +} + +class LASX_I8_SHF_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))]; +} + +class LASX_2R_U8_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, immZExt8:$ui8))]; +} + +class LASX_I13_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins immOp:$i13); + string AsmString = !strconcat(instr_asm, "\t$xd, $i13"); + list Pattern = [(set ROXD:$xd, (OpNode (Ty simm13:$i13)))]; + string DecoderMethod = "DecodeLASX256Mem13"; +} + +class LASX_I13_DESC_BASE_10 { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ImmOp:$i10); + string AsmString = !strconcat(instr_asm, "\t$xd, $i10"); + bit hasSideEffects = 0; + string DecoderMethod = "DecodeLASX256Mem10"; + list Pattern = [(set ROXD:$xd, (OpNode Imm:$i10))]; + } + +class LASX_BIT_U8_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXJ:$xj, SplatImm.OpClass:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = [(set ROXD:$xd, (OpNode ROXJ:$xj, SplatImm:$ui8))]; +} + +class LASX_2RN_3R_U8_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins ROXD:$xd_in, ROXJ:$xj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$xd, $xj, $ui8"); + list Pattern = []; + string Constraints = "$xd = $xd_in"; +} + + +//encoding + +def XVFMADD_S : LASX_4R<0b000010100001>, + LASX_4RF<"xvfmadd.s", int_loongarch_lasx_xvfmadd_s, LASX256WOpnd>; + +def XVFMADD_D : LASX_4R<0b000010100010>, + LASX_4RF<"xvfmadd.d", int_loongarch_lasx_xvfmadd_d, LASX256DOpnd>; + + +def XVFMSUB_S : LASX_4R<0b000010100101>, + LASX_4RF<"xvfmsub.s", int_loongarch_lasx_xvfmsub_s, LASX256WOpnd>; + +def XVFMSUB_D : LASX_4R<0b000010100110>, + LASX_4RF<"xvfmsub.d", int_loongarch_lasx_xvfmsub_d, LASX256DOpnd>; + + +def XVFNMADD_S : LASX_4R<0b000010101001>, + LASX_4RF<"xvfnmadd.s", int_loongarch_lasx_xvfnmadd_s, LASX256WOpnd>; + +def XVFNMADD_D : LASX_4R<0b000010101010>, + LASX_4RF<"xvfnmadd.d", int_loongarch_lasx_xvfnmadd_d, LASX256DOpnd>; + + +def XVFNMSUB_S : LASX_4R<0b000010101101>, + LASX_4RF<"xvfnmsub.s", int_loongarch_lasx_xvfnmsub_s, LASX256WOpnd>; + +def XVFNMSUB_D : LASX_4R<0b000010101110>, + LASX_4RF<"xvfnmsub.d", int_loongarch_lasx_xvfnmsub_d, LASX256DOpnd>; + + +// xvfmadd: xj * xk + xa +def : LASXPat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D $xj, $xk, $xa)>; + +def : LASXPat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + (XVFMADD_S $xj, $xk, $xa)>; + + +// xvfmsub: xj * xk - xa +def : LASXPat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), + (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +def : LASXPat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), + (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; + + +// xvfnmadd: -(xj * xk + xa) +def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), + (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), + (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; + +// xvfnmsub: -(xj * xk - xa) +def : LASXPat<(fma (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), + (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +def : LASXPat<(fma (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), + (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; + + +def XVFCMP_CAF_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.caf.s", int_loongarch_lasx_xvfcmp_caf_s, LASX256WOpnd>{ + bits<5> cond=0x0; + } + +def XVFCMP_CAF_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.caf.d", int_loongarch_lasx_xvfcmp_caf_d, LASX256DOpnd>{ + bits<5> cond=0x0; + } + +def XVFCMP_COR_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cor.s", vfsetord_v8f32, LASX256WOpnd>{ + bits<5> cond=0x14; + } + +def XVFCMP_COR_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cor.d", vfsetord_v4f64, LASX256DOpnd>{ + bits<5> cond=0x14; + } + +def XVFCMP_CUN_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cun.s", vfsetun_v8f32, LASX256WOpnd>{ + bits<5> cond=0x8; + } + +def XVFCMP_CUN_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cun.d", vfsetun_v4f64, LASX256DOpnd>{ + bits<5> cond=0x8; + } + +def XVFCMP_CUNE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cune.s", vfsetune_v8f32, LASX256WOpnd>{ + bits<5> cond=0x18; + } + +def XVFCMP_CUNE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cune.d", vfsetune_v4f64, LASX256DOpnd>{ + bits<5> cond=0x18; + } + +def XVFCMP_CUEQ_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cueq.s", vfsetueq_v8f32, LASX256WOpnd>{ + bits<5> cond=0xc; + } + +def XVFCMP_CUEQ_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cueq.d", vfsetueq_v4f64, LASX256DOpnd>{ + bits<5> cond=0xc; + } + +def XVFCMP_CEQ_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.ceq.s", vfsetoeq_v8f32, LASX256WOpnd>{ + bits<5> cond=0x4; + } + +def XVFCMP_CEQ_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.ceq.d", vfsetoeq_v4f64, LASX256DOpnd>{ + bits<5> cond=0x4; + } + +def XVFCMP_CNE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cne.s", vfsetone_v8f32, LASX256WOpnd>{ + bits<5> cond=0x10; + } + +def XVFCMP_CNE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cne.d", vfsetone_v4f64, LASX256DOpnd>{ + bits<5> cond=0x10; + } + +def XVFCMP_CLT_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.clt.s", vfsetolt_v8f32, LASX256WOpnd>{ + bits<5> cond=0x2; + } + +def XVFCMP_CLT_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.clt.d", vfsetolt_v4f64, LASX256DOpnd>{ + bits<5> cond=0x2; + } + +def XVFCMP_CULT_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cult.s", vfsetult_v8f32, LASX256WOpnd>{ + bits<5> cond=0xa; + } + +def XVFCMP_CULT_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cult.d", vfsetult_v4f64, LASX256DOpnd>{ + bits<5> cond=0xa; + } + +def XVFCMP_CLE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cle.s", vfsetole_v8f32, LASX256WOpnd>{ + bits<5> cond=0x6; + } + +def XVFCMP_CLE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cle.d", vfsetole_v4f64, LASX256DOpnd>{ + bits<5> cond=0x6; + } + +def XVFCMP_CULE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.cule.s", vfsetule_v8f32, LASX256WOpnd>{ + bits<5> cond=0xe; + } + +def XVFCMP_CULE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.cule.d", vfsetule_v4f64, LASX256DOpnd>{ + bits<5> cond=0xe; + } + +def XVFCMP_SAF_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.saf.s", int_loongarch_lasx_xvfcmp_saf_s, LASX256WOpnd>{ + bits<5> cond=0x1; + } + +def XVFCMP_SAF_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.saf.d", int_loongarch_lasx_xvfcmp_saf_d, LASX256DOpnd>{ + bits<5> cond=0x1; + } + +def XVFCMP_SOR_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sor.s", int_loongarch_lasx_xvfcmp_sor_s, LASX256WOpnd>{ + bits<5> cond=0x15; + } + +def XVFCMP_SOR_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sor.d", int_loongarch_lasx_xvfcmp_sor_d, LASX256DOpnd>{ + bits<5> cond=0x15; + } + +def XVFCMP_SUN_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sun.s", int_loongarch_lasx_xvfcmp_sun_s, LASX256WOpnd>{ + bits<5> cond=0x9; + } + +def XVFCMP_SUN_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sun.d", int_loongarch_lasx_xvfcmp_sun_d, LASX256DOpnd>{ + bits<5> cond=0x9; + } + +def XVFCMP_SUNE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sune.s", int_loongarch_lasx_xvfcmp_sune_s, LASX256WOpnd>{ + bits<5> cond=0x19; + } + +def XVFCMP_SUNE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sune.d", int_loongarch_lasx_xvfcmp_sune_d, LASX256DOpnd>{ + bits<5> cond=0x19; + } + +def XVFCMP_SUEQ_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sueq.s", int_loongarch_lasx_xvfcmp_sueq_s, LASX256WOpnd>{ + bits<5> cond=0xd; + } + +def XVFCMP_SUEQ_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sueq.d", int_loongarch_lasx_xvfcmp_sueq_d, LASX256DOpnd>{ + bits<5> cond=0xd; + } + +def XVFCMP_SEQ_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.seq.s", int_loongarch_lasx_xvfcmp_seq_s, LASX256WOpnd>{ + bits<5> cond=0x5; + } + +def XVFCMP_SEQ_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.seq.d", int_loongarch_lasx_xvfcmp_seq_d, LASX256DOpnd>{ + bits<5> cond=0x5; + } + +def XVFCMP_SNE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sne.s", int_loongarch_lasx_xvfcmp_sne_s, LASX256WOpnd>{ + bits<5> cond=0x11; + } + +def XVFCMP_SNE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sne.d", int_loongarch_lasx_xvfcmp_sne_d, LASX256DOpnd>{ + bits<5> cond=0x11; + } + +def XVFCMP_SLT_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.slt.s", int_loongarch_lasx_xvfcmp_slt_s, LASX256WOpnd>{ + bits<5> cond=0x3; + } + +def XVFCMP_SLT_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.slt.d", int_loongarch_lasx_xvfcmp_slt_d, LASX256DOpnd>{ + bits<5> cond=0x3; + } + +def XVFCMP_SULT_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sult.s", int_loongarch_lasx_xvfcmp_sult_s, LASX256WOpnd>{ + bits<5> cond=0xb; + } + +def XVFCMP_SULT_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sult.d", int_loongarch_lasx_xvfcmp_sult_d, LASX256DOpnd>{ + bits<5> cond=0xb; + } + +def XVFCMP_SLE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sle.s", int_loongarch_lasx_xvfcmp_sle_s, LASX256WOpnd>{ + bits<5> cond=0x7; + } + +def XVFCMP_SLE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sle.d", int_loongarch_lasx_xvfcmp_sle_d, LASX256DOpnd>{ + bits<5> cond=0x7; + } + +def XVFCMP_SULE_S : LASX_XVFCMP<0b000011001001>, + LASX_3RF<"xvfcmp.sule.s", int_loongarch_lasx_xvfcmp_sule_s, LASX256WOpnd>{ + bits<5> cond=0xf; + } + +def XVFCMP_SULE_D : LASX_XVFCMP<0b000011001010>, + LASX_3RF<"xvfcmp.sule.d", int_loongarch_lasx_xvfcmp_sule_d, LASX256DOpnd>{ + bits<5> cond=0xf; + } + + +def XVBITSEL_V : LASX_4R<0b000011010010>, + LASX_4RF<"xvbitsel.v", int_loongarch_lasx_xvbitsel_v, LASX256BOpnd>; + +class LASX_BSEL_PSEUDO_BASE : + LASXPseudo<(outs RO:$xd), (ins RO:$xd_in, RO:$xs, RO:$xt), + [(set RO:$xd, (Ty (vselect RO:$xd_in, RO:$xt, RO:$xs)))]>, + PseudoInstExpansion<(XVBITSEL_V LASX256BOpnd:$xd, LASX256BOpnd:$xs, + LASX256BOpnd:$xt, LASX256BOpnd:$xd_in)> { + let Constraints = "$xd_in = $xd"; +} + +def XBSEL_B_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_H_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_W_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_D_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_FW_PSEUDO : LASX_BSEL_PSEUDO_BASE; +def XBSEL_FD_PSEUDO : LASX_BSEL_PSEUDO_BASE; + + + +def XVSHUF_B : LASX_4R<0b000011010110>, + LASX_4RF<"xvshuf.b", int_loongarch_lasx_xvshuf_b, LASX256BOpnd>; + + +def XVLD : LASX_I12_S<0b0010110010>, + LASX_LD<"xvld", load, v32i8, LASX256BOpnd, mem>; + +def XVST : LASX_I12_S<0b0010110011>, + LASX_ST<"xvst", store, v32i8, LASX256BOpnd, mem_simm12>; + + +class LASX_LD_DESC_BASE { + dag OutOperandList = (outs ROXD:$xd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$xd, $addr"); + list Pattern = [(set ROXD:$xd, (OpNode (TyNode (load Addr:$addr))))]; + string DecoderMethod = "DecodeLASX256memlsl"; +} + + +def XVLDREPL_B : LASX_SI12_S<0b0011001010>, + LASX_LD_DESC_BASE<"xvldrepl.b", xvbroadcast_v32i8, v32i8, LASX256BOpnd>; + +def XVLDREPL_H : LASX_SI11_S<0b00110010010>, + LASX_LD_DESC_BASE<"xvldrepl.h", xvbroadcast_v16i16, v16i16, LASX256HOpnd, mem_simm11_lsl1, addrimm11lsl1>; + +def XVLDREPL_W : LASX_SI10_S<0b001100100010>, + LASX_LD_DESC_BASE<"xvldrepl.w", xvbroadcast_v8i32, v8i32, LASX256WOpnd, mem_simm10_lsl2, addrimm10lsl2>; + +def XVLDREPL_D : LASX_SI9_S<0b0011001000010>, + LASX_LD_DESC_BASE<"xvldrepl.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd, mem_simm9_lsl3, addrimm9lsl3>; + + +def XVSTELM_B : LASX_SI8_idx5<0b001100111>, + LASX_I8_U5_DESC_BASE<"xvstelm.b", int_loongarch_lasx_xvstelm_b, simm8_32, immSExt8, LASX256BOpnd, GPR32Opnd>; + +def XVSTELM_H : LASX_SI8_idx4<0b0011001101>, + LASX_I8_U4_DESC_BASE<"xvstelm.h", int_loongarch_lasx_xvstelm_h, immSExt8_1_O, immSExt8, LASX256HOpnd, GPR32Opnd>; + +def XVSTELM_W : LASX_SI8_idx3<0b00110011001>, + LASX_I8_U3_DESC_BASE<"xvstelm.w", int_loongarch_lasx_xvstelm_w, immSExt8_2_O, immSExt8, LASX256WOpnd, GPR32Opnd>; + +def XVSTELM_D : LASX_SI8_idx2<0b001100110001>, + LASX_I8_U2_DESC_BASE<"xvstelm.d", int_loongarch_lasx_xvstelm_d, immSExt8_3_O, immSExt8, LASX256DOpnd, GPR32Opnd>; + +let mayLoad = 1, canFoldAsLoad = 1 in { + def XVLDX : LASX_3R_2GP<0b00111000010010000>, + LASX_LDX_LA<"xvldx", int_loongarch_lasx_xvldx, GPR64Opnd, LASX256BOpnd>; +} + +let mayStore = 1 in{ + def XVSTX : LASX_3R_2GP<0b00111000010011000>, + LASX_SDX_LA<"xvstx", int_loongarch_lasx_xvstx, GPR64Opnd, LASX256BOpnd>; +} + + +def XVSEQ_B : LASX_3R<0b01110100000000000>, IsCommutable, + LASX_3R_SETCC_DESC_BASE<"xvseq.b", SETEQ, v32i8, LASX256BOpnd>; + +def XVSEQ_H : LASX_3R<0b01110100000000001>, IsCommutable, + LASX_3R_SETCC_DESC_BASE<"xvseq.h", SETEQ, v16i16, LASX256HOpnd>; + +def XVSEQ_W : LASX_3R<0b01110100000000010>, IsCommutable, + LASX_3R_SETCC_DESC_BASE<"xvseq.w", SETEQ, v8i32, LASX256WOpnd> ; + +def XVSEQ_D : LASX_3R<0b01110100000000011>, IsCommutable, + LASX_3R_SETCC_DESC_BASE<"xvseq.d", SETEQ, v4i64, LASX256DOpnd>; + + +def XVSLE_B : LASX_3R<0b01110100000000100>, + LASX_3R_SETCC_DESC_BASE<"xvsle.b", SETLE, v32i8, LASX256BOpnd>; + +def XVSLE_H : LASX_3R<0b01110100000000101>, + LASX_3R_SETCC_DESC_BASE<"xvsle.h", SETLE, v16i16, LASX256HOpnd>; + +def XVSLE_W : LASX_3R<0b01110100000000110>, + LASX_3R_SETCC_DESC_BASE<"xvsle.w", SETLE, v8i32, LASX256WOpnd>; + +def XVSLE_D : LASX_3R<0b01110100000000111>, + LASX_3R_SETCC_DESC_BASE<"xvsle.d", SETLE, v4i64, LASX256DOpnd>; + + +def XVSLE_BU : LASX_3R<0b01110100000001000>, + LASX_3R_SETCC_DESC_BASE<"xvsle.bu", SETULE, v32i8, LASX256BOpnd>; + +def XVSLE_HU : LASX_3R<0b01110100000001001>, + LASX_3R_SETCC_DESC_BASE<"xvsle.hu", SETULE, v16i16, LASX256HOpnd>; + +def XVSLE_WU : LASX_3R<0b01110100000001010>, + LASX_3R_SETCC_DESC_BASE<"xvsle.wu", SETULE, v8i32, LASX256WOpnd>; + +def XVSLE_DU : LASX_3R<0b01110100000001011>, + LASX_3R_SETCC_DESC_BASE<"xvsle.du", SETULE, v4i64, LASX256DOpnd>; + + +def XVSLT_B : LASX_3R<0b01110100000001100>, + LASX_3R_SETCC_DESC_BASE<"xvslt.b", SETLT, v32i8, LASX256BOpnd>; + +def XVSLT_H : LASX_3R<0b01110100000001101>, + LASX_3R_SETCC_DESC_BASE<"xvslt.h", SETLT, v16i16, LASX256HOpnd>; + +def XVSLT_W : LASX_3R<0b01110100000001110>, + LASX_3R_SETCC_DESC_BASE<"xvslt.w", SETLT, v8i32, LASX256WOpnd>; + +def XVSLT_D : LASX_3R<0b01110100000001111>, + LASX_3R_SETCC_DESC_BASE<"xvslt.d", SETLT, v4i64, LASX256DOpnd>; + + +def XVSLT_BU : LASX_3R<0b01110100000010000>, + LASX_3R_SETCC_DESC_BASE<"xvslt.bu", SETULT, v32i8, LASX256BOpnd>; + +def XVSLT_HU : LASX_3R<0b01110100000010001>, + LASX_3R_SETCC_DESC_BASE<"xvslt.hu", SETULT, v16i16, LASX256HOpnd>; + +def XVSLT_WU : LASX_3R<0b01110100000010010>, + LASX_3R_SETCC_DESC_BASE<"xvslt.wu", SETULT, v8i32, LASX256WOpnd>; + +def XVSLT_DU : LASX_3R<0b01110100000010011>, + LASX_3R_SETCC_DESC_BASE<"xvslt.du", SETULT, v4i64, LASX256DOpnd>; + + +def XVADD_B : LASX_3R<0b01110100000010100>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.b", add, LASX256BOpnd>; + +def XVADD_H : LASX_3R<0b01110100000010101>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.h", add, LASX256HOpnd>; + +def XVADD_W : LASX_3R<0b01110100000010110>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.w", add, LASX256WOpnd>; + +def XVADD_D : LASX_3R<0b01110100000010111>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.d", add, LASX256DOpnd>; + + +def XVSUB_B : LASX_3R<0b01110100000011000>, + LASX_3R_DESC_BASE<"xvsub.b", sub, LASX256BOpnd>; + +def XVSUB_H : LASX_3R<0b01110100000011001>, + LASX_3R_DESC_BASE<"xvsub.h", sub, LASX256HOpnd>; + +def XVSUB_W : LASX_3R<0b01110100000011010>, + LASX_3R_DESC_BASE<"xvsub.w", sub, LASX256WOpnd>; + +def XVSUB_D : LASX_3R<0b01110100000011011>, + LASX_3R_DESC_BASE<"xvsub.d", sub, LASX256DOpnd>; + + +def XVADDWEV_H_B : LASX_3R<0b01110100000111100>, + LASX_3R_DESC_BASE<"xvaddwev.h.b", int_loongarch_lasx_xvaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWEV_W_H : LASX_3R<0b01110100000111101>, + LASX_3R_DESC_BASE<"xvaddwev.w.h", int_loongarch_lasx_xvaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWEV_D_W : LASX_3R<0b01110100000111110>, + LASX_3R_DESC_BASE<"xvaddwev.d.w", int_loongarch_lasx_xvaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWEV_Q_D : LASX_3R<0b01110100000111111>, + LASX_3R_DESC_BASE<"xvaddwev.q.d", int_loongarch_lasx_xvaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSUBWEV_H_B : LASX_3R<0b01110100001000000>, + LASX_3R_DESC_BASE<"xvsubwev.h.b", int_loongarch_lasx_xvsubwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVSUBWEV_W_H : LASX_3R<0b01110100001000001>, + LASX_3R_DESC_BASE<"xvsubwev.w.h", int_loongarch_lasx_xvsubwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSUBWEV_D_W : LASX_3R<0b01110100001000010>, + LASX_3R_DESC_BASE<"xvsubwev.d.w", int_loongarch_lasx_xvsubwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVSUBWEV_Q_D : LASX_3R<0b01110100001000011>, + LASX_3R_DESC_BASE<"xvsubwev.q.d", int_loongarch_lasx_xvsubwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWOD_H_B : LASX_3R<0b01110100001000100>, + LASX_3R_DESC_BASE<"xvaddwod.h.b", int_loongarch_lasx_xvaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWOD_W_H : LASX_3R<0b01110100001000101>, + LASX_3R_DESC_BASE<"xvaddwod.w.h", int_loongarch_lasx_xvaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWOD_D_W : LASX_3R<0b01110100001000110>, + LASX_3R_DESC_BASE<"xvaddwod.d.w", int_loongarch_lasx_xvaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWOD_Q_D : LASX_3R<0b01110100001000111>, + LASX_3R_DESC_BASE<"xvaddwod.q.d", int_loongarch_lasx_xvaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSUBWOD_H_B : LASX_3R<0b01110100001001000>, + LASX_3R_DESC_BASE<"xvsubwod.h.b", int_loongarch_lasx_xvsubwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVSUBWOD_W_H : LASX_3R<0b01110100001001001>, + LASX_3R_DESC_BASE<"xvsubwod.w.h", int_loongarch_lasx_xvsubwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSUBWOD_D_W : LASX_3R<0b01110100001001010>, + LASX_3R_DESC_BASE<"xvsubwod.d.w", int_loongarch_lasx_xvsubwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVSUBWOD_Q_D : LASX_3R<0b01110100001001011>, + LASX_3R_DESC_BASE<"xvsubwod.q.d", int_loongarch_lasx_xvsubwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWEV_H_BU : LASX_3R<0b01110100001011100>, + LASX_3R_DESC_BASE<"xvaddwev.h.bu", int_loongarch_lasx_xvaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWEV_W_HU : LASX_3R<0b01110100001011101>, + LASX_3R_DESC_BASE<"xvaddwev.w.hu", int_loongarch_lasx_xvaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWEV_D_WU : LASX_3R<0b01110100001011110>, + LASX_3R_DESC_BASE<"xvaddwev.d.wu", int_loongarch_lasx_xvaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWEV_Q_DU : LASX_3R<0b01110100001011111>, + LASX_3R_DESC_BASE<"xvaddwev.q.du", int_loongarch_lasx_xvaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSUBWEV_H_BU : LASX_3R<0b01110100001100000>, + LASX_3R_DESC_BASE<"xvsubwev.h.bu", int_loongarch_lasx_xvsubwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVSUBWEV_W_HU : LASX_3R<0b01110100001100001>, + LASX_3R_DESC_BASE<"xvsubwev.w.hu", int_loongarch_lasx_xvsubwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSUBWEV_D_WU : LASX_3R<0b01110100001100010>, + LASX_3R_DESC_BASE<"xvsubwev.d.wu", int_loongarch_lasx_xvsubwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVSUBWEV_Q_DU : LASX_3R<0b01110100001100011>, + LASX_3R_DESC_BASE<"xvsubwev.q.du", int_loongarch_lasx_xvsubwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWOD_H_BU : LASX_3R<0b01110100001100100>, + LASX_3R_DESC_BASE<"xvaddwod.h.bu", int_loongarch_lasx_xvaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWOD_W_HU : LASX_3R<0b01110100001100101>, + LASX_3R_DESC_BASE<"xvaddwod.w.hu", int_loongarch_lasx_xvaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWOD_D_WU : LASX_3R<0b01110100001100110>, + LASX_3R_DESC_BASE<"xvaddwod.d.wu", int_loongarch_lasx_xvaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWOD_Q_DU : LASX_3R<0b01110100001100111>, + LASX_3R_DESC_BASE<"xvaddwod.q.du", int_loongarch_lasx_xvaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSUBWOD_H_BU : LASX_3R<0b01110100001101000>, + LASX_3R_DESC_BASE<"xvsubwod.h.bu", int_loongarch_lasx_xvsubwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVSUBWOD_W_HU : LASX_3R<0b01110100001101001>, + LASX_3R_DESC_BASE<"xvsubwod.w.hu", int_loongarch_lasx_xvsubwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSUBWOD_D_WU : LASX_3R<0b01110100001101010>, + LASX_3R_DESC_BASE<"xvsubwod.d.wu", int_loongarch_lasx_xvsubwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVSUBWOD_Q_DU : LASX_3R<0b01110100001101011>, + LASX_3R_DESC_BASE<"xvsubwod.q.du", int_loongarch_lasx_xvsubwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWEV_H_BU_B : LASX_3R<0b01110100001111100>, + LASX_3R_DESC_BASE<"xvaddwev.h.bu.b", int_loongarch_lasx_xvaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWEV_W_HU_H : LASX_3R<0b01110100001111101>, + LASX_3R_DESC_BASE<"xvaddwev.w.hu.h", int_loongarch_lasx_xvaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWEV_D_WU_W : LASX_3R<0b01110100001111110>, + LASX_3R_DESC_BASE<"xvaddwev.d.wu.w", int_loongarch_lasx_xvaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWEV_Q_DU_D : LASX_3R<0b01110100001111111>, + LASX_3R_DESC_BASE<"xvaddwev.q.du.d", int_loongarch_lasx_xvaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDWOD_H_BU_B : LASX_3R<0b01110100010000000>, + LASX_3R_DESC_BASE<"xvaddwod.h.bu.b", int_loongarch_lasx_xvaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVADDWOD_W_HU_H : LASX_3R<0b01110100010000001>, + LASX_3R_DESC_BASE<"xvaddwod.w.hu.h", int_loongarch_lasx_xvaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVADDWOD_D_WU_W : LASX_3R<0b01110100010000010>, + LASX_3R_DESC_BASE<"xvaddwod.d.wu.w", int_loongarch_lasx_xvaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVADDWOD_Q_DU_D : LASX_3R<0b01110100010000011>, + LASX_3R_DESC_BASE<"xvaddwod.q.du.d", int_loongarch_lasx_xvaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSADD_B : LASX_3R<0b01110100010001100>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.b", saddsat, LASX256BOpnd>; + +def XVSADD_H : LASX_3R<0b01110100010001101>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.h", saddsat, LASX256HOpnd>; + +def XVSADD_W : LASX_3R<0b01110100010001110>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.w", saddsat, LASX256WOpnd>; + +def XVSADD_D : LASX_3R<0b01110100010001111>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.d", saddsat, LASX256DOpnd>; + + +def XVSSUB_B : LASX_3R<0b01110100010010000>, + LASX_3R_DESC_BASE<"xvssub.b", ssubsat, LASX256BOpnd>; + +def XVSSUB_H : LASX_3R<0b01110100010010001>, + LASX_3R_DESC_BASE<"xvssub.h", ssubsat, LASX256HOpnd>; + +def XVSSUB_W : LASX_3R<0b01110100010010010>, + LASX_3R_DESC_BASE<"xvssub.w", ssubsat, LASX256WOpnd>; + +def XVSSUB_D : LASX_3R<0b01110100010010011>, + LASX_3R_DESC_BASE<"xvssub.d", ssubsat, LASX256DOpnd>; + + +def XVSADD_BU : LASX_3R<0b01110100010010100>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.bu", uaddsat, LASX256BOpnd>; + +def XVSADD_HU : LASX_3R<0b01110100010010101>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.hu", uaddsat, LASX256HOpnd>; + +def XVSADD_WU : LASX_3R<0b01110100010010110>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.wu", uaddsat, LASX256WOpnd>; + +def XVSADD_DU : LASX_3R<0b01110100010010111>, IsCommutable, + LASX_3R_DESC_BASE<"xvsadd.du", uaddsat, LASX256DOpnd>; + + +def XVSSUB_BU : LASX_3R<0b01110100010011000>, + LASX_3R_DESC_BASE<"xvssub.bu", usubsat, LASX256BOpnd>; + +def XVSSUB_HU : LASX_3R<0b01110100010011001>, + LASX_3R_DESC_BASE<"xvssub.hu", usubsat, LASX256HOpnd>; + +def XVSSUB_WU : LASX_3R<0b01110100010011010>, + LASX_3R_DESC_BASE<"xvssub.wu", usubsat, LASX256WOpnd>; + +def XVSSUB_DU : LASX_3R<0b01110100010011011>, + LASX_3R_DESC_BASE<"xvssub.du", usubsat, LASX256DOpnd>; + + +def XVHADDW_H_B : LASX_3R<0b01110100010101000>, + LASX_3R_DESC_BASE<"xvhaddw.h.b", int_loongarch_lasx_xvhaddw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVHADDW_W_H : LASX_3R<0b01110100010101001>, + LASX_3R_DESC_BASE<"xvhaddw.w.h", int_loongarch_lasx_xvhaddw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVHADDW_D_W : LASX_3R<0b01110100010101010>, + LASX_3R_DESC_BASE<"xvhaddw.d.w", int_loongarch_lasx_xvhaddw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVHADDW_Q_D : LASX_3R<0b01110100010101011>, + LASX_3R_DESC_BASE<"xvhaddw.q.d", int_loongarch_lasx_xvhaddw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + + +def XVHSUBW_H_B : LASX_3R<0b01110100010101100>, + LASX_3R_DESC_BASE<"xvhsubw.h.b", int_loongarch_lasx_xvhsubw_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVHSUBW_W_H : LASX_3R<0b01110100010101101>, + LASX_3R_DESC_BASE<"xvhsubw.w.h", int_loongarch_lasx_xvhsubw_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVHSUBW_D_W : LASX_3R<0b01110100010101110>, + LASX_3R_DESC_BASE<"xvhsubw.d.w", int_loongarch_lasx_xvhsubw_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVHSUBW_Q_D : LASX_3R<0b01110100010101111>, + LASX_3R_DESC_BASE<"xvhsubw.q.d", int_loongarch_lasx_xvhsubw_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVHADDW_HU_BU : LASX_3R<0b01110100010110000>, + LASX_3R_DESC_BASE<"xvhaddw.hu.bu", int_loongarch_lasx_xvhaddw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVHADDW_WU_HU : LASX_3R<0b01110100010110001>, + LASX_3R_DESC_BASE<"xvhaddw.wu.hu", int_loongarch_lasx_xvhaddw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVHADDW_DU_WU : LASX_3R<0b01110100010110010>, + LASX_3R_DESC_BASE<"xvhaddw.du.wu", int_loongarch_lasx_xvhaddw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVHADDW_QU_DU : LASX_3R<0b01110100010110011>, + LASX_3R_DESC_BASE<"xvhaddw.qu.du", int_loongarch_lasx_xvhaddw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + + +def XVHSUBW_HU_BU : LASX_3R<0b01110100010110100>, + LASX_3R_DESC_BASE<"xvhsubw.hu.bu", int_loongarch_lasx_xvhsubw_hu_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVHSUBW_WU_HU : LASX_3R<0b01110100010110101>, + LASX_3R_DESC_BASE<"xvhsubw.wu.hu", int_loongarch_lasx_xvhsubw_wu_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVHSUBW_DU_WU : LASX_3R<0b01110100010110110>, + LASX_3R_DESC_BASE<"xvhsubw.du.wu", int_loongarch_lasx_xvhsubw_du_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVHSUBW_QU_DU : LASX_3R<0b01110100010110111>, + LASX_3R_DESC_BASE<"xvhsubw.qu.du", int_loongarch_lasx_xvhsubw_qu_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVADDA_B : LASX_3R<0b01110100010111000>, IsCommutable, + LASX_3R_DESC_BASE<"xvadda.b", int_loongarch_lasx_xvadda_b, LASX256BOpnd>; + +def XVADDA_H : LASX_3R<0b01110100010111001>, IsCommutable, + LASX_3R_DESC_BASE<"xvadda.h", int_loongarch_lasx_xvadda_h, LASX256HOpnd>; + +def XVADDA_W : LASX_3R<0b01110100010111010>, IsCommutable, + LASX_3R_DESC_BASE<"xvadda.w", int_loongarch_lasx_xvadda_w, LASX256WOpnd>; + +def XVADDA_D : LASX_3R<0b01110100010111011>, IsCommutable, + LASX_3R_DESC_BASE<"xvadda.d", int_loongarch_lasx_xvadda_d, LASX256DOpnd>; + + +def XVABSD_B : LASX_3R<0b01110100011000000>, + LASX_3R_DESC_BASE<"xvabsd.b", int_loongarch_lasx_xvabsd_b, LASX256BOpnd>; + +def XVABSD_H : LASX_3R<0b01110100011000001>, + LASX_3R_DESC_BASE<"xvabsd.h", int_loongarch_lasx_xvabsd_h, LASX256HOpnd>; + +def XVABSD_W : LASX_3R<0b01110100011000010>, + LASX_3R_DESC_BASE<"xvabsd.w", int_loongarch_lasx_xvabsd_w, LASX256WOpnd>; + +def XVABSD_D : LASX_3R<0b01110100011000011>, + LASX_3R_DESC_BASE<"xvabsd.d", int_loongarch_lasx_xvabsd_d, LASX256DOpnd>; + + +def XVABSD_BU : LASX_3R<0b01110100011000100>, + LASX_3R_DESC_BASE<"xvabsd.bu", int_loongarch_lasx_xvabsd_bu, LASX256BOpnd>; + +def XVABSD_HU : LASX_3R<0b01110100011000101>, + LASX_3R_DESC_BASE<"xvabsd.hu", int_loongarch_lasx_xvabsd_hu, LASX256HOpnd>; + +def XVABSD_WU : LASX_3R<0b01110100011000110>, + LASX_3R_DESC_BASE<"xvabsd.wu", int_loongarch_lasx_xvabsd_wu, LASX256WOpnd>; + +def XVABSD_DU : LASX_3R<0b01110100011000111>, + LASX_3R_DESC_BASE<"xvabsd.du", int_loongarch_lasx_xvabsd_du, LASX256DOpnd>; + + +def XVAVG_B : LASX_3R<0b01110100011001000>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.b", int_loongarch_lasx_xvavg_b, LASX256BOpnd>; + +def XVAVG_H : LASX_3R<0b01110100011001001>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.h", int_loongarch_lasx_xvavg_h, LASX256HOpnd>; + +def XVAVG_W : LASX_3R<0b01110100011001010>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.w", int_loongarch_lasx_xvavg_w, LASX256WOpnd>; + +def XVAVG_D : LASX_3R<0b01110100011001011>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.d", int_loongarch_lasx_xvavg_d, LASX256DOpnd>; + + +def XVAVG_BU : LASX_3R<0b01110100011001100>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.bu", int_loongarch_lasx_xvavg_bu, LASX256BOpnd>; + +def XVAVG_HU : LASX_3R<0b01110100011001101>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.hu", int_loongarch_lasx_xvavg_hu, LASX256HOpnd>; + +def XVAVG_WU : LASX_3R<0b01110100011001110>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.wu", int_loongarch_lasx_xvavg_wu, LASX256WOpnd>; + +def XVAVG_DU : LASX_3R<0b01110100011001111>, IsCommutable, + LASX_3R_DESC_BASE<"xvavg.du", int_loongarch_lasx_xvavg_du, LASX256DOpnd>; + + +def XVAVGR_B : LASX_3R<0b01110100011010000>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.b", int_loongarch_lasx_xvavgr_b, LASX256BOpnd>; + +def XVAVGR_H : LASX_3R<0b01110100011010001>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.h", int_loongarch_lasx_xvavgr_h, LASX256HOpnd>; + +def XVAVGR_W : LASX_3R<0b01110100011010010>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.w", int_loongarch_lasx_xvavgr_w, LASX256WOpnd>; + +def XVAVGR_D : LASX_3R<0b01110100011010011>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.d", int_loongarch_lasx_xvavgr_d, LASX256DOpnd>; + + +def XVAVGR_BU : LASX_3R<0b01110100011010100>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.bu", int_loongarch_lasx_xvavgr_bu, LASX256BOpnd>; + +def XVAVGR_HU : LASX_3R<0b01110100011010101>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.hu", int_loongarch_lasx_xvavgr_hu, LASX256HOpnd>; + +def XVAVGR_WU : LASX_3R<0b01110100011010110>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.wu", int_loongarch_lasx_xvavgr_wu, LASX256WOpnd>; + +def XVAVGR_DU : LASX_3R<0b01110100011010111>, IsCommutable, + LASX_3R_DESC_BASE<"xvavgr.du", int_loongarch_lasx_xvavgr_du, LASX256DOpnd>; + + +def XVMAX_B : LASX_3R<0b01110100011100000>, + LASX_3R_DESC_BASE<"xvmax.b", smax, LASX256BOpnd>; + +def XVMAX_H : LASX_3R<0b01110100011100001>, + LASX_3R_DESC_BASE<"xvmax.h", smax, LASX256HOpnd>; + +def XVMAX_W : LASX_3R<0b01110100011100010>, + LASX_3R_DESC_BASE<"xvmax.w", smax, LASX256WOpnd>; + +def XVMAX_D : LASX_3R<0b01110100011100011>, + LASX_3R_DESC_BASE<"xvmax.d", smax, LASX256DOpnd>; + + +def XVMIN_B : LASX_3R<0b01110100011100100>, + LASX_3R_DESC_BASE<"xvmin.b", smin, LASX256BOpnd>; + +def XVMIN_H : LASX_3R<0b01110100011100101>, + LASX_3R_DESC_BASE<"xvmin.h", smin, LASX256HOpnd>; + +def XVMIN_W : LASX_3R<0b01110100011100110>, + LASX_3R_DESC_BASE<"xvmin.w", smin, LASX256WOpnd>; + +def XVMIN_D : LASX_3R<0b01110100011100111>, + LASX_3R_DESC_BASE<"xvmin.d", smin, LASX256DOpnd>; + + +def XVMAX_BU : LASX_3R<0b01110100011101000>, + LASX_3R_DESC_BASE<"xvmax.bu", umax, LASX256BOpnd>; + +def XVMAX_HU : LASX_3R<0b01110100011101001>, + LASX_3R_DESC_BASE<"xvmax.hu", umax, LASX256HOpnd>; + +def XVMAX_WU : LASX_3R<0b01110100011101010>, + LASX_3R_DESC_BASE<"xvmax.wu", umax, LASX256WOpnd>; + +def XVMAX_DU : LASX_3R<0b01110100011101011>, + LASX_3R_DESC_BASE<"xvmax.du", umax, LASX256DOpnd>; + + +def XVMIN_BU : LASX_3R<0b01110100011101100>, + LASX_3R_DESC_BASE<"xvmin.bu", umin, LASX256BOpnd>; + +def XVMIN_HU : LASX_3R<0b01110100011101101>, + LASX_3R_DESC_BASE<"xvmin.hu", umin, LASX256HOpnd>; + +def XVMIN_WU : LASX_3R<0b01110100011101110>, + LASX_3R_DESC_BASE<"xvmin.wu", umin, LASX256WOpnd>; + +def XVMIN_DU : LASX_3R<0b01110100011101111>, + LASX_3R_DESC_BASE<"xvmin.du", umin, LASX256DOpnd>; + + +def XVMUL_B : LASX_3R<0b01110100100001000>, + LASX_3R_DESC_BASE<"xvmul.b", mul, LASX256BOpnd>, IsCommutable; + +def XVMUL_H : LASX_3R<0b01110100100001001>, + LASX_3R_DESC_BASE<"xvmul.h", mul, LASX256HOpnd>, IsCommutable; + +def XVMUL_W : LASX_3R<0b01110100100001010>, + LASX_3R_DESC_BASE<"xvmul.w", mul, LASX256WOpnd>, IsCommutable; + +def XVMUL_D : LASX_3R<0b01110100100001011>, + LASX_3R_DESC_BASE<"xvmul.d", mul, LASX256DOpnd>, IsCommutable; + + +def XVMUH_B : LASX_3R<0b01110100100001100>, + LASX_3R_DESC_BASE<"xvmuh.b", int_loongarch_lasx_xvmuh_b, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMUH_H : LASX_3R<0b01110100100001101>, + LASX_3R_DESC_BASE<"xvmuh.h", int_loongarch_lasx_xvmuh_h, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMUH_W : LASX_3R<0b01110100100001110>, + LASX_3R_DESC_BASE<"xvmuh.w", int_loongarch_lasx_xvmuh_w, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMUH_D : LASX_3R<0b01110100100001111>, + LASX_3R_DESC_BASE<"xvmuh.d", int_loongarch_lasx_xvmuh_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMUH_BU : LASX_3R<0b01110100100010000>, + LASX_3R_DESC_BASE<"xvmuh.bu", int_loongarch_lasx_xvmuh_bu, LASX256BOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMUH_HU : LASX_3R<0b01110100100010001>, + LASX_3R_DESC_BASE<"xvmuh.hu", int_loongarch_lasx_xvmuh_hu, LASX256HOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMUH_WU : LASX_3R<0b01110100100010010>, + LASX_3R_DESC_BASE<"xvmuh.wu", int_loongarch_lasx_xvmuh_wu, LASX256WOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMUH_DU : LASX_3R<0b01110100100010011>, + LASX_3R_DESC_BASE<"xvmuh.du", int_loongarch_lasx_xvmuh_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWEV_H_B : LASX_3R<0b01110100100100000>, + LASX_3R_DESC_BASE<"xvmulwev.h.b", int_loongarch_lasx_xvmulwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWEV_W_H : LASX_3R<0b01110100100100001>, + LASX_3R_DESC_BASE<"xvmulwev.w.h", int_loongarch_lasx_xvmulwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWEV_D_W : LASX_3R<0b01110100100100010>, + LASX_3R_DESC_BASE<"xvmulwev.d.w", int_loongarch_lasx_xvmulwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWEV_Q_D : LASX_3R<0b01110100100100011>, + LASX_3R_DESC_BASE<"xvmulwev.q.d", int_loongarch_lasx_xvmulwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWOD_H_B : LASX_3R<0b01110100100100100>, + LASX_3R_DESC_BASE<"xvmulwod.h.b", int_loongarch_lasx_xvmulwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWOD_W_H : LASX_3R<0b01110100100100101>, + LASX_3R_DESC_BASE<"xvmulwod.w.h", int_loongarch_lasx_xvmulwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWOD_D_W : LASX_3R<0b01110100100100110>, + LASX_3R_DESC_BASE<"xvmulwod.d.w", int_loongarch_lasx_xvmulwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWOD_Q_D : LASX_3R<0b01110100100100111>, + LASX_3R_DESC_BASE<"xvmulwod.q.d", int_loongarch_lasx_xvmulwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWEV_H_BU : LASX_3R<0b01110100100110000>, + LASX_3R_DESC_BASE<"xvmulwev.h.bu", int_loongarch_lasx_xvmulwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWEV_W_HU : LASX_3R<0b01110100100110001>, + LASX_3R_DESC_BASE<"xvmulwev.w.hu", int_loongarch_lasx_xvmulwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWEV_D_WU : LASX_3R<0b01110100100110010>, + LASX_3R_DESC_BASE<"xvmulwev.d.wu", int_loongarch_lasx_xvmulwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWEV_Q_DU : LASX_3R<0b01110100100110011>, + LASX_3R_DESC_BASE<"xvmulwev.q.du", int_loongarch_lasx_xvmulwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWOD_H_BU : LASX_3R<0b01110100100110100>, + LASX_3R_DESC_BASE<"xvmulwod.h.bu", int_loongarch_lasx_xvmulwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWOD_W_HU : LASX_3R<0b01110100100110101>, + LASX_3R_DESC_BASE<"xvmulwod.w.hu", int_loongarch_lasx_xvmulwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWOD_D_WU : LASX_3R<0b01110100100110110>, + LASX_3R_DESC_BASE<"xvmulwod.d.wu", int_loongarch_lasx_xvmulwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWOD_Q_DU : LASX_3R<0b01110100100110111>, + LASX_3R_DESC_BASE<"xvmulwod.q.du", int_loongarch_lasx_xvmulwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWEV_H_BU_B : LASX_3R<0b01110100101000000>, + LASX_3R_DESC_BASE<"xvmulwev.h.bu.b", int_loongarch_lasx_xvmulwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWEV_W_HU_H : LASX_3R<0b01110100101000001>, + LASX_3R_DESC_BASE<"xvmulwev.w.hu.h", int_loongarch_lasx_xvmulwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWEV_D_WU_W : LASX_3R<0b01110100101000010>, + LASX_3R_DESC_BASE<"xvmulwev.d.wu.w", int_loongarch_lasx_xvmulwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWEV_Q_DU_D : LASX_3R<0b01110100101000011>, + LASX_3R_DESC_BASE<"xvmulwev.q.du.d", int_loongarch_lasx_xvmulwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMULWOD_H_BU_B : LASX_3R<0b01110100101000100>, + LASX_3R_DESC_BASE<"xvmulwod.h.bu.b", int_loongarch_lasx_xvmulwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMULWOD_W_HU_H : LASX_3R<0b01110100101000101>, + LASX_3R_DESC_BASE<"xvmulwod.w.hu.h", int_loongarch_lasx_xvmulwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMULWOD_D_WU_W : LASX_3R<0b01110100101000110>, + LASX_3R_DESC_BASE<"xvmulwod.d.wu.w", int_loongarch_lasx_xvmulwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd> ; + +def XVMULWOD_Q_DU_D : LASX_3R<0b01110100101000111>, + LASX_3R_DESC_BASE<"xvmulwod.q.du.d", int_loongarch_lasx_xvmulwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADD_B : LASX_3R<0b01110100101010000>, + LASX_3R_4R_DESC_BASE<"xvmadd.b", muladd, LASX256BOpnd>; + +def XVMADD_H : LASX_3R<0b01110100101010001>, + LASX_3R_4R_DESC_BASE<"xvmadd.h", muladd, LASX256HOpnd>; + +def XVMADD_W : LASX_3R<0b01110100101010010>, + LASX_3R_4R_DESC_BASE<"xvmadd.w", muladd, LASX256WOpnd>; + +def XVMADD_D : LASX_3R<0b01110100101010011>, + LASX_3R_4R_DESC_BASE<"xvmadd.d", muladd, LASX256DOpnd>; + + +def XVMSUB_B : LASX_3R<0b01110100101010100>, + LASX_3R_4R_DESC_BASE<"xvmsub.b", mulsub, LASX256BOpnd>; + +def XVMSUB_H : LASX_3R<0b01110100101010101>, + LASX_3R_4R_DESC_BASE<"xvmsub.h", mulsub, LASX256HOpnd>; + +def XVMSUB_W : LASX_3R<0b01110100101010110>, + LASX_3R_4R_DESC_BASE<"xvmsub.w", mulsub, LASX256WOpnd>; + +def XVMSUB_D : LASX_3R<0b01110100101010111>, + LASX_3R_4R_DESC_BASE<"xvmsub.d", mulsub, LASX256DOpnd>; + + +def XVMADDWEV_H_B : LASX_3R<0b01110100101011000>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.h.b", int_loongarch_lasx_xvmaddwev_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWEV_W_H : LASX_3R<0b01110100101011001>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.w.h", int_loongarch_lasx_xvmaddwev_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWEV_D_W : LASX_3R<0b01110100101011010>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.d.w", int_loongarch_lasx_xvmaddwev_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWEV_Q_D : LASX_3R<0b01110100101011011>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.q.d", int_loongarch_lasx_xvmaddwev_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWOD_H_B : LASX_3R<0b01110100101011100>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.h.b", int_loongarch_lasx_xvmaddwod_h_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWOD_W_H : LASX_3R<0b01110100101011101>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.w.h", int_loongarch_lasx_xvmaddwod_w_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWOD_D_W : LASX_3R<0b01110100101011110>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.d.w", int_loongarch_lasx_xvmaddwod_d_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWOD_Q_D : LASX_3R<0b01110100101011111>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.q.d", int_loongarch_lasx_xvmaddwod_q_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWEV_H_BU : LASX_3R<0b01110100101101000>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu", int_loongarch_lasx_xvmaddwev_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWEV_W_HU : LASX_3R<0b01110100101101001>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu", int_loongarch_lasx_xvmaddwev_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWEV_D_WU : LASX_3R<0b01110100101101010>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu", int_loongarch_lasx_xvmaddwev_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWEV_Q_DU : LASX_3R<0b01110100101101011>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du", int_loongarch_lasx_xvmaddwev_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWOD_H_BU : LASX_3R<0b01110100101101100>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu", int_loongarch_lasx_xvmaddwod_h_bu, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWOD_W_HU : LASX_3R<0b01110100101101101>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu", int_loongarch_lasx_xvmaddwod_w_hu, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWOD_D_WU : LASX_3R<0b01110100101101110>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu", int_loongarch_lasx_xvmaddwod_d_wu, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWOD_Q_DU : LASX_3R<0b01110100101101111>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du", int_loongarch_lasx_xvmaddwod_q_du, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWEV_H_BU_B : LASX_3R<0b01110100101111000>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.h.bu.b", int_loongarch_lasx_xvmaddwev_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWEV_W_HU_H : LASX_3R<0b01110100101111001>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.w.hu.h", int_loongarch_lasx_xvmaddwev_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWEV_D_WU_W : LASX_3R<0b01110100101111010>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.d.wu.w", int_loongarch_lasx_xvmaddwev_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWEV_Q_DU_D : LASX_3R<0b01110100101111011>, + LASX_3R_4R_DESC_BASE<"xvmaddwev.q.du.d", int_loongarch_lasx_xvmaddwev_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVMADDWOD_H_BU_B : LASX_3R<0b01110100101111100>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.h.bu.b", int_loongarch_lasx_xvmaddwod_h_bu_b, LASX256HOpnd, LASX256BOpnd, LASX256BOpnd>; + +def XVMADDWOD_W_HU_H : LASX_3R<0b01110100101111101>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.w.hu.h", int_loongarch_lasx_xvmaddwod_w_hu_h, LASX256WOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVMADDWOD_D_WU_W : LASX_3R<0b01110100101111110>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.d.wu.w", int_loongarch_lasx_xvmaddwod_d_wu_w, LASX256DOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVMADDWOD_Q_DU_D : LASX_3R<0b01110100101111111>, + LASX_3R_4R_DESC_BASE<"xvmaddwod.q.du.d", int_loongarch_lasx_xvmaddwod_q_du_d, LASX256DOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVDIV_B : LASX_3R<0b01110100111000000>, + LASX_3R_DESC_BASE<"xvdiv.b", sdiv, LASX256BOpnd>; + +def XVDIV_H : LASX_3R<0b01110100111000001>, + LASX_3R_DESC_BASE<"xvdiv.h", sdiv, LASX256HOpnd>; + +def XVDIV_W : LASX_3R<0b01110100111000010>, + LASX_3R_DESC_BASE<"xvdiv.w", sdiv, LASX256WOpnd>; + +def XVDIV_D : LASX_3R<0b01110100111000011>, + LASX_3R_DESC_BASE<"xvdiv.d", sdiv, LASX256DOpnd>; + + +def XVMOD_B : LASX_3R<0b01110100111000100>, + LASX_3R_DESC_BASE<"xvmod.b", srem, LASX256BOpnd>; + +def XVMOD_H : LASX_3R<0b01110100111000101>, + LASX_3R_DESC_BASE<"xvmod.h", srem, LASX256HOpnd>; + +def XVMOD_W : LASX_3R<0b01110100111000110>, + LASX_3R_DESC_BASE<"xvmod.w", srem, LASX256WOpnd>; + +def XVMOD_D : LASX_3R<0b01110100111000111>, + LASX_3R_DESC_BASE<"xvmod.d", srem, LASX256DOpnd>; + + +def XVDIV_BU : LASX_3R<0b01110100111001000>, + LASX_3R_DESC_BASE<"xvdiv.bu", udiv, LASX256BOpnd>; + +def XVDIV_HU : LASX_3R<0b01110100111001001>, + LASX_3R_DESC_BASE<"xvdiv.hu", udiv, LASX256HOpnd>; + +def XVDIV_WU : LASX_3R<0b01110100111001010>, + LASX_3R_DESC_BASE<"xvdiv.wu", udiv, LASX256WOpnd>; + +def XVDIV_DU : LASX_3R<0b01110100111001011>, + LASX_3R_DESC_BASE<"xvdiv.du", udiv, LASX256DOpnd>; + + +def XVMOD_BU : LASX_3R<0b01110100111001100>, + LASX_3R_DESC_BASE<"xvmod.bu", urem, LASX256BOpnd>; + +def XVMOD_HU : LASX_3R<0b01110100111001101>, + LASX_3R_DESC_BASE<"xvmod.hu", urem, LASX256HOpnd>; + +def XVMOD_WU : LASX_3R<0b01110100111001110>, + LASX_3R_DESC_BASE<"xvmod.wu", urem, LASX256WOpnd>; + +def XVMOD_DU : LASX_3R<0b01110100111001111>, + LASX_3R_DESC_BASE<"xvmod.du", urem, LASX256DOpnd>; + + +def XVSLL_B : LASX_3R<0b01110100111010000>, + LASX_3R_DESC_BASE<"xvsll.b", shl, LASX256BOpnd>; + +def XVSLL_H : LASX_3R<0b01110100111010001>, + LASX_3R_DESC_BASE<"xvsll.h", shl, LASX256HOpnd>; + +def XVSLL_W : LASX_3R<0b01110100111010010>, + LASX_3R_DESC_BASE<"xvsll.w", shl, LASX256WOpnd>; + +def XVSLL_D : LASX_3R<0b01110100111010011>, + LASX_3R_DESC_BASE<"xvsll.d", shl, LASX256DOpnd>; + + +def XVSRL_B : LASX_3R<0b01110100111010100>, + LASX_3R_DESC_BASE<"xvsrl.b", srl, LASX256BOpnd>; + +def XVSRL_H : LASX_3R<0b01110100111010101>, + LASX_3R_DESC_BASE<"xvsrl.h", srl, LASX256HOpnd>; + +def XVSRL_W : LASX_3R<0b01110100111010110>, + LASX_3R_DESC_BASE<"xvsrl.w", srl, LASX256WOpnd>; + +def XVSRL_D : LASX_3R<0b01110100111010111>, + LASX_3R_DESC_BASE<"xvsrl.d", srl, LASX256DOpnd>; + + +def XVSRA_B : LASX_3R<0b01110100111011000>, + LASX_3R_DESC_BASE<"xvsra.b", sra, LASX256BOpnd>; + +def XVSRA_H : LASX_3R<0b01110100111011001>, + LASX_3R_DESC_BASE<"xvsra.h", sra, LASX256HOpnd>; + +def XVSRA_W : LASX_3R<0b01110100111011010>, + LASX_3R_DESC_BASE<"xvsra.w", sra, LASX256WOpnd>; + +def XVSRA_D : LASX_3R<0b01110100111011011>, + LASX_3R_DESC_BASE<"xvsra.d", sra, LASX256DOpnd>; + + +def XVROTR_B : LASX_3R<0b01110100111011100>, + LASX_3R_DESC_BASE<"xvrotr.b", int_loongarch_lasx_xvrotr_b, LASX256BOpnd>; + +def XVROTR_H : LASX_3R<0b01110100111011101>, + LASX_3R_DESC_BASE<"xvrotr.h", int_loongarch_lasx_xvrotr_h, LASX256HOpnd>; + +def XVROTR_W : LASX_3R<0b01110100111011110>, + LASX_3R_DESC_BASE<"xvrotr.w", int_loongarch_lasx_xvrotr_w, LASX256WOpnd>; + +def XVROTR_D : LASX_3R<0b01110100111011111>, + LASX_3R_DESC_BASE<"xvrotr.d", int_loongarch_lasx_xvrotr_d, LASX256DOpnd>; + + +def XVSRLR_B : LASX_3R<0b01110100111100000>, + LASX_3R_DESC_BASE<"xvsrlr.b", int_loongarch_lasx_xvsrlr_b, LASX256BOpnd>; + +def XVSRLR_H : LASX_3R<0b01110100111100001>, + LASX_3R_DESC_BASE<"xvsrlr.h", int_loongarch_lasx_xvsrlr_h, LASX256HOpnd>; + +def XVSRLR_W : LASX_3R<0b01110100111100010>, + LASX_3R_DESC_BASE<"xvsrlr.w", int_loongarch_lasx_xvsrlr_w, LASX256WOpnd>; + +def XVSRLR_D : LASX_3R<0b01110100111100011>, + LASX_3R_DESC_BASE<"xvsrlr.d", int_loongarch_lasx_xvsrlr_d, LASX256DOpnd>; + + +def XVSRAR_B : LASX_3R<0b01110100111100100>, + LASX_3R_DESC_BASE<"xvsrar.b", int_loongarch_lasx_xvsrar_b, LASX256BOpnd>; + +def XVSRAR_H : LASX_3R<0b01110100111100101>, + LASX_3R_DESC_BASE<"xvsrar.h", int_loongarch_lasx_xvsrar_h, LASX256HOpnd>; + +def XVSRAR_W : LASX_3R<0b01110100111100110>, + LASX_3R_DESC_BASE<"xvsrar.w", int_loongarch_lasx_xvsrar_w, LASX256WOpnd>; + +def XVSRAR_D : LASX_3R<0b01110100111100111>, + LASX_3R_DESC_BASE<"xvsrar.d", int_loongarch_lasx_xvsrar_d, LASX256DOpnd>; + + +def XVSRLN_B_H : LASX_3R<0b01110100111101001>, + LASX_3R_DESC_BASE<"xvsrln.b.h", int_loongarch_lasx_xvsrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSRLN_H_W : LASX_3R<0b01110100111101010>, + LASX_3R_DESC_BASE<"xvsrln.h.w", int_loongarch_lasx_xvsrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSRLN_W_D : LASX_3R<0b01110100111101011>, + LASX_3R_DESC_BASE<"xvsrln.w.d", int_loongarch_lasx_xvsrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSRAN_B_H : LASX_3R<0b01110100111101101>, + LASX_3R_DESC_BASE<"xvsran.b.h", int_loongarch_lasx_xvsran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSRAN_H_W : LASX_3R<0b01110100111101110>, + LASX_3R_DESC_BASE<"xvsran.h.w", int_loongarch_lasx_xvsran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSRAN_W_D : LASX_3R<0b01110100111101111>, + LASX_3R_DESC_BASE<"xvsran.w.d", int_loongarch_lasx_xvsran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSRLRN_B_H : LASX_3R<0b01110100111110001>, + LASX_3R_DESC_BASE<"xvsrlrn.b.h", int_loongarch_lasx_xvsrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSRLRN_H_W : LASX_3R<0b01110100111110010>, + LASX_3R_DESC_BASE<"xvsrlrn.h.w", int_loongarch_lasx_xvsrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSRLRN_W_D : LASX_3R<0b01110100111110011>, + LASX_3R_DESC_BASE<"xvsrlrn.w.d", int_loongarch_lasx_xvsrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSRARN_B_H : LASX_3R<0b01110100111110101>, + LASX_3R_DESC_BASE<"xvsrarn.b.h", int_loongarch_lasx_xvsrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSRARN_H_W : LASX_3R<0b01110100111110110>, + LASX_3R_DESC_BASE<"xvsrarn.h.w", int_loongarch_lasx_xvsrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSRARN_W_D : LASX_3R<0b01110100111110111>, + LASX_3R_DESC_BASE<"xvsrarn.w.d", int_loongarch_lasx_xvsrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLN_B_H : LASX_3R<0b01110100111111001>, + LASX_3R_DESC_BASE<"xvssrln.b.h", int_loongarch_lasx_xvssrln_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLN_H_W : LASX_3R<0b01110100111111010>, + LASX_3R_DESC_BASE<"xvssrln.h.w", int_loongarch_lasx_xvssrln_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLN_W_D : LASX_3R<0b01110100111111011>, + LASX_3R_DESC_BASE<"xvssrln.w.d", int_loongarch_lasx_xvssrln_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRAN_B_H : LASX_3R<0b01110100111111101>, + LASX_3R_DESC_BASE<"xvssran.b.h", int_loongarch_lasx_xvssran_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRAN_H_W : LASX_3R<0b01110100111111110>, + LASX_3R_DESC_BASE<"xvssran.h.w", int_loongarch_lasx_xvssran_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRAN_W_D : LASX_3R<0b01110100111111111>, + LASX_3R_DESC_BASE<"xvssran.w.d", int_loongarch_lasx_xvssran_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLRN_B_H : LASX_3R<0b01110101000000001>, + LASX_3R_DESC_BASE<"xvssrlrn.b.h", int_loongarch_lasx_xvssrlrn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLRN_H_W : LASX_3R<0b01110101000000010>, + LASX_3R_DESC_BASE<"xvssrlrn.h.w", int_loongarch_lasx_xvssrlrn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLRN_W_D : LASX_3R<0b01110101000000011>, + LASX_3R_DESC_BASE<"xvssrlrn.w.d", int_loongarch_lasx_xvssrlrn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRARN_B_H : LASX_3R<0b01110101000000101>, + LASX_3R_DESC_BASE<"xvssrarn.b.h", int_loongarch_lasx_xvssrarn_b_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRARN_H_W : LASX_3R<0b01110101000000110>, + LASX_3R_DESC_BASE<"xvssrarn.h.w", int_loongarch_lasx_xvssrarn_h_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRARN_W_D : LASX_3R<0b01110101000000111>, + LASX_3R_DESC_BASE<"xvssrarn.w.d", int_loongarch_lasx_xvssrarn_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLN_BU_H : LASX_3R<0b01110101000001001>, + LASX_3R_DESC_BASE<"xvssrln.bu.h", int_loongarch_lasx_xvssrln_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLN_HU_W : LASX_3R<0b01110101000001010>, + LASX_3R_DESC_BASE<"xvssrln.hu.w", int_loongarch_lasx_xvssrln_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLN_WU_D : LASX_3R<0b01110101000001011>, + LASX_3R_DESC_BASE<"xvssrln.wu.d", int_loongarch_lasx_xvssrln_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRAN_BU_H : LASX_3R<0b01110101000001101>, + LASX_3R_DESC_BASE<"xvssran.bu.h", int_loongarch_lasx_xvssran_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRAN_HU_W : LASX_3R<0b01110101000001110>, + LASX_3R_DESC_BASE<"xvssran.hu.w", int_loongarch_lasx_xvssran_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRAN_WU_D : LASX_3R<0b01110101000001111>, + LASX_3R_DESC_BASE<"xvssran.wu.d", int_loongarch_lasx_xvssran_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLRN_BU_H : LASX_3R<0b01110101000010001>, + LASX_3R_DESC_BASE<"xvssrlrn.bu.h", int_loongarch_lasx_xvssrlrn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLRN_HU_W : LASX_3R<0b01110101000010010>, + LASX_3R_DESC_BASE<"xvssrlrn.hu.w", int_loongarch_lasx_xvssrlrn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLRN_WU_D : LASX_3R<0b01110101000010011>, + LASX_3R_DESC_BASE<"xvssrlrn.wu.d", int_loongarch_lasx_xvssrlrn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRARN_BU_H : LASX_3R<0b01110101000010101>, + LASX_3R_DESC_BASE<"xvssrarn.bu.h", int_loongarch_lasx_xvssrarn_bu_h, LASX256BOpnd, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRARN_HU_W : LASX_3R<0b01110101000010110>, + LASX_3R_DESC_BASE<"xvssrarn.hu.w", int_loongarch_lasx_xvssrarn_hu_w, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRARN_WU_D : LASX_3R<0b01110101000010111>, + LASX_3R_DESC_BASE<"xvssrarn.wu.d", int_loongarch_lasx_xvssrarn_wu_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVBITCLR_B : LASX_3R<0b01110101000011000>, + LASX_3R_DESC_BASE<"xvbitclr.b", xvbitclr_b, LASX256BOpnd>; + +def XVBITCLR_H : LASX_3R<0b01110101000011001>, + LASX_3R_DESC_BASE<"xvbitclr.h", xvbitclr_h, LASX256HOpnd>; + +def XVBITCLR_W : LASX_3R<0b01110101000011010>, + LASX_3R_DESC_BASE<"xvbitclr.w", xvbitclr_w, LASX256WOpnd>; + +def XVBITCLR_D : LASX_3R<0b01110101000011011>, + LASX_3R_DESC_BASE<"xvbitclr.d", xvbitclr_d, LASX256DOpnd>; + + +def XVBITSET_B : LASX_3R<0b01110101000011100>, + LASX_3R_DESC_BASE<"xvbitset.b", int_loongarch_lasx_xvbitset_b, LASX256BOpnd>; + +def XVBITSET_H : LASX_3R<0b01110101000011101>, + LASX_3R_DESC_BASE<"xvbitset.h", int_loongarch_lasx_xvbitset_h, LASX256HOpnd>; + +def XVBITSET_W : LASX_3R<0b01110101000011110>, + LASX_3R_DESC_BASE<"xvbitset.w", int_loongarch_lasx_xvbitset_w, LASX256WOpnd>; + +def XVBITSET_D : LASX_3R<0b01110101000011111>, + LASX_3R_DESC_BASE<"xvbitset.d", int_loongarch_lasx_xvbitset_d, LASX256DOpnd>; + + +def XVBITREV_B : LASX_3R<0b01110101000100000>, + LASX_3R_DESC_BASE<"xvbitrev.b", int_loongarch_lasx_xvbitrev_b, LASX256BOpnd>; + +def XVBITREV_H : LASX_3R<0b01110101000100001>, + LASX_3R_DESC_BASE<"xvbitrev.h", int_loongarch_lasx_xvbitrev_h, LASX256HOpnd>; + +def XVBITREV_W : LASX_3R<0b01110101000100010>, + LASX_3R_DESC_BASE<"xvbitrev.w", int_loongarch_lasx_xvbitrev_w, LASX256WOpnd>; + +def XVBITREV_D : LASX_3R<0b01110101000100011>, + LASX_3R_DESC_BASE<"xvbitrev.d", int_loongarch_lasx_xvbitrev_d, LASX256DOpnd>; + + +def XVPACKEV_B : LASX_3R<0b01110101000101100>, + LASX_3R_DESC_BASE<"xvpackev.b", LoongArchVPACKEV, LASX256BOpnd>; + +def XVPACKEV_H : LASX_3R<0b01110101000101101>, + LASX_3R_DESC_BASE<"xvpackev.h", LoongArchVPACKEV, LASX256HOpnd>; + +def XVPACKEV_W : LASX_3R<0b01110101000101110>, + LASX_3R_DESC_BASE<"xvpackev.w", LoongArchVPACKEV, LASX256WOpnd>; + +def XVPACKEV_D : LASX_3R<0b01110101000101111>, + LASX_3R_DESC_BASE<"xvpackev.d", LoongArchVPACKEV, LASX256DOpnd>; + + +def XVPACKOD_B : LASX_3R<0b01110101000110000>, + LASX_3R_DESC_BASE<"xvpackod.b", LoongArchVPACKOD, LASX256BOpnd>; + +def XVPACKOD_H : LASX_3R<0b01110101000110001>, + LASX_3R_DESC_BASE<"xvpackod.h", LoongArchVPACKOD, LASX256HOpnd>; + +def XVPACKOD_W : LASX_3R<0b01110101000110010>, + LASX_3R_DESC_BASE<"xvpackod.w", LoongArchVPACKOD, LASX256WOpnd>; + +def XVPACKOD_D : LASX_3R<0b01110101000110011>, + LASX_3R_DESC_BASE<"xvpackod.d", LoongArchVPACKOD, LASX256DOpnd>; + + +def XVILVL_B : LASX_3R<0b01110101000110100>, + LASX_3R_DESC_BASE<"xvilvl.b", LoongArchVILVL, LASX256BOpnd>; + +def XVILVL_H : LASX_3R<0b01110101000110101>, + LASX_3R_DESC_BASE<"xvilvl.h", LoongArchVILVL, LASX256HOpnd>; + +def XVILVL_W : LASX_3R<0b01110101000110110>, + LASX_3R_DESC_BASE<"xvilvl.w", LoongArchVILVL, LASX256WOpnd>; + +def XVILVL_D : LASX_3R<0b01110101000110111>, + LASX_3R_DESC_BASE<"xvilvl.d", LoongArchVILVL, LASX256DOpnd>; + + +def XVILVH_B : LASX_3R<0b01110101000111000>, + LASX_3R_DESC_BASE<"xvilvh.b", LoongArchVILVH, LASX256BOpnd>; + +def XVILVH_H : LASX_3R<0b01110101000111001>, + LASX_3R_DESC_BASE<"xvilvh.h", LoongArchVILVH, LASX256HOpnd>; + +def XVILVH_W : LASX_3R<0b01110101000111010>, + LASX_3R_DESC_BASE<"xvilvh.w", LoongArchVILVH, LASX256WOpnd>; + +def XVILVH_D : LASX_3R<0b01110101000111011>, + LASX_3R_DESC_BASE<"xvilvh.d", LoongArchVILVH, LASX256DOpnd>; + + +def XVPICKEV_B : LASX_3R<0b01110101000111100>, + LASX_3R_DESC_BASE<"xvpickev.b", LoongArchVPICKEV, LASX256BOpnd>; + +def XVPICKEV_H : LASX_3R<0b01110101000111101>, + LASX_3R_DESC_BASE<"xvpickev.h", LoongArchVPICKEV, LASX256HOpnd>; + +def XVPICKEV_W : LASX_3R<0b01110101000111110>, + LASX_3R_DESC_BASE<"xvpickev.w", LoongArchVPICKEV, LASX256WOpnd>; + +def XVPICKEV_D : LASX_3R<0b01110101000111111>, + LASX_3R_DESC_BASE<"xvpickev.d", LoongArchVPICKEV, LASX256DOpnd>; + + +def XVPICKOD_B : LASX_3R<0b01110101001000000>, + LASX_3R_DESC_BASE<"xvpickod.b", LoongArchVPICKOD, LASX256BOpnd>; + +def XVPICKOD_H : LASX_3R<0b01110101001000001>, + LASX_3R_DESC_BASE<"xvpickod.h", LoongArchVPICKOD, LASX256HOpnd>; + +def XVPICKOD_W : LASX_3R<0b01110101001000010>, + LASX_3R_DESC_BASE<"xvpickod.w", LoongArchVPICKOD, LASX256WOpnd>; + +def XVPICKOD_D : LASX_3R<0b01110101001000011>, + LASX_3R_DESC_BASE<"xvpickod.d", LoongArchVPICKOD, LASX256DOpnd>; + + +def XVREPLVE_B : LASX_3R_1GP<0b01110101001000100>, + LASX_3R_VREPLVE_DESC_BASE<"xvreplve.b", int_loongarch_lasx_xvreplve_b, LASX256BOpnd>; + +def XVREPLVE_H : LASX_3R_1GP<0b01110101001000101>, + LASX_3R_VREPLVE_DESC_BASE<"xvreplve.h", int_loongarch_lasx_xvreplve_h, LASX256HOpnd>; + +def XVREPLVE_W : LASX_3R_1GP<0b01110101001000110>, + LASX_3R_VREPLVE_DESC_BASE<"xvreplve.w", int_loongarch_lasx_xvreplve_w, LASX256WOpnd>; + +def XVREPLVE_D : LASX_3R_1GP<0b01110101001000111>, + LASX_3R_VREPLVE_DESC_BASE<"xvreplve.d", int_loongarch_lasx_xvreplve_d, LASX256DOpnd>; + + +def XVAND_V : LASX_3R<0b01110101001001100>, + LASX_VEC_DESC_BASE<"xvand.v", and, LASX256BOpnd>; +class XAND_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XAND_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XAND_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; + +def XAND_V_H_PSEUDO : XAND_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XAND_V_W_PSEUDO : XAND_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XAND_V_D_PSEUDO : XAND_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XVAND_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + + +def XVOR_V : LASX_3R<0b01110101001001101>, + LASX_VEC_DESC_BASE<"xvor.v", or, LASX256BOpnd>; +class X_OR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class X_OR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class X_OR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; + +def X_OR_V_H_PSEUDO : X_OR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def X_OR_V_W_PSEUDO : X_OR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def X_OR_V_D_PSEUDO : X_OR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XVOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + + +def XVXOR_V : LASX_3R<0b01110101001001110>, + LASX_VEC_DESC_BASE<"xvxor.v", xor, LASX256BOpnd>; +class XXOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XXOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XXOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; + +def XXOR_V_H_PSEUDO : XXOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XXOR_V_W_PSEUDO : XXOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XXOR_V_D_PSEUDO : XXOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XVXOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + + +def XVNOR_V : LASX_3R<0b01110101001001111>, + LASX_VEC_DESC_BASE<"xvnor.v", LoongArchVNOR, LASX256BOpnd>; + +class XNOR_V_H_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XNOR_V_W_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; +class XNOR_V_D_PSEUDO_DESC : LASX_VEC_PSEUDO_BASE; + +def XNOR_V_H_PSEUDO : XNOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XNOR_V_W_PSEUDO : XNOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; +def XNOR_V_D_PSEUDO : XNOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(XVNOR_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + + +def XVANDN_V : LASX_3R<0b01110101001010000>, + LASX_3R_DESC_BASE<"xvandn.v", int_loongarch_lasx_xvandn_v, LASX256BOpnd>; + + +class LASX_ANDN_PSEUDO_BASE : + LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk), + []>, + PseudoInstExpansion<(XVANDN_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + +def XVANDN_H_PSEUDO : LASX_ANDN_PSEUDO_BASE; +def XVANDN_W_PSEUDO : LASX_ANDN_PSEUDO_BASE; +def XVANDN_D_PSEUDO : LASX_ANDN_PSEUDO_BASE; + + +def XVORN_V : LASX_3R<0b01110101001010001>, + LASX_3R_DESC_BASE<"xvorn.v", int_loongarch_lasx_xvorn_v, LASX256BOpnd>; + + +class LASX_ORN_PSEUDO_BASE : + LASXPseudo<(outs RO:$xd), (ins RO:$xj, RO:$xk), + []>, + PseudoInstExpansion<(XVORN_V LASX256BOpnd:$xd, + LASX256BOpnd:$xj, + LASX256BOpnd:$xk)>; + +def XVORN_H_PSEUDO : LASX_ORN_PSEUDO_BASE; +def XVORN_W_PSEUDO : LASX_ORN_PSEUDO_BASE; +def XVORN_D_PSEUDO : LASX_ORN_PSEUDO_BASE; + + +def XVFRSTP_B : LASX_3R<0b01110101001010110>, + LASX_3R_4R_DESC_BASE<"xvfrstp.b", int_loongarch_lasx_xvfrstp_b, LASX256BOpnd>; + +def XVFRSTP_H : LASX_3R<0b01110101001010111>, + LASX_3R_4R_DESC_BASE<"xvfrstp.h", int_loongarch_lasx_xvfrstp_h, LASX256HOpnd>; + + +def XVADD_Q : LASX_3R<0b01110101001011010>, IsCommutable, + LASX_3R_DESC_BASE<"xvadd.q", int_loongarch_lasx_xvadd_q, LASX256DOpnd>; + +def XVSUB_Q : LASX_3R<0b01110101001011011>, + LASX_3R_DESC_BASE<"xvsub.q", int_loongarch_lasx_xvsub_q, LASX256DOpnd>; + + +def XVSIGNCOV_B : LASX_3R<0b01110101001011100>, + LASX_3R_DESC_BASE<"xvsigncov.b", int_loongarch_lasx_xvsigncov_b, LASX256BOpnd>; + +def XVSIGNCOV_H : LASX_3R<0b01110101001011101>, + LASX_3R_DESC_BASE<"xvsigncov.h", int_loongarch_lasx_xvsigncov_h, LASX256HOpnd>; + +def XVSIGNCOV_W : LASX_3R<0b01110101001011110>, + LASX_3R_DESC_BASE<"xvsigncov.w", int_loongarch_lasx_xvsigncov_w, LASX256WOpnd>; + +def XVSIGNCOV_D : LASX_3R<0b01110101001011111>, + LASX_3R_DESC_BASE<"xvsigncov.d", int_loongarch_lasx_xvsigncov_d, LASX256DOpnd>; + + +def XVFADD_S : LASX_3R<0b01110101001100001>, IsCommutable, + LASX_3RF_DESC_BASE<"xvfadd.s", fadd, LASX256WOpnd>; + +def XVFADD_D : LASX_3R<0b01110101001100010>, IsCommutable, + LASX_3RF_DESC_BASE<"xvfadd.d", fadd, LASX256DOpnd>; + + +def XVFSUB_S : LASX_3R<0b01110101001100101>, + LASX_3RF_DESC_BASE<"xvfsub.s", fsub, LASX256WOpnd>; + +def XVFSUB_D : LASX_3R<0b01110101001100110>, + LASX_3RF_DESC_BASE<"xvfsub.d", fsub, LASX256DOpnd>; + + +def XVFMUL_S : LASX_3R<0b01110101001110001>, + LASX_3RF_DESC_BASE<"xvfmul.s", fmul, LASX256WOpnd>; + +def XVFMUL_D : LASX_3R<0b01110101001110010>, + LASX_3RF_DESC_BASE<"xvfmul.d", fmul, LASX256DOpnd>; + + +def XVFDIV_S : LASX_3R<0b01110101001110101>, + LASX_3RF_DESC_BASE<"xvfdiv.s", fdiv, LASX256WOpnd>; + +def XVFDIV_D : LASX_3R<0b01110101001110110>, + LASX_3RF_DESC_BASE<"xvfdiv.d", fdiv, LASX256DOpnd>; + + +def XVFMAX_S : LASX_3R<0b01110101001111001>, + LASX_3RF_DESC_BASE<"xvfmax.s", int_loongarch_lasx_xvfmax_s, LASX256WOpnd>; + +def XVFMAX_D : LASX_3R<0b01110101001111010>, + LASX_3RF_DESC_BASE<"xvfmax.d", int_loongarch_lasx_xvfmax_d, LASX256DOpnd>; + + +def XVFMIN_S : LASX_3R<0b01110101001111101>, + LASX_3RF_DESC_BASE<"xvfmin.s", int_loongarch_lasx_xvfmin_s, LASX256WOpnd>; + +def XVFMIN_D : LASX_3R<0b01110101001111110>, + LASX_3RF_DESC_BASE<"xvfmin.d", int_loongarch_lasx_xvfmin_d, LASX256DOpnd>; + + +def XVFMAXA_S : LASX_3R<0b01110101010000001>, + LASX_3RF_DESC_BASE<"xvfmaxa.s", int_loongarch_lasx_xvfmaxa_s, LASX256WOpnd>; + +def XVFMAXA_D : LASX_3R<0b01110101010000010>, + LASX_3RF_DESC_BASE<"xvfmaxa.d", int_loongarch_lasx_xvfmaxa_d, LASX256DOpnd>; + + +def XVFMINA_S : LASX_3R<0b01110101010000101>, + LASX_3RF_DESC_BASE<"xvfmina.s", int_loongarch_lasx_xvfmina_s, LASX256WOpnd>; + +def XVFMINA_D : LASX_3R<0b01110101010000110>, + LASX_3RF_DESC_BASE<"xvfmina.d", int_loongarch_lasx_xvfmina_d, LASX256DOpnd>; + + +def XVFCVT_H_S : LASX_3R<0b01110101010001100>, + LASX_3RF_DESC_BASE<"xvfcvt.h.s", int_loongarch_lasx_xvfcvt_h_s, LASX256HOpnd, LASX256WOpnd, LASX256WOpnd>; + +def XVFCVT_S_D : LASX_3R<0b01110101010001101>, + LASX_3RF_DESC_BASE1<"xvfcvt.s.d", int_loongarch_lasx_xvfcvt_s_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVFFINT_S_L : LASX_3R<0b01110101010010000>, + LASX_3RF_DESC_BASE<"xvffint.s.l", int_loongarch_lasx_xvffint_s_l, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + +def XVFTINT_W_D : LASX_3R<0b01110101010010011>, + LASX_3RF_DESC_BASE<"xvftint.w.d", int_loongarch_lasx_xvftint_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVFTINTRM_W_D : LASX_3R<0b01110101010010100>, + LASX_3RF_DESC_BASE<"xvftintrm.w.d", int_loongarch_lasx_xvftintrm_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + +def XVFTINTRP_W_D : LASX_3R<0b01110101010010101>, + LASX_3RF_DESC_BASE<"xvftintrp.w.d", int_loongarch_lasx_xvftintrp_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + +def XVFTINTRZ_W_D : LASX_3R<0b01110101010010110>, + LASX_3RF_DESC_BASE<"xvftintrz.w.d", int_loongarch_lasx_xvftintrz_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + +def XVFTINTRNE_W_D : LASX_3R<0b01110101010010111>, + LASX_3RF_DESC_BASE<"xvftintrne.w.d", int_loongarch_lasx_xvftintrne_w_d, LASX256WOpnd, LASX256DOpnd, LASX256DOpnd>; + + +def XVSHUF_H : LASX_3R<0b01110101011110101>, + LASX_3R_VSHF_DESC_BASE<"xvshuf.h", LASX256HOpnd>; + +def XVSHUF_W : LASX_3R<0b01110101011110110>, + LASX_3R_VSHF_DESC_BASE<"xvshuf.w", LASX256WOpnd>; + +def XVSHUF_D : LASX_3R<0b01110101011110111>, + LASX_3R_VSHF_DESC_BASE<"xvshuf.d", LASX256DOpnd>; + + +def XVPERM_W : LASX_3R<0b01110101011111010>, + LASX_3R_DESC_BASE<"xvperm.w", int_loongarch_lasx_xvperm_w, LASX256WOpnd>; + + +def XVSEQI_B : LASX_I5<0b01110110100000000>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.b", int_loongarch_lasx_xvseqi_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVSEQI_H : LASX_I5<0b01110110100000001>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.h", int_loongarch_lasx_xvseqi_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVSEQI_W : LASX_I5<0b01110110100000010>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.w", int_loongarch_lasx_xvseqi_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVSEQI_D : LASX_I5<0b01110110100000011>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvseqi.d", int_loongarch_lasx_xvseqi_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVSLEI_B : LASX_I5<0b01110110100000100>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.b", int_loongarch_lasx_xvslei_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVSLEI_H : LASX_I5<0b01110110100000101>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.h", int_loongarch_lasx_xvslei_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVSLEI_W : LASX_I5<0b01110110100000110>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.w", int_loongarch_lasx_xvslei_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVSLEI_D : LASX_I5<0b01110110100000111>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslei.d", int_loongarch_lasx_xvslei_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVSLEI_BU : LASX_I5_U<0b01110110100001000>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.bu", int_loongarch_lasx_xvslei_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVSLEI_HU : LASX_I5_U<0b01110110100001001>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.hu", int_loongarch_lasx_xvslei_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVSLEI_WU : LASX_I5_U<0b01110110100001010>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.wu", int_loongarch_lasx_xvslei_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVSLEI_DU : LASX_I5_U<0b01110110100001011>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslei.du", int_loongarch_lasx_xvslei_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVSLTI_B : LASX_I5<0b01110110100001100>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.b", int_loongarch_lasx_xvslti_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVSLTI_H : LASX_I5<0b01110110100001101>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.h", int_loongarch_lasx_xvslti_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVSLTI_W : LASX_I5<0b01110110100001110>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.w", int_loongarch_lasx_xvslti_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVSLTI_D : LASX_I5<0b01110110100001111>, + LASX_I5_SETCC_DESC_BASE_Intrinsic<"xvslti.d", int_loongarch_lasx_xvslti_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVSLTI_BU : LASX_I5_U<0b01110110100010000>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.bu", int_loongarch_lasx_xvslti_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVSLTI_HU : LASX_I5_U<0b01110110100010001>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.hu", int_loongarch_lasx_xvslti_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVSLTI_WU : LASX_I5_U<0b01110110100010010>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.wu", int_loongarch_lasx_xvslti_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVSLTI_DU : LASX_I5_U<0b01110110100010011>, + LASX_I5_U_SETCC_DESC_BASE_Intrinsic<"xvslti.du", int_loongarch_lasx_xvslti_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVADDI_BU : LASX_I5_U<0b01110110100010100>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.bu", int_loongarch_lasx_xvaddi_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVADDI_HU : LASX_I5_U<0b01110110100010101>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.hu", int_loongarch_lasx_xvaddi_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVADDI_WU : LASX_I5_U<0b01110110100010110>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.wu", int_loongarch_lasx_xvaddi_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVADDI_DU : LASX_I5_U<0b01110110100010111>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvaddi.du", int_loongarch_lasx_xvaddi_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVSUBI_BU : LASX_I5_U<0b01110110100011000>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.bu", int_loongarch_lasx_xvsubi_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVSUBI_HU : LASX_I5_U<0b01110110100011001>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.hu", int_loongarch_lasx_xvsubi_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVSUBI_WU : LASX_I5_U<0b01110110100011010>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.wu", int_loongarch_lasx_xvsubi_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVSUBI_DU : LASX_I5_U<0b01110110100011011>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvsubi.du", int_loongarch_lasx_xvsubi_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVBSLL_V : LASX_I5_U<0b01110110100011100>, + LASX_U5_DESC_BASE<"xvbsll.v", int_loongarch_lasx_xvbsll_v, LASX256BOpnd>; + +def XVBSRL_V : LASX_I5_U<0b01110110100011101>, + LASX_U5_DESC_BASE<"xvbsrl.v", int_loongarch_lasx_xvbsrl_v, LASX256BOpnd>; + + +def XVMAXI_B : LASX_I5<0b01110110100100000>, + LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.b", int_loongarch_lasx_xvmaxi_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVMAXI_H : LASX_I5<0b01110110100100001>, + LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.h", int_loongarch_lasx_xvmaxi_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVMAXI_W : LASX_I5<0b01110110100100010>, + LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.w", int_loongarch_lasx_xvmaxi_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVMAXI_D : LASX_I5<0b01110110100100011>, + LASX_I5_DESC_BASE_Intrinsic<"xvmaxi.d", int_loongarch_lasx_xvmaxi_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVMINI_B : LASX_I5<0b01110110100100100>, + LASX_I5_DESC_BASE_Intrinsic<"xvmini.b", int_loongarch_lasx_xvmini_b, simm5_32, immSExt5, LASX256BOpnd>; + +def XVMINI_H : LASX_I5<0b01110110100100101>, + LASX_I5_DESC_BASE_Intrinsic<"xvmini.h", int_loongarch_lasx_xvmini_h, simm5_32, immSExt5, LASX256HOpnd>; + +def XVMINI_W : LASX_I5<0b01110110100100110>, + LASX_I5_DESC_BASE_Intrinsic<"xvmini.w", int_loongarch_lasx_xvmini_w, simm5_32, immSExt5, LASX256WOpnd>; + +def XVMINI_D : LASX_I5<0b01110110100100111>, + LASX_I5_DESC_BASE_Intrinsic<"xvmini.d", int_loongarch_lasx_xvmini_d, simm5_32, immSExt5, LASX256DOpnd>; + + +def XVMAXI_BU : LASX_I5_U<0b01110110100101000>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.bu", int_loongarch_lasx_xvmaxi_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVMAXI_HU : LASX_I5_U<0b01110110100101001>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.hu", int_loongarch_lasx_xvmaxi_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVMAXI_WU : LASX_I5_U<0b01110110100101010>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.wu", int_loongarch_lasx_xvmaxi_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVMAXI_DU : LASX_I5_U<0b01110110100101011>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmaxi.du", int_loongarch_lasx_xvmaxi_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVMINI_BU : LASX_I5_U<0b01110110100101100>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.bu", int_loongarch_lasx_xvmini_bu, uimm5, immZExt5, LASX256BOpnd>; + +def XVMINI_HU : LASX_I5_U<0b01110110100101101>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.hu", int_loongarch_lasx_xvmini_hu, uimm5, immZExt5, LASX256HOpnd>; + +def XVMINI_WU : LASX_I5_U<0b01110110100101110>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.wu", int_loongarch_lasx_xvmini_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVMINI_DU : LASX_I5_U<0b01110110100101111>, + LASX_I5_U_DESC_BASE_Intrinsic<"xvmini.du", int_loongarch_lasx_xvmini_du, uimm5, immZExt5, LASX256DOpnd>; + + +def XVFRSTPI_B : LASX_I5_U<0b01110110100110100>, + LASX_U5_4R_DESC_BASE<"xvfrstpi.b", int_loongarch_lasx_xvfrstpi_b, LASX256BOpnd>; + +def XVFRSTPI_H : LASX_I5_U<0b01110110100110101>, + LASX_U5_4R_DESC_BASE<"xvfrstpi.h", int_loongarch_lasx_xvfrstpi_h, LASX256HOpnd>; + + +def XVCLO_B : LASX_2R<0b0111011010011100000000>, + LASX_2R_DESC_BASE<"xvclo.b", int_loongarch_lasx_xvclo_b, LASX256BOpnd>; + +def XVCLO_H : LASX_2R<0b0111011010011100000001>, + LASX_2R_DESC_BASE<"xvclo.h", int_loongarch_lasx_xvclo_h, LASX256HOpnd>; + +def XVCLO_W : LASX_2R<0b0111011010011100000010>, + LASX_2R_DESC_BASE<"xvclo.w", int_loongarch_lasx_xvclo_w, LASX256WOpnd>; + +def XVCLO_D : LASX_2R<0b0111011010011100000011>, + LASX_2R_DESC_BASE<"xvclo.d", int_loongarch_lasx_xvclo_d, LASX256DOpnd>; + + +def XVCLZ_B : LASX_2R<0b0111011010011100000100>, + LASX_2R_DESC_BASE<"xvclz.b", ctlz, LASX256BOpnd>; + +def XVCLZ_H : LASX_2R<0b0111011010011100000101>, + LASX_2R_DESC_BASE<"xvclz.h", ctlz, LASX256HOpnd>; + +def XVCLZ_W : LASX_2R<0b0111011010011100000110>, + LASX_2R_DESC_BASE<"xvclz.w", ctlz, LASX256WOpnd>; + +def XVCLZ_D : LASX_2R<0b0111011010011100000111>, + LASX_2R_DESC_BASE<"xvclz.d", ctlz, LASX256DOpnd>; + + +def XVPCNT_B : LASX_2R<0b0111011010011100001000>, + LASX_2R_DESC_BASE<"xvpcnt.b", ctpop, LASX256BOpnd>; + +def XVPCNT_H : LASX_2R<0b0111011010011100001001>, + LASX_2R_DESC_BASE<"xvpcnt.h", ctpop, LASX256HOpnd>; + +def XVPCNT_W : LASX_2R<0b0111011010011100001010>, + LASX_2R_DESC_BASE<"xvpcnt.w", ctpop, LASX256WOpnd>; + +def XVPCNT_D : LASX_2R<0b0111011010011100001011>, + LASX_2R_DESC_BASE<"xvpcnt.d", ctpop, LASX256DOpnd>; + + +def XVNEG_B : LASX_2R<0b0111011010011100001100>, + LASX_2R_DESC_BASE<"xvneg.b", int_loongarch_lasx_xvneg_b, LASX256BOpnd>; + +def XVNEG_H : LASX_2R<0b0111011010011100001101>, + LASX_2R_DESC_BASE<"xvneg.h", int_loongarch_lasx_xvneg_h, LASX256HOpnd>; + +def XVNEG_W : LASX_2R<0b0111011010011100001110>, + LASX_2R_DESC_BASE<"xvneg.w", int_loongarch_lasx_xvneg_w, LASX256WOpnd>; + +def XVNEG_D : LASX_2R<0b0111011010011100001111>, + LASX_2R_DESC_BASE<"xvneg.d", int_loongarch_lasx_xvneg_d, LASX256DOpnd>; + + +def XVMSKLTZ_B : LASX_2R<0b0111011010011100010000>, + LASX_2R_DESC_BASE<"xvmskltz.b", int_loongarch_lasx_xvmskltz_b, LASX256BOpnd>; + +def XVMSKLTZ_H : LASX_2R<0b0111011010011100010001>, + LASX_2R_DESC_BASE<"xvmskltz.h", int_loongarch_lasx_xvmskltz_h, LASX256HOpnd>; + +def XVMSKLTZ_W : LASX_2R<0b0111011010011100010010>, + LASX_2R_DESC_BASE<"xvmskltz.w", int_loongarch_lasx_xvmskltz_w, LASX256WOpnd>; + +def XVMSKLTZ_D : LASX_2R<0b0111011010011100010011>, + LASX_2R_DESC_BASE<"xvmskltz.d", int_loongarch_lasx_xvmskltz_d, LASX256DOpnd>; + + +def XVMSKGEZ_B : LASX_2R<0b0111011010011100010100>, + LASX_2R_DESC_BASE<"xvmskgez.b", int_loongarch_lasx_xvmskgez_b, LASX256BOpnd>; + +def XVMSKNZ_B : LASX_2R<0b0111011010011100011000>, + LASX_2R_DESC_BASE<"xvmsknz.b", int_loongarch_lasx_xvmsknz_b, LASX256BOpnd>; + + +def XVSETEQZ_V : LASX_SET<0b0111011010011100100110>, + LASX_SET_DESC_BASE<"xvseteqz.v", LASX256BOpnd>; + +def XVSETNEZ_V : LASX_SET<0b0111011010011100100111>, + LASX_SET_DESC_BASE<"xvsetnez.v", LASX256BOpnd>; + + +def XVSETANYEQZ_B : LASX_SET<0b0111011010011100101000>, + LASX_SET_DESC_BASE<"xvsetanyeqz.b", LASX256BOpnd>; + +def XVSETANYEQZ_H : LASX_SET<0b0111011010011100101001>, + LASX_SET_DESC_BASE<"xvsetanyeqz.h", LASX256HOpnd>; + +def XVSETANYEQZ_W : LASX_SET<0b0111011010011100101010>, + LASX_SET_DESC_BASE<"xvsetanyeqz.w", LASX256WOpnd>; + +def XVSETANYEQZ_D : LASX_SET<0b0111011010011100101011>, + LASX_SET_DESC_BASE<"xvsetanyeqz.d", LASX256DOpnd>; + + +def XVSETALLNEZ_B : LASX_SET<0b0111011010011100101100>, + LASX_SET_DESC_BASE<"xvsetallnez.b", LASX256BOpnd>; + +def XVSETALLNEZ_H : LASX_SET<0b0111011010011100101101>, + LASX_SET_DESC_BASE<"xvsetallnez.h", LASX256HOpnd>; + +def XVSETALLNEZ_W : LASX_SET<0b0111011010011100101110>, + LASX_SET_DESC_BASE<"xvsetallnez.w", LASX256WOpnd>; + +def XVSETALLNEZ_D : LASX_SET<0b0111011010011100101111>, + LASX_SET_DESC_BASE<"xvsetallnez.d", LASX256DOpnd>; + +class LASX_CBRANCH_PSEUDO_DESC_BASE : + LoongArchPseudo<(outs GPR32:$dst), + (ins RCWS:$xj), + [(set GPR32:$dst, (OpNode (TyNode RCWS:$xj)))]> { + bit usesCustomInserter = 1; +} + +def XSNZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSNZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSNZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSNZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSNZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; + +def XSZ_B_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSZ_H_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSZ_W_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSZ_D_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; +def XSZ_V_PSEUDO : LASX_CBRANCH_PSEUDO_DESC_BASE; + + +def XVFLOGB_S : LASX_2R<0b0111011010011100110001>, + LASX_2RF_DESC_BASE<"xvflogb.s", int_loongarch_lasx_xvflogb_s, LASX256WOpnd>; + +def XVFLOGB_D : LASX_2R<0b0111011010011100110010>, + LASX_2RF_DESC_BASE<"xvflogb.d", int_loongarch_lasx_xvflogb_d, LASX256DOpnd>; + + +def XVFCLASS_S : LASX_2R<0b0111011010011100110101>, + LASX_2RF_DESC_BASE<"xvfclass.s", int_loongarch_lasx_xvfclass_s, LASX256WOpnd>; + +def XVFCLASS_D : LASX_2R<0b0111011010011100110110>, + LASX_2RF_DESC_BASE<"xvfclass.d", int_loongarch_lasx_xvfclass_d, LASX256DOpnd>; + + +def XVFSQRT_S : LASX_2R<0b0111011010011100111001>, + LASX_2RF_DESC_BASE<"xvfsqrt.s", fsqrt, LASX256WOpnd>; + +def XVFSQRT_D : LASX_2R<0b0111011010011100111010>, + LASX_2RF_DESC_BASE<"xvfsqrt.d", fsqrt, LASX256DOpnd>; + + +def XVFRECIP_S : LASX_2R<0b0111011010011100111101>, + LASX_2RF_DESC_BASE<"xvfrecip.s", int_loongarch_lasx_xvfrecip_s, LASX256WOpnd>; + +def XVFRECIP_D : LASX_2R<0b0111011010011100111110>, + LASX_2RF_DESC_BASE<"xvfrecip.d", int_loongarch_lasx_xvfrecip_d, LASX256DOpnd>; + + +def XVFRSQRT_S : LASX_2R<0b0111011010011101000001>, + LASX_2RF_DESC_BASE<"xvfrsqrt.s", int_loongarch_lasx_xvfrsqrt_s, LASX256WOpnd>; + +def XVFRSQRT_D : LASX_2R<0b0111011010011101000010>, + LASX_2RF_DESC_BASE<"xvfrsqrt.d", int_loongarch_lasx_xvfrsqrt_d, LASX256DOpnd>; + + +def XVFRINT_S : LASX_2R<0b0111011010011101001101>, + LASX_2RF_DESC_BASE<"xvfrint.s", frint, LASX256WOpnd>; + +def XVFRINT_D : LASX_2R<0b0111011010011101001110>, + LASX_2RF_DESC_BASE<"xvfrint.d", frint, LASX256DOpnd>; + + +def XVFRINTRM_S : LASX_2R<0b0111011010011101010001>, + LASX_2RF_DESC_BASE<"xvfrintrm.s", int_loongarch_lasx_xvfrintrm_s, LASX256WOpnd>; + +def XVFRINTRM_D : LASX_2R<0b0111011010011101010010>, + LASX_2RF_DESC_BASE<"xvfrintrm.d", int_loongarch_lasx_xvfrintrm_d, LASX256DOpnd>; + + +def XVFRINTRP_S : LASX_2R<0b0111011010011101010101>, + LASX_2RF_DESC_BASE<"xvfrintrp.s", int_loongarch_lasx_xvfrintrp_s, LASX256WOpnd>; + +def XVFRINTRP_D : LASX_2R<0b0111011010011101010110>, + LASX_2RF_DESC_BASE<"xvfrintrp.d", int_loongarch_lasx_xvfrintrp_d, LASX256DOpnd>; + + +def XVFRINTRZ_S : LASX_2R<0b0111011010011101011001>, + LASX_2RF_DESC_BASE<"xvfrintrz.s", int_loongarch_lasx_xvfrintrz_s, LASX256WOpnd>; + +def XVFRINTRZ_D : LASX_2R<0b0111011010011101011010>, + LASX_2RF_DESC_BASE<"xvfrintrz.d", int_loongarch_lasx_xvfrintrz_d, LASX256DOpnd>; + + +def XVFRINTRNE_S : LASX_2R<0b0111011010011101011101>, + LASX_2RF_DESC_BASE<"xvfrintrne.s", int_loongarch_lasx_xvfrintrne_s, LASX256WOpnd>; + +def XVFRINTRNE_D : LASX_2R<0b0111011010011101011110>, + LASX_2RF_DESC_BASE<"xvfrintrne.d", int_loongarch_lasx_xvfrintrne_d, LASX256DOpnd>; + + +def XVFCVTL_S_H : LASX_2R<0b0111011010011101111010>, + LASX_2RF_DESC_BASE<"xvfcvtl.s.h", int_loongarch_lasx_xvfcvtl_s_h, LASX256WOpnd, LASX256HOpnd>; + +def XVFCVTH_S_H : LASX_2R<0b0111011010011101111011>, + LASX_2RF_DESC_BASE<"xvfcvth.s.h", int_loongarch_lasx_xvfcvth_s_h, LASX256WOpnd, LASX256HOpnd>; + + +def XVFCVTL_D_S : LASX_2R<0b0111011010011101111100>, + LASX_2RF_DESC_BASE<"xvfcvtl.d.s", int_loongarch_lasx_xvfcvtl_d_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFCVTH_D_S : LASX_2R<0b0111011010011101111101>, + LASX_2RF_DESC_BASE<"xvfcvth.d.s", int_loongarch_lasx_xvfcvth_d_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFFINT_S_W : LASX_2R<0b0111011010011110000000>, + LASX_2RF_DESC_BASE<"xvffint.s.w", sint_to_fp, LASX256WOpnd>; + +def XVFFINT_S_WU : LASX_2R<0b0111011010011110000001>, + LASX_2RF_DESC_BASE<"xvffint.s.wu", uint_to_fp, LASX256WOpnd>; + + +def XVFFINT_D_L : LASX_2R<0b0111011010011110000010>, + LASX_2RF_DESC_BASE<"xvffint.d.l", sint_to_fp, LASX256DOpnd>; + +def XVFFINT_D_LU : LASX_2R<0b0111011010011110000011>, + LASX_2RF_DESC_BASE<"xvffint.d.lu", uint_to_fp, LASX256DOpnd>; + + +def XVFFINTL_D_W : LASX_2R<0b0111011010011110000100>, + LASX_2RF_DESC_BASE<"xvffintl.d.w", int_loongarch_lasx_xvffintl_d_w, LASX256DOpnd, LASX256WOpnd>; + +def XVFFINTH_D_W : LASX_2R<0b0111011010011110000101>, + LASX_2RF_DESC_BASE<"xvffinth.d.w", int_loongarch_lasx_xvffinth_d_w, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINT_W_S : LASX_2R<0b0111011010011110001100>, + LASX_2RF_DESC_BASE<"xvftint.w.s", int_loongarch_lasx_xvftint_w_s, LASX256WOpnd>; + +def XVFTINT_L_D : LASX_2R<0b0111011010011110001101>, + LASX_2RF_DESC_BASE<"xvftint.l.d", int_loongarch_lasx_xvftint_l_d, LASX256DOpnd>; + + +def XVFTINTRM_W_S : LASX_2R<0b0111011010011110001110>, + LASX_2RF_DESC_BASE<"xvftintrm.w.s", int_loongarch_lasx_xvftintrm_w_s, LASX256WOpnd>; + +def XVFTINTRM_L_D : LASX_2R<0b0111011010011110001111>, + LASX_2RF_DESC_BASE<"xvftintrm.l.d", int_loongarch_lasx_xvftintrm_l_d, LASX256DOpnd>; + + +def XVFTINTRP_W_S : LASX_2R<0b0111011010011110010000>, + LASX_2RF_DESC_BASE<"xvftintrp.w.s", int_loongarch_lasx_xvftintrp_w_s, LASX256WOpnd>; + +def XVFTINTRP_L_D : LASX_2R<0b0111011010011110010001>, + LASX_2RF_DESC_BASE<"xvftintrp.l.d", int_loongarch_lasx_xvftintrp_l_d, LASX256DOpnd>; + + +def XVFTINTRZ_W_S : LASX_2R<0b0111011010011110010010>, + LASX_2RF_DESC_BASE<"xvftintrz.w.s", fp_to_sint, LASX256WOpnd>; + +def XVFTINTRZ_L_D : LASX_2R<0b0111011010011110010011>, + LASX_2RF_DESC_BASE<"xvftintrz.l.d", fp_to_sint, LASX256DOpnd>; + + +def XVFTINTRNE_W_S : LASX_2R<0b0111011010011110010100>, + LASX_2RF_DESC_BASE<"xvftintrne.w.s", int_loongarch_lasx_xvftintrne_w_s, LASX256WOpnd>; + +def XVFTINTRNE_L_D : LASX_2R<0b0111011010011110010101>, + LASX_2RF_DESC_BASE<"xvftintrne.l.d", int_loongarch_lasx_xvftintrne_l_d, LASX256DOpnd>; + + +def XVFTINT_WU_S : LASX_2R<0b0111011010011110010110>, + LASX_2RF_DESC_BASE<"xvftint.wu.s", int_loongarch_lasx_xvftint_wu_s, LASX256WOpnd>; + +def XVFTINT_LU_D : LASX_2R<0b0111011010011110010111>, + LASX_2RF_DESC_BASE<"xvftint.lu.d", int_loongarch_lasx_xvftint_lu_d, LASX256DOpnd>; + + +def XVFTINTRZ_WU_S : LASX_2R<0b0111011010011110011100>, + LASX_2RF_DESC_BASE<"xvftintrz.wu.s", fp_to_uint, LASX256WOpnd>; + +def XVFTINTRZ_LU_D : LASX_2R<0b0111011010011110011101>, + LASX_2RF_DESC_BASE<"xvftintrz.lu.d", fp_to_uint, LASX256DOpnd>; + + +def XVFTINTL_L_S : LASX_2R<0b0111011010011110100000>, + LASX_2RF_DESC_BASE<"xvftintl.l.s", int_loongarch_lasx_xvftintl_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTH_L_S : LASX_2R<0b0111011010011110100001>, + LASX_2RF_DESC_BASE<"xvftinth.l.s", int_loongarch_lasx_xvftinth_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINTRML_L_S : LASX_2R<0b0111011010011110100010>, + LASX_2RF_DESC_BASE<"xvftintrml.l.s", int_loongarch_lasx_xvftintrml_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTRMH_L_S : LASX_2R<0b0111011010011110100011>, + LASX_2RF_DESC_BASE<"xvftintrmh.l.s", int_loongarch_lasx_xvftintrmh_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINTRPL_L_S : LASX_2R<0b0111011010011110100100>, + LASX_2RF_DESC_BASE<"xvftintrpl.l.s", int_loongarch_lasx_xvftintrpl_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTRPH_L_S : LASX_2R<0b0111011010011110100101>, + LASX_2RF_DESC_BASE<"xvftintrph.l.s", int_loongarch_lasx_xvftintrph_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINTRZL_L_S : LASX_2R<0b0111011010011110100110>, + LASX_2RF_DESC_BASE<"xvftintrzl.l.s", int_loongarch_lasx_xvftintrzl_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTRZH_L_S : LASX_2R<0b0111011010011110100111>, + LASX_2RF_DESC_BASE<"xvftintrzh.l.s", int_loongarch_lasx_xvftintrzh_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVFTINTRNEL_L_S : LASX_2R<0b0111011010011110101000>, + LASX_2RF_DESC_BASE<"xvftintrnel.l.s", int_loongarch_lasx_xvftintrnel_l_s, LASX256DOpnd, LASX256WOpnd>; + +def XVFTINTRNEH_L_S : LASX_2R<0b0111011010011110101001>, + LASX_2RF_DESC_BASE<"xvftintrneh.l.s", int_loongarch_lasx_xvftintrneh_l_s, LASX256DOpnd, LASX256WOpnd>; + + +def XVEXTH_H_B : LASX_2R<0b0111011010011110111000>, + LASX_2R_DESC_BASE<"xvexth.h.b", int_loongarch_lasx_xvexth_h_b, LASX256HOpnd, LASX256BOpnd>; + +def XVEXTH_W_H : LASX_2R<0b0111011010011110111001>, + LASX_2R_DESC_BASE<"xvexth.w.h", int_loongarch_lasx_xvexth_w_h, LASX256WOpnd, LASX256HOpnd>; + +def XVEXTH_D_W : LASX_2R<0b0111011010011110111010>, + LASX_2R_DESC_BASE<"xvexth.d.w", int_loongarch_lasx_xvexth_d_w, LASX256DOpnd, LASX256WOpnd> ; + +def XVEXTH_Q_D : LASX_2R<0b0111011010011110111011>, + LASX_2R_DESC_BASE<"xvexth.q.d", int_loongarch_lasx_xvexth_q_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVEXTH_HU_BU : LASX_2R<0b0111011010011110111100>, + LASX_2R_DESC_BASE<"xvexth.hu.bu", int_loongarch_lasx_xvexth_hu_bu, LASX256HOpnd, LASX256BOpnd>; + +def XVEXTH_WU_HU : LASX_2R<0b0111011010011110111101>, + LASX_2R_DESC_BASE<"xvexth.wu.hu", int_loongarch_lasx_xvexth_wu_hu, LASX256WOpnd, LASX256HOpnd>; + +def XVEXTH_DU_WU : LASX_2R<0b0111011010011110111110>, + LASX_2R_DESC_BASE<"xvexth.du.wu", int_loongarch_lasx_xvexth_du_wu, LASX256DOpnd, LASX256WOpnd> ; + +def XVEXTH_QU_DU : LASX_2R<0b0111011010011110111111>, + LASX_2R_DESC_BASE<"xvexth.qu.du", int_loongarch_lasx_xvexth_qu_du, LASX256DOpnd, LASX256DOpnd>; + + +def XVREPLGR2VR_B : LASX_2R_1GP<0b0111011010011111000000>, + LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.b", v32i8, xvsplati8, LASX256BOpnd, GPR32Opnd>; + +def XVREPLGR2VR_H : LASX_2R_1GP<0b0111011010011111000001>, + LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.h", v16i16, xvsplati16, LASX256HOpnd, GPR32Opnd>; + +def XVREPLGR2VR_W : LASX_2R_1GP<0b0111011010011111000010>, + LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.w", v8i32, xvsplati32, LASX256WOpnd, GPR32Opnd>; + +def XVREPLGR2VR_D : LASX_2R_1GP<0b0111011010011111000011>, + LASX_2R_REPL_DESC_BASE<"xvreplgr2vr.d", v4i64, xvsplati64, LASX256DOpnd, GPR64Opnd>; + + +def VEXT2XV_H_B : LASX_2R<0b0111011010011111000100>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.h.b", int_loongarch_lasx_vext2xv_h_b, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>; + +def VEXT2XV_W_B : LASX_2R<0b0111011010011111000101>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.w.b", int_loongarch_lasx_vext2xv_w_b, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>; + +def VEXT2XV_D_B : LASX_2R<0b0111011010011111000110>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.d.b", int_loongarch_lasx_vext2xv_d_b, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ; + +def VEXT2XV_W_H : LASX_2R<0b0111011010011111000111>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.w.h", int_loongarch_lasx_vext2xv_w_h, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>; + +def VEXT2XV_D_H : LASX_2R<0b0111011010011111001000>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.d.h", int_loongarch_lasx_vext2xv_d_h, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ; + +def VEXT2XV_D_W : LASX_2R<0b0111011010011111001001>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.d.w", int_loongarch_lasx_vext2xv_d_w, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>; + + +def VEXT2XV_HU_BU : LASX_2R<0b0111011010011111001010>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.hu.bu", int_loongarch_lasx_vext2xv_hu_bu, v32i8, v16i16, LASX256BOpnd, LASX256HOpnd>; + +def VEXT2XV_WU_BU : LASX_2R<0b0111011010011111001011>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.bu", int_loongarch_lasx_vext2xv_wu_bu, v32i8, v8i32, LASX256BOpnd, LASX256WOpnd>; + +def VEXT2XV_DU_BU : LASX_2R<0b0111011010011111001100>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.du.bu", int_loongarch_lasx_vext2xv_du_bu, v32i8, v4i64, LASX256BOpnd, LASX256DOpnd> ; + +def VEXT2XV_WU_HU : LASX_2R<0b0111011010011111001101>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.wu.hu", int_loongarch_lasx_vext2xv_wu_hu, v16i16, v8i32, LASX256HOpnd, LASX256WOpnd>; + +def VEXT2XV_DU_HU : LASX_2R<0b0111011010011111001110>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.du.hu", int_loongarch_lasx_vext2xv_du_hu, v16i16, v4i64, LASX256HOpnd, LASX256DOpnd> ; + +def VEXT2XV_DU_WU : LASX_2R<0b0111011010011111001111>, + LASX_XVEXTEND_DESC_BASE<"vext2xv.du.wu", int_loongarch_lasx_vext2xv_du_wu, v8i32, v4i64, LASX256WOpnd, LASX256DOpnd>; + + +def XVHSELI_D : LASX_I5_U<0b01110110100111111>, + LASX_U5N_DESC_BASE<"xvhseli.d", LASX256DOpnd>; + + +def XVROTRI_B : LASX_I3_U<0b0111011010100000001>, + LASX_RORI_U3_DESC_BASE_Intrinsic<"xvrotri.b", int_loongarch_lasx_xvrotri_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVROTRI_H : LASX_I4_U<0b011101101010000001>, + LASX_RORI_U4_DESC_BASE_Intrinsic<"xvrotri.h", int_loongarch_lasx_xvrotri_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVROTRI_W : LASX_I5_U<0b01110110101000001>, + LASX_RORI_U5_DESC_BASE_Intrinsic<"xvrotri.w", int_loongarch_lasx_xvrotri_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVROTRI_D : LASX_I6_U<0b0111011010100001>, + LASX_RORI_U6_DESC_BASE_Intrinsic<"xvrotri.d", int_loongarch_lasx_xvrotri_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRLRI_B : LASX_I3_U<0b0111011010100100001>, + LASX_BIT_3_DESC_BASE<"xvsrlri.b", int_loongarch_lasx_xvsrlri_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSRLRI_H : LASX_I4_U<0b011101101010010001>, + LASX_BIT_4_DESC_BASE<"xvsrlri.h", int_loongarch_lasx_xvsrlri_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSRLRI_W : LASX_I5_U<0b01110110101001001>, + LASX_BIT_5_DESC_BASE<"xvsrlri.w", int_loongarch_lasx_xvsrlri_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSRLRI_D : LASX_I6_U<0b0111011010100101>, + LASX_BIT_6_DESC_BASE<"xvsrlri.d", int_loongarch_lasx_xvsrlri_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRARI_B : LASX_I3_U<0b0111011010101000001>, + LASX_BIT_3_DESC_BASE<"xvsrari.b", int_loongarch_lasx_xvsrari_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSRARI_H : LASX_I4_U<0b011101101010100001>, + LASX_BIT_4_DESC_BASE<"xvsrari.h", int_loongarch_lasx_xvsrari_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSRARI_W : LASX_I5_U<0b01110110101010001>, + LASX_BIT_5_DESC_BASE<"xvsrari.w", int_loongarch_lasx_xvsrari_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSRARI_D : LASX_I6_U<0b0111011010101001>, + LASX_BIT_6_DESC_BASE<"xvsrari.d", int_loongarch_lasx_xvsrari_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVINSGR2VR_W : LASX_I3_R_U<0b0111011011101011110>, + LASX_INSERT_U3_DESC_BASE<"xvinsgr2vr.w", v8i32, uimm3_ptr, immZExt3Ptr, LASX256WOpnd, GPR32Opnd>; + +def XVINSGR2VR_D : LASX_I2_R_U<0b01110110111010111110>, + LASX_INSERT_U2_DESC_BASE<"xvinsgr2vr.d", v4i64, uimm2_ptr, immZExt2Ptr, LASX256DOpnd, GPR64Opnd>; + + +def XVPICKVE2GR_W : LASX_ELM_COPY_U3<0b0111011011101111110>, + LASX_COPY_U3_DESC_BASE<"xvpickve2gr.w", vextract_sext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>; + +def XVPICKVE2GR_D : LASX_ELM_COPY_U2<0b01110110111011111110>, + LASX_COPY_U2_DESC_BASE<"xvpickve2gr.d", vextract_sext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>; + + +def XVPICKVE2GR_WU : LASX_ELM_COPY_U3<0b0111011011110011110>, + LASX_COPY_U3_DESC_BASE<"xvpickve2gr.wu", vextract_zext_i32, v8i32, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LASX256WOpnd>; + +def XVPICKVE2GR_DU : LASX_ELM_COPY_U2<0b01110110111100111110>, + LASX_COPY_U2_DESC_BASE<"xvpickve2gr.du", vextract_zext_i64, v4i64, uimm2_ptr, immZExt2Ptr, GPR64Opnd, LASX256DOpnd>; + + +def XVREPL128VEI_B : LASX_I4_U<0b011101101111011110>, + LASX_ELM_U4_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.b", int_loongarch_lasx_xvrepl128vei_b, LASX256BOpnd>; + +def XVREPL128VEI_H : LASX_I3_U<0b0111011011110111110>, + LASX_ELM_U3_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.h", int_loongarch_lasx_xvrepl128vei_h, LASX256HOpnd>; + +def XVREPL128VEI_W : LASX_I2_U<0b01110110111101111110>, + LASX_ELM_U2_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.w", int_loongarch_lasx_xvrepl128vei_w, LASX256WOpnd>; + +def XVREPL128VEI_D : LASX_I1_U<0b011101101111011111110>, + LASX_ELM_U1_VREPLVE_DESC_BASE_Intrinsic<"xvrepl128vei.d", int_loongarch_lasx_xvrepl128vei_d, LASX256DOpnd>; + + +def XVINSVE0_W : LASX_I3_U<0b0111011011111111110>, + LASX_BIT_3_4O_DESC_BASE<"xvinsve0.w", int_loongarch_lasx_xvinsve0_w, uimm3, immZExt3, LASX256WOpnd>; + +def XVINSVE0_D : LASX_I2_U<0b01110110111111111110>, + LASX_BIT_2_4O_DESC_BASE<"xvinsve0.d", int_loongarch_lasx_xvinsve0_d, uimm2, immZExt2, LASX256DOpnd>; + + +def XVPICKVE_W : LASX_I3_U<0b0111011100000011110>, + LASX_BIT_3_4ON<"xvpickve.w", uimm3, immZExt3, LASX256WOpnd>; + +def XVPICKVE_D : LASX_I2_U<0b01110111000000111110>, + LASX_BIT_2_4ON<"xvpickve.d", uimm2, immZExt2, LASX256DOpnd>; + + +def XVREPLVE0_B : LASX_2R<0b0111011100000111000000>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.b", int_loongarch_lasx_xvreplve0_b, v32i8, LASX256BOpnd>; + +def XVREPLVE0_H : LASX_2R<0b0111011100000111100000>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.h", int_loongarch_lasx_xvreplve0_h, v16i16, LASX256HOpnd>; + +def XVREPLVE0_W : LASX_2R<0b0111011100000111110000>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.w", int_loongarch_lasx_xvreplve0_w, v8i32, LASX256WOpnd> ; + +def XVREPLVE0_D : LASX_2R<0b0111011100000111111000>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.d", xvbroadcast_v4i64, v4i64, LASX256DOpnd>; + +def XVREPLVE0_Q : LASX_2R<0b0111011100000111111100>, + LASX_XVBROADCAST_DESC_BASE<"xvreplve0.q", int_loongarch_lasx_xvreplve0_q, v32i8, LASX256BOpnd>; + + +def XVSLLWIL_H_B : LASX_I3_U<0b0111011100001000001>, + LASX_2R_U3_DESC_BASE<"xvsllwil.h.b", int_loongarch_lasx_xvsllwil_h_b, LASX256HOpnd, LASX256BOpnd>; + +def XVSLLWIL_W_H : LASX_I4_U<0b011101110000100001>, + LASX_2R_U4_DESC_BASE<"xvsllwil.w.h", int_loongarch_lasx_xvsllwil_w_h, LASX256WOpnd, LASX256HOpnd>; + +def XVSLLWIL_D_W : LASX_I5_U<0b01110111000010001>, + LASX_2R_U5_DESC_BASE<"xvsllwil.d.w", int_loongarch_lasx_xvsllwil_d_w, LASX256DOpnd, LASX256WOpnd> ; + + +def XVEXTL_Q_D : LASX_2R<0b0111011100001001000000>, + LASX_2R_DESC_BASE<"xvextl.q.d", int_loongarch_lasx_xvextl_q_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVSLLWIL_HU_BU : LASX_I3_U<0b0111011100001100001>, + LASX_2R_U3_DESC_BASE<"xvsllwil.hu.bu", int_loongarch_lasx_xvsllwil_hu_bu, LASX256HOpnd, LASX256BOpnd>; + +def XVSLLWIL_WU_HU : LASX_I4_U<0b011101110000110001>, + LASX_2R_U4_DESC_BASE<"xvsllwil.wu.hu", int_loongarch_lasx_xvsllwil_wu_hu, LASX256WOpnd, LASX256HOpnd>; + +def XVSLLWIL_DU_WU : LASX_I5_U<0b01110111000011001>, + LASX_2R_U5_DESC_BASE<"xvsllwil.du.wu", int_loongarch_lasx_xvsllwil_du_wu, LASX256DOpnd, LASX256WOpnd> ; + + +def XVEXTL_QU_DU : LASX_2R<0b0111011100001101000000>, + LASX_2R_DESC_BASE<"xvextl.qu.du", int_loongarch_lasx_xvextl_qu_du, LASX256DOpnd, LASX256DOpnd>; + + +def XVBITCLRI_B : LASX_I3_U<0b0111011100010000001>, + LASX_2R_U3_DESC_BASE<"xvbitclri.b", int_loongarch_lasx_xvbitclri_b, LASX256BOpnd, LASX256BOpnd>; + +def XVBITCLRI_H : LASX_I4_U<0b011101110001000001>, + LASX_2R_U4_DESC_BASE<"xvbitclri.h", int_loongarch_lasx_xvbitclri_h, LASX256HOpnd, LASX256HOpnd>; + +def XVBITCLRI_W : LASX_I5_U<0b01110111000100001>, + LASX_2R_U5_DESC_BASE<"xvbitclri.w", int_loongarch_lasx_xvbitclri_w, LASX256WOpnd, LASX256WOpnd>; + +def XVBITCLRI_D : LASX_I6_U<0b0111011100010001>, + LASX_2R_U6_DESC_BASE<"xvbitclri.d", int_loongarch_lasx_xvbitclri_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVBITSETI_B : LASX_I3_U<0b0111011100010100001>, + LASX_2R_U3_DESC_BASE<"xvbitseti.b", int_loongarch_lasx_xvbitseti_b, LASX256BOpnd, LASX256BOpnd>; + +def XVBITSETI_H : LASX_I4_U<0b011101110001010001>, + LASX_2R_U4_DESC_BASE<"xvbitseti.h", int_loongarch_lasx_xvbitseti_h, LASX256HOpnd, LASX256HOpnd>; + +def XVBITSETI_W : LASX_I5_U<0b01110111000101001>, + LASX_2R_U5_DESC_BASE<"xvbitseti.w", int_loongarch_lasx_xvbitseti_w, LASX256WOpnd, LASX256WOpnd>; + +def XVBITSETI_D : LASX_I6_U<0b0111011100010101>, + LASX_2R_U6_DESC_BASE<"xvbitseti.d", int_loongarch_lasx_xvbitseti_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVBITREVI_B : LASX_I3_U<0b0111011100011000001>, + LASX_2R_U3_DESC_BASE<"xvbitrevi.b", int_loongarch_lasx_xvbitrevi_b, LASX256BOpnd, LASX256BOpnd>; + +def XVBITREVI_H : LASX_I4_U<0b011101110001100001>, + LASX_2R_U4_DESC_BASE<"xvbitrevi.h", int_loongarch_lasx_xvbitrevi_h, LASX256HOpnd, LASX256HOpnd>; + +def XVBITREVI_W : LASX_I5_U<0b01110111000110001>, + LASX_2R_U5_DESC_BASE<"xvbitrevi.w", int_loongarch_lasx_xvbitrevi_w, LASX256WOpnd, LASX256WOpnd>; + +def XVBITREVI_D : LASX_I6_U<0b0111011100011001>, + LASX_2R_U6_DESC_BASE<"xvbitrevi.d", int_loongarch_lasx_xvbitrevi_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVSAT_B : LASX_I3_U<0b0111011100100100001>, + LASX_BIT_3_DESC_BASE<"xvsat.b", int_loongarch_lasx_xvsat_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSAT_H : LASX_I4_U<0b011101110010010001>, + LASX_BIT_4_DESC_BASE<"xvsat.h", int_loongarch_lasx_xvsat_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSAT_W : LASX_I5_U<0b01110111001001001>, + LASX_BIT_5_DESC_BASE<"xvsat.w", int_loongarch_lasx_xvsat_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSAT_D : LASX_I6_U<0b0111011100100101>, + LASX_BIT_6_DESC_BASE<"xvsat.d", int_loongarch_lasx_xvsat_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSAT_BU : LASX_I3_U<0b0111011100101000001>, + LASX_BIT_3_DESC_BASE<"xvsat.bu", int_loongarch_lasx_xvsat_bu, uimm3, immZExt3, LASX256BOpnd>; + +def XVSAT_HU : LASX_I4_U<0b011101110010100001>, + LASX_BIT_4_DESC_BASE<"xvsat.hu", int_loongarch_lasx_xvsat_hu, uimm4, immZExt4, LASX256HOpnd>; + +def XVSAT_WU : LASX_I5_U<0b01110111001010001>, + LASX_BIT_5_DESC_BASE<"xvsat.wu", int_loongarch_lasx_xvsat_wu, uimm5, immZExt5, LASX256WOpnd>; + +def XVSAT_DU : LASX_I6_U<0b0111011100101001>, + LASX_BIT_6_DESC_BASE<"xvsat.du", int_loongarch_lasx_xvsat_du, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSLLI_B : LASX_I3_U<0b0111011100101100001>, + LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvslli.b", int_loongarch_lasx_xvslli_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSLLI_H : LASX_I4_U<0b011101110010110001>, + LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvslli.h", int_loongarch_lasx_xvslli_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSLLI_W : LASX_I5_U<0b01110111001011001>, + LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvslli.w", int_loongarch_lasx_xvslli_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSLLI_D : LASX_I6_U<0b0111011100101101>, + LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvslli.d", int_loongarch_lasx_xvslli_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRLI_B : LASX_I3_U<0b0111011100110000001>, + LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.b", int_loongarch_lasx_xvsrli_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSRLI_H : LASX_I4_U<0b011101110011000001>, + LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.h", int_loongarch_lasx_xvsrli_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSRLI_W : LASX_I5_U<0b01110111001100001>, + LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.w", int_loongarch_lasx_xvsrli_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSRLI_D : LASX_I6_U<0b0111011100110001>, + LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrli.d", int_loongarch_lasx_xvsrli_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRAI_B : LASX_I3_U<0b0111011100110100001>, + LASX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.b", int_loongarch_lasx_xvsrai_b, uimm3, immZExt3, LASX256BOpnd>; + +def XVSRAI_H : LASX_I4_U<0b011101110011010001>, + LASX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.h", int_loongarch_lasx_xvsrai_h, uimm4, immZExt4, LASX256HOpnd>; + +def XVSRAI_W : LASX_I5_U<0b01110111001101001>, + LASX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.w", int_loongarch_lasx_xvsrai_w, uimm5, immZExt5, LASX256WOpnd>; + +def XVSRAI_D : LASX_I6_U<0b0111011100110101>, + LASX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"xvsrai.d", int_loongarch_lasx_xvsrai_d, uimm6, immZExt6, LASX256DOpnd>; + + +def XVSRLNI_B_H : LASX_I4_U<0b011101110100000001>, + LASX_U4_DESC_BASE<"xvsrlni.b.h", int_loongarch_lasx_xvsrlni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSRLNI_H_W : LASX_I5_U<0b01110111010000001>, + LASX_N4_U5_DESC_BASE<"xvsrlni.h.w", int_loongarch_lasx_xvsrlni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSRLNI_W_D : LASX_I6_U<0b0111011101000001>, + LASX_U6_DESC_BASE<"xvsrlni.w.d", int_loongarch_lasx_xvsrlni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSRLNI_D_Q : LASX_I7_U<0b011101110100001>, + LASX_D_DESC_BASE<"xvsrlni.d.q", int_loongarch_lasx_xvsrlni_d_q, LASX256DOpnd>; + + +def XVSRLRNI_B_H : LASX_I4_U<0b011101110100010001>, + LASX_U4_DESC_BASE<"xvsrlrni.b.h", int_loongarch_lasx_xvsrlrni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSRLRNI_H_W : LASX_I5_U<0b01110111010001001>, + LASX_N4_U5_DESC_BASE<"xvsrlrni.h.w", int_loongarch_lasx_xvsrlrni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSRLRNI_W_D : LASX_I6_U<0b0111011101000101>, + LASX_U6_DESC_BASE<"xvsrlrni.w.d", int_loongarch_lasx_xvsrlrni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSRLRNI_D_Q : LASX_I7_U<0b011101110100011>, + LASX_D_DESC_BASE<"xvsrlrni.d.q", int_loongarch_lasx_xvsrlrni_d_q, LASX256DOpnd>; + + +def XVSSRLNI_B_H : LASX_I4_U<0b011101110100100001>, + LASX_U4_DESC_BASE<"xvssrlni.b.h", int_loongarch_lasx_xvssrlni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRLNI_H_W : LASX_I5_U<0b01110111010010001>, + LASX_N4_U5_DESC_BASE<"xvssrlni.h.w", int_loongarch_lasx_xvssrlni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRLNI_W_D : LASX_I6_U<0b0111011101001001>, + LASX_U6_DESC_BASE<"xvssrlni.w.d", int_loongarch_lasx_xvssrlni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRLNI_D_Q : LASX_I7_U<0b011101110100101>, + LASX_D_DESC_BASE<"xvssrlni.d.q", int_loongarch_lasx_xvssrlni_d_q, LASX256DOpnd>; + + +def XVSSRLNI_BU_H : LASX_I4_U<0b011101110100110001>, + LASX_U4_DESC_BASE<"xvssrlni.bu.h", int_loongarch_lasx_xvssrlni_bu_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRLNI_HU_W : LASX_I5_U<0b01110111010011001>, + LASX_N4_U5_DESC_BASE<"xvssrlni.hu.w", int_loongarch_lasx_xvssrlni_hu_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRLNI_WU_D : LASX_I6_U<0b0111011101001101>, + LASX_U6_DESC_BASE<"xvssrlni.wu.d", int_loongarch_lasx_xvssrlni_wu_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRLNI_DU_Q : LASX_I7_U<0b011101110100111>, + LASX_D_DESC_BASE<"xvssrlni.du.q", int_loongarch_lasx_xvssrlni_du_q, LASX256DOpnd>; + + +def XVSSRLRNI_B_H : LASX_I4_U<0b011101110101000001>, + LASX_2R_3R_U4_DESC_BASE<"xvssrlrni.b.h", int_loongarch_lasx_xvssrlrni_b_h, LASX256BOpnd, LASX256BOpnd>; + +def XVSSRLRNI_H_W : LASX_I5_U<0b01110111010100001>, + LASX_2R_3R_U5_DESC_BASE<"xvssrlrni.h.w", int_loongarch_lasx_xvssrlrni_h_w, LASX256HOpnd, LASX256HOpnd>; + +def XVSSRLRNI_W_D : LASX_I6_U<0b0111011101010001>, + LASX_2R_3R_U6_DESC_BASE<"xvssrlrni.w.d", int_loongarch_lasx_xvssrlrni_w_d, LASX256WOpnd, LASX256WOpnd>; + +def XVSSRLRNI_D_Q : LASX_I7_U<0b011101110101001>, + LASX_2R_3R_U7_DESC_BASE<"xvssrlrni.d.q", int_loongarch_lasx_xvssrlrni_d_q, LASX256DOpnd, LASX256DOpnd>; + + +def XVSSRLRNI_BU_H : LASX_I4_U<0b011101110101010001>, + LASX_U4_DESC_BASE<"xvssrlrni.bu.h", int_loongarch_lasx_xvssrlrni_bu_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRLRNI_HU_W : LASX_I5_U<0b01110111010101001>, + LASX_N4_U5_DESC_BASE<"xvssrlrni.hu.w", int_loongarch_lasx_xvssrlrni_hu_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRLRNI_WU_D : LASX_I6_U<0b0111011101010101>, + LASX_U6_DESC_BASE<"xvssrlrni.wu.d", int_loongarch_lasx_xvssrlrni_wu_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRLRNI_DU_Q : LASX_I7_U<0b011101110101011>, + LASX_D_DESC_BASE<"xvssrlrni.du.q", int_loongarch_lasx_xvssrlrni_du_q, LASX256DOpnd>; + + +def XVSRANI_B_H : LASX_I4_U<0b011101110101100001>, + LASX_2R_3R_U4_DESC_BASE<"xvsrani.b.h", int_loongarch_lasx_xvsrani_b_h, LASX256BOpnd, LASX256BOpnd>; + +def XVSRANI_H_W : LASX_I5_U<0b01110111010110001>, + LASX_2R_3R_U5_DESC_BASE<"xvsrani.h.w", int_loongarch_lasx_xvsrani_h_w, LASX256HOpnd, LASX256HOpnd>; + +def XVSRANI_W_D : LASX_I6_U<0b0111011101011001>, + LASX_2R_3R_U6_DESC_BASE<"xvsrani.w.d", int_loongarch_lasx_xvsrani_w_d, LASX256WOpnd, LASX256WOpnd>; + +def XVSRANI_D_Q : LASX_I7_U<0b011101110101101>, + LASX_2R_3R_U7_DESC_BASE<"xvsrani.d.q", int_loongarch_lasx_xvsrani_d_q, LASX256DOpnd, LASX256DOpnd>; + + +def XVSRARNI_B_H : LASX_I4_U<0b011101110101110001>, + LASX_U4_DESC_BASE<"xvsrarni.b.h", int_loongarch_lasx_xvsrarni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSRARNI_H_W : LASX_I5_U<0b01110111010111001>, + LASX_N4_U5_DESC_BASE<"xvsrarni.h.w", int_loongarch_lasx_xvsrarni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSRARNI_W_D : LASX_I6_U<0b0111011101011101>, + LASX_U6_DESC_BASE<"xvsrarni.w.d", int_loongarch_lasx_xvsrarni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSRARNI_D_Q : LASX_I7_U<0b011101110101111>, + LASX_D_DESC_BASE<"xvsrarni.d.q", int_loongarch_lasx_xvsrarni_d_q, LASX256DOpnd>; + + +def XVSSRANI_B_H : LASX_I4_U<0b011101110110000001>, + LASX_U4_DESC_BASE<"xvssrani.b.h", int_loongarch_lasx_xvssrani_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRANI_H_W : LASX_I5_U<0b01110111011000001>, + LASX_N4_U5_DESC_BASE<"xvssrani.h.w", int_loongarch_lasx_xvssrani_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRANI_W_D : LASX_I6_U<0b0111011101100001>, + LASX_U6_DESC_BASE<"xvssrani.w.d", int_loongarch_lasx_xvssrani_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRANI_D_Q : LASX_I7_U<0b011101110110001>, + LASX_D_DESC_BASE<"xvssrani.d.q", int_loongarch_lasx_xvssrani_d_q, LASX256DOpnd>; + + +def XVSSRANI_BU_H : LASX_I4_U<0b011101110110010001>, + LASX_U4_DESC_BASE<"xvssrani.bu.h", int_loongarch_lasx_xvssrani_bu_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRANI_HU_W : LASX_I5_U<0b01110111011001001>, + LASX_N4_U5_DESC_BASE<"xvssrani.hu.w", int_loongarch_lasx_xvssrani_hu_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRANI_WU_D : LASX_I6_U<0b0111011101100101>, + LASX_U6_DESC_BASE<"xvssrani.wu.d", int_loongarch_lasx_xvssrani_wu_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRANI_DU_Q : LASX_I7_U<0b011101110110011>, + LASX_D_DESC_BASE<"xvssrani.du.q", int_loongarch_lasx_xvssrani_du_q, LASX256DOpnd>; + + +def XVSSRARNI_B_H : LASX_I4_U<0b011101110110100001>, + LASX_U4_DESC_BASE<"xvssrarni.b.h", int_loongarch_lasx_xvssrarni_b_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRARNI_H_W : LASX_I5_U<0b01110111011010001>, + LASX_N4_U5_DESC_BASE<"xvssrarni.h.w", int_loongarch_lasx_xvssrarni_h_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRARNI_W_D : LASX_I6_U<0b0111011101101001>, + LASX_U6_DESC_BASE<"xvssrarni.w.d", int_loongarch_lasx_xvssrarni_w_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRARNI_D_Q : LASX_I7_U<0b011101110110101>, + LASX_D_DESC_BASE<"xvssrarni.d.q", int_loongarch_lasx_xvssrarni_d_q, LASX256DOpnd>; + + +def XVSSRARNI_BU_H : LASX_I4_U<0b011101110110110001>, + LASX_U4_DESC_BASE<"xvssrarni.bu.h", int_loongarch_lasx_xvssrarni_bu_h, uimm4, immZExt4, LASX256BOpnd>; + +def XVSSRARNI_HU_W : LASX_I5_U<0b01110111011011001>, + LASX_N4_U5_DESC_BASE<"xvssrarni.hu.w", int_loongarch_lasx_xvssrarni_hu_w, uimm5, immZExt5, LASX256HOpnd>; + +def XVSSRARNI_WU_D : LASX_I6_U<0b0111011101101101>, + LASX_U6_DESC_BASE<"xvssrarni.wu.d", int_loongarch_lasx_xvssrarni_wu_d, uimm6, immZExt6, LASX256WOpnd>; + +def XVSSRARNI_DU_Q : LASX_I7_U<0b011101110110111>, + LASX_D_DESC_BASE<"xvssrarni.du.q", int_loongarch_lasx_xvssrarni_du_q, LASX256DOpnd>; + + +def XVEXTRINS_B : LASX_I8_U<0b01110111100011>, + LASX_2R_3R_U8_DESC_BASE<"xvextrins.b", int_loongarch_lasx_xvextrins_b, LASX256BOpnd, LASX256BOpnd>; + +def XVEXTRINS_H : LASX_I8_U<0b01110111100010>, + LASX_2R_3R_U8_DESC_BASE<"xvextrins.h", int_loongarch_lasx_xvextrins_h, LASX256HOpnd, LASX256HOpnd>; + +def XVEXTRINS_W : LASX_I8_U<0b01110111100001>, + LASX_2R_3R_U8_DESC_BASE<"xvextrins.w", int_loongarch_lasx_xvextrins_w, LASX256WOpnd, LASX256WOpnd>; + +def XVEXTRINS_D : LASX_I8_U<0b01110111100000>, + LASX_2R_3R_U8_DESC_BASE<"xvextrins.d", int_loongarch_lasx_xvextrins_d, LASX256DOpnd, LASX256DOpnd>; + + +def XVSHUF4I_B : LASX_I8_U<0b01110111100100>, + LASX_I8_SHF_DESC_BASE<"xvshuf4i.b", int_loongarch_lasx_xvshuf4i_b, LASX256BOpnd>; + +def XVSHUF4I_H : LASX_I8_U<0b01110111100101>, + LASX_I8_SHF_DESC_BASE<"xvshuf4i.h", int_loongarch_lasx_xvshuf4i_h, LASX256HOpnd>; + +def XVSHUF4I_W : LASX_I8_U<0b01110111100110>, + LASX_I8_SHF_DESC_BASE<"xvshuf4i.w", int_loongarch_lasx_xvshuf4i_w, LASX256WOpnd>; + +def XVSHUF4I_D : LASX_I8_U<0b01110111100111>, + LASX_I8_O4_SHF_DESC_BASE<"xvshuf4i.d", int_loongarch_lasx_xvshuf4i_d, LASX256DOpnd>; + + +def XVBITSELI_B : LASX_I8_U<0b01110111110001>, + LASX_2R_3R_U8_DESC_BASE<"xvbitseli.b", int_loongarch_lasx_xvbitseli_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVANDI_B : LASX_I8_U<0b01110111110100>, + LASX_2R_U8_DESC_BASE<"xvandi.b", int_loongarch_lasx_xvandi_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVORI_B : LASX_I8_U<0b01110111110101>, + LASX_2R_U8_DESC_BASE<"xvori.b", int_loongarch_lasx_xvori_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVXORI_B : LASX_I8_U<0b01110111110110>, + LASX_2R_U8_DESC_BASE<"xvxori.b", int_loongarch_lasx_xvxori_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVNORI_B : LASX_I8_U<0b01110111110111>, + LASX_2R_U8_DESC_BASE<"xvnori.b", int_loongarch_lasx_xvnori_b, LASX256BOpnd, LASX256BOpnd>; + + +def XVLDI : LASX_1R_I13<0b01110111111000>, + LASX_I13_DESC_BASE<"xvldi", int_loongarch_lasx_xvldi, i32, simm13Op, LASX256DOpnd>; + + +def XVLDI_B : LASX_1R_I13_I10<0b01110111111000000>, + LASX_I13_DESC_BASE_10<"xvldi", int_loongarch_lasx_xvrepli_b, simm10, immZExt10, LASX256BOpnd>; + +def XVLDI_H : LASX_1R_I13_I10<0b01110111111000001>, + LASX_I13_DESC_BASE_10<"xvldi", int_loongarch_lasx_xvrepli_h, simm10, immZExt10, LASX256HOpnd>; + +def XVLDI_W : LASX_1R_I13_I10<0b01110111111000010>, + LASX_I13_DESC_BASE_10<"xvldi", int_loongarch_lasx_xvrepli_w, simm10, immZExt10, LASX256WOpnd>; + +def XVLDI_D : LASX_1R_I13_I10<0b01110111111000011>, + LASX_I13_DESC_BASE_10<"xvldi", int_loongarch_lasx_xvrepli_d, simm10, immZExt10, LASX256DOpnd>; + + +def XVPERMI_W : LASX_I8_U<0b01110111111001>, + LASX_2R_3R_U8_DESC_BASE<"xvpermi.w", int_loongarch_lasx_xvpermi_w, LASX256WOpnd, LASX256WOpnd>; + +def XVPERMI_D : LASX_I8_U<0b01110111111010>, + LASX_2R_U8_DESC_BASE<"xvpermi.d", int_loongarch_lasx_xvpermi_d, LASX256DOpnd, LASX256DOpnd>; + +def XVPERMI_Q : LASX_I8_U<0b01110111111011>, + LASX_2R_3R_U8_DESC_BASE<"xvpermi.q", int_loongarch_lasx_xvpermi_q, LASX256BOpnd, LASX256BOpnd>; + + +//Pat + +class LASXBitconvertPat preds = [HasLASX]> : + LASXPat<(DstVT (bitconvert SrcVT:$src)), + (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; + +// These are endian-independent because the element size doesnt change +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; +def : LASXBitconvertPat; + +class LASX_XINSERT_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ImmOp:$n, ROFS:$fs), + [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, Imm:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$xd = $xd_in"; +} + +class XINSERT_H_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; + +class XINSERT_H64_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; + +def XINSERT_H_PSEUDO : XINSERT_H_PSEUDO_DESC; +def XINSERT_H64_PSEUDO : XINSERT_H64_PSEUDO_DESC; + +class XINSERT_B_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; +def XINSERT_B_PSEUDO : XINSERT_B_PSEUDO_DESC; + + +class LASX_COPY_PSEUDO_BASE : + LASXPseudo<(outs RCD:$xd), (ins RCWS:$xj, ImmOp:$n), + [(set RCD:$xd, (OpNode (VecTy RCWS:$xj), Imm:$n))]> { + bit usesCustomInserter = 1; +} + +class XCOPY_FW_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE; +class XCOPY_FD_PSEUDO_DESC : LASX_COPY_PSEUDO_BASE; +def XCOPY_FW_PSEUDO : XCOPY_FW_PSEUDO_DESC; +def XCOPY_FD_PSEUDO : XCOPY_FD_PSEUDO_DESC; + + + +class LASX_XINSERT_VIDX_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROXD:$xd_in, ROIdx:$n, ROFS:$fs), + [(set ROXD:$xd, (OpNode (Ty ROXD:$xd_in), ROFS:$fs, ROIdx:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$xd = $xd_in"; +} + + +class XINSERT_FW_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; +def XINSERT_FW_PSEUDO : XINSERT_FW_PSEUDO_DESC; + +class XINSERT_FW_VIDX_PSEUDO_DESC : + LASX_XINSERT_VIDX_PSEUDO_BASE; +class XINSERT_FW_VIDX64_PSEUDO_DESC : + LASX_XINSERT_VIDX_PSEUDO_BASE; + +def XINSERT_FW_VIDX_PSEUDO : XINSERT_FW_VIDX_PSEUDO_DESC; +def XINSERT_FW_VIDX64_PSEUDO : XINSERT_FW_VIDX64_PSEUDO_DESC; + +class XINSERT_B_VIDX64_PSEUDO_DESC : + LASX_XINSERT_VIDX_PSEUDO_BASE; + +def XINSERT_B_VIDX64_PSEUDO : XINSERT_B_VIDX64_PSEUDO_DESC; + + +class XINSERT_B_VIDX_PSEUDO_DESC : + LASX_XINSERT_VIDX_PSEUDO_BASE; + +def XINSERT_B_VIDX_PSEUDO : XINSERT_B_VIDX_PSEUDO_DESC; + + +class XINSERTPostRA : + LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> { + let mayLoad = 1; + let mayStore = 1; +} + +def XINSERT_B_VIDX_PSEUDO_POSTRA : XINSERTPostRA; +def XINSERT_B_VIDX64_PSEUDO_POSTRA : XINSERTPostRA; +def XINSERT_FW_VIDX_PSEUDO_POSTRA : XINSERTPostRA; +def XINSERT_FW_VIDX64_PSEUDO_POSTRA : XINSERTPostRA; + +class XINSERT_FD_PSEUDO_DESC : LASX_XINSERT_PSEUDO_BASE; + +def XINSERT_FD_PSEUDO : XINSERT_FD_PSEUDO_DESC; + +class LASX_2R_FILL_PSEUDO_BASE : + LASXPseudo<(outs RCWD:$xd), (ins RCWS:$fs), + [(set RCWD:$xd, (OpNode RCWS:$fs))]> { + let usesCustomInserter = 1; +} + +class XFILL_FW_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE; +class XFILL_FD_PSEUDO_DESC : LASX_2R_FILL_PSEUDO_BASE; +def XFILL_FW_PSEUDO : XFILL_FW_PSEUDO_DESC; +def XFILL_FD_PSEUDO : XFILL_FD_PSEUDO_DESC; + +class LASX_CONCAT_VECTORS_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROXJ:$xs, ROXK:$xt), + [(set ROXD:$xd, (Ty (concat_vectors (SubTy ROXJ:$xs), (SubTy ROXK:$xt))))]> { + bit usesCustomInserter = 1; +} + +class CONCAT_VECTORS_B_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; +class CONCAT_VECTORS_H_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; +class CONCAT_VECTORS_W_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; +class CONCAT_VECTORS_D_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; + +class CONCAT_VECTORS_FW_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; +class CONCAT_VECTORS_FD_PSEUDO_DESC : + LASX_CONCAT_VECTORS_PSEUDO_BASE; + +def CONCAT_VECTORS_B_PSEUDO : CONCAT_VECTORS_B_PSEUDO_DESC; +def CONCAT_VECTORS_H_PSEUDO : CONCAT_VECTORS_H_PSEUDO_DESC; +def CONCAT_VECTORS_W_PSEUDO : CONCAT_VECTORS_W_PSEUDO_DESC; +def CONCAT_VECTORS_D_PSEUDO : CONCAT_VECTORS_D_PSEUDO_DESC; +def CONCAT_VECTORS_FW_PSEUDO : CONCAT_VECTORS_FW_PSEUDO_DESC; +def CONCAT_VECTORS_FD_PSEUDO : CONCAT_VECTORS_FD_PSEUDO_DESC; + + +class LASX_COPY_GPR_PSEUDO_BASE : + LASXPseudo<(outs ROXD:$xd), (ins ROFS:$xj, ROIdx:$n), + [(set ROXD:$xd, (OpNode (VecTy ROFS:$xj), ROIdx:$n))]> { + bit usesCustomInserter = 1; +} + +class XCOPY_FW_GPR_PSEUDO_DESC : LASX_COPY_GPR_PSEUDO_BASE; +def XCOPY_FW_GPR_PSEUDO : XCOPY_FW_GPR_PSEUDO_DESC; + + +let isCodeGenOnly = 1 in { + +def XVLD_H : LASX_I12_S<0b0010110010>, + LASX_LD<"xvld", load, v16i16, LASX256HOpnd, mem>; + +def XVLD_W : LASX_I12_S<0b0010110010>, + LASX_LD<"xvld", load, v8i32, LASX256WOpnd, mem>; + +def XVLD_D : LASX_I12_S<0b0010110010>, + LASX_LD<"xvld", load, v4i64, LASX256DOpnd, mem>; + + +def XVST_H : LASX_I12_S<0b0010110011>, + LASX_ST<"xvst", store, v16i16, LASX256HOpnd, mem_simm12>; + +def XVST_W : LASX_I12_S<0b0010110011>, + LASX_ST<"xvst", store, v8i32, LASX256WOpnd, mem_simm12>; + +def XVST_D : LASX_I12_S<0b0010110011>, + LASX_ST<"xvst", store, v4i64, LASX256DOpnd, mem_simm12>; + + +def XVREPLVE_W_N : LASX_3R_1GP<0b01110101001000110>, + LASX_3R_VREPLVE_DESC_BASE_N<"xvreplve.w", LASX256WOpnd>; + + +def XVANDI_B_N : LASX_I8_U<0b01110111110100>, + LASX_BIT_U8_DESC_BASE<"xvandi.b", and, xvsplati8_uimm8, LASX256BOpnd>; + + +def XVXORI_B_N : LASX_I8_U<0b01110111110110>, + LASX_BIT_U8_DESC_BASE<"xvxori.b", xor, xvsplati8_uimm8, LASX256BOpnd>; + + +def XVSRAI_B_N : LASX_I3_U<0b0111011100110100001>, + LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrai.b", sra, xvsplati8_uimm3, LASX256BOpnd>; + +def XVSRAI_H_N : LASX_I4_U<0b011101110011010001>, + LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrai.h", sra, xvsplati16_uimm4, LASX256HOpnd>; + +def XVSRAI_W_N : LASX_I5_U<0b01110111001101001>, + LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrai.w", sra, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSRAI_D_N : LASX_I6_U<0b0111011100110101>, + LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrai.d", sra, xvsplati64_uimm6, LASX256DOpnd>; + + +def XVSLLI_B_N : LASX_I3_U<0b0111011100101100001>, + LASX_BIT_U3_VREPLVE_DESC_BASE<"xvslli.b", shl, xvsplati8_uimm3, LASX256BOpnd>; + +def XVSLLI_H_N : LASX_I4_U<0b011101110010110001>, + LASX_BIT_U4_VREPLVE_DESC_BASE<"xvslli.h", shl, xvsplati16_uimm4, LASX256HOpnd>; + +def XVSLLI_W_N : LASX_I5_U<0b01110111001011001>, + LASX_BIT_U5_VREPLVE_DESC_BASE<"xvslli.w", shl, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSLLI_D_N : LASX_I6_U<0b0111011100101101>, + LASX_BIT_U6_VREPLVE_DESC_BASE<"xvslli.d", shl, xvsplati64_uimm6, LASX256DOpnd>; + + +def XVSRLI_B_N : LASX_I3_U<0b0111011100110000001>, + LASX_BIT_U3_VREPLVE_DESC_BASE<"xvsrli.b", srl, xvsplati8_uimm3, LASX256BOpnd>; + +def XVSRLI_H_N : LASX_I4_U<0b011101110011000001>, + LASX_BIT_U4_VREPLVE_DESC_BASE<"xvsrli.h", srl, xvsplati16_uimm4, LASX256HOpnd>; + +def XVSRLI_W_N : LASX_I5_U<0b01110111001100001>, + LASX_BIT_U5_VREPLVE_DESC_BASE<"xvsrli.w", srl, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSRLI_D_N : LASX_I6_U<0b0111011100110001>, + LASX_BIT_U6_VREPLVE_DESC_BASE<"xvsrli.d", srl, xvsplati64_uimm6, LASX256DOpnd>; + + +def XVMAXI_B_N : LASX_I5<0b01110110100100000>, + LASX_I5_DESC_BASE<"xvmaxi.b", smax, xvsplati8_simm5, LASX256BOpnd>; + +def XVMAXI_H_N : LASX_I5<0b01110110100100001>, + LASX_I5_DESC_BASE<"xvmaxi.h", smax, xvsplati16_simm5, LASX256HOpnd>; + +def XVMAXI_W_N : LASX_I5<0b01110110100100010>, + LASX_I5_DESC_BASE<"xvmaxi.w", smax, xvsplati32_simm5, LASX256WOpnd>; + +def XVMAXI_D_N : LASX_I5<0b01110110100100011>, + LASX_I5_DESC_BASE<"xvmaxi.d", smax, xvsplati64_simm5, LASX256DOpnd>; + + +def XVMINI_B_N : LASX_I5<0b01110110100100100>, + LASX_I5_DESC_BASE<"xvmini.b", smin, xvsplati8_simm5, LASX256BOpnd>; + +def XVMINI_H_N : LASX_I5<0b01110110100100101>, + LASX_I5_DESC_BASE<"xvmini.h", smin, xvsplati16_simm5, LASX256HOpnd>; + +def XVMINI_W_N : LASX_I5<0b01110110100100110>, + LASX_I5_DESC_BASE<"xvmini.w", smin, xvsplati32_simm5, LASX256WOpnd>; + +def XVMINI_D_N : LASX_I5<0b01110110100100111>, + LASX_I5_DESC_BASE<"xvmini.d", smin, xvsplati64_simm5, LASX256DOpnd>; + + +def XVMAXI_BU_N : LASX_I5_U<0b01110110100101000>, + LASX_I5_U_DESC_BASE<"xvmaxi.bu", umax, xvsplati8_uimm5, LASX256BOpnd>; + +def XVMAXI_HU_N : LASX_I5_U<0b01110110100101001>, + LASX_I5_U_DESC_BASE<"xvmaxi.hu", umax, xvsplati16_uimm5, LASX256HOpnd>; + +def XVMAXI_WU_N : LASX_I5_U<0b01110110100101010>, + LASX_I5_U_DESC_BASE<"xvmaxi.wu", umax, xvsplati32_uimm5, LASX256WOpnd>; + +def XVMAXI_DU_N : LASX_I5_U<0b01110110100101011>, + LASX_I5_U_DESC_BASE<"xvmaxi.du", umax, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVMINI_BU_N : LASX_I5_U<0b01110110100101100>, + LASX_I5_U_DESC_BASE<"xvmini.bu", umin, xvsplati8_uimm5, LASX256BOpnd>; + +def XVMINI_HU_N : LASX_I5_U<0b01110110100101101>, + LASX_I5_U_DESC_BASE<"xvmini.hu", umin, xvsplati16_uimm5, LASX256HOpnd>; + +def XVMINI_WU_N : LASX_I5_U<0b01110110100101110>, + LASX_I5_U_DESC_BASE<"xvmini.wu", umin, xvsplati32_uimm5, LASX256WOpnd>; + +def XVMINI_DU_N : LASX_I5_U<0b01110110100101111>, + LASX_I5_U_DESC_BASE<"xvmini.du", umin, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVSEQI_B_N : LASX_I5<0b01110110100000000>, + LASX_I5_SETCC_DESC_BASE<"xvseqi.b", SETEQ, v32i8, xvsplati8_simm5, LASX256BOpnd>; + +def XVSEQI_H_N : LASX_I5<0b01110110100000001>, + LASX_I5_SETCC_DESC_BASE<"xvseqi.h", SETEQ, v16i16, xvsplati16_simm5, LASX256HOpnd>; + +def XVSEQI_W_N : LASX_I5<0b01110110100000010>, + LASX_I5_SETCC_DESC_BASE<"xvseqi.w", SETEQ, v8i32, xvsplati32_simm5, LASX256WOpnd>; + +def XVSEQI_D_N : LASX_I5<0b01110110100000011>, + LASX_I5_SETCC_DESC_BASE<"xvseqi.d", SETEQ, v4i64, xvsplati64_simm5, LASX256DOpnd>; + + +def XVSLEI_B_N : LASX_I5<0b01110110100000100>, + LASX_I5_SETCC_DESC_BASE<"xvslei.b", SETLE, v32i8, xvsplati8_simm5, LASX256BOpnd>; + +def XVSLEI_H_N : LASX_I5<0b01110110100000101>, + LASX_I5_SETCC_DESC_BASE<"xvslei.h", SETLE, v16i16, xvsplati16_simm5, LASX256HOpnd>; + +def XVSLEI_W_N : LASX_I5<0b01110110100000110>, + LASX_I5_SETCC_DESC_BASE<"xvslei.w", SETLE, v8i32, xvsplati32_simm5, LASX256WOpnd>; + +def XVSLEI_D_N : LASX_I5<0b01110110100000111>, + LASX_I5_SETCC_DESC_BASE<"xvslei.d", SETLE, v4i64, xvsplati64_simm5, LASX256DOpnd>; + + +def XVSLEI_BU_N : LASX_I5_U<0b01110110100001000>, + LASX_I5_U_SETCC_DESC_BASE<"xvslei.bu", SETULE, v32i8, xvsplati8_uimm5, LASX256BOpnd>; + +def XVSLEI_HU_N : LASX_I5_U<0b01110110100001001>, + LASX_I5_U_SETCC_DESC_BASE<"xvslei.hu", SETULE, v16i16, xvsplati16_uimm5, LASX256HOpnd>; + +def XVSLEI_WU_N : LASX_I5_U<0b01110110100001010>, + LASX_I5_U_SETCC_DESC_BASE<"xvslei.wu", SETULE, v8i32, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSLEI_DU_N : LASX_I5_U<0b01110110100001011>, + LASX_I5_U_SETCC_DESC_BASE<"xvslei.du", SETULE, v4i64, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVSLTI_B_N : LASX_I5<0b01110110100001100>, + LASX_I5_SETCC_DESC_BASE<"xvslti.b", SETLT, v32i8, xvsplati8_simm5, LASX256BOpnd>; + +def XVSLTI_H_N : LASX_I5<0b01110110100001101>, + LASX_I5_SETCC_DESC_BASE<"xvslti.h", SETLT, v16i16, xvsplati16_simm5, LASX256HOpnd>; + +def XVSLTI_W_N : LASX_I5<0b01110110100001110>, + LASX_I5_SETCC_DESC_BASE<"xvslti.w", SETLT, v8i32, xvsplati32_simm5, LASX256WOpnd>; + +def XVSLTI_D_N : LASX_I5<0b01110110100001111>, + LASX_I5_SETCC_DESC_BASE<"xvslti.d", SETLT, v4i64, xvsplati64_simm5, LASX256DOpnd>; + + +def XVSLTI_BU_N : LASX_I5_U<0b01110110100010000>, + LASX_I5_U_SETCC_DESC_BASE<"xvslti.bu", SETULT, v32i8, xvsplati8_uimm5, LASX256BOpnd>; + +def XVSLTI_HU_N : LASX_I5_U<0b01110110100010001>, + LASX_I5_U_SETCC_DESC_BASE<"xvslti.hu", SETULT, v16i16, xvsplati16_uimm5, LASX256HOpnd>; + +def XVSLTI_WU_N : LASX_I5_U<0b01110110100010010>, + LASX_I5_U_SETCC_DESC_BASE<"xvslti.wu", SETULT, v8i32, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSLTI_DU_N : LASX_I5_U<0b01110110100010011>, + LASX_I5_U_SETCC_DESC_BASE<"xvslti.du", SETULT, v4i64, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVADDI_BU_N : LASX_I5_U<0b01110110100010100>, + LASX_I5_U_DESC_BASE<"xvaddi.bu", add, xvsplati8_uimm5, LASX256BOpnd>; + +def XVADDI_HU_N : LASX_I5_U<0b01110110100010101>, + LASX_I5_U_DESC_BASE<"xvaddi.hu", add, xvsplati16_uimm5, LASX256HOpnd>; + +def XVADDI_WU_N : LASX_I5_U<0b01110110100010110>, + LASX_I5_U_DESC_BASE<"xvaddi.wu", add, xvsplati32_uimm5, LASX256WOpnd>; + +def XVADDI_DU_N : LASX_I5_U<0b01110110100010111>, + LASX_I5_U_DESC_BASE<"xvaddi.du", add, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVSUBI_BU_N : LASX_I5_U<0b01110110100011000>, + LASX_I5_U_DESC_BASE<"xvsubi.bu", sub, xvsplati8_uimm5, LASX256BOpnd>; + +def XVSUBI_HU_N : LASX_I5_U<0b01110110100011001>, + LASX_I5_U_DESC_BASE<"xvsubi.hu", sub, xvsplati16_uimm5, LASX256HOpnd>; + +def XVSUBI_WU_N : LASX_I5_U<0b01110110100011010>, + LASX_I5_U_DESC_BASE<"xvsubi.wu", sub, xvsplati32_uimm5, LASX256WOpnd>; + +def XVSUBI_DU_N : LASX_I5_U<0b01110110100011011>, + LASX_I5_U_DESC_BASE<"xvsubi.du", sub, xvsplati64_uimm5, LASX256DOpnd>; + + +def XVPERMI_QH : LASX_I8_U<0b01110111111011>, + LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256HOpnd, LASX256HOpnd>; + +def XVPERMI_QW : LASX_I8_U<0b01110111111011>, + LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256WOpnd, LASX256WOpnd>; + +def XVPERMI_QD : LASX_I8_U<0b01110111111011>, + LASX_2RN_3R_U8_DESC_BASE<"xvpermi.q", LASX256DOpnd, LASX256DOpnd>; + + +def XVBITSELI_B_N : LASX_I8_U<0b01110111110001>, + LASX_2R_3R_U8_SELECT<"xvbitseli.b", vselect, LASX256BOpnd, LASX256BOpnd>; + +} + + +def : LASXPat<(v8f32 (load addrimm12:$addr)), (XVLD_W addrimm12:$addr)>; +def : LASXPat<(v4f64 (load addrimm12:$addr)), (XVLD_D addrimm12:$addr)>; + +def XVST_FW : LASXPat<(store (v8f32 LASX256W:$xj), addrimm12:$addr), + (XVST_W LASX256W:$xj, addrimm12:$addr)>; +def XVST_FD : LASXPat<(store (v4f64 LASX256D:$xj), addrimm12:$addr), + (XVST_D LASX256D:$xj, addrimm12:$addr)>; + +def XVNEG_FW : LASXPat<(fneg (v8f32 LASX256W:$xj)), + (XVBITREVI_W LASX256W:$xj, 31)>; +def XVNEG_FD : LASXPat<(fneg (v4f64 LASX256D:$xj)), + (XVBITREVI_D LASX256D:$xj, 63)>; + + +def : LASXPat<(v4i64 (LoongArchVABSD v4i64:$xj, v4i64:$xk, (i32 0))), + (v4i64 (XVABSD_D $xj, $xk))>; + +def : LASXPat<(v8i32 (LoongArchVABSD v8i32:$xj, v8i32:$xk, (i32 0))), + (v8i32 (XVABSD_W $xj, $xk))>; + +def : LASXPat<(v16i16 (LoongArchVABSD v16i16:$xj, v16i16:$xk, (i32 0))), + (v16i16 (XVABSD_H $xj, $xk))>; + +def : LASXPat<(v32i8 (LoongArchVABSD v32i8:$xj, v32i8:$xk, (i32 0))), + (v32i8 (XVABSD_B $xj, $xk))>; + +def : LASXPat<(v4i64 (LoongArchUVABSD v4i64:$xj, v4i64:$xk, (i32 0))), + (v4i64 (XVABSD_DU $xj, $xk))>; + +def : LASXPat<(v8i32 (LoongArchUVABSD v8i32:$xj, v8i32:$xk, (i32 0))), + (v8i32 (XVABSD_WU $xj, $xk))>; + +def : LASXPat<(v16i16 (LoongArchUVABSD v16i16:$xj, v16i16:$xk, (i32 0))), + (v16i16 (XVABSD_HU $xj, $xk))>; + +def : LASXPat<(v32i8 (LoongArchUVABSD v32i8:$xj, v32i8:$xk, (i32 0))), + (v32i8 (XVABSD_BU $xj, $xk))>; + + +def : LASXPat<(or v32i8:$vj, (shl vsplat_imm_eq_1, v32i8:$vk)), + (XVBITSET_B v32i8:$vj, v32i8:$vk)>; +def : LASXPat<(or v16i16:$vj, (shl vsplat_imm_eq_1, v16i16:$vk)), + (XVBITSET_H v16i16:$vj, v16i16:$vk)>; +def : LASXPat<(or v8i32:$vj, (shl vsplat_imm_eq_1, v8i32:$vk)), + (XVBITSET_W v8i32:$vj, v8i32:$vk)>; +def : LASXPat<(or v4i64:$vj, (shl vsplat_imm_eq_1, v4i64:$vk)), + (XVBITSET_D v4i64:$vj, v4i64:$vk)>; + +def : LASXPat<(xor v32i8:$vj, (shl xvsplat_imm_eq_1, v32i8:$vk)), + (XVBITREV_B v32i8:$vj, v32i8:$vk)>; +def : LASXPat<(xor v16i16:$vj, (shl xvsplat_imm_eq_1, v16i16:$vk)), + (XVBITREV_H v16i16:$vj, v16i16:$vk)>; +def : LASXPat<(xor v8i32:$vj, (shl xvsplat_imm_eq_1, v8i32:$vk)), + (XVBITREV_W v8i32:$vj, v8i32:$vk)>; +def : LASXPat<(xor v4i64:$vj, (shl (v4i64 xvsplati64_imm_eq_1), v4i64:$vk)), + (XVBITREV_D v4i64:$vj, v4i64:$vk)>; + +def : LASXPat<(and v32i8:$vj, (xor (shl vsplat_imm_eq_1, v32i8:$vk), immAllOnesV)), + (XVBITCLR_B v32i8:$vj, v32i8:$vk)>; +def : LASXPat<(and v16i16:$vj, (xor (shl vsplat_imm_eq_1, v16i16:$vk), immAllOnesV)), + (XVBITCLR_H v16i16:$vj, v16i16:$vk)>; +def : LASXPat<(and v8i32:$vj, (xor (shl vsplat_imm_eq_1, v8i32:$vk), immAllOnesV)), + (XVBITCLR_W v8i32:$vj, v8i32:$vk)>; +def : LASXPat<(and v4i64:$vj, (xor (shl (v4i64 vsplati64_imm_eq_1), v4i64:$vk), (bitconvert (v8i32 immAllOnesV)))), + (XVBITCLR_D v4i64:$vj, v4i64:$vk)>; + +def xvsplati64_imm_eq_63 : PatLeaf<(bitconvert (v8i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def xvsplati8imm7 : PatFrag<(ops node:$wt), + (and node:$wt, (xvsplati8 immi32Cst7))>; +def xvsplati16imm15 : PatFrag<(ops node:$wt), + (and node:$wt, (xvsplati16 immi32Cst15))>; +def xvsplati32imm31 : PatFrag<(ops node:$wt), + (and node:$wt, (xvsplati32 immi32Cst31))>; +def xvsplati64imm63 : PatFrag<(ops node:$wt), + (and node:$wt, xvsplati64_imm_eq_63)>; + + +class LASXShiftPat : + LASXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))), + (VT (Insn VT:$vs, VT:$vt))>; + +class LASXBitPat : + LASXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))), + (VT (Insn VT:$vs, VT:$vt))>; + +multiclass LASXShiftPats { + def : LASXShiftPat(Insn#_B), + (xvsplati8 immi32Cst7)>; + def : LASXShiftPat(Insn#_H), + (xvsplati16 immi32Cst15)>; + def : LASXShiftPat(Insn#_W), + (xvsplati32 immi32Cst31)>; + def : LASXPat<(v4i64 (Node v4i64:$vs, (v4i64 (and v4i64:$vt, + xvsplati64_imm_eq_63)))), + (v4i64 (!cast(Insn#_D) v4i64:$vs, v4i64:$vt))>; +} + +multiclass LASXBitPats { + def : LASXBitPat(Insn#_B), xvsplati8imm7>; + def : LASXBitPat(Insn#_H), xvsplati16imm15>; + def : LASXBitPat(Insn#_W), xvsplati32imm31>; + def : LASXPat<(Node v4i64:$vs, (shl (v4i64 xvsplati64_imm_eq_1), + (xvsplati64imm63 v4i64:$vt))), + (v4i64 (!cast(Insn#_D) v4i64:$vs, v4i64:$vt))>; +} + +defm : LASXShiftPats; +defm : LASXShiftPats; +defm : LASXShiftPats; +defm : LASXBitPats; +defm : LASXBitPats; + +def : LASXPat<(and v32i8:$vs, (xor (shl xvsplat_imm_eq_1, + (xvsplati8imm7 v32i8:$vt)), + immAllOnesV)), + (v32i8 (XVBITCLR_B v32i8:$vs, v32i8:$vt))>; +def : LASXPat<(and v16i16:$vs, (xor (shl xvsplat_imm_eq_1, + (xvsplati16imm15 v16i16:$vt)), + immAllOnesV)), + (v16i16 (XVBITCLR_H v16i16:$vs, v16i16:$vt))>; +def : LASXPat<(and v8i32:$vs, (xor (shl xvsplat_imm_eq_1, + (xvsplati32imm31 v8i32:$vt)), + immAllOnesV)), + (v8i32 (XVBITCLR_W v8i32:$vs, v8i32:$vt))>; +def : LASXPat<(and v4i64:$vs, (xor (shl (v4i64 xvsplati64_imm_eq_1), + (xvsplati64imm63 v4i64:$vt)), + (bitconvert (v8i32 immAllOnesV)))), + (v4i64 (XVBITCLR_D v4i64:$vs, v4i64:$vt))>; + + +def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), + (f32 fpimm1),(f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v8f32:$v), + (XVFRECIP_S v8f32:$v)>; + +def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), v4f64:$v), + (XVFRECIP_D v4f64:$v)>; + +def : LASXPat<(fdiv (v8f32 fpimm1), v8f32:$v), + (XVFRECIP_S v8f32:$v)>; + +def : LASXPat<(fdiv (v4f64 fpimm1), v4f64:$v), + (XVFRECIP_D v4f64:$v)>; + + +def : LASXPat<(fdiv (v8f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), + (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v8f32:$v)), + (XVFRSQRT_S v8f32:$v)>; + +def : LASXPat<(fdiv (v4f64 (build_vector (f64 fpimm1), (f64 fpimm1), (f64 fpimm1), (f64 fpimm1))), (fsqrt v4f64:$v)), + (XVFRSQRT_D v4f64:$v)>; + +def : LASXPat<(fdiv (v8f32 fpimm1), (fsqrt v8f32:$v)), + (XVFRSQRT_S v8f32:$v)>; + +def : LASXPat<(fdiv (v4f64 fpimm1), (fsqrt v4f64:$v)), + (XVFRSQRT_D v4f64:$v)>; + + +def : LASXPat <(extract_subvector v4f64:$vec, (i32 0)), + (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v8f32:$vec, (i32 0)), + (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v4i64:$vec, (i32 0)), + (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v8i32:$vec, (i32 0)), + (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v16i16:$vec, (i32 0)), + (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v32i8:$vec, (i32 0)), + (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>; + + + +def : LASXPat <(extract_subvector v4f64:$vec, (i64 0)), + (v2f64 (EXTRACT_SUBREG v4f64:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v8f32:$vec, (i64 0)), + (v4f32 (EXTRACT_SUBREG v8f32:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v4i64:$vec, (i64 0)), + (v2i64 (EXTRACT_SUBREG v4i64:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v8i32:$vec, (i64 0)), + (v4i32 (EXTRACT_SUBREG v8i32:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v16i16:$vec, (i64 0)), + (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub_128))>; + +def : LASXPat <(extract_subvector v32i8:$vec, (i64 0)), + (v16i8 (EXTRACT_SUBREG v32i8:$vec, sub_128))>; + + +def : LASXPat <(extract_subvector v4i64:$vec, (i32 2)), + (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v8i32:$vec, (i32 4)), + (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v16i16:$vec, (i32 8)), + (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v32i8:$vec, (i32 16)), + (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>; + + +def : LASXPat <(extract_subvector v4i64:$vec, (i64 2)), + (v2i64 (EXTRACT_SUBREG (v4i64 (XVPERMI_QD v4i64:$vec, v4i64:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v8i32:$vec, (i64 4)), + (v4i32 (EXTRACT_SUBREG (v8i32 (XVPERMI_QW v8i32:$vec, v8i32:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v16i16:$vec, (i64 8)), + (v8i16 (EXTRACT_SUBREG (v16i16 (XVPERMI_QH v16i16:$vec, v16i16:$vec, (i32 1))), sub_128))>; + +def : LASXPat <(extract_subvector v32i8:$vec, (i64 16)), + (v16i8 (EXTRACT_SUBREG (v32i8 (XVPERMI_Q v32i8:$vec, v32i8:$vec, (i32 1))), sub_128))>; + + +def : LASXPat<(abs v4i64:$v), + (XVMAX_D v4i64:$v, (XVNEG_D v4i64:$v))>; + +def : LASXPat<(abs v8i32:$v), + (XVMAX_W v8i32:$v, (XVNEG_W v8i32:$v))>; + +def : LASXPat<(abs v16i16:$v), + (XVMAX_H v16i16:$v, (XVNEG_H v16i16:$v))>; + +def : LASXPat<(abs v32i8:$v), + (XVMAX_B v32i8:$v, (XVNEG_B v32i8:$v))>; + + +def : LASXPat<(sub (v32i8 immAllZerosV), v32i8:$v), + (XVNEG_B v32i8:$v)>; + +def : LASXPat<(sub (v16i16 immAllZerosV), v16i16:$v), + (XVNEG_H v16i16:$v)>; + +def : LASXPat<(sub (v8i32 immAllZerosV), v8i32:$v), + (XVNEG_W v8i32:$v)>; + +def : LASXPat<(sub (v4i64 immAllZerosV), v4i64:$v), + (XVNEG_D v4i64:$v)>; + + + +def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; + + +def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; + +def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; + + +def : LASXPat<(insert_subvector + (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i32 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 0)), + (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), LSX128D:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 0)), + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 0)), + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$src, sub_128)>; + +def : LASXPat<(insert_subvector + (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 0)), + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$src, sub_128)>; + + +def : LASXPat<(insert_subvector + (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i32 2)), + (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector (v8i32 immAllZerosV), + (v4i32 LSX128W:$src), (i32 4)), + (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i32 8)), + (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i32 16)), + (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; + + +def : LASXPat<(insert_subvector + (v4i64 immAllZerosV), (v2i64 LSX128D:$src), (i64 2)), + (XVPERMI_QD (v4i64 (XVREPLGR2VR_D ZERO_64)), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v8i32 immAllZerosV), (v4i32 LSX128W:$src), (i64 4)), + (XVPERMI_QW (v8i32 (XVREPLGR2VR_W ZERO)), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v16i16 immAllZerosV), (v8i16 LSX128H:$src), (i64 8)), + (XVPERMI_QH (v16i16 (XVREPLGR2VR_H ZERO)), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector + (v32i8 immAllZerosV), (v16i8 LSX128B:$src), (i64 16)), + (XVPERMI_Q (v32i8 (XVREPLGR2VR_B ZERO)), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; + + +def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i32 2)), + (XVPERMI_QD (v4i64 (IMPLICIT_DEF)), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i32 4)), + (XVPERMI_QW (v8i32 (IMPLICIT_DEF)), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i32 8)), + (XVPERMI_QH (v16i16 (IMPLICIT_DEF)), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i32 16)), + (XVPERMI_Q (v32i8 (IMPLICIT_DEF)), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; + + +def : LASXPat<(insert_subvector undef, (v2i64 LSX128D:$src), (i64 2)), + (XVPERMI_QD (v4i64 (IMPLICIT_DEF)), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v4i32 LSX128W:$src), (i64 4)), + (XVPERMI_QW (v8i32 (IMPLICIT_DEF)), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v8i16 LSX128H:$src), (i64 8)), + (XVPERMI_QH (v16i16 (IMPLICIT_DEF)), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector undef, (v16i8 LSX128B:$src), (i64 16)), + (XVPERMI_Q (v32i8 (IMPLICIT_DEF)), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; + + +def : LASXPat<(sra + (v32i8 (add + (v32i8 (add LASX256B:$a, LASX256B:$b)), + (v32i8 (srl + (v32i8 (add LASX256B:$a, LASX256B:$b)), + (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7)) + ) + ) + ) + ) + ), + (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (XVAVG_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; + +def : LASXPat<(sra + (v16i16 (add + (v16i16 (add LASX256H:$a, LASX256H:$b)), + (v16i16 (srl + (v16i16 (add LASX256H:$a, LASX256H:$b)), + (v16i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15)) + ) + ) + ) + ) + ), + (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (XVAVG_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; + +def : LASXPat<(sra + (v8i32 (add + (v8i32 (add LASX256W:$a, LASX256W:$b)), + (v8i32 (srl + (v8i32 (add LASX256W:$a, LASX256W:$b)), + (v8i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31), + (i32 31),(i32 31),(i32 31),(i32 31)) + ) + ) + ) + ) + ), + (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (XVAVG_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; + +def : LASXPat<(sra + (v4i64 (add + (v4i64 (add LASX256D:$a, LASX256D:$b)), + (v4i64 (srl + (v4i64 (add LASX256D:$a, LASX256D:$b)), + (v4i64 (build_vector (i64 63),(i64 63),(i64 63),(i64 63))) + ) + ) + ) + ), + (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), + (XVAVG_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; + + + +def : LASXPat<(srl + (v32i8 (add LASX256B:$a, LASX256B:$b)), + (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVG_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; + +def : LASXPat<(srl + (v16i16 (add LASX256H:$a, LASX256H:$b)), + (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVG_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; + +def : LASXPat<(srl + (v8i32 (add LASX256W:$a, LASX256W:$b)), + (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (XVAVG_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; + +def : LASXPat<(srl + (v4i64 (add LASX256D:$a, LASX256D:$b)), + (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) + ) + ), + (XVAVG_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; + + + + + + +def : LASXPat<(mulhs LASX256D:$a, LASX256D:$b), + (XVMUH_D LASX256D:$a, LASX256D:$b)>; + +def : LASXPat<(mulhs LASX256W:$a, LASX256W:$b), + (XVMUH_W LASX256W:$a, LASX256W:$b)>; + +def : LASXPat<(mulhs LASX256H:$a, LASX256H:$b), + (XVMUH_H LASX256H:$a, LASX256H:$b)>; + +def : LASXPat<(mulhs LASX256B:$a, LASX256B:$b), + (XVMUH_B LASX256B:$a, LASX256B:$b)>; + + +def : LASXPat<(mulhu LASX256D:$a, LASX256D:$b), + (XVMUH_DU LASX256D:$a, LASX256D:$b)>; + +def : LASXPat<(mulhu LASX256W:$a, LASX256W:$b), + (XVMUH_WU LASX256W:$a, LASX256W:$b)>; + +def : LASXPat<(mulhu LASX256H:$a, LASX256H:$b), + (XVMUH_HU LASX256H:$a, LASX256H:$b)>; + +def : LASXPat<(mulhu LASX256B:$a, LASX256B:$b), + (XVMUH_BU LASX256B:$a, LASX256B:$b)>; + + +def : LASXPat<(LoongArchINSVE (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3), + (XVINSVE0_W LASX256W:$a, LASX256W:$b, uimm3:$ui3)>; + +def : LASXPat<(LoongArchINSVE (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2), + (XVINSVE0_D LASX256D:$a, LASX256D:$b, uimm2:$ui2)>; + + +def : LASXPat<(LoongArchXVPICKVE (v8i32 (bitconvert (v32i8 (build_vector + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0) + )))), (v8i32 LASX256W:$b), uimm3:$ui3), + (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>; + +def : LASXPat<(LoongArchXVPICKVE (v4i64 (bitconvert (v32i8 (build_vector + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0) + )))), (v4i64 LASX256D:$b), uimm2:$ui2), + (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>; + + +def : LASXPat<(LoongArchXVPICKVE (v8i32 (build_vector + (i32 0),(i32 0),(i32 0),(i32 0), + (i32 0),(i32 0),(i32 0),(i32 0) + )), (v8i32 LASX256W:$b), uimm3:$ui3), + (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$b, uimm3:$ui3)>; + +def : LASXPat<(LoongArchXVPICKVE (v4i64 (build_vector + (i64 0),(i64 0),(i64 0),(i64 0) + )), (v4i64 LASX256D:$b), uimm2:$ui2), + (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$b, uimm2:$ui2)>; + + +def : LASXPat<(LoongArchXVPICKVE (v8i32 LASX256W:$a), (v8i32 LASX256W:$b), uimm3:$ui3), + (XVPICKVE_W LASX256W:$a, LASX256W:$b, uimm3:$ui3)>; + +def : LASXPat<(LoongArchXVPICKVE (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm2:$ui2), + (XVPICKVE_D LASX256D:$a, LASX256D:$b, uimm2:$ui2)>; + + +def : LASXPat<(LoongArchXVSHUF4I (v4i64 LASX256D:$a), (v4i64 LASX256D:$b), uimm8_32:$ui8), + (XVSHUF4I_D LASX256D:$a, LASX256D:$b, uimm8_32:$ui8)>; + +def : LASXPat<(LoongArchXVPERMI (v4i64 LASX256D:$a), uimm8_32:$ui8), + (XVPERMI_D LASX256D:$a, uimm8_32:$ui8)>; + + + + +//===----------------------------------------------------------------------===// +// Intrinsics +//===----------------------------------------------------------------------===// + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_COR_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cor_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_COR_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CUN_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cun_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CUN_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CUNE_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cune_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CUNE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CUEQ_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cueq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CUEQ_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CEQ_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_ceq_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CEQ_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CNE_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cne_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CNE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CLT_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_clt_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CLT_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CULT_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cult_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CULT_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CLE_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cle_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CLE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFCMP_CULE_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfcmp_cule_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFCMP_CULE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvseq_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSEQ_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvseq_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSEQ_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvseq_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSEQ_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvseq_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSEQ_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsle_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLE_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLE_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLE_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLE_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsle_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLE_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLE_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLE_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsle_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLE_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvslt_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLT_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLT_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLT_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLT_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvslt_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLT_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLT_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLT_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvslt_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLT_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVADD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVADD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVADD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVADD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSUB_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSUB_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSUB_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSUB_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmax_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMAX_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMAX_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMAX_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMAX_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmin_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMIN_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMIN_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMIN_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMIN_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmin_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMIN_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMIN_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMIN_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmin_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMIN_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmul_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMUL_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmul_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMUL_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmul_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMUL_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmul_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMUL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvdiv_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVDIV_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVDIV_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVDIV_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVDIV_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsll_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSLL_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsll_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSLL_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsll_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSLL_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsll_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSLL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsrl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSRL_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsrl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSRL_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsrl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSRL_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsrl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSRL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsra_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSRA_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsra_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSRA_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsra_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSRA_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsra_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSRA_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfadd_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFADD_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfadd_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFADD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfsub_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFSUB_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfsub_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFSUB_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfmul_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFMUL_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfmul_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFMUL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfdiv_s (v8f32 LASX256W:$xj), (v8f32 LASX256W:$xk)), + (XVFDIV_S LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvfdiv_d (v4f64 LASX256D:$xj), (v4f64 LASX256D:$xk)), + (XVFDIV_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfsqrt_s (v8f32 LASX256W:$xj)), + (XVFSQRT_S LASX256W:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvfsqrt_d (v4f64 LASX256D:$xj)), + (XVFSQRT_D LASX256D:$xj)>; + +def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_w (v8i32 LASX256W:$xj))), + (XVFFINT_S_W (v8i32 LASX256W:$xj))>; +def : LASXPat<(v8f32 (int_loongarch_lasx_xvffint_s_wu (v8i32 LASX256W:$xj))), + (XVFFINT_S_WU (v8i32 LASX256W:$xj))>; + +def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_l (v4i64 LASX256D:$xj))), + (XVFFINT_D_L (v4i64 LASX256D:$xj))>; +def : LASXPat<(v4f64 (int_loongarch_lasx_xvffint_d_lu (v4i64 LASX256D:$xj))), + (XVFFINT_D_LU (v4i64 LASX256D:$xj))>; + +def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_b GPR32Opnd:$rj), + (XVREPLGR2VR_B GPR32Opnd:$rj)>; +def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_h GPR32Opnd:$rj), + (XVREPLGR2VR_H GPR32Opnd:$rj)>; +def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_w GPR32Opnd:$rj), + (XVREPLGR2VR_W GPR32Opnd:$rj)>; +def : LASXPat<(int_loongarch_lasx_xvreplgr2vr_d GPR64Opnd:$rj), + (XVREPLGR2VR_D GPR64Opnd:$rj)>; + +def : LASXPat<(int_loongarch_lasx_xvpickve2gr_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)), + (XVPICKVE2GR_W LASX256W:$xj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvpickve2gr_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)), + (XVPICKVE2GR_D LASX256D:$xj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvpickve2gr_wu (v8i32 LASX256W:$xj), (immZExt3:$ui3)), + (XVPICKVE2GR_WU LASX256W:$xj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvpickve2gr_du (v4i64 LASX256D:$xj), (immZExt2:$ui2)), + (XVPICKVE2GR_DU LASX256D:$xj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvreplve0_d (v4i64 LASX256D:$xj)), + (XVREPLVE0_D (v4i64 LASX256D:$xj))>; + +def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_w (v8i32 LASX256W:$xj), GPR32Opnd:$rj, (immZExt3:$ui3)), + (XVINSGR2VR_W LASX256W:$xj, GPR32Opnd:$rj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvinsgr2vr_d (v4i64 LASX256D:$xj), GPR64Opnd:$rj, (immZExt2:$ui2)), + (XVINSGR2VR_D LASX256D:$xj, GPR64Opnd:$rj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvpickve_w (v8i32 LASX256W:$xj), (immZExt3:$ui3)), + (XVPICKVE_W (v8i32 (IMPLICIT_DEF)), LASX256W:$xj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvpickve_d (v4i64 LASX256D:$xj), (immZExt2:$ui2)), + (XVPICKVE_D (v4i64 (IMPLICIT_DEF)), LASX256D:$xj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvpickve_w_f (v8f32 LASX256W:$xj), (immZExt3:$ui3)), + (XVPICKVE_W (v8f32 (IMPLICIT_DEF)), LASX256W:$xj, uimm3:$ui3)>; +def : LASXPat<(int_loongarch_lasx_xvpickve_d_f (v4f64 LASX256D:$xj), (immZExt2:$ui2)), + (XVPICKVE_D (v4f64 (IMPLICIT_DEF)), LASX256D:$xj, uimm2:$ui2)>; + +def : LASXPat<(int_loongarch_lasx_xvdiv_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVDIV_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVDIV_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVDIV_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvdiv_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVDIV_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmod_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMOD_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMOD_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMOD_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMOD_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMOD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMOD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMOD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMOD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmax_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMAX_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMAX_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMAX_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmax_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMAX_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvfrint_s (v8f32 LASX256W:$xj)), + (XVFRINT_S LASX256W:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvfrint_d (v4f64 LASX256D:$xj)), + (XVFRINT_D LASX256D:$xj)>; + +def : LASXPat<(int_loongarch_lasx_xvpackod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVPACKOD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVPACKOD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVPACKOD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVPACKOD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvpackev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVPACKEV_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVPACKEV_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVPACKEV_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpackev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVPACKEV_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvilvh_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVILVH_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvh_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVILVH_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvh_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVILVH_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvh_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVILVH_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvilvl_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVILVL_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvl_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVILVL_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvl_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVILVL_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvilvl_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVILVL_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvpickev_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVPICKEV_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickev_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVPICKEV_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickev_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVPICKEV_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickev_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVPICKEV_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvpickod_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVPICKOD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickod_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVPICKOD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickod_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVPICKOD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvpickod_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVPICKOD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsadd_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSADD_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSADD_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSADD_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSADD_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvssub_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSSUB_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSSUB_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSSUB_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSSUB_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvsadd_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSADD_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSADD_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSADD_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvsadd_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSADD_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvssub_bu (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVSSUB_BU LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_hu (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVSSUB_HU LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_wu (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVSSUB_WU LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvssub_du (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVSSUB_DU LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmadd_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMADD_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmadd_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMADD_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmadd_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMADD_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmadd_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMADD_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvmsub_b (v32i8 LASX256B:$xd_in), (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVMSUB_B LASX256B:$xd_in, LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmsub_h (v16i16 LASX256H:$xd_in), (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVMSUB_H LASX256H:$xd_in, LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmsub_w (v8i32 LASX256W:$xd_in), (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVMSUB_W LASX256W:$xd_in, LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvmsub_d (v4i64 LASX256D:$xd_in), (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVMSUB_D LASX256D:$xd_in, LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_wu_s (v8f32 LASX256W:$xj))), + (XVFTINTRZ_WU_S (v8f32 LASX256W:$xj))>; +def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_lu_d (v4f64 LASX256D:$xj))), + (XVFTINTRZ_LU_D (v4f64 LASX256D:$xj))>; + +def : LASXPat<(v8i32 (int_loongarch_lasx_xvftintrz_w_s (v8f32 LASX256W:$xj))), + (XVFTINTRZ_W_S (v8f32 LASX256W:$xj))>; +def : LASXPat<(v4i64 (int_loongarch_lasx_xvftintrz_l_d (v4f64 LASX256D:$xj))), + (XVFTINTRZ_L_D (v4f64 LASX256D:$xj))>; + +def : LASXPat<(int_loongarch_lasx_xvbitclr_b (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk)), + (XVBITCLR_B LASX256B:$xj, LASX256B:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvbitclr_h (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk)), + (XVBITCLR_H LASX256H:$xj, LASX256H:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvbitclr_w (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk)), + (XVBITCLR_W LASX256W:$xj, LASX256W:$xk)>; +def : LASXPat<(int_loongarch_lasx_xvbitclr_d (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk)), + (XVBITCLR_D LASX256D:$xj, LASX256D:$xk)>; + +def : LASXPat<(int_loongarch_lasx_xvclz_b (v32i8 LASX256B:$xj)), + (XVCLZ_B LASX256B:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvclz_h (v16i16 LASX256H:$xj)), + (XVCLZ_H LASX256H:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvclz_w (v8i32 LASX256W:$xj)), + (XVCLZ_W LASX256W:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvclz_d (v4i64 LASX256D:$xj)), + (XVCLZ_D LASX256D:$xj)>; + +def : LASXPat<(int_loongarch_lasx_xvpcnt_b (v32i8 LASX256B:$xj)), + (XVPCNT_B LASX256B:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvpcnt_h (v16i16 LASX256H:$xj)), + (XVPCNT_H LASX256H:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvpcnt_w (v8i32 LASX256W:$xj)), + (XVPCNT_W LASX256W:$xj)>; +def : LASXPat<(int_loongarch_lasx_xvpcnt_d (v4i64 LASX256D:$xj)), + (XVPCNT_D LASX256D:$xj)>; + + +def : LASXPat<(v32i8 (load (add iPTR:$xj, iPTR:$xk))), + (XVLDX PtrRC:$xj, PtrRC:$xk)>; + +def : LASXPat<(store (v32i8 LASX256B:$xd), (add iPTR:$xj, iPTR:$xk)), + (XVSTX LASX256B:$xd, PtrRC:$xj, PtrRC:$xk)>; + + +def : LASXPat<(v4i64 (sext_invec (v8i32 LASX256W:$xj))), + (VEXT2XV_D_W LASX256W:$xj)>; +def : LASXPat<(v8i32 (sext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_W_H LASX256H:$xj)>; +def : LASXPat<(v16i16 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_H_B LASX256B:$xj)>; + + +def : LASXPat<(v4i64 (zext_invec (v8i32 LASX256W:$xj))), + (VEXT2XV_DU_WU LASX256W:$xj)>; +def : LASXPat<(v8i32 (zext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_WU_HU LASX256H:$xj)>; +def : LASXPat<(v16i16 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_HU_BU LASX256B:$xj)>; + + +def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_D_H LASX256H:$xj)>; +def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_D_B LASX256B:$xj)>; +def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_W_B LASX256B:$xj)>; + + +def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_DU_HU LASX256H:$xj)>; +def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_DU_BU LASX256B:$xj)>; +def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_WU_BU LASX256B:$xj)>; + + +def : LASXPat<(v4i64 (sext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_D_H LASX256H:$xj)>; +def : LASXPat<(v4i64 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_D_B LASX256B:$xj)>; +def : LASXPat<(v8i32 (sext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_W_B LASX256B:$xj)>; + +def : LASXPat<(v4i64 (zext_invec (v16i16 LASX256H:$xj))), + (VEXT2XV_DU_HU LASX256H:$xj)>; +def : LASXPat<(v4i64 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_DU_BU LASX256B:$xj)>; +def : LASXPat<(v8i32 (zext_invec (v32i8 LASX256B:$xj))), + (VEXT2XV_WU_BU LASX256B:$xj)>; + + +def : LASXPat<(v16i16 (sext (v16i8 LSX128B:$vj))), + (VEXT2XV_H_B + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>; + +def : LASXPat<(v8i32 (sext (v8i16 LSX128H:$vj))), + (VEXT2XV_W_H + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>; + +def : LASXPat<(v4i64 (sext (v4i32 LSX128W:$vj))), + (VEXT2XV_D_W + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>; + +def : LASXPat<(v16i16 (zext (v16i8 LSX128B:$vj))), + (VEXT2XV_HU_BU + (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), LSX128B:$vj, sub_128))>; + +def : LASXPat<(v8i32 (zext (v8i16 LSX128H:$vj))), + (VEXT2XV_WU_HU + (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), LSX128H:$vj, sub_128))>; + +def : LASXPat<(v4i64 (zext (v4i32 LSX128W:$vj))), + (VEXT2XV_DU_WU + (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), LSX128W:$vj, sub_128))>; + + +def : LASXPat<(xor + (v16i16 LASX256H:$xj), (xvsplati16 imm_mask) + ), + (XNOR_V_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xj))>; + +def : LASXPat<(xor + (v8i32 LASX256W:$xj), (xvsplati32 imm_mask) + ), + (XNOR_V_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xj))>; + +def : LASXPat<(xor + (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64) + ), + (XNOR_V_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xj))>; + + +def : LASXPat<(and + (v32i8 (xor (v32i8 LASX256B:$xj), (xvsplati8 imm_mask))), + (v32i8 LASX256B:$xk) + ), + (XVANDN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>; + +def : LASXPat<(and + (v16i16 (xor (v16i16 LASX256H:$xj), (xvsplati16 imm_mask))), + (v16i16 LASX256H:$xk) + ), + (XVANDN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>; + +def : LASXPat<(and + (v8i32 (xor (v8i32 LASX256W:$xj), (xvsplati32 imm_mask))), + (v8i32 LASX256W:$xk) + ), + (XVANDN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>; + +def : LASXPat<(and + (v4i64 (xor (v4i64 LASX256D:$xj), (xvsplati64 imm_mask_64))), + (v4i64 LASX256D:$xk) + ), + (XVANDN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>; + + +def : LASXPat<(or + (v32i8 LASX256B:$xj), + (v32i8 (xor (v32i8 LASX256B:$xk), (xvsplati8 imm_mask))) + ), + (XVORN_V (v32i8 LASX256B:$xj), (v32i8 LASX256B:$xk))>; + +def : LASXPat<(or + (v16i16 LASX256H:$xj), + (v16i16 (xor (v16i16 LASX256H:$xk), (xvsplati16 imm_mask))) + ), + (XVORN_H_PSEUDO (v16i16 LASX256H:$xj), (v16i16 LASX256H:$xk))>; + +def : LASXPat<(or + (v8i32 LASX256W:$xj), + (v8i32 (xor (v8i32 LASX256W:$xk), (xvsplati32 imm_mask))) + ), + (XVORN_W_PSEUDO (v8i32 LASX256W:$xj), (v8i32 LASX256W:$xk))>; + +def : LASXPat<(or + (v4i64 LASX256D:$xj), + (v4i64 (xor (v4i64 LASX256D:$xk), (xvsplati64 imm_mask_64))) + ), + (XVORN_D_PSEUDO (v4i64 LASX256D:$xj), (v4i64 LASX256D:$xk))>; + + +def : LASXPat<(add (v4i64 (abs LASX256D:$a)), (v4i64 (abs LASX256D:$b))), + (XVADDA_D (v4i64 LASX256D:$a),(v4i64 LASX256D:$b))>; + +def : LASXPat<(add (v8i32 (abs LASX256W:$a)), (v8i32 (abs LASX256W:$b))), + (XVADDA_W (v8i32 LASX256W:$a),(v8i32 LASX256W:$b))>; + +def : LASXPat<(add (v16i16 (abs LASX256H:$a)), (v16i16 (abs LASX256H:$b))), + (XVADDA_H (v16i16 LASX256H:$a),(v16i16 LASX256H:$b))>; + +def : LASXPat<(add (v32i8 (abs LASX256B:$a)), (v32i8 (abs LASX256B:$b))), + (XVADDA_B (v32i8 LASX256B:$a),(v32i8 LASX256B:$b))>; + + +def : LASXPat<(and v32i8:$xj, (xor (shl xvsplat_imm_eq_1, v32i8:$xk), + (xvsplati8 imm_mask))), + (XVBITCLR_B v32i8:$xj, v32i8:$xk)>; + +def : LASXPat<(and v16i16:$xj, (xor (shl xvsplat_imm_eq_1, v16i16:$xk), + (xvsplati16 imm_mask))), + (XVBITCLR_H v16i16:$xj, v16i16:$xk)>; + +def : LASXPat<(and v8i32:$xj, (xor (shl xvsplat_imm_eq_1, v8i32:$xk), + (xvsplati32 imm_mask))), + (XVBITCLR_W v8i32:$xj, v8i32:$xk)>; + +def : LASXPat<(and v4i64:$xj, (xor (shl xvsplat_imm_eq_1, v4i64:$xk), + (xvsplati64 imm_mask_64))), + (XVBITCLR_D v4i64:$xj, v4i64:$xk)>; + + +def : LASXPat<(insert_subvector (v32i8 LASX256B:$dst), + (v16i8 LSX128B:$src), (i64 0)), + (XVPERMI_Q (v32i8 LASX256B:$dst), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), + (i32 48))>; + +def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst), + (v8i16 LSX128H:$src), (i64 0)), + (XVPERMI_QH (v16i16 LASX256H:$dst), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), + (i32 48))>; + +def : LASXPat<(insert_subvector (v8i32 LASX256W:$dst), + (v4i32 LSX128W:$src), (i64 0)), + (XVPERMI_QW (v8i32 LASX256W:$dst), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), + (i32 48))>; + +def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst), + (v2i64 LSX128D:$src), (i64 0)), + (XVPERMI_QD (v4i64 LASX256D:$dst), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), + (i32 48))>; + +def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst), + (v2i64 LSX128D:$src), (i64 2)), + (XVPERMI_QD (v4i64 LASX256D:$dst), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector (v8i32 LASX256W:$dst), + (v4i32 LSX128W:$src), (i64 4)), + (XVPERMI_QW (v8i32 LASX256W:$dst), + (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + LSX128W:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst), + (v8i16 LSX128H:$src), (i64 8)), + (XVPERMI_QH (v16i16 LASX256H:$dst), + (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), + LSX128H:$src, sub_128)), (i32 32))>; + +def : LASXPat<(insert_subvector (v32i8 LASX256B:$dst), + (v16i8 LSX128B:$src), (i64 16)), + (XVPERMI_Q (v32i8 LASX256B:$dst), + (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), + LSX128B:$src, sub_128)), (i32 32))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td new file mode 100644 index 000000000000..50df4d724c54 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td @@ -0,0 +1,449 @@ +//===- LoongArchLSXInstrFormats.td - LoongArch LSX Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class LSXInst : InstLA<(outs), (ins), "", [], FrmOther>, + EXT_LSX { +} + +class LSXCBranch : LSXInst { +} + +class LSXSpecial : LSXInst { +} + +class LSXPseudo pattern>: + LoongArchPseudo { + let Predicates = [HasLSX]; +} + +class LSX_3R op>: LSXInst { + bits<5> vk; + bits<5> vj; + bits<5> vd; + + let Inst{31-15} = op; + let Inst{14-10} = vk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_3R_1GP op>: LSXInst { + bits<5> rk; + bits<5> vj; + bits<5> vd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I5 op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<5> si5; + + let Inst{31-15} = op; + let Inst{14-10} = si5; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I5_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<5> ui5; + + let Inst{31-15} = op; + let Inst{14-10} = ui5; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_2R op>: LSXInst { + bits<5> vj; + bits<5> vd; + + let Inst{31-10} = op; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_2R_1GP op>: LSXInst { + bits<5> rj; + bits<5> vd; + + let Inst{31-10} = op; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I1_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I2_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I3_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I4_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I6_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<6> ui6; + + let Inst{31-16} = op; + let Inst{15-10} = ui6; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I1_R_U op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I2_R_U op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I3_R_U op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I4_R_U op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_ELM_COPY_B op>: LSXInst { + bits<5> rd; + bits<5> vj; + bits<4> ui4; + + let Inst{31-14} = op; + let Inst{13-10} = ui4; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +class LSX_ELM_COPY_H op>: LSXInst { + bits<5> rd; + bits<5> vj; + bits<3> ui3; + + let Inst{31-13} = op; + let Inst{12-10} = ui3; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +class LSX_ELM_COPY_W op>: LSXInst { + bits<5> rd; + bits<5> vj; + bits<2> ui2; + + let Inst{31-12} = op; + let Inst{11-10} = ui2; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +class LSX_ELM_COPY_D op>: LSXInst { + bits<5> rd; + bits<5> vj; + bits<1> ui1; + + let Inst{31-11} = op; + let Inst{10} = ui1; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +class LSX_I8_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<8> ui8; + + let Inst{31-18} = op; + let Inst{17-10} = ui8; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I7_U op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<7> ui7; + + let Inst{31-17} = op; + let Inst{16-10} = ui7; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_I12_S op>: LSXInst { + bits<5> vd; +// bits<5> rj; +// bits<12> si12; + bits<17> addr; + + let Inst{31-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = vd; +} + +class LSX_SI12_S op>: LSXInst { + bits<5> vd; + bits<17> addr; + + let Inst{31-22} = op; + let Inst{21-10} = addr{11-0}; + let Inst{9-5} = addr{16-12}; + let Inst{4-0} = vd; +} + +class LSX_SI11_S op>: LSXInst { + bits<5> vd; + bits<16> addr; + + let Inst{31-21} = op; + let Inst{20-10} = addr{10-0}; + let Inst{9-5} = addr{15-11}; + let Inst{4-0} = vd; +} + +class LSX_SI10_S op>: LSXInst { + bits<5> vd; + bits<15> addr; + + let Inst{31-20} = op; + let Inst{19-10} = addr{9-0}; + let Inst{9-5} = addr{14-10}; + let Inst{4-0} = vd; +} + +class LSX_SI9_S op>: LSXInst { + bits<5> vd; + bits<14> addr; + + let Inst{31-19} = op; + let Inst{18-10} = addr{8-0}; + let Inst{9-5} = addr{13-9}; + let Inst{4-0} = vd; +} + +class LSX_SET op>: LSXInst { + bits<5> vj; + bits<3> cd; + + let Inst{31-10} = op; + let Inst{9-5} = vj; + let Inst{4-3} = 0b00; + let Inst{2-0} = cd; +} + +class LSX_VR4MUL op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<5> vk; + bits<5> va; + + let Inst{31-20} = op; + let Inst{19-15} = va; + let Inst{14-10} = vk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_VFCMP op>: LSXInst { + bits<5> vd; + bits<5> vj; + bits<5> vk; + bits<5> cond; + + let Inst{31-20} = op; + let Inst{19-15} = cond; + let Inst{14-10} = vk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +class LSX_Addr_SI8_idx1 op>: LSXInst { + bits<5> vd; + bits<13> addr; + bits<1> idx; + + let Inst{31-19} = op; + let Inst{18-11} = addr{7-0}; + let Inst{10} = idx; + let Inst{9-5} = addr{12-8}; + let Inst{4-0} = vd; +} + +class LSX_SI8_idx1 op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<8> si8; + bits<1> idx; + + let Inst{31-19} = op; + let Inst{18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_SI8_idx2 op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<8> si8; + bits<2> idx; + + let Inst{31-20} = op; + let Inst{19-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_SI8_idx3 op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<8> si8; + bits<3> idx; + + let Inst{31-21} = op; + let Inst{20-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_SI8_idx4 op>: LSXInst { + bits<5> vd; + bits<5> rj; + bits<8> si8; + bits<4> idx; + + let Inst{31-22} = op; + let Inst{21-18} = idx; + let Inst{17-10} = si8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_3R_2GP op>: LSXInst { + bits<5> rk; + bits<5> rj; + bits<5> vd; + + let Inst{31-15} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +class LSX_I5_mode_U op>: LSXInst { + bits<5> vd; + bits<5> mode; + bits<5> ui5; + + let Inst{31-15} = op; + let Inst{14-10} = ui5; + let Inst{9-5} = mode; + let Inst{4-0} = vd; +} + +class LSX_1R_I13 op>: LSXInst { + bits<13> i13; + bits<5> vd; + + let Inst{31-18} = op; + let Inst{17-5} = i13; + let Inst{4-0} = vd; +} + +class LSX_1R_I13_I10 op>: LSXInst { + bits<10> i10; + bits<5> vd; + + let Inst{31-15} = op; + let Inst{14-5} = i10; + let Inst{4-0} = vd; +} + + + + + + + diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td new file mode 100644 index 000000000000..cac609c8cc6f --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -0,0 +1,5747 @@ +//===- LoongArchLSXInstrInfo.td - LSX instructions -*- tablegen ------------*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes LoongArch LSX instructions. +// +//===----------------------------------------------------------------------===// + +def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; +def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisInt<1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>]>; +def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, + SDTCisFP<1>, + SDTCisSameAs<1, 2>, + SDTCisVT<3, OtherVT>]>; +def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisInt<1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; +def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; +def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def SDTVABSD : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; + +def SDT_VBROADCAST : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def LoongArchVBROADCAST : SDNode<"LoongArchISD::VBROADCAST", SDT_VBROADCAST>; + +def LoongArchVAllNonZero : SDNode<"LoongArchISD::VALL_NONZERO", SDT_LoongArchVecCond>; +def LoongArchVAnyNonZero : SDNode<"LoongArchISD::VANY_NONZERO", SDT_LoongArchVecCond>; +def LoongArchVAllZero : SDNode<"LoongArchISD::VALL_ZERO", SDT_LoongArchVecCond>; +def LoongArchVAnyZero : SDNode<"LoongArchISD::VANY_ZERO", SDT_LoongArchVecCond>; +def LoongArchVNOR : SDNode<"LoongArchISD::VNOR", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def LoongArchVSHF : SDNode<"LoongArchISD::VSHF", SDT_VSHF>; +def LoongArchSHF : SDNode<"LoongArchISD::SHF", SDT_SHF>; +def LoongArchVPACKEV : SDNode<"LoongArchISD::VPACKEV", SDT_ILV>; +def LoongArchVPACKOD : SDNode<"LoongArchISD::VPACKOD", SDT_ILV>; +def LoongArchVILVH : SDNode<"LoongArchISD::VILVH", SDT_ILV>; +def LoongArchVILVL : SDNode<"LoongArchISD::VILVL", SDT_ILV>; +def LoongArchVPICKEV : SDNode<"LoongArchISD::VPICKEV", SDT_ILV>; +def LoongArchVPICKOD : SDNode<"LoongArchISD::VPICKOD", SDT_ILV>; +def LoongArchVABSD : SDNode<"LoongArchISD::VABSD", SDTVABSD>; +def LoongArchUVABSD : SDNode<"LoongArchISD::UVABSD", SDTVABSD>; + +def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; +def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; + +def LoongArchVExtractSExt : SDNode<"LoongArchISD::VEXTRACT_SEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; +def LoongArchVExtractZExt : SDNode<"LoongArchISD::VEXTRACT_ZEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>; + +def immZExt1Ptr : ImmLeaf(Imm);}]>; +def immZExt2Ptr : ImmLeaf(Imm);}]>; +def immZExt3Ptr : ImmLeaf(Imm);}]>; +def immZExt4Ptr : ImmLeaf(Imm);}]>; +def immZExt5Ptr : ImmLeaf(Imm);}]>; +def immZExt10 : ImmLeaf(Imm);}]>; +def immZExt8 : ImmLeaf(Imm);}]>; +def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; +def immZExt6 : ImmLeaf; +def immZExt4 : ImmLeaf(Imm);}]>; +def immZExt3 : ImmLeaf(Imm);}]>; +def immZExt2 : ImmLeaf(Imm);}]>; +def immZExt1 : ImmLeaf(Imm);}]>; +def immSExt12_l : ImmLeaf(Imm);}]>; +def immSExt11Ptr : ImmLeaf(Imm);}]>; + +def immSExt11_1 : ImmLeaf(Imm<<1);}]>; +def immSExt10Ptr : ImmLeaf(Imm);}]>; +def immSExt10_2 : ImmLeaf(Imm<<2);}]>; +def immSExt9Ptr : ImmLeaf(Imm);}]>; +def immSExt9_3 : ImmLeaf(Imm<<3);}]>; +def immSExt8 : ImmLeaf(Imm);}]>; +def immSExt5 : ImmLeaf(Imm);}]>; +def immSExt8_1 : ImmLeaf(Imm<<1);}]>; +def immSExt8_2 : ImmLeaf(Imm<<2);}]>; +def immSExt8_3 : ImmLeaf(Imm<<3);}]>; + +def addrimm10 : ComplexPattern; +def addrimm10lsl2 : ComplexPattern; +def addrimm9lsl3 : ComplexPattern; +def addrimm11lsl1 : ComplexPattern; + + +class SimmLslAsmOperandClass Supers = [], + int Shift = 0> : AsmOperandClass { + let Name = "Simm" # Bits # "_Lsl" # Shift; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<" # Bits # ", " # Shift # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits # "_Lsl" # Shift; +} + +def Simm11Lsl1AsmOperand + : SimmLslAsmOperandClass<11, [], 1>; + +def immSExt11_1_O : Operand { + let EncoderMethod = "getSImm11Lsl1Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; + let ParserMatchClass = Simm11Lsl1AsmOperand; +} + +def Simm10Lsl2AsmOperand + : SimmLslAsmOperandClass<10, [], 2>; + +def immSExt10_2_O : Operand { + let EncoderMethod = "getSImm10Lsl2Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>"; + let ParserMatchClass = Simm10Lsl2AsmOperand; +} + +def Simm9Lsl3AsmOperand + : SimmLslAsmOperandClass<9, [], 3>; + +def immSExt9_3_O : Operand { + let EncoderMethod = "getSImm9Lsl3Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; + let ParserMatchClass = Simm9Lsl3AsmOperand; +} + +def Simm8Lsl3AsmOperand + : SimmLslAsmOperandClass<8, [], 3>; + +def immSExt8_3_O : Operand { + let EncoderMethod = "getSImm8Lsl3Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<8>"; + let ParserMatchClass = Simm8Lsl3AsmOperand; +} + +def Simm8Lsl2AsmOperand + : SimmLslAsmOperandClass<8, [], 2>; + +def immSExt8_2_O : Operand { + let EncoderMethod = "getSImm8Lsl2Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<4>"; + let ParserMatchClass = Simm8Lsl2AsmOperand; +} + +def Simm8Lsl1AsmOperand + : SimmLslAsmOperandClass<8, [], 1>; + +def immSExt8_1_O : Operand { + let EncoderMethod = "getSImm8Lsl1Encoding"; + let DecoderMethod = "DecodeSImmWithOffsetAndScale<2>"; + let ParserMatchClass = Simm8Lsl1AsmOperand; +} + + +class ConstantSImmAsmOperandClass Supers = [], + int Offset = 0> : AsmOperandClass { + let Name = "ConstantSImm" # Bits # "_" # Offset; + let RenderMethod = "addConstantSImmOperands<" # Bits # ", " # Offset # ">"; + let PredicateMethod = "isConstantSImm<" # Bits # ", " # Offset # ">"; + let SuperClasses = Supers; + let DiagnosticType = "SImm" # Bits # "_" # Offset; +} + +class ConstantUImmRangeAsmOperandClass Supers = []> + : AsmOperandClass { + let Name = "ConstantUImmRange" # Bottom # "_" # Top; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isConstantUImmRange<" # Bottom # ", " # Top # ">"; + let SuperClasses = Supers; + let DiagnosticType = "UImmRange" # Bottom # "_" # Top; +} + +def SImm16RelaxedAsmOperandClass + : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> { + let Name = "SImm16_Relaxed"; + let PredicateMethod = "isAnyImm<16>"; + let DiagnosticType = "SImm16_Relaxed"; +} + +def ConstantSImm11Lsl1AsmOperandClass : AsmOperandClass { + let Name = "SImm11Lsl1"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<11, 1>"; + let SuperClasses = [SImm12Operand]; + let DiagnosticType = "SImm11_Lsl1"; +} + +def ConstantSImm9Lsl3AsmOperandClass : AsmOperandClass { + let Name = "SImm9Lsl3"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<9, 3>"; + let SuperClasses = [SImm12Operand]; + let DiagnosticType = "SImm9_Lsl3"; +} + +def ConstantSImm10Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm10Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<10, 2>"; + let SuperClasses = [SImm12Operand]; + let DiagnosticType = "SImm10_Lsl2"; +} +def ConstantSImm11AsmOperandClass + : ConstantSImmAsmOperandClass<11, [ConstantSImm10Lsl2AsmOperandClass]>; +def ConstantSImm10Lsl1AsmOperandClass : AsmOperandClass { + let Name = "SImm10Lsl1"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<10, 1>"; + let SuperClasses = [ConstantSImm11AsmOperandClass]; + let DiagnosticType = "SImm10_Lsl1"; +} +def ConstantUImm10AsmOperandClass + : ConstantUImmAsmOperandClass<10, [ConstantSImm10Lsl1AsmOperandClass]>; +def ConstantSImm10AsmOperandClass + : ConstantSImmAsmOperandClass<10, [ConstantUImm10AsmOperandClass]>; +def ConstantSImm9AsmOperandClass + : ConstantSImmAsmOperandClass<9, [ConstantSImm10AsmOperandClass]>; +def ConstantSImm7Lsl2AsmOperandClass : AsmOperandClass { + let Name = "SImm7Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledSImm<7, 2>"; + let SuperClasses = [ConstantSImm9AsmOperandClass]; + let DiagnosticType = "SImm7_Lsl2"; +} +def ConstantUImm8AsmOperandClass + : ConstantUImmAsmOperandClass<8, [ConstantSImm7Lsl2AsmOperandClass]>; +def ConstantUImm7Sub1AsmOperandClass + : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass], -1> { + // Specify the names since the -1 offset causes invalid identifiers otherwise. + let Name = "UImm7_N1"; + let DiagnosticType = "UImm7_N1"; +} +def ConstantUImm7AsmOperandClass + : ConstantUImmAsmOperandClass<7, [ConstantUImm7Sub1AsmOperandClass]>; +def ConstantUImm6Lsl2AsmOperandClass : AsmOperandClass { + let Name = "UImm6Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledUImm<6, 2>"; + let SuperClasses = [ConstantUImm7AsmOperandClass]; + let DiagnosticType = "UImm6_Lsl2"; +} +def ConstantUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<6, [ConstantUImm6Lsl2AsmOperandClass]>; +def ConstantSImm6AsmOperandClass + : ConstantSImmAsmOperandClass<6, [ConstantUImm6AsmOperandClass]>; +def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass { + let Name = "UImm5Lsl2"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isScaledUImm<5, 2>"; + let SuperClasses = [ConstantSImm6AsmOperandClass]; + let DiagnosticType = "UImm5_Lsl2"; +} +def ConstantUImm5_Range2_64AsmOperandClass + : ConstantUImmRangeAsmOperandClass<2, 64, [ConstantUImm5Lsl2AsmOperandClass]>; +def ConstantUImm5Plus33AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5_Range2_64AsmOperandClass], + 33>; +def ConstantUImm5ReportUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus33AsmOperandClass]> { + let Name = "ConstantUImm5_0_Report_UImm6"; + let DiagnosticType = "UImm5_0_Report_UImm6"; +} +def ConstantUImm5Plus32AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5ReportUImm6AsmOperandClass], 32>; +def ConstantUImm5Plus32NormalizeAsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus32AsmOperandClass], 32> { + let Name = "ConstantUImm5_32_Norm"; + // We must also subtract 32 when we render the operand. + let RenderMethod = "addConstantUImmOperands<5, 32, -32>"; +} +def ConstantUImm5Plus1ReportUImm6AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5Plus32NormalizeAsmOperandClass], 1>{ + let Name = "ConstantUImm5_Plus1_Report_UImm6"; +} +def ConstantUImm5Plus1AsmOperandClass + : ConstantUImmAsmOperandClass< + 5, [ConstantUImm5Plus1ReportUImm6AsmOperandClass], 1>; +def ConstantUImm5AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm5Plus1AsmOperandClass]>; +def ConstantSImm5AsmOperandClass + : ConstantSImmAsmOperandClass<5, [ConstantUImm5AsmOperandClass]>; +def ConstantUImm4AsmOperandClass + : ConstantUImmAsmOperandClass<4, [ConstantSImm5AsmOperandClass]>; +def ConstantSImm4AsmOperandClass + : ConstantSImmAsmOperandClass<4, [ConstantUImm4AsmOperandClass]>; +def ConstantUImm3AsmOperandClass + : ConstantUImmAsmOperandClass<3, [ConstantSImm4AsmOperandClass]>; +def ConstantUImm2AsmOperandClass + : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>; +def ConstantUImm1AsmOperandClass + : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>; +def ConstantImmzAsmOperandClass : AsmOperandClass { + let Name = "ConstantImmz"; + let RenderMethod = "addConstantUImmOperands<1>"; + let PredicateMethod = "isConstantImmz"; + let SuperClasses = [ConstantUImm1AsmOperandClass]; + let DiagnosticType = "Immz"; +} + +foreach I = {1, 2, 3, 4, 5, 6, 8} in + def vsplat_uimm # I : Operand { + let PrintMethod = "printUImm<" # I # ">"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + +foreach I = {5, 10} in + def vsplat_simm # I : Operand { + let ParserMatchClass = + !cast("ConstantSImm" # I # "AsmOperandClass"); + } + +foreach I = {1, 4, 7, 8, 10, 20, 26} in + def uimm # I : Operand { + let PrintMethod = "printUImm<" # I # ">"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + +foreach I = {1, 2, 3, 4, 5, 6, 7, 8} in + def uimm # I # _ptr : Operand { + let PrintMethod = "printUImm<" # I # ">"; + let ParserMatchClass = + !cast("ConstantUImm" # I # "AsmOperandClass"); + } + + +def addrimm12 : ComplexPattern; + + +def LoongArchMemSimm12AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm12"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<12>"; + let DiagnosticType = "MemSImm12"; +} + +def mem_simm12 : mem_generic { + let MIOperandInfo = (ops ptr_rc, simm12); + let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = LoongArchMemSimm12AsmOperand; +} + +foreach I = {4, 6, 9, 10, 11} in + def simm # I : Operand { + let DecoderMethod = "DecodeSImmWithOffsetAndScale<" # I # ">"; + let ParserMatchClass = + !cast("ConstantSImm" # I # "AsmOperandClass"); + } + +def LoongArchMemSimm9AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm9"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<9>"; + let DiagnosticType = "MemSImm9"; +} + +def LoongArchMemSimm10AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm10"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<10>"; + let DiagnosticType = "MemSImm10"; +} + +def LoongArchMemSimm11AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm11"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<11>"; + let DiagnosticType = "MemSImm11"; +} + +def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>; + +def simm10Op : Operand { + let DecoderMethod = "DecodeSIMM10"; +} + +def simm13Op : Operand { + let DecoderMethod = "DecodeSIMM13"; +} + +def LoongArchMemSimm10Lsl2AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm10_2"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<10, 2>"; + let DiagnosticType = "MemSImm10Lsl2"; +} + + +def simm10_lsl2 : Operand { +// let DecoderMethod = "DecodeSImmWithOffsetAndScale<10, 2>"; + let ParserMatchClass = + !cast("ConstantSImm10Lsl2AsmOperandClass"); +} + +def mem_simm10_lsl2 : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm10_lsl2")); + let EncoderMethod = "getMemEncoding10l2"; + let ParserMatchClass = + !cast("LoongArchMemSimm10Lsl2AsmOperand"); +} + + +def LoongArchMemSimm11Lsl1AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm11_1"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<11, 1>"; + let DiagnosticType = "MemSImm11Lsl1"; +} + + +def simm11_lsl1 : Operand { + // let DecoderMethod = "DecodeSImmWithOffsetAndScale<11, 1>"; + let ParserMatchClass = + !cast("ConstantSImm11Lsl1AsmOperandClass"); +} + +def mem_simm11_lsl1 : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm11_lsl1")); + let EncoderMethod = "getMemEncoding11l1"; + let ParserMatchClass = + !cast("LoongArchMemSimm11Lsl1AsmOperand"); +} + +def LoongArchMemSimm9Lsl3AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm9_3"; + let SuperClasses = [LoongArchMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<9, 3>"; + let DiagnosticType = "MemSImm9Lsl3"; +} + + +def simm9_lsl3 : Operand { + // let DecoderMethod = "DecodeSImmWithOffsetAndScale<9, 3>"; + let ParserMatchClass = + !cast("ConstantSImm9Lsl3AsmOperandClass"); +} + +def mem_simm9_lsl3 : mem_generic { + let MIOperandInfo = (ops ptr_rc, !cast("simm9_lsl3")); + let EncoderMethod = "getMemEncoding9l3"; + let ParserMatchClass = + !cast("LoongArchMemSimm9Lsl3AsmOperand"); +} + + + + +// Operands + +def immZExt2Lsa : ImmLeaf(Imm - 1);}]>; + +// Pattern fragments +def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractSExt node:$vec, node:$idx, i8)>; +def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractSExt node:$vec, node:$idx, i16)>; +def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractSExt node:$vec, node:$idx, i32)>; +def vextract_sext_i64 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractSExt node:$vec, node:$idx, i64)>; + +def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractZExt node:$vec, node:$idx, i8)>; +def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractZExt node:$vec, node:$idx, i16)>; +def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractZExt node:$vec, node:$idx, i32)>; +def vextract_zext_i64 : PatFrag<(ops node:$vec, node:$idx), + (LoongArchVExtractZExt node:$vec, node:$idx, i64)>; + +def vldrepl_v16i8 : PatFrag<(ops node:$v1), + (v16i8 (LoongArchVBROADCAST node:$v1))>; +def vldrepl_v8i16 : PatFrag<(ops node:$v1), + (v8i16 (LoongArchVBROADCAST node:$v1))>; +def vldrepl_v4i32 : PatFrag<(ops node:$v1), + (v4i32 (LoongArchVBROADCAST node:$v1))>; +def vldrepl_v2i64 : PatFrag<(ops node:$v1), + (v2i64 (LoongArchVBROADCAST node:$v1))>; + +def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>; +def vinsert_v2i64 : PatFrag<(ops node:$vec, node:$val, node:$idx), + (v2i64 (vector_insert node:$vec, node:$val, node:$idx))>; + +class vfsetcc_type : + PatFrag<(ops node:$lhs, node:$rhs), + (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>; + +// ISD::SETFALSE cannot occur +def vfseteq_v4f32 : vfsetcc_type; +def vfseteq_v2f64 : vfsetcc_type; +def vfsetge_v4f32 : vfsetcc_type; +def vfsetge_v2f64 : vfsetcc_type; +def vfsetgt_v4f32 : vfsetcc_type; +def vfsetgt_v2f64 : vfsetcc_type; +def vfsetle_v4f32 : vfsetcc_type; +def vfsetle_v2f64 : vfsetcc_type; +def vfsetlt_v4f32 : vfsetcc_type; +def vfsetlt_v2f64 : vfsetcc_type; +def vfsetne_v4f32 : vfsetcc_type; +def vfsetne_v2f64 : vfsetcc_type; +def vfsetoeq_v4f32 : vfsetcc_type; +def vfsetoeq_v2f64 : vfsetcc_type; +def vfsetoge_v4f32 : vfsetcc_type; +def vfsetoge_v2f64 : vfsetcc_type; +def vfsetogt_v4f32 : vfsetcc_type; +def vfsetogt_v2f64 : vfsetcc_type; +def vfsetole_v4f32 : vfsetcc_type; +def vfsetole_v2f64 : vfsetcc_type; +def vfsetolt_v4f32 : vfsetcc_type; +def vfsetolt_v2f64 : vfsetcc_type; +def vfsetone_v4f32 : vfsetcc_type; +def vfsetone_v2f64 : vfsetcc_type; +def vfsetord_v4f32 : vfsetcc_type; +def vfsetord_v2f64 : vfsetcc_type; +def vfsetun_v4f32 : vfsetcc_type; +def vfsetun_v2f64 : vfsetcc_type; +def vfsetueq_v4f32 : vfsetcc_type; +def vfsetueq_v2f64 : vfsetcc_type; +def vfsetuge_v4f32 : vfsetcc_type; +def vfsetuge_v2f64 : vfsetcc_type; +def vfsetugt_v4f32 : vfsetcc_type; +def vfsetugt_v2f64 : vfsetcc_type; +def vfsetule_v4f32 : vfsetcc_type; +def vfsetule_v2f64 : vfsetcc_type; +def vfsetult_v4f32 : vfsetcc_type; +def vfsetult_v2f64 : vfsetcc_type; +def vfsetune_v4f32 : vfsetcc_type; +def vfsetune_v2f64 : vfsetcc_type; + + + +// ISD::SETTRUE cannot occur +// ISD::SETFALSE2 cannot occur +// ISD::SETTRUE2 cannot occur + +class vsetcc_type : + PatFrag<(ops node:$lhs, node:$rhs), + (ResTy (vsetcc node:$lhs, node:$rhs, CC))>; + +def vseteq_v16i8 : vsetcc_type; +def vseteq_v8i16 : vsetcc_type; +def vseteq_v4i32 : vsetcc_type; +def vseteq_v2i64 : vsetcc_type; +def vsetle_v16i8 : vsetcc_type; +def vsetle_v8i16 : vsetcc_type; +def vsetle_v4i32 : vsetcc_type; +def vsetle_v2i64 : vsetcc_type; +def vsetlt_v16i8 : vsetcc_type; +def vsetlt_v8i16 : vsetcc_type; +def vsetlt_v4i32 : vsetcc_type; +def vsetlt_v2i64 : vsetcc_type; +def vsetule_v16i8 : vsetcc_type; +def vsetule_v8i16 : vsetcc_type; +def vsetule_v4i32 : vsetcc_type; +def vsetule_v2i64 : vsetcc_type; +def vsetult_v16i8 : vsetcc_type; +def vsetult_v8i16 : vsetcc_type; +def vsetult_v4i32 : vsetcc_type; +def vsetult_v2i64 : vsetcc_type; + +def vsplati8 : PatFrag<(ops node:$e0), + (v16i8 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati16 : PatFrag<(ops node:$e0), + (v8i16 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplati32 : PatFrag<(ops node:$e0), + (v4i32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; + +def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def vsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector node:$e0, node:$e0))>; + +def vsplati64_splat_d : PatFrag<(ops node:$e0), + (v2i64 (bitconvert + (v4i32 (and + (v4i32 (build_vector node:$e0, + node:$e0, + node:$e0, + node:$e0)), + vsplati64_imm_eq_1))))>; + +def vsplatf32 : PatFrag<(ops node:$e0), + (v4f32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def vsplatf64 : PatFrag<(ops node:$e0), + (v2f64 (build_vector node:$e0, node:$e0))>; + +def vsplati8_elt : PatFrag<(ops node:$v, node:$i), + (LoongArchVSHF (vsplati8 node:$i), node:$v, node:$v)>; +def vsplati16_elt : PatFrag<(ops node:$v, node:$i), + (LoongArchVSHF (vsplati16 node:$i), node:$v, node:$v)>; +def vsplati32_elt : PatFrag<(ops node:$v, node:$i), + (LoongArchVSHF (vsplati32 node:$i), node:$v, node:$v)>; +def vsplati64_elt : PatFrag<(ops node:$v, node:$i), + (LoongArchVSHF (vsplati64_splat_d node:$i),node:$v, node:$v)>; + +class SplatPatLeaf + : PatLeaf { + Operand OpClass = opclass; +} + +class SplatComplexPattern roots = [], + list props = []> : + ComplexPattern { + Operand OpClass = opclass; +} + +def vsplati8_uimm3 : SplatComplexPattern; + +def vsplati8_uimm4 : SplatComplexPattern; + +def vsplati8_uimm5 : SplatComplexPattern; + +def vsplati8_uimm8 : SplatComplexPattern; + +def vsplati8_simm5 : SplatComplexPattern; + +def vsplati16_uimm3 : SplatComplexPattern; + +def vsplati16_uimm4 : SplatComplexPattern; + +def vsplati16_uimm5 : SplatComplexPattern; + +def vsplati16_simm5 : SplatComplexPattern; + +def vsplati32_uimm2 : SplatComplexPattern; + +def vsplati32_uimm5 : SplatComplexPattern; + +def vsplati32_simm5 : SplatComplexPattern; + +def vsplati64_uimm1 : SplatComplexPattern; + +def vsplati64_uimm5 : SplatComplexPattern; + +def vsplati64_uimm6 : SplatComplexPattern; + +def vsplati64_simm5 : SplatComplexPattern; + + +// Any build_vector that is a constant splat with a value that equals 1 +// FIXME: These should be a ComplexPattern but we can't use them because the +// ISel generator requires the uses to have a name, but providing a name +// causes other errors ("used in pattern but not operand list") +def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def vbitclr_b : PatFrag<(ops node:$vj, node:$vk), + (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), + immAllOnesV))>; +def vbitclr_h : PatFrag<(ops node:$vj, node:$vk), + (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), + immAllOnesV))>; +def vbitclr_w : PatFrag<(ops node:$vj, node:$vk), + (and node:$vj, (xor (shl vsplat_imm_eq_1, node:$vk), + immAllOnesV))>; +def vbitclr_d : PatFrag<(ops node:$vj, node:$vk), + (and node:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), + node:$vk), + (bitconvert (v4i32 immAllOnesV))))>; + +def vbneg_b : PatFrag<(ops node:$vj, node:$vk), + (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbneg_h : PatFrag<(ops node:$vj, node:$vk), + (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbneg_w : PatFrag<(ops node:$vj, node:$vk), + (xor node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbneg_d : PatFrag<(ops node:$vj, node:$vk), + (xor node:$vj, (shl (v2i64 vsplati64_imm_eq_1), + node:$vk))>; + +def vbset_b : PatFrag<(ops node:$vj, node:$vk), + (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbset_h : PatFrag<(ops node:$vj, node:$vk), + (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbset_w : PatFrag<(ops node:$vj, node:$vk), + (or node:$vj, (shl vsplat_imm_eq_1, node:$vk))>; +def vbset_d : PatFrag<(ops node:$vj, node:$vk), + (or node:$vj, (shl (v2i64 vsplati64_imm_eq_1), + node:$vk))>; + +def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), + (add node:$vd, (mul node:$vj, node:$vk))>; + +def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), + (sub node:$vd, (mul node:$vj, node:$vk))>; + +class IsCommutable { + bit isCommutable = 1; +} + + + +//class +class LSX_3R_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; +} + +class LSX_3RN_DESC_BASE : + LSX_3R_DESC_BASE; + +class LSX_3R_4R_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + ROVK:$vk))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_3R_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, GPR32Opnd:$rk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $rk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, GPR32Opnd:$rk))]; +} + +class LSX_VEC_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; +} + +class LSX_VEC_PSEUDO_BASE : + LSXPseudo<(outs ROVD:$vd), (ins ROVJ:$vj, ROVK:$vk), + [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]>; + +class LSX_3RF_DESC_BASE : + LSX_3R_DESC_BASE; + +class LSX_3RFN_DESC_BASE : + LSX_3R_DESC_BASE; + +class LSX_3R_DESC_BASE1 { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; +} + +class LSX_3RF_DESC_BASE1 : + LSX_3R_DESC_BASE1; + +class LSX_3R_VSHF_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$vd_in, ROVJ:$vj, + ROVK:$vk))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_3R_4R_VSHF_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVD:$va); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF ROVD:$va, ROVJ:$vj, + ROVK:$vk))]; +} + +class LSX_I5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$si5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$si5))]; +} + +class LSX_I5_U_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))]; +} + +class LSX_BIT_3_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui3))]; +} + +class LSX_BIT_3N_DESC_BASE : + LSX_BIT_3_DESC_BASE; + +class LSX_BIT_4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui4))]; +} + +class LSX_BIT_4N_DESC_BASE : + LSX_BIT_4_DESC_BASE; + +class LSX_BIT_5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))]; +} + +class LSX_BIT_5N_DESC_BASE : + LSX_BIT_5_DESC_BASE; + +class LSX_BIT_6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui6))]; +} + +class LSX_BIT_6N_DESC_BASE : + LSX_BIT_6_DESC_BASE; + +class LSX_2R_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; +} + +class LSX_2RN_DESC_BASE : + LSX_2R_DESC_BASE; + +class LSX_2RF_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; +} + +class LSX_2RFN_DESC_BASE : + LSX_2R_DESC_BASE; + +class LSX_2RF_DESC_BASE_CVT { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj))]; +} + +class LSX_2RFN_DESC_BASE_CVT : + LSX_2RF_DESC_BASE_CVT; + +class LSX_2RF_DESC_BASE_tmp { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj"); + list Pattern = []; +} + +class LSX_2R_REPL_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROS:$rj); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj"); + list Pattern = [(set ROVD:$vd, (VT (OpNode ROS:$rj)))]; +} + +class LSX_INSERT_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$rj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$rj, Imm:$ui4))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_INSERT_U4N_DESC_BASE : + LSX_INSERT_U4_DESC_BASE; + +class LSX_INSERT_U3_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui3))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_INSERT_U3N_DESC_BASE : + LSX_INSERT_U3_DESC_BASE; + +class LSX_INSERT_U2_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui2"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui2))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_INSERT_U2N_DESC_BASE : + LSX_INSERT_U2_DESC_BASE; + +class LSX_INSERT_U1_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROS:$rj, ImmOp:$ui1); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $ui1"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROS:$rj, Imm:$ui1))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_INSERT_U1N_DESC_BASE : + LSX_INSERT_U1_DESC_BASE; + +class LSX_PICK_U1_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui1); + string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui1"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui1))]; +} + +class LSX_PICK_U2_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui2"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui2))]; +} + +class LSX_PICK_U3_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui3"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui3))]; +} + +class LSX_PICK_U4_DESC_BASE { + dag OutOperandList = (outs ROD:$rd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$rd, $vj, $ui4"); + list Pattern = [(set ROD:$rd, (OpNode (VecTy ROVJ:$vj), Imm:$ui4))]; +} + +class LSX_ELM_U3_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui3, ROVJ:$vj, + ROVJ:$vj))]; +} + +class LSX_ELM_U2_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui2); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui2, ROVJ:$vj, + ROVJ:$vj))]; +} + +class LSX_ELM_U1_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui1); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui1, ROVJ:$vj, + ROVJ:$vj))]; +} + +class LSX_ELM_U4_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (LoongArchVSHF SplatImm:$ui4, ROVJ:$vj, + ROVJ:$vj))]; +} + +class LSX_ELM_U4_SLD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + Imm:$ui4))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_ELM_U3_SLD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + Imm:$ui3))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_ELM_U2_SLD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui2); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui2"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + Imm:$ui2))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_ELM_U1_SLD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui1); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui1"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, + Imm:$ui1))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIT_U3_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui3))]; +} + +class LSX_BIT_U4_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui4))]; +} + +class LSX_BIT_U5_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui5))]; +} + +class LSX_BIT_U6_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui6))]; +} + +class LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))]; +} + +class LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))]; +} + +class LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))]; +} + +class LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; +} + +class LSX_I8_SHF_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (LoongArchSHF immZExt8:$ui8, ROVJ:$vj))]; +} + +class LSX_I8_SHUF_DESC_BASE_D { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))]; + string Constraints = "$vd = $vd_in"; +} + +def LoongArchSelect : SDNode<"LoongArchISD::VSELECT" ,SDTSelect>; +def LoongArchVROR : SDNode<"LoongArchISD::VROR", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>, []>; +def LoongArchVRORI : SDNode<"LoongArchISD::VRORI", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, + SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>, []>; + +class LSX2_RORI_U3_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui3))]; +} + +class LSX2_RORI_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui4))]; +} + +class LSX2_RORI_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui5))]; +} + +class LSX2_RORI_U6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (LoongArchVRORI ROVJ:$vj, Imm:$ui6))]; +} + +class LSX_BIND_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui4))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIND_U4N_DESC_BASE : + LSX_BIND_U4_DESC_BASE; + +class LSX_BIND_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui5))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIND_U5N_DESC_BASE : + LSX_BIND_U5_DESC_BASE; + +class LSX_BIND_U6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, ImmOp:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, Imm:$ui6))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIND_U6N_DESC_BASE : + LSX_BIND_U6_DESC_BASE; + +class LSX_BIND_U7_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_BIND_U7N_DESC_BASE : + LSX_BIND_U7_DESC_BASE; + + +class LD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); + list Pattern = [(set ROVD:$vd, (TyNode (OpNode Addr:$addr)))]; + string DecoderMethod = "DecodeLSX128Mem"; +} + +class ST_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); + list Pattern = [(OpNode (TyNode ROVD:$vd), Addr:$addr)]; + string DecoderMethod = "DecodeLSX128Mem"; +} + +class LSX_VEC_ADDR_PSEUDO_BASE : + LSXPseudo<(outs), (ins ROVD:$vd, MemOpnd:$addr), + [(OpNode (TyNode ROVD:$vd), MemOpnd:$addr)]>; + + +class LSX_SET_DESC_BASE { + dag OutOperandList = (outs FCFROpnd:$cd); + dag InOperandList = (ins ROVD:$vj); + string AsmString = !strconcat(instr_asm, "\t$cd, $vj"); + list Pattern = []; +} + +class LSX_SET_DESC_BASE_tmp { + dag OutOperandList = (outs FCFROpnd:$cd); + dag InOperandList = (ins ROVD:$vj); + string AsmString = !strconcat(instr_asm, "\t$cd, $vj"); + list Pattern = []; +} + +class LSX_VMul_Reg4 { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))]; +} + +class LSX_4RF { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk, ROVA:$va); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk, $va"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk, ROVA:$va))]; +} + + +class LSX_VFCMP_Reg3 { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ROVK:$vk); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $vk"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, ROVK:$vk))]; +} + +class LSX_I12_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, ImmOp:$si12); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si12"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si12))]; +} + +class LSX_I11_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, ImmOp:$si11); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si11"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si11))]; +} + +class LSX_I10_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, ImmOp:$si10); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si10"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si10))]; +} + +class LSX_I9_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, ImmOp:$si9); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si9"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, Imm:$si9))]; +} + + +class LSX_I8_U1_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm1:$idx); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt1:$idx)]; + string DecoderMethod = "DecodeLSX128memstl"; +} + + +class LSX_I8_U2_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm2:$idx); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt2:$idx)]; + string DecoderMethod = "DecodeLSX128memstl"; +} + +class LSX_I8_U3_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm3:$idx); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt3:$idx)]; + string DecoderMethod = "DecodeLSX128memstl"; +} + +class LSX_I8_U4_DESC_BASE { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, ImmOp:$si8, uimm4:$idx); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $si8, $idx"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, Imm:$si8, immZExt4:$idx)]; + string DecoderMethod = "DecodeLSX128memstl"; +} + +class LSX_I5_U_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$ui5))]; +} + +class LSX_I5_DESC_BASE_Intrinsic { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, ImmOp:$si5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $si5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, Imm:$si5))]; +} + +class LSX_LDX_LA { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins PtrRC:$rj, RORK:$rk); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk"); + list Pattern = [(set ROVD:$vd, (OpNode iPTR:$rj, RORK:$rk))]; +} + +class LSX_SDX_LA { + dag OutOperandList = (outs); + dag InOperandList = (ins ROVD:$vd, PtrRC:$rj, RORK:$rk); + string AsmString = !strconcat(instr_asm, "\t$vd, $rj, $rk"); + list Pattern = [(OpNode ROVD:$vd, iPTR:$rj, RORK:$rk)]; +} + +class LSX_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; +} + +class LSX_U5_4R_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_U3_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm3:$ui3); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui3"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt3:$ui3))]; +} + +class LSX_2R_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt4:$ui4))]; +} + +class LSX_2R_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt5:$ui5))]; +} + +class LSX_2R_U6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt6:$ui6))]; +} + +class LSX_2R_3R_U4_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm4:$ui4); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui4"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt4:$ui4))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_U5_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm5:$ui5); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui5"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt5:$ui5))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_U6_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm6:$ui6); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui6"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt6:$ui6))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_U7_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm7:$ui7); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui7"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt7:$ui7))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_U8_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, ROVJ:$vj, immZExt8:$ui8))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_3R_SELECT { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVD:$vd_in, ROVJ:$vj, vsplat_uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVD:$vd_in, vsplati8_uimm8:$ui8, ROVJ:$vj))]; + string Constraints = "$vd = $vd_in"; +} + +class LSX_2R_U8_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, uimm8:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, immZExt8:$ui8))]; +} + +class LSX_I13_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins immOp:$i13); + string AsmString = !strconcat(instr_asm, "\t$vd, $i13"); + list Pattern = [(set ROVD:$vd, (OpNode (Ty simm13:$i13)))]; + string DecoderMethod = "DecodeLSX128Mem13"; +} + +class LSX_I13_DESC_BASE_10 { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ImmOp:$i10); + string AsmString = !strconcat(instr_asm, "\t$vd, $i10"); + list Pattern = [(set ROVD:$vd, (OpNode Imm:$i10))]; + bit hasSideEffects = 0; + string DecoderMethod = "DecodeLSX128Mem10"; +} + +class LSX_BIT_U8_VREPLVE_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins ROVJ:$vj, SplatImm.OpClass:$ui8); + string AsmString = !strconcat(instr_asm, "\t$vd, $vj, $ui8"); + list Pattern = [(set ROVD:$vd, (OpNode ROVJ:$vj, SplatImm:$ui8))]; +} + + +class LSXPat pred = [HasLSX]> : + Pat, Requires; + +// Instruction encoding. + + +def VSADD_B : LSX_3R<0b01110000010001100>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.b", LSX128BOpnd>; + +def VSADD_H : LSX_3R<0b01110000010001101>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.h", LSX128HOpnd>; + +def VSADD_W : LSX_3R<0b01110000010001110>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.w", LSX128WOpnd>; + +def VSADD_D : LSX_3R<0b01110000010001111>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.d", LSX128DOpnd>; + + +def VSSUB_B : LSX_3R<0b01110000010010000>, + LSX_3RN_DESC_BASE<"vssub.b", LSX128BOpnd>; + +def VSSUB_H : LSX_3R<0b01110000010010001>, + LSX_3RN_DESC_BASE<"vssub.h", LSX128HOpnd>; + +def VSSUB_W : LSX_3R<0b01110000010010010>, + LSX_3RN_DESC_BASE<"vssub.w", LSX128WOpnd>; + +def VSSUB_D : LSX_3R<0b01110000010010011>, + LSX_3RN_DESC_BASE<"vssub.d", LSX128DOpnd>; + + +def VSADD_BU : LSX_3R<0b01110000010010100>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.bu", LSX128BOpnd>; + +def VSADD_HU : LSX_3R<0b01110000010010101>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.hu", LSX128HOpnd>; + +def VSADD_WU : LSX_3R<0b01110000010010110>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.wu", LSX128WOpnd>; + +def VSADD_DU : LSX_3R<0b01110000010010111>, IsCommutable, + LSX_3RN_DESC_BASE<"vsadd.du", LSX128DOpnd>; + + +def VSSUB_BU : LSX_3R<0b01110000010011000>, + LSX_3RN_DESC_BASE<"vssub.bu", LSX128BOpnd>; + +def VSSUB_HU : LSX_3R<0b01110000010011001>, + LSX_3RN_DESC_BASE<"vssub.hu", LSX128HOpnd>; + +def VSSUB_WU : LSX_3R<0b01110000010011010>, + LSX_3RN_DESC_BASE<"vssub.wu", LSX128WOpnd>; + +def VSSUB_DU : LSX_3R<0b01110000010011011>, + LSX_3RN_DESC_BASE<"vssub.du", LSX128DOpnd>; + + +def VHADDW_H_B : LSX_3R<0b01110000010101000>, + LSX_3RN_DESC_BASE<"vhaddw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VHADDW_W_H : LSX_3R<0b01110000010101001>, + LSX_3RN_DESC_BASE<"vhaddw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VHADDW_D_W : LSX_3R<0b01110000010101010>, + LSX_3RN_DESC_BASE<"vhaddw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + + +def VHSUBW_H_B : LSX_3R<0b01110000010101100>, + LSX_3RN_DESC_BASE<"vhsubw.h.b", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VHSUBW_W_H : LSX_3R<0b01110000010101101>, + LSX_3RN_DESC_BASE<"vhsubw.w.h", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VHSUBW_D_W : LSX_3R<0b01110000010101110>, + LSX_3RN_DESC_BASE<"vhsubw.d.w", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + + +def VHADDW_HU_BU : LSX_3R<0b01110000010110000>, + LSX_3RN_DESC_BASE<"vhaddw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VHADDW_WU_HU : LSX_3R<0b01110000010110001>, + LSX_3RN_DESC_BASE<"vhaddw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VHADDW_DU_WU : LSX_3R<0b01110000010110010>, + LSX_3RN_DESC_BASE<"vhaddw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + + +def VHSUBW_HU_BU : LSX_3R<0b01110000010110100>, + LSX_3RN_DESC_BASE<"vhsubw.hu.bu", LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VHSUBW_WU_HU : LSX_3R<0b01110000010110101>, + LSX_3RN_DESC_BASE<"vhsubw.wu.hu", LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VHSUBW_DU_WU : LSX_3R<0b01110000010110110>, + LSX_3RN_DESC_BASE<"vhsubw.du.wu", LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + + +def VADDA_B : LSX_3R<0b01110000010111000>, IsCommutable, + LSX_3RN_DESC_BASE<"vadda.b", LSX128BOpnd>; + +def VADDA_H : LSX_3R<0b01110000010111001>, IsCommutable, + LSX_3RN_DESC_BASE<"vadda.h", LSX128HOpnd>; + +def VADDA_W : LSX_3R<0b01110000010111010>, IsCommutable, + LSX_3RN_DESC_BASE<"vadda.w", LSX128WOpnd>; + +def VADDA_D : LSX_3R<0b01110000010111011>, IsCommutable, + LSX_3RN_DESC_BASE<"vadda.d", LSX128DOpnd>; + + +def VABSD_B : LSX_3R<0b01110000011000000>, + LSX_3RN_DESC_BASE<"vabsd.b", LSX128BOpnd>; + +def VABSD_H : LSX_3R<0b01110000011000001>, + LSX_3RN_DESC_BASE<"vabsd.h", LSX128HOpnd>; + +def VABSD_W : LSX_3R<0b01110000011000010>, + LSX_3RN_DESC_BASE<"vabsd.w", LSX128WOpnd>; + +def VABSD_D : LSX_3R<0b01110000011000011>, + LSX_3RN_DESC_BASE<"vabsd.d", LSX128DOpnd>; + + +def VABSD_BU : LSX_3R<0b01110000011000100>, + LSX_3RN_DESC_BASE<"vabsd.bu", LSX128BOpnd>; + +def VABSD_HU : LSX_3R<0b01110000011000101>, + LSX_3RN_DESC_BASE<"vabsd.hu", LSX128HOpnd>; + +def VABSD_WU : LSX_3R<0b01110000011000110>, + LSX_3RN_DESC_BASE<"vabsd.wu", LSX128WOpnd>; + +def VABSD_DU : LSX_3R<0b01110000011000111>, + LSX_3RN_DESC_BASE<"vabsd.du", LSX128DOpnd>; + + +def VAVG_B : LSX_3R<0b01110000011001000>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.b", LSX128BOpnd>; + +def VAVG_H : LSX_3R<0b01110000011001001>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.h", LSX128HOpnd>; + +def VAVG_W : LSX_3R<0b01110000011001010>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.w", LSX128WOpnd>; + +def VAVG_D : LSX_3R<0b01110000011001011>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.d", LSX128DOpnd>; + + +def VAVG_BU : LSX_3R<0b01110000011001100>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.bu", LSX128BOpnd>; + +def VAVG_HU : LSX_3R<0b01110000011001101>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.hu", LSX128HOpnd>; + +def VAVG_WU : LSX_3R<0b01110000011001110>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.wu", LSX128WOpnd>; + +def VAVG_DU : LSX_3R<0b01110000011001111>, IsCommutable, + LSX_3RN_DESC_BASE<"vavg.du", LSX128DOpnd>; + + +def VAVGR_B : LSX_3R<0b01110000011010000>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.b", LSX128BOpnd>; + +def VAVGR_H : LSX_3R<0b01110000011010001>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.h", LSX128HOpnd>; + +def VAVGR_W : LSX_3R<0b01110000011010010>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.w", LSX128WOpnd>; + +def VAVGR_D : LSX_3R<0b01110000011010011>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.d", LSX128DOpnd>; + + +def VAVGR_BU : LSX_3R<0b01110000011010100>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.bu", LSX128BOpnd>; + +def VAVGR_HU : LSX_3R<0b01110000011010101>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.hu", LSX128HOpnd>; + +def VAVGR_WU : LSX_3R<0b01110000011010110>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.wu", LSX128WOpnd>; + +def VAVGR_DU : LSX_3R<0b01110000011010111>, IsCommutable, + LSX_3RN_DESC_BASE<"vavgr.du", LSX128DOpnd>; + + +def VMAX_B : LSX_3R<0b01110000011100000>, + LSX_3R_DESC_BASE<"vmax.b", smax, LSX128BOpnd>; + +def VMAX_H : LSX_3R<0b01110000011100001>, + LSX_3R_DESC_BASE<"vmax.h", smax, LSX128HOpnd>; + +def VMAX_W : LSX_3R<0b01110000011100010>, + LSX_3R_DESC_BASE<"vmax.w", smax, LSX128WOpnd>; + +def VMAX_D : LSX_3R<0b01110000011100011>, + LSX_3R_DESC_BASE<"vmax.d", smax, LSX128DOpnd>; + + +def VMIN_B : LSX_3R<0b01110000011100100>, + LSX_3R_DESC_BASE<"vmin.b", smin, LSX128BOpnd>; + +def VMIN_H : LSX_3R<0b01110000011100101>, + LSX_3R_DESC_BASE<"vmin.h", smin, LSX128HOpnd>; + +def VMIN_W : LSX_3R<0b01110000011100110>, + LSX_3R_DESC_BASE<"vmin.w", smin, LSX128WOpnd>; + +def VMIN_D : LSX_3R<0b01110000011100111>, + LSX_3R_DESC_BASE<"vmin.d", smin, LSX128DOpnd>; + + +def VMAX_BU : LSX_3R<0b01110000011101000>, + LSX_3R_DESC_BASE<"vmax.bu", umax, LSX128BOpnd>; + +def VMAX_HU : LSX_3R<0b01110000011101001>, + LSX_3R_DESC_BASE<"vmax.hu", umax, LSX128HOpnd>; + +def VMAX_WU : LSX_3R<0b01110000011101010>, + LSX_3R_DESC_BASE<"vmax.wu", umax, LSX128WOpnd>; + +def VMAX_DU : LSX_3R<0b01110000011101011>, + LSX_3R_DESC_BASE<"vmax.du", umax, LSX128DOpnd>; + + +def VMIN_BU : LSX_3R<0b01110000011101100>, + LSX_3R_DESC_BASE<"vmin.bu", umin, LSX128BOpnd>; + +def VMIN_HU : LSX_3R<0b01110000011101101>, + LSX_3R_DESC_BASE<"vmin.hu", umin, LSX128HOpnd>; + +def VMIN_WU : LSX_3R<0b01110000011101110>, + LSX_3R_DESC_BASE<"vmin.wu", umin, LSX128WOpnd>; + +def VMIN_DU : LSX_3R<0b01110000011101111>, + LSX_3R_DESC_BASE<"vmin.du", umin, LSX128DOpnd>; + + +def VMUL_B : LSX_3R<0b01110000100001000>, + LSX_3R_DESC_BASE<"vmul.b", mul, LSX128BOpnd>; + +def VMUL_H : LSX_3R<0b01110000100001001>, + LSX_3R_DESC_BASE<"vmul.h", mul, LSX128HOpnd>; + +def VMUL_W : LSX_3R<0b01110000100001010>, + LSX_3R_DESC_BASE<"vmul.w", mul, LSX128WOpnd>; + +def VMUL_D : LSX_3R<0b01110000100001011>, + LSX_3R_DESC_BASE<"vmul.d", mul, LSX128DOpnd>; + + +def VMADD_B : LSX_3R<0b01110000101010000>, + LSX_3R_4R_DESC_BASE<"vmadd.b", muladd, LSX128BOpnd>; + +def VMADD_H : LSX_3R<0b01110000101010001>, + LSX_3R_4R_DESC_BASE<"vmadd.h", muladd, LSX128HOpnd>; + +def VMADD_W : LSX_3R<0b01110000101010010>, + LSX_3R_4R_DESC_BASE<"vmadd.w", muladd, LSX128WOpnd>; + +def VMADD_D : LSX_3R<0b01110000101010011>, + LSX_3R_4R_DESC_BASE<"vmadd.d", muladd, LSX128DOpnd>; + + +def VMSUB_B : LSX_3R<0b01110000101010100>, + LSX_3R_4R_DESC_BASE<"vmsub.b", mulsub, LSX128BOpnd>; + +def VMSUB_H : LSX_3R<0b01110000101010101>, + LSX_3R_4R_DESC_BASE<"vmsub.h", mulsub, LSX128HOpnd>; + +def VMSUB_W : LSX_3R<0b01110000101010110>, + LSX_3R_4R_DESC_BASE<"vmsub.w", mulsub, LSX128WOpnd>; + +def VMSUB_D : LSX_3R<0b01110000101010111>, + LSX_3R_4R_DESC_BASE<"vmsub.d", mulsub, LSX128DOpnd>; + + +def VDIV_B : LSX_3R<0b01110000111000000>, + LSX_3R_DESC_BASE<"vdiv.b", sdiv, LSX128BOpnd>; + +def VDIV_H : LSX_3R<0b01110000111000001>, + LSX_3R_DESC_BASE<"vdiv.h", sdiv, LSX128HOpnd>; + +def VDIV_W : LSX_3R<0b01110000111000010>, + LSX_3R_DESC_BASE<"vdiv.w", sdiv, LSX128WOpnd>; + +def VDIV_D : LSX_3R<0b01110000111000011>, + LSX_3R_DESC_BASE<"vdiv.d", sdiv, LSX128DOpnd>; + + +def VMOD_B : LSX_3R<0b01110000111000100>, + LSX_3R_DESC_BASE<"vmod.b", srem, LSX128BOpnd>; + +def VMOD_H : LSX_3R<0b01110000111000101>, + LSX_3R_DESC_BASE<"vmod.h", srem, LSX128HOpnd>; + +def VMOD_W : LSX_3R<0b01110000111000110>, + LSX_3R_DESC_BASE<"vmod.w", srem, LSX128WOpnd>; + +def VMOD_D : LSX_3R<0b01110000111000111>, + LSX_3R_DESC_BASE<"vmod.d", srem, LSX128DOpnd>; + + +def VDIV_BU : LSX_3R<0b01110000111001000>, + LSX_3R_DESC_BASE<"vdiv.bu", udiv, LSX128BOpnd>; + +def VDIV_HU : LSX_3R<0b01110000111001001>, + LSX_3R_DESC_BASE<"vdiv.hu", udiv, LSX128HOpnd>; + +def VDIV_WU : LSX_3R<0b01110000111001010>, + LSX_3R_DESC_BASE<"vdiv.wu", udiv, LSX128WOpnd>; + +def VDIV_DU : LSX_3R<0b01110000111001011>, + LSX_3R_DESC_BASE<"vdiv.du", udiv, LSX128DOpnd>; + + +def VMOD_BU : LSX_3R<0b01110000111001100>, + LSX_3R_DESC_BASE<"vmod.bu", urem, LSX128BOpnd>; + +def VMOD_HU : LSX_3R<0b01110000111001101>, + LSX_3R_DESC_BASE<"vmod.hu", urem, LSX128HOpnd>; + +def VMOD_WU : LSX_3R<0b01110000111001110>, + LSX_3R_DESC_BASE<"vmod.wu", urem, LSX128WOpnd>; + +def VMOD_DU : LSX_3R<0b01110000111001111>, + LSX_3R_DESC_BASE<"vmod.du", urem, LSX128DOpnd>; + + +def VSLL_B : LSX_3R<0b01110000111010000>, + LSX_3R_DESC_BASE<"vsll.b", shl, LSX128BOpnd>; + +def VSLL_H : LSX_3R<0b01110000111010001>, + LSX_3R_DESC_BASE<"vsll.h", shl, LSX128HOpnd>; + +def VSLL_W : LSX_3R<0b01110000111010010>, + LSX_3R_DESC_BASE<"vsll.w", shl, LSX128WOpnd>; + +def VSLL_D : LSX_3R<0b01110000111010011>, + LSX_3R_DESC_BASE<"vsll.d", shl, LSX128DOpnd>; + + +def VSRL_B : LSX_3R<0b01110000111010100>, + LSX_3R_DESC_BASE<"vsrl.b", srl, LSX128BOpnd>; + +def VSRL_H : LSX_3R<0b01110000111010101>, + LSX_3R_DESC_BASE<"vsrl.h", srl, LSX128HOpnd>; + +def VSRL_W : LSX_3R<0b01110000111010110>, + LSX_3R_DESC_BASE<"vsrl.w", srl, LSX128WOpnd>; + +def VSRL_D : LSX_3R<0b01110000111010111>, + LSX_3R_DESC_BASE<"vsrl.d", srl, LSX128DOpnd>; + + +def VSRA_B : LSX_3R<0b01110000111011000>, + LSX_3R_DESC_BASE<"vsra.b", sra, LSX128BOpnd>; + +def VSRA_H : LSX_3R<0b01110000111011001>, + LSX_3R_DESC_BASE<"vsra.h", sra, LSX128HOpnd>; + +def VSRA_W : LSX_3R<0b01110000111011010>, + LSX_3R_DESC_BASE<"vsra.w", sra, LSX128WOpnd>; + +def VSRA_D : LSX_3R<0b01110000111011011>, + LSX_3R_DESC_BASE<"vsra.d", sra, LSX128DOpnd>; + + +def VSRLR_B : LSX_3R<0b01110000111100000>, + LSX_3RN_DESC_BASE<"vsrlr.b", LSX128BOpnd>; + +def VSRLR_H : LSX_3R<0b01110000111100001>, + LSX_3RN_DESC_BASE<"vsrlr.h", LSX128HOpnd>; + +def VSRLR_W : LSX_3R<0b01110000111100010>, + LSX_3RN_DESC_BASE<"vsrlr.w", LSX128WOpnd>; + +def VSRLR_D : LSX_3R<0b01110000111100011>, + LSX_3RN_DESC_BASE<"vsrlr.d", LSX128DOpnd>; + + +def VSRAR_B : LSX_3R<0b01110000111100100>, + LSX_3RN_DESC_BASE<"vsrar.b", LSX128BOpnd>; + +def VSRAR_H : LSX_3R<0b01110000111100101>, + LSX_3RN_DESC_BASE<"vsrar.h", LSX128HOpnd>; + +def VSRAR_W : LSX_3R<0b01110000111100110>, + LSX_3RN_DESC_BASE<"vsrar.w", LSX128WOpnd>; + +def VSRAR_D : LSX_3R<0b01110000111100111>, + LSX_3RN_DESC_BASE<"vsrar.d", LSX128DOpnd>; + + +def VBITCLR_B : LSX_3R<0b01110001000011000>, + LSX_3R_DESC_BASE<"vbitclr.b", vbitclr_b, LSX128BOpnd>; + +def VBITCLR_H : LSX_3R<0b01110001000011001>, + LSX_3R_DESC_BASE<"vbitclr.h", vbitclr_h, LSX128HOpnd>; + +def VBITCLR_W : LSX_3R<0b01110001000011010>, + LSX_3R_DESC_BASE<"vbitclr.w", vbitclr_w, LSX128WOpnd>; + +def VBITCLR_D : LSX_3R<0b01110001000011011>, + LSX_3R_DESC_BASE<"vbitclr.d", vbitclr_d, LSX128DOpnd>; + + +def VBITSET_B : LSX_3R<0b01110001000011100>, + LSX_3RN_DESC_BASE<"vbitset.b", LSX128BOpnd>; + +def VBITSET_H : LSX_3R<0b01110001000011101>, + LSX_3RN_DESC_BASE<"vbitset.h", LSX128HOpnd>; + +def VBITSET_W : LSX_3R<0b01110001000011110>, + LSX_3RN_DESC_BASE<"vbitset.w", LSX128WOpnd>; + +def VBITSET_D : LSX_3R<0b01110001000011111>, + LSX_3RN_DESC_BASE<"vbitset.d", LSX128DOpnd>; + + +def VBITREV_B : LSX_3R<0b01110001000100000>, + LSX_3RN_DESC_BASE<"vbitrev.b", LSX128BOpnd>; + +def VBITREV_H : LSX_3R<0b01110001000100001>, + LSX_3RN_DESC_BASE<"vbitrev.h", LSX128HOpnd>; + +def VBITREV_W : LSX_3R<0b01110001000100010>, + LSX_3RN_DESC_BASE<"vbitrev.w", LSX128WOpnd>; + +def VBITREV_D : LSX_3R<0b01110001000100011>, + LSX_3RN_DESC_BASE<"vbitrev.d", LSX128DOpnd>; + + +def VPACKEV_B : LSX_3R<0b01110001000101100>, + LSX_3R_DESC_BASE<"vpackev.b", LoongArchVPACKEV, LSX128BOpnd>; + +def VPACKEV_H : LSX_3R<0b01110001000101101>, + LSX_3R_DESC_BASE<"vpackev.h", LoongArchVPACKEV, LSX128HOpnd>; + +def VPACKEV_W : LSX_3R<0b01110001000101110>, + LSX_3R_DESC_BASE<"vpackev.w", LoongArchVPACKEV, LSX128WOpnd>; + +def VPACKEV_D : LSX_3R<0b01110001000101111>, + LSX_3R_DESC_BASE<"vpackev.d", LoongArchVPACKEV, LSX128DOpnd>; + + +def VPACKOD_B : LSX_3R<0b01110001000110000>, + LSX_3R_DESC_BASE<"vpackod.b", LoongArchVPACKOD, LSX128BOpnd>; + +def VPACKOD_H : LSX_3R<0b01110001000110001>, + LSX_3R_DESC_BASE<"vpackod.h", LoongArchVPACKOD, LSX128HOpnd>; + +def VPACKOD_W : LSX_3R<0b01110001000110010>, + LSX_3R_DESC_BASE<"vpackod.w", LoongArchVPACKOD, LSX128WOpnd>; + +def VPACKOD_D : LSX_3R<0b01110001000110011>, + LSX_3R_DESC_BASE<"vpackod.d", LoongArchVPACKOD, LSX128DOpnd>; + + +def VILVL_B : LSX_3R<0b01110001000110100>, + LSX_3R_DESC_BASE<"vilvl.b", LoongArchVILVL, LSX128BOpnd>; + +def VILVL_H : LSX_3R<0b01110001000110101>, + LSX_3R_DESC_BASE<"vilvl.h", LoongArchVILVL, LSX128HOpnd>; + +def VILVL_W : LSX_3R<0b01110001000110110>, + LSX_3R_DESC_BASE<"vilvl.w", LoongArchVILVL, LSX128WOpnd>; + +def VILVL_D : LSX_3R<0b01110001000110111>, + LSX_3R_DESC_BASE<"vilvl.d", LoongArchVILVL, LSX128DOpnd>; + + +def VILVH_B : LSX_3R<0b01110001000111000>, + LSX_3R_DESC_BASE<"vilvh.b", LoongArchVILVH, LSX128BOpnd>; + +def VILVH_H : LSX_3R<0b01110001000111001>, + LSX_3R_DESC_BASE<"vilvh.h", LoongArchVILVH, LSX128HOpnd>; + +def VILVH_W : LSX_3R<0b01110001000111010>, + LSX_3R_DESC_BASE<"vilvh.w", LoongArchVILVH, LSX128WOpnd>; + +def VILVH_D : LSX_3R<0b01110001000111011>, + LSX_3R_DESC_BASE<"vilvh.d", LoongArchVILVH, LSX128DOpnd>; + + +def VPICKEV_B : LSX_3R<0b01110001000111100>, + LSX_3R_DESC_BASE<"vpickev.b", LoongArchVPICKEV, LSX128BOpnd>; + +def VPICKEV_H : LSX_3R<0b01110001000111101>, + LSX_3R_DESC_BASE<"vpickev.h", LoongArchVPICKEV, LSX128HOpnd>; + +def VPICKEV_W : LSX_3R<0b01110001000111110>, + LSX_3R_DESC_BASE<"vpickev.w", LoongArchVPICKEV, LSX128WOpnd>; + +def VPICKEV_D : LSX_3R<0b01110001000111111>, + LSX_3R_DESC_BASE<"vpickev.d", LoongArchVPICKEV, LSX128DOpnd>; + + +def VPICKOD_B : LSX_3R<0b01110001001000000>, + LSX_3R_DESC_BASE<"vpickod.b", LoongArchVPICKOD, LSX128BOpnd>; + +def VPICKOD_H : LSX_3R<0b01110001001000001>, + LSX_3R_DESC_BASE<"vpickod.h", LoongArchVPICKOD, LSX128HOpnd>; + +def VPICKOD_W : LSX_3R<0b01110001001000010>, + LSX_3R_DESC_BASE<"vpickod.w", LoongArchVPICKOD, LSX128WOpnd>; + +def VPICKOD_D : LSX_3R<0b01110001001000011>, + LSX_3R_DESC_BASE<"vpickod.d", LoongArchVPICKOD, LSX128DOpnd>; + + +def VREPLVE_B : LSX_3R_1GP<0b01110001001000100>, + LSX_3R_VREPLVE_DESC_BASE<"vreplve.b", vsplati8_elt, LSX128BOpnd>; + +def VREPLVE_H : LSX_3R_1GP<0b01110001001000101>, + LSX_3R_VREPLVE_DESC_BASE<"vreplve.h", vsplati16_elt, LSX128HOpnd>; + +def VREPLVE_W : LSX_3R_1GP<0b01110001001000110>, + LSX_3R_VREPLVE_DESC_BASE<"vreplve.w", vsplati32_elt, LSX128WOpnd>; + +def VREPLVE_D : LSX_3R_1GP<0b01110001001000111>, + LSX_3R_VREPLVE_DESC_BASE<"vreplve.d", vsplati64_elt, LSX128DOpnd>; + + +def VAND_V : LSX_3R<0b01110001001001100>, + LSX_VEC_DESC_BASE<"vand.v", and, LSX128BOpnd>; +class AND_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class AND_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class AND_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; + +def AND_V_H_PSEUDO : AND_V_H_PSEUDO_DESC, + PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def AND_V_W_PSEUDO : AND_V_W_PSEUDO_DESC, + PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def AND_V_D_PSEUDO : AND_V_D_PSEUDO_DESC, + PseudoInstExpansion<(VAND_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + + +def VOR_V : LSX_3R<0b01110001001001101>, + LSX_VEC_DESC_BASE<"vor.v", or, LSX128BOpnd>; +class OR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class OR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class OR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; + +def OR_V_H_PSEUDO : OR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def OR_V_W_PSEUDO : OR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def OR_V_D_PSEUDO : OR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(VOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + + +def VXOR_V : LSX_3R<0b01110001001001110>, + LSX_VEC_DESC_BASE<"vxor.v", xor, LSX128BOpnd>; +class XOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class XOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class XOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; + +def XOR_V_H_PSEUDO : XOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def XOR_V_W_PSEUDO : XOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def XOR_V_D_PSEUDO : XOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(VXOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + + +def VNOR_V : LSX_3R<0b01110001001001111>, + LSX_VEC_DESC_BASE<"vnor.v", LoongArchVNOR, LSX128BOpnd>; +class NOR_V_H_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class NOR_V_W_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; +class NOR_V_D_PSEUDO_DESC : LSX_VEC_PSEUDO_BASE; + +def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC, + PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC, + PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; +def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC, + PseudoInstExpansion<(VNOR_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + + +def VFADD_S : LSX_3R<0b01110001001100001>, IsCommutable, + LSX_3RF_DESC_BASE<"vfadd.s", fadd, LSX128WOpnd>; + +def VFADD_D : LSX_3R<0b01110001001100010>, IsCommutable, + LSX_3RF_DESC_BASE<"vfadd.d", fadd, LSX128DOpnd>; + + +def VFSUB_S : LSX_3R<0b01110001001100101>, + LSX_3RF_DESC_BASE<"vfsub.s", fsub, LSX128WOpnd>; + +def VFSUB_D : LSX_3R<0b01110001001100110>, + LSX_3RF_DESC_BASE<"vfsub.d", fsub, LSX128DOpnd>; + + +def VFMUL_S : LSX_3R<0b01110001001110001>, + LSX_3RF_DESC_BASE<"vfmul.s", fmul, LSX128WOpnd>; + +def VFMUL_D : LSX_3R<0b01110001001110010>, + LSX_3RF_DESC_BASE<"vfmul.d", fmul, LSX128DOpnd>; + + +def VFDIV_S : LSX_3R<0b01110001001110101>, + LSX_3RF_DESC_BASE<"vfdiv.s", fdiv, LSX128WOpnd>; + +def VFDIV_D : LSX_3R<0b01110001001110110>, + LSX_3RF_DESC_BASE<"vfdiv.d", fdiv, LSX128DOpnd>; + + +def VFMAX_S : LSX_3R<0b01110001001111001>, + LSX_3RFN_DESC_BASE<"vfmax.s", LSX128WOpnd>; + +def VFMAX_D : LSX_3R<0b01110001001111010>, + LSX_3RFN_DESC_BASE<"vfmax.d", LSX128DOpnd>; + + +def VFMIN_S : LSX_3R<0b01110001001111101>, + LSX_3RFN_DESC_BASE<"vfmin.s", LSX128WOpnd>; + +def VFMIN_D : LSX_3R<0b01110001001111110>, + LSX_3RFN_DESC_BASE<"vfmin.d", LSX128DOpnd>; + + +def VFMAXA_S : LSX_3R<0b01110001010000001>, + LSX_3RFN_DESC_BASE<"vfmaxa.s", LSX128WOpnd>; + +def VFMAXA_D : LSX_3R<0b01110001010000010>, + LSX_3RFN_DESC_BASE<"vfmaxa.d", LSX128DOpnd>; + + +def VFMINA_S : LSX_3R<0b01110001010000101>, + LSX_3RFN_DESC_BASE<"vfmina.s", LSX128WOpnd>; + +def VFMINA_D : LSX_3R<0b01110001010000110>, + LSX_3RFN_DESC_BASE<"vfmina.d", LSX128DOpnd>; + + +def VSHUF_H : LSX_3R<0b01110001011110101>, + LSX_3R_VSHF_DESC_BASE<"vshuf.h", LSX128HOpnd>; + +def VSHUF_W : LSX_3R<0b01110001011110110>, + LSX_3R_VSHF_DESC_BASE<"vshuf.w", LSX128WOpnd>; + +def VSHUF_D : LSX_3R<0b01110001011110111>, + LSX_3R_VSHF_DESC_BASE<"vshuf.d", LSX128DOpnd>; + + +def VSEQI_B : LSX_I5<0b01110010100000000>, + LSX_I5_DESC_BASE_Intrinsic<"vseqi.b", int_loongarch_lsx_vseqi_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VSEQI_H : LSX_I5<0b01110010100000001>, + LSX_I5_DESC_BASE_Intrinsic<"vseqi.h", int_loongarch_lsx_vseqi_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VSEQI_W : LSX_I5<0b01110010100000010>, + LSX_I5_DESC_BASE_Intrinsic<"vseqi.w", int_loongarch_lsx_vseqi_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VSEQI_D : LSX_I5<0b01110010100000011>, + LSX_I5_DESC_BASE_Intrinsic<"vseqi.d", int_loongarch_lsx_vseqi_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VSLEI_B : LSX_I5<0b01110010100000100>, + LSX_I5_DESC_BASE_Intrinsic<"vslei.b", int_loongarch_lsx_vslei_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VSLEI_H : LSX_I5<0b01110010100000101>, + LSX_I5_DESC_BASE_Intrinsic<"vslei.h", int_loongarch_lsx_vslei_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VSLEI_W : LSX_I5<0b01110010100000110>, + LSX_I5_DESC_BASE_Intrinsic<"vslei.w", int_loongarch_lsx_vslei_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VSLEI_D : LSX_I5<0b01110010100000111>, + LSX_I5_DESC_BASE_Intrinsic<"vslei.d", int_loongarch_lsx_vslei_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VSLEI_BU : LSX_I5_U<0b01110010100001000>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslei.bu", int_loongarch_lsx_vslei_bu, uimm5, immZExt5, LSX128BOpnd>; + +def VSLEI_HU : LSX_I5_U<0b01110010100001001>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslei.hu", int_loongarch_lsx_vslei_hu, uimm5, immZExt5, LSX128HOpnd>; + +def VSLEI_WU : LSX_I5_U<0b01110010100001010>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslei.wu", int_loongarch_lsx_vslei_wu, uimm5, immZExt5, LSX128WOpnd>; + +def VSLEI_DU : LSX_I5_U<0b01110010100001011>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslei.du", int_loongarch_lsx_vslei_du, uimm5, immZExt5, LSX128DOpnd>; + + +def VSLTI_B : LSX_I5<0b01110010100001100>, + LSX_I5_DESC_BASE_Intrinsic<"vslti.b", int_loongarch_lsx_vslti_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VSLTI_H : LSX_I5<0b01110010100001101>, + LSX_I5_DESC_BASE_Intrinsic<"vslti.h", int_loongarch_lsx_vslti_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VSLTI_W : LSX_I5<0b01110010100001110>, + LSX_I5_DESC_BASE_Intrinsic<"vslti.w", int_loongarch_lsx_vslti_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VSLTI_D : LSX_I5<0b01110010100001111>, + LSX_I5_DESC_BASE_Intrinsic<"vslti.d", int_loongarch_lsx_vslti_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VSLTI_BU : LSX_I5_U<0b01110010100010000>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslti.bu", int_loongarch_lsx_vslti_bu, uimm5, immZExt5, LSX128BOpnd>; + +def VSLTI_HU : LSX_I5_U<0b01110010100010001>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslti.hu", int_loongarch_lsx_vslti_hu, uimm5, immZExt5, LSX128HOpnd>; + +def VSLTI_WU : LSX_I5_U<0b01110010100010010>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslti.wu", int_loongarch_lsx_vslti_wu, uimm5, immZExt5, LSX128WOpnd>; + +def VSLTI_DU : LSX_I5_U<0b01110010100010011>, + LSX_I5_U_DESC_BASE_Intrinsic<"vslti.du", int_loongarch_lsx_vslti_du, uimm5, immZExt5, LSX128DOpnd>; + + +def VADDI_BU : LSX_I5_U<0b01110010100010100>, + LSX_I5_U_DESC_BASE<"vaddi.bu", add, vsplati8_uimm5, LSX128BOpnd>; + +def VADDI_HU : LSX_I5_U<0b01110010100010101>, + LSX_I5_U_DESC_BASE<"vaddi.hu", add, vsplati16_uimm5, LSX128HOpnd>; + +def VADDI_WU : LSX_I5_U<0b01110010100010110>, + LSX_I5_U_DESC_BASE<"vaddi.wu", add, vsplati32_uimm5, LSX128WOpnd>; + +def VADDI_DU : LSX_I5_U<0b01110010100010111>, + LSX_I5_U_DESC_BASE<"vaddi.du", add, vsplati64_uimm5, LSX128DOpnd>; + + +def VSUBI_BU : LSX_I5_U<0b01110010100011000>, + LSX_I5_U_DESC_BASE<"vsubi.bu", sub, vsplati8_uimm5, LSX128BOpnd>; + +def VSUBI_HU : LSX_I5_U<0b01110010100011001>, + LSX_I5_U_DESC_BASE<"vsubi.hu", sub, vsplati16_uimm5, LSX128HOpnd>; + +def VSUBI_WU : LSX_I5_U<0b01110010100011010>, + LSX_I5_U_DESC_BASE<"vsubi.wu", sub, vsplati32_uimm5, LSX128WOpnd>; + +def VSUBI_DU : LSX_I5_U<0b01110010100011011>, + LSX_I5_U_DESC_BASE<"vsubi.du", sub, vsplati64_uimm5, LSX128DOpnd>; + + +def VMAXI_B : LSX_I5<0b01110010100100000>, + LSX_I5_DESC_BASE_Intrinsic<"vmaxi.b", int_loongarch_lsx_vmaxi_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VMAXI_H : LSX_I5<0b01110010100100001>, + LSX_I5_DESC_BASE_Intrinsic<"vmaxi.h", int_loongarch_lsx_vmaxi_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VMAXI_W : LSX_I5<0b01110010100100010>, + LSX_I5_DESC_BASE_Intrinsic<"vmaxi.w", int_loongarch_lsx_vmaxi_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VMAXI_D : LSX_I5<0b01110010100100011>, + LSX_I5_DESC_BASE_Intrinsic<"vmaxi.d", int_loongarch_lsx_vmaxi_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VMINI_B : LSX_I5<0b01110010100100100>, + LSX_I5_DESC_BASE_Intrinsic<"vmini.b", int_loongarch_lsx_vmini_b, simm5_32, immSExt5, LSX128BOpnd>; + +def VMINI_H : LSX_I5<0b01110010100100101>, + LSX_I5_DESC_BASE_Intrinsic<"vmini.h", int_loongarch_lsx_vmini_h, simm5_32, immSExt5, LSX128HOpnd>; + +def VMINI_W : LSX_I5<0b01110010100100110>, + LSX_I5_DESC_BASE_Intrinsic<"vmini.w", int_loongarch_lsx_vmini_w, simm5_32, immSExt5, LSX128WOpnd>; + +def VMINI_D : LSX_I5<0b01110010100100111>, + LSX_I5_DESC_BASE_Intrinsic<"vmini.d", int_loongarch_lsx_vmini_d, simm5_32, immSExt5, LSX128DOpnd>; + + +def VMAXI_BU : LSX_I5_U<0b01110010100101000>, + LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.bu", int_loongarch_lsx_vmaxi_bu, uimm5, immZExt5, LSX128BOpnd>; + +def VMAXI_HU : LSX_I5_U<0b01110010100101001>, + LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.hu", int_loongarch_lsx_vmaxi_hu, uimm5, immZExt5, LSX128HOpnd>; + +def VMAXI_WU : LSX_I5_U<0b01110010100101010>, + LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.wu", int_loongarch_lsx_vmaxi_wu, uimm5, immZExt5, LSX128WOpnd>; + +def VMAXI_DU : LSX_I5_U<0b01110010100101011>, + LSX_I5_U_DESC_BASE_Intrinsic<"vmaxi.du", int_loongarch_lsx_vmaxi_du, uimm5, immZExt5, LSX128DOpnd>; + + +def VMINI_BU : LSX_I5_U<0b01110010100101100>, + LSX_I5_U_DESC_BASE<"vmini.bu", umin, vsplati8_uimm5, LSX128BOpnd>; + +def VMINI_HU : LSX_I5_U<0b01110010100101101>, + LSX_I5_U_DESC_BASE<"vmini.hu", umin, vsplati16_uimm5, LSX128HOpnd>; + +def VMINI_WU : LSX_I5_U<0b01110010100101110>, + LSX_I5_U_DESC_BASE<"vmini.wu", umin, vsplati32_uimm5, LSX128WOpnd>; + +def VMINI_DU : LSX_I5_U<0b01110010100101111>, + LSX_I5_U_DESC_BASE<"vmini.du", umin, vsplati64_uimm5, LSX128DOpnd>; + + +def VCLO_B : LSX_2R<0b0111001010011100000000>, + LSX_2RN_DESC_BASE<"vclo.b", LSX128BOpnd>; + +def VCLO_H : LSX_2R<0b0111001010011100000001>, + LSX_2RN_DESC_BASE<"vclo.h", LSX128HOpnd>; + +def VCLO_W : LSX_2R<0b0111001010011100000010>, + LSX_2RN_DESC_BASE<"vclo.w", LSX128WOpnd>; + +def VCLO_D : LSX_2R<0b0111001010011100000011>, + LSX_2RN_DESC_BASE<"vclo.d", LSX128DOpnd>; + + +def VCLZ_B : LSX_2R<0b0111001010011100000100>, + LSX_2R_DESC_BASE<"vclz.b", ctlz, LSX128BOpnd>; + +def VCLZ_H : LSX_2R<0b0111001010011100000101>, + LSX_2R_DESC_BASE<"vclz.h", ctlz, LSX128HOpnd>; + +def VCLZ_W : LSX_2R<0b0111001010011100000110>, + LSX_2R_DESC_BASE<"vclz.w", ctlz, LSX128WOpnd>; + +def VCLZ_D : LSX_2R<0b0111001010011100000111>, + LSX_2R_DESC_BASE<"vclz.d", ctlz, LSX128DOpnd>; + + +def VPCNT_B : LSX_2R<0b0111001010011100001000>, + LSX_2R_DESC_BASE<"vpcnt.b", ctpop, LSX128BOpnd>; + +def VPCNT_H : LSX_2R<0b0111001010011100001001>, + LSX_2R_DESC_BASE<"vpcnt.h", ctpop, LSX128HOpnd>; + +def VPCNT_W : LSX_2R<0b0111001010011100001010>, + LSX_2R_DESC_BASE<"vpcnt.w", ctpop, LSX128WOpnd>; + +def VPCNT_D : LSX_2R<0b0111001010011100001011>, + LSX_2R_DESC_BASE<"vpcnt.d", ctpop, LSX128DOpnd>; + + +def VFLOGB_S : LSX_2R<0b0111001010011100110001>, + LSX_2RFN_DESC_BASE<"vflogb.s", LSX128WOpnd>; + +def VFLOGB_D : LSX_2R<0b0111001010011100110010>, + LSX_2RFN_DESC_BASE<"vflogb.d", LSX128DOpnd>; + + +def VFCLASS_S : LSX_2R<0b0111001010011100110101>, + LSX_2RFN_DESC_BASE<"vfclass.s", LSX128WOpnd>; + +def VFCLASS_D : LSX_2R<0b0111001010011100110110>, + LSX_2RFN_DESC_BASE<"vfclass.d", LSX128DOpnd>; + + +def VFSQRT_S : LSX_2R<0b0111001010011100111001>, + LSX_2RF_DESC_BASE<"vfsqrt.s", fsqrt, LSX128WOpnd>; + +def VFSQRT_D : LSX_2R<0b0111001010011100111010>, + LSX_2RF_DESC_BASE<"vfsqrt.d", fsqrt, LSX128DOpnd>; + + +def VFRECIP_S : LSX_2R<0b0111001010011100111101>, + LSX_2RFN_DESC_BASE<"vfrecip.s", LSX128WOpnd>; + +def VFRECIP_D : LSX_2R<0b0111001010011100111110>, + LSX_2RFN_DESC_BASE<"vfrecip.d", LSX128DOpnd>; + + +def VFRSQRT_S : LSX_2R<0b0111001010011101000001>, + LSX_2RFN_DESC_BASE<"vfrsqrt.s", LSX128WOpnd>; + +def VFRSQRT_D : LSX_2R<0b0111001010011101000010>, + LSX_2RFN_DESC_BASE<"vfrsqrt.d", LSX128DOpnd>; + + +def VFRINT_S : LSX_2R<0b0111001010011101001101>, + LSX_2RF_DESC_BASE<"vfrint.s", frint, LSX128WOpnd>; + +def VFRINT_D : LSX_2R<0b0111001010011101001110>, + LSX_2RF_DESC_BASE<"vfrint.d", frint, LSX128DOpnd>; + + +def VFCVTL_S_H : LSX_2R<0b0111001010011101111010>, + LSX_2RFN_DESC_BASE_CVT<"vfcvtl.s.h", LSX128WOpnd, LSX128HOpnd>; + +def VFCVTH_S_H : LSX_2R<0b0111001010011101111011>, + LSX_2RFN_DESC_BASE_CVT<"vfcvth.s.h", LSX128WOpnd, LSX128HOpnd>; + + +def VFCVTL_D_S : LSX_2R<0b0111001010011101111100>, + LSX_2RFN_DESC_BASE_CVT<"vfcvtl.d.s", LSX128DOpnd, LSX128WOpnd>; + +def VFCVTH_D_S : LSX_2R<0b0111001010011101111101>, + LSX_2RFN_DESC_BASE_CVT<"vfcvth.d.s", LSX128DOpnd, LSX128WOpnd>; + + +def VFFINT_S_W : LSX_2R<0b0111001010011110000000>, + LSX_2RF_DESC_BASE<"vffint.s.w", sint_to_fp, LSX128WOpnd>; + +def VFFINT_S_WU : LSX_2R<0b0111001010011110000001>, + LSX_2RF_DESC_BASE<"vffint.s.wu", uint_to_fp, LSX128WOpnd>; + + +def VFFINT_D_L : LSX_2R<0b0111001010011110000010>, + LSX_2RF_DESC_BASE<"vffint.d.l", sint_to_fp, LSX128DOpnd>; + +def VFFINT_D_LU : LSX_2R<0b0111001010011110000011>, + LSX_2RF_DESC_BASE<"vffint.d.lu", uint_to_fp, LSX128DOpnd>; + + +def VFTINT_W_S : LSX_2R<0b0111001010011110001100>, + LSX_2RFN_DESC_BASE<"vftint.w.s", LSX128WOpnd>; + +def VFTINT_L_D : LSX_2R<0b0111001010011110001101>, + LSX_2RFN_DESC_BASE<"vftint.l.d", LSX128DOpnd>; + + +def VFTINT_WU_S : LSX_2R<0b0111001010011110010110>, + LSX_2RFN_DESC_BASE<"vftint.wu.s", LSX128WOpnd>; + +def VFTINT_LU_D : LSX_2R<0b0111001010011110010111>, + LSX_2RFN_DESC_BASE<"vftint.lu.d", LSX128DOpnd>; + + +def VFTINTRZ_WU_S : LSX_2R<0b0111001010011110011100>, + LSX_2RF_DESC_BASE<"vftintrz.wu.s", fp_to_uint, LSX128WOpnd>; + +def VFTINTRZ_LU_D : LSX_2R<0b0111001010011110011101>, + LSX_2RF_DESC_BASE<"vftintrz.lu.d", fp_to_uint, LSX128DOpnd>; + + +def VREPLGR2VR_B : LSX_2R_1GP<0b0111001010011111000000>, + LSX_2R_REPL_DESC_BASE<"vreplgr2vr.b", v16i8, vsplati8, LSX128BOpnd, GPR32Opnd>; + +def VREPLGR2VR_H : LSX_2R_1GP<0b0111001010011111000001>, + LSX_2R_REPL_DESC_BASE<"vreplgr2vr.h", v8i16, vsplati16, LSX128HOpnd, GPR32Opnd>; + +def VREPLGR2VR_W : LSX_2R_1GP<0b0111001010011111000010>, + LSX_2R_REPL_DESC_BASE<"vreplgr2vr.w", v4i32, vsplati32, LSX128WOpnd, GPR32Opnd>; + +def VREPLGR2VR_D : LSX_2R_1GP<0b0111001010011111000011>, + LSX_2R_REPL_DESC_BASE<"vreplgr2vr.d", v2i64, vsplati64, LSX128DOpnd, GPR64Opnd>; + + +class LSX_2R_FILL_PSEUDO_BASE : + LSXPseudo<(outs RCVD:$vd), (ins RCVS:$fs), + [(set RCVD:$vd, (OpNode RCVS:$fs))]> { + let usesCustomInserter = 1; +} + +class FILL_FW_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE; +class FILL_FD_PSEUDO_DESC : LSX_2R_FILL_PSEUDO_BASE; + +def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC; +def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC; + + +def VSRLRI_B : LSX_I3_U<0b0111001010100100001>, + LSX_BIT_3N_DESC_BASE<"vsrlri.b", uimm3, immZExt3, LSX128BOpnd>; + +def VSRLRI_H : LSX_I4_U<0b011100101010010001>, + LSX_BIT_4N_DESC_BASE<"vsrlri.h", uimm4, immZExt4, LSX128HOpnd>; + +def VSRLRI_W : LSX_I5_U<0b01110010101001001>, + LSX_BIT_5N_DESC_BASE<"vsrlri.w", uimm5, immZExt5, LSX128WOpnd>; + +def VSRLRI_D : LSX_I6_U<0b0111001010100101>, + LSX_BIT_6N_DESC_BASE<"vsrlri.d", uimm6, immZExt6, LSX128DOpnd>; + + +def VSRARI_B : LSX_I3_U<0b0111001010101000001>, + LSX_BIT_3N_DESC_BASE<"vsrari.b", uimm3, immZExt3, LSX128BOpnd>; + +def VSRARI_H : LSX_I4_U<0b011100101010100001>, + LSX_BIT_4N_DESC_BASE<"vsrari.h", uimm4, immZExt4, LSX128HOpnd>; + +def VSRARI_W : LSX_I5_U<0b01110010101010001>, + LSX_BIT_5N_DESC_BASE<"vsrari.w", uimm5, immZExt5, LSX128WOpnd>; + +def VSRARI_D : LSX_I6_U<0b0111001010101001>, + LSX_BIT_6N_DESC_BASE<"vsrari.d", uimm6, immZExt6, LSX128DOpnd>; + + +def VINSGR2VR_B : LSX_I4_R_U<0b011100101110101110>, + LSX_INSERT_U4_DESC_BASE<"vinsgr2vr.b", vinsert_v16i8, uimm4, immZExt4Ptr, LSX128BOpnd, GPR32Opnd>; + +def VINSGR2VR_H : LSX_I3_R_U<0b0111001011101011110>, + LSX_INSERT_U3_DESC_BASE<"vinsgr2vr.h", vinsert_v8i16, uimm3, immZExt3Ptr, LSX128HOpnd, GPR32Opnd>; + +def VINSGR2VR_W : LSX_I2_R_U<0b01110010111010111110>, + LSX_INSERT_U2_DESC_BASE<"vinsgr2vr.w", vinsert_v4i32, uimm2, immZExt2Ptr, LSX128WOpnd, GPR32Opnd>; + +def VINSGR2VR_D : LSX_I1_R_U<0b011100101110101111110>, + LSX_INSERT_U1_DESC_BASE<"vinsgr2vr.d", vinsert_v2i64, uimm1, immZExt1Ptr, LSX128DOpnd, GPR64Opnd>; + + +def VPICKVE2GR_B : LSX_ELM_COPY_B<0b011100101110111110>, + LSX_PICK_U4_DESC_BASE<"vpickve2gr.b", vextract_sext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>; + +def VPICKVE2GR_H : LSX_ELM_COPY_H<0b0111001011101111110>, + LSX_PICK_U3_DESC_BASE<"vpickve2gr.h", vextract_sext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>; + +def VPICKVE2GR_W : LSX_ELM_COPY_W<0b01110010111011111110>, + LSX_PICK_U2_DESC_BASE<"vpickve2gr.w", vextract_sext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>; + +def VPICKVE2GR_D : LSX_ELM_COPY_D<0b011100101110111111110>, + LSX_PICK_U1_DESC_BASE<"vpickve2gr.d", vextract_sext_i64, v2i64, uimm1_ptr, immZExt1Ptr, GPR64Opnd, LSX128DOpnd>; + + +def VPICKVE2GR_BU : LSX_ELM_COPY_B<0b011100101111001110>, + LSX_PICK_U4_DESC_BASE<"vpickve2gr.bu", vextract_zext_i8, v16i8, uimm4_ptr, immZExt4Ptr, GPR32Opnd, LSX128BOpnd>; + +def VPICKVE2GR_HU : LSX_ELM_COPY_H<0b0111001011110011110>, + LSX_PICK_U3_DESC_BASE<"vpickve2gr.hu", vextract_zext_i16, v8i16, uimm3_ptr, immZExt3Ptr, GPR32Opnd, LSX128HOpnd>; + +def VPICKVE2GR_WU : LSX_ELM_COPY_W<0b01110010111100111110>, + LSX_PICK_U2_DESC_BASE<"vpickve2gr.wu", vextract_zext_i32, v4i32, uimm2_ptr, immZExt2Ptr, GPR32Opnd, LSX128WOpnd>; + +def VPICKVE2GR_DU : LSX_ELM_COPY_D<0b011100101111001111110>, + LSX_PICK_U1_DESC_BASE<"vpickve2gr.du", int_loongarch_lsx_vpickve2gr_du, v2i64, uimm1, immZExt1, GPR64Opnd, LSX128DOpnd>; + + +def : LSXPat<(vextract_zext_i64 (v2i64 LSX128D:$vj), immZExt1Ptr:$idx), + (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>; +def : LSXPat<(vextract_zext_i64 (v2f64 LSX128D:$vj), immZExt1Ptr:$idx), + (VPICKVE2GR_D LSX128D:$vj, immZExt1:$idx)>; + + +def VREPLVEI_B : LSX_I4_U<0b011100101111011110>, + LSX_ELM_U4_VREPLVE_DESC_BASE<"vreplvei.b", vsplati8_uimm4, LSX128BOpnd>; + +def VREPLVEI_H : LSX_I3_U<0b0111001011110111110>, + LSX_ELM_U3_VREPLVE_DESC_BASE<"vreplvei.h", vsplati16_uimm3, LSX128HOpnd>; + +def VREPLVEI_W : LSX_I2_U<0b01110010111101111110>, + LSX_ELM_U2_VREPLVE_DESC_BASE<"vreplvei.w", vsplati32_uimm2, LSX128WOpnd>; + +def VREPLVEI_D : LSX_I1_U<0b011100101111011111110>, + LSX_ELM_U1_VREPLVE_DESC_BASE<"vreplvei.d", vsplati64_uimm1, LSX128DOpnd>; + + +def VSAT_B : LSX_I3_U<0b0111001100100100001>, + LSX_BIT_3N_DESC_BASE<"vsat.b", uimm3, immZExt3, LSX128BOpnd>; + +def VSAT_H : LSX_I4_U<0b011100110010010001>, + LSX_BIT_4N_DESC_BASE<"vsat.h", uimm4, immZExt4, LSX128HOpnd>; + +def VSAT_W : LSX_I5_U<0b01110011001001001>, + LSX_BIT_5N_DESC_BASE<"vsat.w", uimm5, immZExt5, LSX128WOpnd>; + +def VSAT_D : LSX_I6_U<0b0111001100100101>, + LSX_BIT_6N_DESC_BASE<"vsat.d", uimm6, immZExt6, LSX128DOpnd>; + + +def VSAT_BU : LSX_I3_U<0b0111001100101000001>, + LSX_BIT_3N_DESC_BASE<"vsat.bu", uimm3, immZExt3, LSX128BOpnd>; + +def VSAT_HU : LSX_I4_U<0b011100110010100001>, + LSX_BIT_4N_DESC_BASE<"vsat.hu", uimm4, immZExt4, LSX128HOpnd>; + +def VSAT_WU : LSX_I5_U<0b01110011001010001>, + LSX_BIT_5N_DESC_BASE<"vsat.wu", uimm5, immZExt5, LSX128WOpnd>; + +def VSAT_DU : LSX_I6_U<0b0111001100101001>, + LSX_BIT_6N_DESC_BASE<"vsat.du", uimm6, immZExt6, LSX128DOpnd>; + + +def VSLLI_B : LSX_I3_U<0b0111001100101100001>, + LSX_BIT_U3_VREPLVE_DESC_BASE<"vslli.b", shl, vsplati8_uimm3, LSX128BOpnd>; + +def VSLLI_H : LSX_I4_U<0b011100110010110001>, + LSX_BIT_U4_VREPLVE_DESC_BASE<"vslli.h", shl, vsplati16_uimm4, LSX128HOpnd>; + +def VSLLI_W : LSX_I5_U<0b01110011001011001>, + LSX_BIT_U5_VREPLVE_DESC_BASE<"vslli.w", shl, vsplati32_uimm5, LSX128WOpnd>; + +def VSLLI_D : LSX_I6_U<0b0111001100101101>, + LSX_BIT_U6_VREPLVE_DESC_BASE<"vslli.d", shl, vsplati64_uimm6, LSX128DOpnd>; + + +def VSRLI_B : LSX_I3_U<0b0111001100110000001>, + LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrli.b", srl, vsplati8_uimm3, LSX128BOpnd>; + +def VSRLI_H : LSX_I4_U<0b011100110011000001>, + LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrli.h", srl, vsplati16_uimm4, LSX128HOpnd>; + +def VSRLI_W : LSX_I5_U<0b01110011001100001>, + LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrli.w", srl, vsplati32_uimm5, LSX128WOpnd>; + +def VSRLI_D : LSX_I6_U<0b0111001100110001>, + LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrli.d", srl, vsplati64_uimm6, LSX128DOpnd>; + + +def VSRAI_B : LSX_I3_U<0b0111001100110100001>, + LSX_BIT_U3_VREPLVE_DESC_BASE_Intrinsic<"vsrai.b", int_loongarch_lsx_vsrai_b, LSX128BOpnd>; + +def VSRAI_H : LSX_I4_U<0b011100110011010001>, + LSX_BIT_U4_VREPLVE_DESC_BASE_Intrinsic<"vsrai.h", int_loongarch_lsx_vsrai_h, LSX128HOpnd>; + +def VSRAI_W : LSX_I5_U<0b01110011001101001>, + LSX_BIT_U5_VREPLVE_DESC_BASE_Intrinsic<"vsrai.w", int_loongarch_lsx_vsrai_w, LSX128WOpnd>; + +def VSRAI_D : LSX_I6_U<0b0111001100110101>, + LSX_BIT_U6_VREPLVE_DESC_BASE_Intrinsic<"vsrai.d", int_loongarch_lsx_vsrai_d, LSX128DOpnd>; + + +def VSHUF4I_B : LSX_I8_U<0b01110011100100>, + LSX_I8_SHF_DESC_BASE<"vshuf4i.b", LSX128BOpnd>; + +def VSHUF4I_H : LSX_I8_U<0b01110011100101>, + LSX_I8_SHF_DESC_BASE<"vshuf4i.h", LSX128HOpnd>; + +def VSHUF4I_W : LSX_I8_U<0b01110011100110>, + LSX_I8_SHF_DESC_BASE<"vshuf4i.w", LSX128WOpnd>; + +def VSHUF4I_D : LSX_I8_U<0b01110011100111>, + LSX_I8_SHUF_DESC_BASE_D<"vshuf4i.d", int_loongarch_lsx_vshuf4i_d, LSX128DOpnd>; + + +def VROTR_B : LSX_3R<0b01110000111011100>, + LSX_3R_DESC_BASE<"vrotr.b", LoongArchVROR, LSX128BOpnd>; + +def VROTR_H : LSX_3R<0b01110000111011101>, + LSX_3R_DESC_BASE<"vrotr.h", LoongArchVROR, LSX128HOpnd>; + +def VROTR_W : LSX_3R<0b01110000111011110>, + LSX_3R_DESC_BASE<"vrotr.w", LoongArchVROR, LSX128WOpnd>; + +def VROTR_D : LSX_3R<0b01110000111011111>, + LSX_3R_DESC_BASE<"vrotr.d", LoongArchVROR, LSX128DOpnd>; + + +def VMSKLTZ_B : LSX_2R<0b0111001010011100010000>, + LSX_2RN_DESC_BASE<"vmskltz.b", LSX128BOpnd>; + +def VMSKLTZ_H : LSX_2R<0b0111001010011100010001>, + LSX_2RN_DESC_BASE<"vmskltz.h", LSX128HOpnd>; + +def VMSKLTZ_W : LSX_2R<0b0111001010011100010010>, + LSX_2RN_DESC_BASE<"vmskltz.w", LSX128WOpnd>; + +def VMSKLTZ_D : LSX_2R<0b0111001010011100010011>, + LSX_2RN_DESC_BASE<"vmskltz.d", LSX128DOpnd>; + + +def VROTRI_B : LSX_I3_U<0b0111001010100000001>, + LSX2_RORI_U3_DESC_BASE<"vrotri.b", uimm3, immZExt3, LSX128BOpnd>; + +def VROTRI_H : LSX_I4_U<0b011100101010000001>, + LSX2_RORI_U4_DESC_BASE<"vrotri.h", uimm4, immZExt4, LSX128HOpnd>; + +def VROTRI_W : LSX_I5_U<0b01110010101000001>, + LSX2_RORI_U5_DESC_BASE<"vrotri.w", uimm5, immZExt5, LSX128WOpnd>; + +def VROTRI_D : LSX_I6_U<0b0111001010100001>, + LSX2_RORI_U6_DESC_BASE<"vrotri.d", uimm6, immZExt6, LSX128DOpnd>; + + +def VSRLNI_B_H : LSX_I4_U<0b011100110100000001>, + LSX_BIND_U4N_DESC_BASE<"vsrlni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSRLNI_H_W : LSX_I5_U<0b01110011010000001>, + LSX_BIND_U5N_DESC_BASE<"vsrlni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSRLNI_W_D : LSX_I6_U<0b0111001101000001>, + LSX_BIND_U6N_DESC_BASE<"vsrlni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSRLNI_D_Q : LSX_I7_U<0b011100110100001>, + LSX_BIND_U7N_DESC_BASE<"vsrlni.d.q", LSX128DOpnd>; + + +def VSRLRNI_B_H : LSX_I4_U<0b011100110100010001>, + LSX_BIND_U4_DESC_BASE<"vsrlrni.b.h", int_loongarch_lsx_vsrlrni_b_h, uimm4, immZExt4, LSX128BOpnd>; + +def VSRLRNI_H_W : LSX_I5_U<0b01110011010001001>, + LSX_BIND_U5_DESC_BASE<"vsrlrni.h.w", int_loongarch_lsx_vsrlrni_h_w, uimm5, immZExt5, LSX128HOpnd>; + +def VSRLRNI_W_D : LSX_I6_U<0b0111001101000101>, + LSX_BIND_U6_DESC_BASE<"vsrlrni.w.d", int_loongarch_lsx_vsrlrni_w_d, uimm6, immZExt6, LSX128WOpnd>; + +def VSRLRNI_D_Q : LSX_I7_U<0b011100110100011>, + LSX_BIND_U7_DESC_BASE<"vsrlrni.d.q", int_loongarch_lsx_vsrlrni_d_q, LSX128DOpnd>; + + +def VSSRLNI_B_H : LSX_I4_U<0b011100110100100001>, + LSX_BIND_U4N_DESC_BASE<"vssrlni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRLNI_H_W : LSX_I5_U<0b01110011010010001>, + LSX_BIND_U5N_DESC_BASE<"vssrlni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRLNI_W_D : LSX_I6_U<0b0111001101001001>, + LSX_BIND_U6N_DESC_BASE<"vssrlni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRLNI_D_Q : LSX_I7_U<0b011100110100101>, + LSX_BIND_U7N_DESC_BASE<"vssrlni.d.q", LSX128DOpnd>; + + +def VSSRLNI_BU_H : LSX_I4_U<0b011100110100110001>, + LSX_BIND_U4N_DESC_BASE<"vssrlni.bu.h", uimm4, immZExt4, LSX128BOpnd> ; + +def VSSRLNI_HU_W : LSX_I5_U<0b01110011010011001>, + LSX_BIND_U5N_DESC_BASE<"vssrlni.hu.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRLNI_WU_D : LSX_I6_U<0b0111001101001101>, + LSX_BIND_U6N_DESC_BASE<"vssrlni.wu.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRLNI_DU_Q : LSX_I7_U<0b011100110100111>, + LSX_BIND_U7N_DESC_BASE<"vssrlni.du.q", LSX128DOpnd>; + + +def VSSRLRNI_BU_H : LSX_I4_U<0b011100110101010001>, + LSX_BIND_U4N_DESC_BASE<"vssrlrni.bu.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRLRNI_HU_W : LSX_I5_U<0b01110011010101001>, + LSX_BIND_U5N_DESC_BASE<"vssrlrni.hu.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRLRNI_WU_D : LSX_I6_U<0b0111001101010101>, + LSX_BIND_U6N_DESC_BASE<"vssrlrni.wu.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRLRNI_DU_Q : LSX_I7_U<0b011100110101011>, + LSX_BIND_U7N_DESC_BASE<"vssrlrni.du.q", LSX128DOpnd>; + + +def VSRARNI_B_H : LSX_I4_U<0b011100110101110001>, + LSX_BIND_U4N_DESC_BASE<"vsrarni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSRARNI_H_W : LSX_I5_U<0b01110011010111001>, + LSX_BIND_U5N_DESC_BASE<"vsrarni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSRARNI_W_D : LSX_I6_U<0b0111001101011101>, + LSX_BIND_U6N_DESC_BASE<"vsrarni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSRARNI_D_Q : LSX_I7_U<0b011100110101111>, + LSX_BIND_U7N_DESC_BASE<"vsrarni.d.q", LSX128DOpnd>; + + +def VSSRANI_B_H : LSX_I4_U<0b011100110110000001>, + LSX_BIND_U4N_DESC_BASE<"vssrani.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRANI_H_W : LSX_I5_U<0b01110011011000001>, + LSX_BIND_U5N_DESC_BASE<"vssrani.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRANI_W_D : LSX_I6_U<0b0111001101100001>, + LSX_BIND_U6N_DESC_BASE<"vssrani.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRANI_D_Q : LSX_I7_U<0b011100110110001>, + LSX_BIND_U7N_DESC_BASE<"vssrani.d.q", LSX128DOpnd>; + + +def VSSRANI_BU_H : LSX_I4_U<0b011100110110010001>, + LSX_BIND_U4N_DESC_BASE<"vssrani.bu.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRANI_HU_W : LSX_I5_U<0b01110011011001001>, + LSX_BIND_U5N_DESC_BASE<"vssrani.hu.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRANI_WU_D : LSX_I6_U<0b0111001101100101>, + LSX_BIND_U6N_DESC_BASE<"vssrani.wu.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRANI_DU_Q : LSX_I7_U<0b011100110110011>, + LSX_BIND_U7N_DESC_BASE<"vssrani.du.q", LSX128DOpnd>; + + +def VSSRARNI_B_H : LSX_I4_U<0b011100110110100001>, + LSX_BIND_U4N_DESC_BASE<"vssrarni.b.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRARNI_H_W : LSX_I5_U<0b01110011011010001>, + LSX_BIND_U5N_DESC_BASE<"vssrarni.h.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRARNI_W_D : LSX_I6_U<0b0111001101101001>, + LSX_BIND_U6N_DESC_BASE<"vssrarni.w.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRARNI_D_Q : LSX_I7_U<0b011100110110101>, + LSX_BIND_U7N_DESC_BASE<"vssrarni.d.q", LSX128DOpnd>; + + +def VSSRARNI_BU_H : LSX_I4_U<0b011100110110110001>, + LSX_BIND_U4N_DESC_BASE<"vssrarni.bu.h", uimm4, immZExt4, LSX128BOpnd>; + +def VSSRARNI_HU_W : LSX_I5_U<0b01110011011011001>, + LSX_BIND_U5N_DESC_BASE<"vssrarni.hu.w", uimm5, immZExt5, LSX128HOpnd>; + +def VSSRARNI_WU_D : LSX_I6_U<0b0111001101101101>, + LSX_BIND_U6N_DESC_BASE<"vssrarni.wu.d", uimm6, immZExt6, LSX128WOpnd>; + +def VSSRARNI_DU_Q : LSX_I7_U<0b011100110110111>, + LSX_BIND_U7N_DESC_BASE<"vssrarni.du.q", LSX128DOpnd>; + + + +def VLD : LSX_I12_S<0b0010110000>, + LD_DESC_BASE<"vld", load, v16i8, LSX128BOpnd, mem>; + +def VST : LSX_I12_S<0b0010110001>, + ST_DESC_BASE<"vst", store, v16i8, LSX128BOpnd, mem_simm12>; + + +def VSETEQZ_V : LSX_SET<0b0111001010011100100110>, + LSX_SET_DESC_BASE<"vseteqz.v", LSX128BOpnd>; + +def VSETNEZ_V : LSX_SET<0b0111001010011100100111>, + LSX_SET_DESC_BASE<"vsetnez.v", LSX128BOpnd>; + + +def VSETANYEQZ_B : LSX_SET<0b0111001010011100101000>, + LSX_SET_DESC_BASE<"vsetanyeqz.b", LSX128BOpnd>; + +def VSETANYEQZ_H : LSX_SET<0b0111001010011100101001>, + LSX_SET_DESC_BASE<"vsetanyeqz.h", LSX128HOpnd>; + +def VSETANYEQZ_W : LSX_SET<0b0111001010011100101010>, + LSX_SET_DESC_BASE<"vsetanyeqz.w", LSX128WOpnd>; + +def VSETANYEQZ_D : LSX_SET<0b0111001010011100101011>, + LSX_SET_DESC_BASE<"vsetanyeqz.d", LSX128DOpnd>; + + +def VSETALLNEZ_B : LSX_SET<0b0111001010011100101100>, + LSX_SET_DESC_BASE<"vsetallnez.b", LSX128BOpnd>; + +def VSETALLNEZ_H : LSX_SET<0b0111001010011100101101>, + LSX_SET_DESC_BASE<"vsetallnez.h", LSX128HOpnd>; + +def VSETALLNEZ_W : LSX_SET<0b0111001010011100101110>, + LSX_SET_DESC_BASE<"vsetallnez.w", LSX128WOpnd>; + +def VSETALLNEZ_D : LSX_SET<0b0111001010011100101111>, + LSX_SET_DESC_BASE<"vsetallnez.d", LSX128DOpnd>; + +class LSX_CBRANCH_PSEUDO_DESC_BASE : + LoongArchPseudo<(outs GPR32Opnd:$rd), + (ins RCVS:$vj), + [(set GPR32Opnd:$rd, (OpNode (TyNode RCVS:$vj)))]> { + bit usesCustomInserter = 1; +} + +def SNZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SNZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; + +def SZ_B_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SZ_H_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SZ_W_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SZ_D_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; +def SZ_V_PSEUDO : LSX_CBRANCH_PSEUDO_DESC_BASE; + + +def VFMADD_S : LSX_VR4MUL<0b000010010001>, + LSX_4RF<"vfmadd.s", int_loongarch_lsx_vfmadd_s, LSX128WOpnd>; + +def VFMADD_D : LSX_VR4MUL<0b000010010010>, + LSX_4RF<"vfmadd.d", int_loongarch_lsx_vfmadd_d, LSX128DOpnd>; + +def VFMSUB_S : LSX_VR4MUL<0b000010010101>, + LSX_4RF<"vfmsub.s", int_loongarch_lsx_vfmsub_s, LSX128WOpnd>; + +def VFMSUB_D : LSX_VR4MUL<0b000010010110>, + LSX_4RF<"vfmsub.d", int_loongarch_lsx_vfmsub_d, LSX128DOpnd>; + +def VFNMADD_S : LSX_VR4MUL<0b000010011001>, + LSX_4RF<"vfnmadd.s", int_loongarch_lsx_vfnmadd_s, LSX128WOpnd>; + +def VFNMADD_D : LSX_VR4MUL<0b000010011010>, + LSX_4RF<"vfnmadd.d", int_loongarch_lsx_vfnmadd_d, LSX128DOpnd>; + +def VFNMSUB_S : LSX_VR4MUL<0b000010011101>, + LSX_4RF<"vfnmsub.s", int_loongarch_lsx_vfnmsub_s, LSX128WOpnd>; + +def VFNMSUB_D : LSX_VR4MUL<0b000010011110>, + LSX_4RF<"vfnmsub.d", int_loongarch_lsx_vfnmsub_d, LSX128DOpnd>; + + +// vfmadd: vj * vk + va +def : LSXPat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D $vj, $vk, $va)>; + +def : LSXPat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + (VFMADD_S $vj, $vk, $va)>; + + +// vfmsub: vj * vk - va +def : LSXPat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), + (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +def : LSXPat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), + (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; + + +// vfnmadd: -(vj * vk + va) +def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), + (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), + (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; + +// vfnmsub: -(vj * vk - va) +def : LSXPat<(fma (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), + (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +def : LSXPat<(fma (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), + (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; + + +def VFCMP_CAF_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.caf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_caf_s>{ + bits<5> cond=0x0; + } + +def VFCMP_CAF_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.caf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_caf_d>{ + bits<5> cond=0x0; + } + + +def VFCMP_COR_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetord_v4f32>{ + bits<5> cond=0x14; + } + +def VFCMP_COR_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetord_v2f64>{ + bits<5> cond=0x14; + } + + +def VFCMP_CUN_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetun_v4f32>{ + bits<5> cond=0x8; + } + +def VFCMP_CUN_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetun_v2f64>{ + bits<5> cond=0x8; + } + + +def VFCMP_CUNE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetune_v4f32>{ + bits<5> cond=0x18; + } + +def VFCMP_CUNE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetune_v2f64>{ + bits<5> cond=0x18; + } + + +def VFCMP_CUEQ_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetueq_v4f32>{ + bits<5> cond=0xc; + } + +def VFCMP_CUEQ_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetueq_v2f64>{ + bits<5> cond=0xc; + } + +def VFCMP_CEQ_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.ceq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetoeq_v4f32>{ + bits<5> cond=0x4; + } + +def VFCMP_CEQ_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.ceq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetoeq_v2f64>{ + bits<5> cond=0x4; + } + + +def VFCMP_CNE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetone_v4f32>{ + bits<5> cond=0x10; + } + +def VFCMP_CNE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetone_v2f64>{ + bits<5> cond=0x10; + } + + +def VFCMP_CLT_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.clt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetolt_v4f32>{ + bits<5> cond=0x2; + } + +def VFCMP_CLT_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.clt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetolt_v2f64>{ + bits<5> cond=0x2; + } + + +def VFCMP_CULT_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetult_v4f32>{ + bits<5> cond=0xa; + } + +def VFCMP_CULT_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetult_v2f64>{ + bits<5> cond=0xa; + } + + +def VFCMP_CLE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetole_v4f32>{ + bits<5> cond=0x6; + } + +def VFCMP_CLE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetole_v2f64>{ + bits<5> cond=0x6; + } + + +def VFCMP_CULE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.cule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, vfsetule_v4f32>{ + bits<5> cond=0xe; + } + +def VFCMP_CULE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.cule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, vfsetule_v2f64>{ + bits<5> cond=0xe; + } + + +def VFCMP_SAF_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.saf.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_saf_s>{ + bits<5> cond=0x1; + } + +def VFCMP_SAF_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.saf.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_saf_d>{ + bits<5> cond=0x1; + } + +def VFCMP_SOR_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sor.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sor_s>{ + bits<5> cond=0x15; + } + +def VFCMP_SOR_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sor.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sor_d>{ + bits<5> cond=0x15; + } + +def VFCMP_SUN_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sun.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sun_s>{ + bits<5> cond=0x9; + } + +def VFCMP_SUN_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sun.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sun_d>{ + bits<5> cond=0x9; + } + +def VFCMP_SUNE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sune.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sune_s>{ + bits<5> cond=0x19; + } + +def VFCMP_SUNE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sune.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sune_d>{ + bits<5> cond=0x19; + } + +def VFCMP_SUEQ_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sueq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sueq_s>{ + bits<5> cond=0xd; + } + +def VFCMP_SUEQ_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sueq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sueq_d>{ + bits<5> cond=0xd; + } + +def VFCMP_SEQ_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.seq.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_seq_s>{ + bits<5> cond=0x5; + } + +def VFCMP_SEQ_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.seq.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_seq_d>{ + bits<5> cond=0x5; + } + +def VFCMP_SNE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sne.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sne_s>{ + bits<5> cond=0x11; + } + +def VFCMP_SNE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sne.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sne_d>{ + bits<5> cond=0x11; + } + +def VFCMP_SLT_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.slt.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_slt_s>{ + bits<5> cond=0x3; + } + +def VFCMP_SLT_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.slt.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_slt_d>{ + bits<5> cond=0x3; + } + +def VFCMP_SULT_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sult.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sult_s>{ + bits<5> cond=0xb; + } + +def VFCMP_SULT_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sult.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sult_d>{ + bits<5> cond=0xb; + } + +def VFCMP_SLE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sle.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sle_s>{ + bits<5> cond=0x7; + } + +def VFCMP_SLE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sle.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sle_d>{ + bits<5> cond=0x7; + } + +def VFCMP_SULE_S : LSX_VFCMP<0b000011000101>, + LSX_VFCMP_Reg3<"vfcmp.sule.s", LSX128WOpnd, LSX128WOpnd, LSX128WOpnd, int_loongarch_lsx_vfcmp_sule_s>{ + bits<5> cond=0xf; + } + +def VFCMP_SULE_D : LSX_VFCMP<0b000011000110>, + LSX_VFCMP_Reg3<"vfcmp.sule.d", LSX128DOpnd, LSX128DOpnd, LSX128DOpnd, int_loongarch_lsx_vfcmp_sule_d>{ + bits<5> cond=0xf; + } + +def VBITSEL_V : LSX_VR4MUL<0b000011010001>, + LSX_VMul_Reg4<"vbitsel.v", LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd, int_loongarch_lsx_vbitsel_v>; + +def VSHUF_B : LSX_VR4MUL<0b000011010101>, + LSX_3R_4R_VSHF_DESC_BASE<"vshuf.b", LSX128BOpnd>; + + +class LSX_BSEL_PSEUDO_BASE : + LSXPseudo<(outs RO:$vd), (ins RO:$vd_in, RO:$vs, RO:$vt), + [(set RO:$vd, (Ty (vselect RO:$vd_in, RO:$vt, RO:$vs)))]>, + PseudoInstExpansion<(VBITSEL_V LSX128BOpnd:$vd, LSX128BOpnd:$vs, + LSX128BOpnd:$vt, LSX128BOpnd:$vd_in)> { + let Constraints = "$vd_in = $vd"; +} + +def BSEL_B_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_H_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_W_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_D_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_FW_PSEUDO : LSX_BSEL_PSEUDO_BASE; +def BSEL_FD_PSEUDO : LSX_BSEL_PSEUDO_BASE; + + +class LSX_LD_DESC_BASE { + dag OutOperandList = (outs ROVD:$vd); + dag InOperandList = (ins MemOpnd:$addr); + string AsmString = !strconcat(instr_asm, "\t$vd, $addr"); + list Pattern = [(set ROVD:$vd, (OpNode (TyNode (load Addr:$addr))))]; + string DecoderMethod = "DecodeLSX128memlsl"; +} + +def VLDREPL_B : LSX_SI12_S<0b0011000010>, + LSX_LD_DESC_BASE<"vldrepl.b", vldrepl_v16i8, v16i8, LSX128BOpnd>; + +def VLDREPL_H : LSX_SI11_S<0b00110000010>, + LSX_LD_DESC_BASE<"vldrepl.h", vldrepl_v8i16, v8i16, LSX128HOpnd, mem_simm11_lsl1, addrimm11lsl1>; + +def VLDREPL_W : LSX_SI10_S<0b001100000010>, + LSX_LD_DESC_BASE<"vldrepl.w", vldrepl_v4i32, v4i32, LSX128WOpnd, mem_simm10_lsl2, addrimm10lsl2>; + +def VLDREPL_D : LSX_SI9_S<0b0011000000010>, + LSX_LD_DESC_BASE<"vldrepl.d", vldrepl_v2i64, v2i64, LSX128DOpnd, mem_simm9_lsl3, addrimm9lsl3>; + + +def VSTELM_B : LSX_SI8_idx4<0b0011000110>, + LSX_I8_U4_DESC_BASE<"vstelm.b", int_loongarch_lsx_vstelm_b, simm8_32, immSExt8, LSX128BOpnd>; + +def VSTELM_H : LSX_SI8_idx3<0b00110001010>, + LSX_I8_U3_DESC_BASE<"vstelm.h", int_loongarch_lsx_vstelm_h, immSExt8_1_O, immSExt8, LSX128HOpnd>; + +def VSTELM_W : LSX_SI8_idx2<0b001100010010>, + LSX_I8_U2_DESC_BASE<"vstelm.w", int_loongarch_lsx_vstelm_w, immSExt8_2_O, immSExt8, LSX128WOpnd>; + +def VSTELM_D : LSX_SI8_idx1<0b0011000100010>, + LSX_I8_U1_DESC_BASE<"vstelm.d", int_loongarch_lsx_vstelm_d, immSExt8_3_O, immSExt8, LSX128DOpnd>; + + +let mayLoad = 1, canFoldAsLoad = 1 in { + def VLDX : LSX_3R_2GP<0b00111000010000000>, + LSX_LDX_LA<"vldx", int_loongarch_lsx_vldx, GPR64Opnd, LSX128BOpnd>; +} + +let mayStore = 1 in{ + def VSTX : LSX_3R_2GP<0b00111000010001000>, + LSX_SDX_LA<"vstx", int_loongarch_lsx_vstx, GPR64Opnd, LSX128BOpnd>; +} + + +def VADDWEV_H_B : LSX_3R<0b01110000000111100>, + LSX_3R_DESC_BASE<"vaddwev.h.b", int_loongarch_lsx_vaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWEV_W_H : LSX_3R<0b01110000000111101>, + LSX_3R_DESC_BASE<"vaddwev.w.h", int_loongarch_lsx_vaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWEV_D_W : LSX_3R<0b01110000000111110>, + LSX_3R_DESC_BASE<"vaddwev.d.w", int_loongarch_lsx_vaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWEV_Q_D : LSX_3R<0b01110000000111111>, + LSX_3R_DESC_BASE<"vaddwev.q.d", int_loongarch_lsx_vaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSUBWEV_H_B : LSX_3R<0b01110000001000000>, + LSX_3R_DESC_BASE<"vsubwev.h.b", int_loongarch_lsx_vsubwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VSUBWEV_W_H : LSX_3R<0b01110000001000001>, + LSX_3R_DESC_BASE<"vsubwev.w.h", int_loongarch_lsx_vsubwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSUBWEV_D_W : LSX_3R<0b01110000001000010>, + LSX_3R_DESC_BASE<"vsubwev.d.w", int_loongarch_lsx_vsubwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VSUBWEV_Q_D : LSX_3R<0b01110000001000011>, + LSX_3R_DESC_BASE<"vsubwev.q.d", int_loongarch_lsx_vsubwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWOD_H_B : LSX_3R<0b01110000001000100>, + LSX_3R_DESC_BASE<"vaddwod.h.b", int_loongarch_lsx_vaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWOD_W_H : LSX_3R<0b01110000001000101>, + LSX_3R_DESC_BASE<"vaddwod.w.h", int_loongarch_lsx_vaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWOD_D_W : LSX_3R<0b01110000001000110>, + LSX_3R_DESC_BASE<"vaddwod.d.w", int_loongarch_lsx_vaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWOD_Q_D : LSX_3R<0b01110000001000111>, + LSX_3R_DESC_BASE<"vaddwod.q.d", int_loongarch_lsx_vaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSUBWOD_H_B : LSX_3R<0b01110000001001000>, + LSX_3R_DESC_BASE<"vsubwod.h.b", int_loongarch_lsx_vsubwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VSUBWOD_W_H : LSX_3R<0b01110000001001001>, + LSX_3R_DESC_BASE<"vsubwod.w.h", int_loongarch_lsx_vsubwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSUBWOD_D_W : LSX_3R<0b01110000001001010>, + LSX_3R_DESC_BASE<"vsubwod.d.w", int_loongarch_lsx_vsubwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VSUBWOD_Q_D : LSX_3R<0b01110000001001011>, + LSX_3R_DESC_BASE<"vsubwod.q.d", int_loongarch_lsx_vsubwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWEV_H_BU : LSX_3R<0b01110000001011100>, + LSX_3R_DESC_BASE<"vaddwev.h.bu", int_loongarch_lsx_vaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWEV_W_HU : LSX_3R<0b01110000001011101>, + LSX_3R_DESC_BASE<"vaddwev.w.hu", int_loongarch_lsx_vaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWEV_D_WU : LSX_3R<0b01110000001011110>, + LSX_3R_DESC_BASE<"vaddwev.d.wu", int_loongarch_lsx_vaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWEV_Q_DU : LSX_3R<0b01110000001011111>, + LSX_3R_DESC_BASE<"vaddwev.q.du", int_loongarch_lsx_vaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSUBWEV_H_BU : LSX_3R<0b01110000001100000>, + LSX_3R_DESC_BASE<"vsubwev.h.bu", int_loongarch_lsx_vsubwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VSUBWEV_W_HU : LSX_3R<0b01110000001100001>, + LSX_3R_DESC_BASE<"vsubwev.w.hu", int_loongarch_lsx_vsubwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSUBWEV_D_WU : LSX_3R<0b01110000001100010>, + LSX_3R_DESC_BASE<"vsubwev.d.wu", int_loongarch_lsx_vsubwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VSUBWEV_Q_DU : LSX_3R<0b01110000001100011>, + LSX_3R_DESC_BASE<"vsubwev.q.du", int_loongarch_lsx_vsubwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWOD_H_BU : LSX_3R<0b01110000001100100>, + LSX_3R_DESC_BASE<"vaddwod.h.bu", int_loongarch_lsx_vaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWOD_W_HU : LSX_3R<0b01110000001100101>, + LSX_3R_DESC_BASE<"vaddwod.w.hu", int_loongarch_lsx_vaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWOD_D_WU : LSX_3R<0b01110000001100110>, + LSX_3R_DESC_BASE<"vaddwod.d.wu", int_loongarch_lsx_vaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWOD_Q_DU : LSX_3R<0b01110000001100111>, + LSX_3R_DESC_BASE<"vaddwod.q.du", int_loongarch_lsx_vaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSUBWOD_H_BU : LSX_3R<0b01110000001101000>, + LSX_3R_DESC_BASE<"vsubwod.h.bu", int_loongarch_lsx_vsubwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VSUBWOD_W_HU : LSX_3R<0b01110000001101001>, + LSX_3R_DESC_BASE<"vsubwod.w.hu", int_loongarch_lsx_vsubwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSUBWOD_D_WU : LSX_3R<0b01110000001101010>, + LSX_3R_DESC_BASE<"vsubwod.d.wu", int_loongarch_lsx_vsubwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VSUBWOD_Q_DU : LSX_3R<0b01110000001101011>, + LSX_3R_DESC_BASE<"vsubwod.q.du", int_loongarch_lsx_vsubwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWEV_H_BU_B : LSX_3R<0b01110000001111100>, + LSX_3R_DESC_BASE<"vaddwev.h.bu.b", int_loongarch_lsx_vaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWEV_W_HU_H : LSX_3R<0b01110000001111101>, + LSX_3R_DESC_BASE<"vaddwev.w.hu.h", int_loongarch_lsx_vaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWEV_D_WU_W : LSX_3R<0b01110000001111110>, + LSX_3R_DESC_BASE<"vaddwev.d.wu.w", int_loongarch_lsx_vaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWEV_Q_DU_D : LSX_3R<0b01110000001111111>, + LSX_3R_DESC_BASE<"vaddwev.q.du.d", int_loongarch_lsx_vaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VADDWOD_H_BU_B : LSX_3R<0b01110000010000000>, + LSX_3R_DESC_BASE<"vaddwod.h.bu.b", int_loongarch_lsx_vaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VADDWOD_W_HU_H : LSX_3R<0b01110000010000001>, + LSX_3R_DESC_BASE<"vaddwod.w.hu.h", int_loongarch_lsx_vaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VADDWOD_D_WU_W : LSX_3R<0b01110000010000010>, + LSX_3R_DESC_BASE<"vaddwod.d.wu.w", int_loongarch_lsx_vaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VADDWOD_Q_DU_D : LSX_3R<0b01110000010000011>, + LSX_3R_DESC_BASE<"vaddwod.q.du.d", int_loongarch_lsx_vaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VHADDW_Q_D : LSX_3R<0b01110000010101011>, + LSX_3R_DESC_BASE<"vhaddw.q.d", int_loongarch_lsx_vhaddw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VHSUBW_Q_D : LSX_3R<0b01110000010101111>, + LSX_3R_DESC_BASE<"vhsubw.q.d", int_loongarch_lsx_vhsubw_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VHADDW_QU_DU : LSX_3R<0b01110000010110011>, + LSX_3R_DESC_BASE<"vhaddw.qu.du", int_loongarch_lsx_vhaddw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VHSUBW_QU_DU : LSX_3R<0b01110000010110111>, + LSX_3R_DESC_BASE<"vhsubw.qu.du", int_loongarch_lsx_vhsubw_qu_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMUH_B : LSX_3R<0b01110000100001100>, + LSX_3R_DESC_BASE<"vmuh.b", int_loongarch_lsx_vmuh_b, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMUH_H : LSX_3R<0b01110000100001101>, + LSX_3R_DESC_BASE<"vmuh.h", int_loongarch_lsx_vmuh_h, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMUH_W : LSX_3R<0b01110000100001110>, + LSX_3R_DESC_BASE<"vmuh.w", int_loongarch_lsx_vmuh_w, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMUH_D : LSX_3R<0b01110000100001111>, + LSX_3R_DESC_BASE<"vmuh.d", int_loongarch_lsx_vmuh_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMUH_BU : LSX_3R<0b01110000100010000>, + LSX_3R_DESC_BASE<"vmuh.bu", int_loongarch_lsx_vmuh_bu, LSX128BOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMUH_HU : LSX_3R<0b01110000100010001>, + LSX_3R_DESC_BASE<"vmuh.hu", int_loongarch_lsx_vmuh_hu, LSX128HOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMUH_WU : LSX_3R<0b01110000100010010>, + LSX_3R_DESC_BASE<"vmuh.wu", int_loongarch_lsx_vmuh_wu, LSX128WOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMUH_DU : LSX_3R<0b01110000100010011>, + LSX_3R_DESC_BASE<"vmuh.du", int_loongarch_lsx_vmuh_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWEV_H_B : LSX_3R<0b01110000100100000>, + LSX_3R_DESC_BASE<"vmulwev.h.b", int_loongarch_lsx_vmulwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWEV_W_H : LSX_3R<0b01110000100100001>, + LSX_3R_DESC_BASE<"vmulwev.w.h", int_loongarch_lsx_vmulwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWEV_D_W : LSX_3R<0b01110000100100010>, + LSX_3R_DESC_BASE<"vmulwev.d.w", int_loongarch_lsx_vmulwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWEV_Q_D : LSX_3R<0b01110000100100011>, + LSX_3R_DESC_BASE<"vmulwev.q.d", int_loongarch_lsx_vmulwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWOD_H_B : LSX_3R<0b01110000100100100>, + LSX_3R_DESC_BASE<"vmulwod.h.b", int_loongarch_lsx_vmulwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWOD_W_H : LSX_3R<0b01110000100100101>, + LSX_3R_DESC_BASE<"vmulwod.w.h", int_loongarch_lsx_vmulwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWOD_D_W : LSX_3R<0b01110000100100110>, + LSX_3R_DESC_BASE<"vmulwod.d.w", int_loongarch_lsx_vmulwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWOD_Q_D : LSX_3R<0b01110000100100111>, + LSX_3R_DESC_BASE<"vmulwod.q.d", int_loongarch_lsx_vmulwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWEV_H_BU : LSX_3R<0b01110000100110000>, + LSX_3R_DESC_BASE<"vmulwev.h.bu", int_loongarch_lsx_vmulwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWEV_W_HU : LSX_3R<0b01110000100110001>, + LSX_3R_DESC_BASE<"vmulwev.w.hu", int_loongarch_lsx_vmulwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWEV_D_WU : LSX_3R<0b01110000100110010>, + LSX_3R_DESC_BASE<"vmulwev.d.wu", int_loongarch_lsx_vmulwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWEV_Q_DU : LSX_3R<0b01110000100110011>, + LSX_3R_DESC_BASE<"vmulwev.q.du", int_loongarch_lsx_vmulwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWOD_H_BU : LSX_3R<0b01110000100110100>, + LSX_3R_DESC_BASE<"vmulwod.h.bu", int_loongarch_lsx_vmulwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWOD_W_HU : LSX_3R<0b01110000100110101>, + LSX_3R_DESC_BASE<"vmulwod.w.hu", int_loongarch_lsx_vmulwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWOD_D_WU : LSX_3R<0b01110000100110110>, + LSX_3R_DESC_BASE<"vmulwod.d.wu", int_loongarch_lsx_vmulwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWOD_Q_DU : LSX_3R<0b01110000100110111>, + LSX_3R_DESC_BASE<"vmulwod.q.du", int_loongarch_lsx_vmulwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWEV_H_BU_B : LSX_3R<0b01110000101000000>, + LSX_3R_DESC_BASE<"vmulwev.h.bu.b", int_loongarch_lsx_vmulwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWEV_W_HU_H : LSX_3R<0b01110000101000001>, + LSX_3R_DESC_BASE<"vmulwev.w.hu.h", int_loongarch_lsx_vmulwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWEV_D_WU_W : LSX_3R<0b01110000101000010>, + LSX_3R_DESC_BASE<"vmulwev.d.wu.w", int_loongarch_lsx_vmulwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWEV_Q_DU_D : LSX_3R<0b01110000101000011>, + LSX_3R_DESC_BASE<"vmulwev.q.du.d", int_loongarch_lsx_vmulwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMULWOD_H_BU_B : LSX_3R<0b01110000101000100>, + LSX_3R_DESC_BASE<"vmulwod.h.bu.b", int_loongarch_lsx_vmulwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMULWOD_W_HU_H : LSX_3R<0b01110000101000101>, + LSX_3R_DESC_BASE<"vmulwod.w.hu.h", int_loongarch_lsx_vmulwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMULWOD_D_WU_W : LSX_3R<0b01110000101000110>, + LSX_3R_DESC_BASE<"vmulwod.d.wu.w", int_loongarch_lsx_vmulwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMULWOD_Q_DU_D : LSX_3R<0b01110000101000111>, + LSX_3R_DESC_BASE<"vmulwod.q.du.d", int_loongarch_lsx_vmulwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWEV_H_B : LSX_3R<0b01110000101011000>, + LSX_3R_4R_DESC_BASE<"vmaddwev.h.b", int_loongarch_lsx_vmaddwev_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWEV_W_H : LSX_3R<0b01110000101011001>, + LSX_3R_4R_DESC_BASE<"vmaddwev.w.h", int_loongarch_lsx_vmaddwev_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWEV_D_W : LSX_3R<0b01110000101011010>, + LSX_3R_4R_DESC_BASE<"vmaddwev.d.w", int_loongarch_lsx_vmaddwev_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VMADDWEV_Q_D : LSX_3R<0b01110000101011011>, + LSX_3R_4R_DESC_BASE<"vmaddwev.q.d", int_loongarch_lsx_vmaddwev_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWOD_H_B : LSX_3R<0b01110000101011100>, + LSX_3R_4R_DESC_BASE<"vmaddwod.h.b", int_loongarch_lsx_vmaddwod_h_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWOD_W_H : LSX_3R<0b01110000101011101>, + LSX_3R_4R_DESC_BASE<"vmaddwod.w.h", int_loongarch_lsx_vmaddwod_w_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWOD_D_W : LSX_3R<0b01110000101011110>, + LSX_3R_4R_DESC_BASE<"vmaddwod.d.w", int_loongarch_lsx_vmaddwod_d_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VMADDWOD_Q_D : LSX_3R<0b01110000101011111>, + LSX_3R_4R_DESC_BASE<"vmaddwod.q.d", int_loongarch_lsx_vmaddwod_q_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWEV_H_BU : LSX_3R<0b01110000101101000>, + LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu", int_loongarch_lsx_vmaddwev_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWEV_W_HU : LSX_3R<0b01110000101101001>, + LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu", int_loongarch_lsx_vmaddwev_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWEV_D_WU : LSX_3R<0b01110000101101010>, + LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu", int_loongarch_lsx_vmaddwev_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VMADDWEV_Q_DU : LSX_3R<0b01110000101101011>, + LSX_3R_4R_DESC_BASE<"vmaddwev.q.du", int_loongarch_lsx_vmaddwev_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWOD_H_BU : LSX_3R<0b01110000101101100>, + LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu", int_loongarch_lsx_vmaddwod_h_bu, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWOD_W_HU : LSX_3R<0b01110000101101101>, + LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu", int_loongarch_lsx_vmaddwod_w_hu, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWOD_D_WU : LSX_3R<0b01110000101101110>, + LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu", int_loongarch_lsx_vmaddwod_d_wu, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VMADDWOD_Q_DU : LSX_3R<0b01110000101101111>, + LSX_3R_4R_DESC_BASE<"vmaddwod.q.du", int_loongarch_lsx_vmaddwod_q_du, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWEV_H_BU_B : LSX_3R<0b01110000101111000>, + LSX_3R_4R_DESC_BASE<"vmaddwev.h.bu.b", int_loongarch_lsx_vmaddwev_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWEV_W_HU_H : LSX_3R<0b01110000101111001>, + LSX_3R_4R_DESC_BASE<"vmaddwev.w.hu.h", int_loongarch_lsx_vmaddwev_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWEV_D_WU_W : LSX_3R<0b01110000101111010>, + LSX_3R_4R_DESC_BASE<"vmaddwev.d.wu.w", int_loongarch_lsx_vmaddwev_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMADDWEV_Q_DU_D : LSX_3R<0b01110000101111011>, + LSX_3R_4R_DESC_BASE<"vmaddwev.q.du.d", int_loongarch_lsx_vmaddwev_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VMADDWOD_H_BU_B : LSX_3R<0b01110000101111100>, + LSX_3R_4R_DESC_BASE<"vmaddwod.h.bu.b", int_loongarch_lsx_vmaddwod_h_bu_b, LSX128HOpnd, LSX128BOpnd, LSX128BOpnd>; + +def VMADDWOD_W_HU_H : LSX_3R<0b01110000101111101>, + LSX_3R_4R_DESC_BASE<"vmaddwod.w.hu.h", int_loongarch_lsx_vmaddwod_w_hu_h, LSX128WOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VMADDWOD_D_WU_W : LSX_3R<0b01110000101111110>, + LSX_3R_4R_DESC_BASE<"vmaddwod.d.wu.w", int_loongarch_lsx_vmaddwod_d_wu_w, LSX128DOpnd, LSX128WOpnd, LSX128WOpnd> ; + +def VMADDWOD_Q_DU_D : LSX_3R<0b01110000101111111>, + LSX_3R_4R_DESC_BASE<"vmaddwod.q.du.d", int_loongarch_lsx_vmaddwod_q_du_d, LSX128DOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSRLN_B_H : LSX_3R<0b01110000111101001>, + LSX_3R_DESC_BASE<"vsrln.b.h", int_loongarch_lsx_vsrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSRLN_H_W : LSX_3R<0b01110000111101010>, + LSX_3R_DESC_BASE<"vsrln.h.w", int_loongarch_lsx_vsrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSRLN_W_D : LSX_3R<0b01110000111101011>, + LSX_3R_DESC_BASE<"vsrln.w.d", int_loongarch_lsx_vsrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSRAN_B_H : LSX_3R<0b01110000111101101>, + LSX_3R_DESC_BASE<"vsran.b.h", int_loongarch_lsx_vsran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSRAN_H_W : LSX_3R<0b01110000111101110>, + LSX_3R_DESC_BASE<"vsran.h.w", int_loongarch_lsx_vsran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSRAN_W_D : LSX_3R<0b01110000111101111>, + LSX_3R_DESC_BASE<"vsran.w.d", int_loongarch_lsx_vsran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSRLRN_B_H : LSX_3R<0b01110000111110001>, + LSX_3R_DESC_BASE<"vsrlrn.b.h", int_loongarch_lsx_vsrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSRLRN_H_W : LSX_3R<0b01110000111110010>, + LSX_3R_DESC_BASE<"vsrlrn.h.w", int_loongarch_lsx_vsrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSRLRN_W_D : LSX_3R<0b01110000111110011>, + LSX_3R_DESC_BASE<"vsrlrn.w.d", int_loongarch_lsx_vsrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSRARN_B_H : LSX_3R<0b01110000111110101>, + LSX_3R_DESC_BASE<"vsrarn.b.h", int_loongarch_lsx_vsrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSRARN_H_W : LSX_3R<0b01110000111110110>, + LSX_3R_DESC_BASE<"vsrarn.h.w", int_loongarch_lsx_vsrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSRARN_W_D : LSX_3R<0b01110000111110111>, + LSX_3R_DESC_BASE<"vsrarn.w.d", int_loongarch_lsx_vsrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLN_B_H : LSX_3R<0b01110000111111001>, + LSX_3R_DESC_BASE<"vssrln.b.h", int_loongarch_lsx_vssrln_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLN_H_W : LSX_3R<0b01110000111111010>, + LSX_3R_DESC_BASE<"vssrln.h.w", int_loongarch_lsx_vssrln_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLN_W_D : LSX_3R<0b01110000111111011>, + LSX_3R_DESC_BASE<"vssrln.w.d", int_loongarch_lsx_vssrln_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRAN_B_H : LSX_3R<0b01110000111111101>, + LSX_3R_DESC_BASE<"vssran.b.h", int_loongarch_lsx_vssran_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRAN_H_W : LSX_3R<0b01110000111111110>, + LSX_3R_DESC_BASE<"vssran.h.w", int_loongarch_lsx_vssran_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRAN_W_D : LSX_3R<0b01110000111111111>, + LSX_3R_DESC_BASE<"vssran.w.d", int_loongarch_lsx_vssran_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLRN_B_H : LSX_3R<0b01110001000000001>, + LSX_3R_DESC_BASE<"vssrlrn.b.h", int_loongarch_lsx_vssrlrn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLRN_H_W : LSX_3R<0b01110001000000010>, + LSX_3R_DESC_BASE<"vssrlrn.h.w", int_loongarch_lsx_vssrlrn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLRN_W_D : LSX_3R<0b01110001000000011>, + LSX_3R_DESC_BASE<"vssrlrn.w.d", int_loongarch_lsx_vssrlrn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRARN_B_H : LSX_3R<0b01110001000000101>, + LSX_3R_DESC_BASE<"vssrarn.b.h", int_loongarch_lsx_vssrarn_b_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRARN_H_W : LSX_3R<0b01110001000000110>, + LSX_3R_DESC_BASE<"vssrarn.h.w", int_loongarch_lsx_vssrarn_h_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRARN_W_D : LSX_3R<0b01110001000000111>, + LSX_3R_DESC_BASE<"vssrarn.w.d", int_loongarch_lsx_vssrarn_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLN_BU_H : LSX_3R<0b01110001000001001>, + LSX_3R_DESC_BASE<"vssrln.bu.h", int_loongarch_lsx_vssrln_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLN_HU_W : LSX_3R<0b01110001000001010>, + LSX_3R_DESC_BASE<"vssrln.hu.w", int_loongarch_lsx_vssrln_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLN_WU_D : LSX_3R<0b01110001000001011>, + LSX_3R_DESC_BASE<"vssrln.wu.d", int_loongarch_lsx_vssrln_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRAN_BU_H : LSX_3R<0b01110001000001101>, + LSX_3R_DESC_BASE<"vssran.bu.h", int_loongarch_lsx_vssran_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRAN_HU_W : LSX_3R<0b01110001000001110>, + LSX_3R_DESC_BASE<"vssran.hu.w", int_loongarch_lsx_vssran_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRAN_WU_D : LSX_3R<0b01110001000001111>, + LSX_3R_DESC_BASE<"vssran.wu.d", int_loongarch_lsx_vssran_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLRN_BU_H : LSX_3R<0b01110001000010001>, + LSX_3R_DESC_BASE<"vssrlrn.bu.h", int_loongarch_lsx_vssrlrn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLRN_HU_W : LSX_3R<0b01110001000010010>, + LSX_3R_DESC_BASE<"vssrlrn.hu.w", int_loongarch_lsx_vssrlrn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLRN_WU_D : LSX_3R<0b01110001000010011>, + LSX_3R_DESC_BASE<"vssrlrn.wu.d", int_loongarch_lsx_vssrlrn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRARN_BU_H : LSX_3R<0b01110001000010101>, + LSX_3R_DESC_BASE<"vssrarn.bu.h", int_loongarch_lsx_vssrarn_bu_h, LSX128BOpnd, LSX128HOpnd, LSX128HOpnd>; + +def VSSRARN_HU_W : LSX_3R<0b01110001000010110>, + LSX_3R_DESC_BASE<"vssrarn.hu.w", int_loongarch_lsx_vssrarn_hu_w, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VSSRARN_WU_D : LSX_3R<0b01110001000010111>, + LSX_3R_DESC_BASE<"vssrarn.wu.d", int_loongarch_lsx_vssrarn_wu_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VANDN_V : LSX_3R<0b01110001001010000>, + LSX_3R_DESC_BASE<"vandn.v", int_loongarch_lsx_vandn_v, LSX128BOpnd>; + + +class LSX_VANDN_PSEUDO_BASE : + LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk), + []>, + PseudoInstExpansion<(VANDN_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + +def VANDN_H_PSEUDO : LSX_VANDN_PSEUDO_BASE; +def VANDN_W_PSEUDO : LSX_VANDN_PSEUDO_BASE; +def VANDN_D_PSEUDO : LSX_VANDN_PSEUDO_BASE; + + + +def VORN_V : LSX_3R<0b01110001001010001>, + LSX_3R_DESC_BASE<"vorn.v", int_loongarch_lsx_vorn_v, LSX128BOpnd>; + + +class LSX_VORN_PSEUDO_BASE : + LSXPseudo<(outs RO:$vd), (ins RO:$vj, RO:$vk), + []>, + PseudoInstExpansion<(VORN_V LSX128BOpnd:$vd, + LSX128BOpnd:$vj, + LSX128BOpnd:$vk)>; + +def VORN_H_PSEUDO : LSX_VORN_PSEUDO_BASE; +def VORN_W_PSEUDO : LSX_VORN_PSEUDO_BASE; +def VORN_D_PSEUDO : LSX_VORN_PSEUDO_BASE; + + +def VFRSTP_B : LSX_3R<0b01110001001010110>, + LSX_3R_4R_DESC_BASE<"vfrstp.b", int_loongarch_lsx_vfrstp_b, LSX128BOpnd>; + +def VFRSTP_H : LSX_3R<0b01110001001010111>, + LSX_3R_4R_DESC_BASE<"vfrstp.h", int_loongarch_lsx_vfrstp_h, LSX128HOpnd>; + + +def VADD_Q : LSX_3R<0b01110001001011010>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.q", int_loongarch_lsx_vadd_q, LSX128DOpnd>; + +def VSUB_Q : LSX_3R<0b01110001001011011>, + LSX_3R_DESC_BASE<"vsub.q", int_loongarch_lsx_vsub_q, LSX128DOpnd>; + + +def VSIGNCOV_B : LSX_3R<0b01110001001011100>, + LSX_3R_DESC_BASE<"vsigncov.b", int_loongarch_lsx_vsigncov_b, LSX128BOpnd>; + +def VSIGNCOV_H : LSX_3R<0b01110001001011101>, + LSX_3R_DESC_BASE<"vsigncov.h", int_loongarch_lsx_vsigncov_h, LSX128HOpnd>; + +def VSIGNCOV_W : LSX_3R<0b01110001001011110>, + LSX_3R_DESC_BASE<"vsigncov.w", int_loongarch_lsx_vsigncov_w, LSX128WOpnd>; + +def VSIGNCOV_D : LSX_3R<0b01110001001011111>, + LSX_3R_DESC_BASE<"vsigncov.d", int_loongarch_lsx_vsigncov_d, LSX128DOpnd>; + + +def VFCVT_H_S : LSX_3R<0b01110001010001100>, + LSX_3RF_DESC_BASE<"vfcvt.h.s", int_loongarch_lsx_vfcvt_h_s, LSX128HOpnd, LSX128WOpnd, LSX128WOpnd>; + +def VFCVT_S_D : LSX_3R<0b01110001010001101>, + LSX_3RF_DESC_BASE1<"vfcvt.s.d", int_loongarch_lsx_vfcvt_s_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VFFINT_S_L : LSX_3R<0b01110001010010000>, + LSX_3RF_DESC_BASE<"vffint.s.l", int_loongarch_lsx_vffint_s_l, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VFTINT_W_D : LSX_3R<0b01110001010010011>, + LSX_3RF_DESC_BASE<"vftint.w.d", int_loongarch_lsx_vftint_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VFTINTRZ_W_D : LSX_3R<0b01110001010010110>, + LSX_3RF_DESC_BASE<"vftintrz.w.d", int_loongarch_lsx_vftintrz_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VFTINTRP_W_D : LSX_3R<0b01110001010010101>, + LSX_3RF_DESC_BASE<"vftintrp.w.d", int_loongarch_lsx_vftintrp_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VFTINTRM_W_D : LSX_3R<0b01110001010010100>, + LSX_3RF_DESC_BASE<"vftintrm.w.d", int_loongarch_lsx_vftintrm_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + +def VFTINTRNE_W_D : LSX_3R<0b01110001010010111>, + LSX_3RF_DESC_BASE<"vftintrne.w.d", int_loongarch_lsx_vftintrne_w_d, LSX128WOpnd, LSX128DOpnd, LSX128DOpnd>; + + +def VBSRL_V : LSX_I5_U<0b01110010100011101>, + LSX_U5_DESC_BASE<"vbsrl.v", int_loongarch_lsx_vbsrl_v, LSX128BOpnd>; + +def VBSLL_V : LSX_I5_U<0b01110010100011100>, + LSX_U5_DESC_BASE<"vbsll.v", int_loongarch_lsx_vbsll_v, LSX128BOpnd>; + + +def VFRSTPI_B : LSX_I5_U<0b01110010100110100>, + LSX_U5_4R_DESC_BASE<"vfrstpi.b", int_loongarch_lsx_vfrstpi_b, LSX128BOpnd>; + +def VFRSTPI_H : LSX_I5_U<0b01110010100110101>, + LSX_U5_4R_DESC_BASE<"vfrstpi.h", int_loongarch_lsx_vfrstpi_h, LSX128HOpnd>; + + +def VNEG_B : LSX_2R<0b0111001010011100001100>, + LSX_2R_DESC_BASE<"vneg.b", int_loongarch_lsx_vneg_b, LSX128BOpnd>; + +def VNEG_H : LSX_2R<0b0111001010011100001101>, + LSX_2R_DESC_BASE<"vneg.h", int_loongarch_lsx_vneg_h, LSX128HOpnd>; + +def VNEG_W : LSX_2R<0b0111001010011100001110>, + LSX_2R_DESC_BASE<"vneg.w", int_loongarch_lsx_vneg_w, LSX128WOpnd>; + +def VNEG_D : LSX_2R<0b0111001010011100001111>, + LSX_2R_DESC_BASE<"vneg.d", int_loongarch_lsx_vneg_d, LSX128DOpnd>; + + +def VMSKGEZ_B : LSX_2R<0b0111001010011100010100>, + LSX_2R_DESC_BASE<"vmskgez.b", int_loongarch_lsx_vmskgez_b, LSX128BOpnd>; + +def VMSKNZ_B : LSX_2R<0b0111001010011100011000>, + LSX_2R_DESC_BASE<"vmsknz.b", int_loongarch_lsx_vmsknz_b, LSX128BOpnd>; + + +def VFRINTRM_S : LSX_2R<0b0111001010011101010001>, + LSX_2RF_DESC_BASE<"vfrintrm.s", int_loongarch_lsx_vfrintrm_s, LSX128WOpnd>; + +def VFRINTRM_D : LSX_2R<0b0111001010011101010010>, + LSX_2RF_DESC_BASE<"vfrintrm.d", int_loongarch_lsx_vfrintrm_d, LSX128DOpnd>; + + +def VFRINTRP_S : LSX_2R<0b0111001010011101010101>, + LSX_2RF_DESC_BASE<"vfrintrp.s", int_loongarch_lsx_vfrintrp_s, LSX128WOpnd>; + +def VFRINTRP_D : LSX_2R<0b0111001010011101010110>, + LSX_2RF_DESC_BASE<"vfrintrp.d", int_loongarch_lsx_vfrintrp_d, LSX128DOpnd>; + + +def VFRINTRZ_S : LSX_2R<0b0111001010011101011001>, + LSX_2RF_DESC_BASE<"vfrintrz.s", int_loongarch_lsx_vfrintrz_s, LSX128WOpnd>; + +def VFRINTRZ_D : LSX_2R<0b0111001010011101011010>, + LSX_2RF_DESC_BASE<"vfrintrz.d", int_loongarch_lsx_vfrintrz_d, LSX128DOpnd>; + + +def VFRINTRNE_S : LSX_2R<0b0111001010011101011101>, + LSX_2RF_DESC_BASE<"vfrintrne.s", int_loongarch_lsx_vfrintrne_s, LSX128WOpnd>; + +def VFRINTRNE_D : LSX_2R<0b0111001010011101011110>, + LSX_2RF_DESC_BASE<"vfrintrne.d", int_loongarch_lsx_vfrintrne_d, LSX128DOpnd>; + + +def VFFINTL_D_W : LSX_2R<0b0111001010011110000100>, + LSX_2RF_DESC_BASE<"vffintl.d.w", int_loongarch_lsx_vffintl_d_w, LSX128DOpnd, LSX128WOpnd>; + +def VFFINTH_D_W : LSX_2R<0b0111001010011110000101>, + LSX_2RF_DESC_BASE<"vffinth.d.w", int_loongarch_lsx_vffinth_d_w, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRM_W_S : LSX_2R<0b0111001010011110001110>, + LSX_2RF_DESC_BASE<"vftintrm.w.s", int_loongarch_lsx_vftintrm_w_s, LSX128WOpnd>; + +def VFTINTRM_L_D : LSX_2R<0b0111001010011110001111>, + LSX_2RF_DESC_BASE<"vftintrm.l.d", int_loongarch_lsx_vftintrm_l_d, LSX128DOpnd>; + + +def VFTINTRP_W_S : LSX_2R<0b0111001010011110010000>, + LSX_2RF_DESC_BASE<"vftintrp.w.s", int_loongarch_lsx_vftintrp_w_s, LSX128WOpnd>; + +def VFTINTRP_L_D : LSX_2R<0b0111001010011110010001>, + LSX_2RF_DESC_BASE<"vftintrp.l.d", int_loongarch_lsx_vftintrp_l_d, LSX128DOpnd>; + + +def VFTINTRZ_W_S : LSX_2R<0b0111001010011110010010>, + LSX_2RF_DESC_BASE<"vftintrz.w.s", fp_to_sint, LSX128WOpnd>; + +def VFTINTRZ_L_D : LSX_2R<0b0111001010011110010011>, + LSX_2RF_DESC_BASE<"vftintrz.l.d", fp_to_sint, LSX128DOpnd>; + + +def VFTINTRNE_W_S : LSX_2R<0b0111001010011110010100>, + LSX_2RF_DESC_BASE<"vftintrne.w.s", int_loongarch_lsx_vftintrne_w_s, LSX128WOpnd>; + +def VFTINTRNE_L_D : LSX_2R<0b0111001010011110010101>, + LSX_2RF_DESC_BASE<"vftintrne.l.d", int_loongarch_lsx_vftintrne_l_d, LSX128DOpnd>; + + +def VFTINTL_L_S : LSX_2R<0b0111001010011110100000>, + LSX_2RF_DESC_BASE<"vftintl.l.s", int_loongarch_lsx_vftintl_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTH_L_S : LSX_2R<0b0111001010011110100001>, + LSX_2RF_DESC_BASE<"vftinth.l.s", int_loongarch_lsx_vftinth_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRML_L_S : LSX_2R<0b0111001010011110100010>, + LSX_2RF_DESC_BASE<"vftintrml.l.s", int_loongarch_lsx_vftintrml_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTRMH_L_S : LSX_2R<0b0111001010011110100011>, + LSX_2RF_DESC_BASE<"vftintrmh.l.s", int_loongarch_lsx_vftintrmh_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRPL_L_S : LSX_2R<0b0111001010011110100100>, + LSX_2RF_DESC_BASE<"vftintrpl.l.s", int_loongarch_lsx_vftintrpl_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTRPH_L_S : LSX_2R<0b0111001010011110100101>, + LSX_2RF_DESC_BASE<"vftintrph.l.s", int_loongarch_lsx_vftintrph_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRZL_L_S : LSX_2R<0b0111001010011110100110>, + LSX_2RF_DESC_BASE<"vftintrzl.l.s", int_loongarch_lsx_vftintrzl_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTRZH_L_S : LSX_2R<0b0111001010011110100111>, + LSX_2RF_DESC_BASE<"vftintrzh.l.s", int_loongarch_lsx_vftintrzh_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VFTINTRNEL_L_S : LSX_2R<0b0111001010011110101000>, + LSX_2RF_DESC_BASE<"vftintrnel.l.s", int_loongarch_lsx_vftintrnel_l_s, LSX128DOpnd, LSX128WOpnd>; + +def VFTINTRNEH_L_S : LSX_2R<0b0111001010011110101001>, + LSX_2RF_DESC_BASE<"vftintrneh.l.s", int_loongarch_lsx_vftintrneh_l_s, LSX128DOpnd, LSX128WOpnd>; + + +def VEXTH_H_B : LSX_2R<0b0111001010011110111000>, + LSX_2R_DESC_BASE<"vexth.h.b", int_loongarch_lsx_vexth_h_b, LSX128HOpnd, LSX128BOpnd>; + +def VEXTH_W_H : LSX_2R<0b0111001010011110111001>, + LSX_2R_DESC_BASE<"vexth.w.h", int_loongarch_lsx_vexth_w_h, LSX128WOpnd, LSX128HOpnd>; + +def VEXTH_D_W : LSX_2R<0b0111001010011110111010>, + LSX_2R_DESC_BASE<"vexth.d.w", int_loongarch_lsx_vexth_d_w, LSX128DOpnd, LSX128WOpnd> ; + +def VEXTH_Q_D : LSX_2R<0b0111001010011110111011>, + LSX_2R_DESC_BASE<"vexth.q.d", int_loongarch_lsx_vexth_q_d, LSX128DOpnd, LSX128DOpnd>; + + +def VEXTH_HU_BU : LSX_2R<0b0111001010011110111100>, + LSX_2R_DESC_BASE<"vexth.hu.bu", int_loongarch_lsx_vexth_hu_bu, LSX128HOpnd, LSX128BOpnd>; + +def VEXTH_WU_HU : LSX_2R<0b0111001010011110111101>, + LSX_2R_DESC_BASE<"vexth.wu.hu", int_loongarch_lsx_vexth_wu_hu, LSX128WOpnd, LSX128HOpnd>; + +def VEXTH_DU_WU : LSX_2R<0b0111001010011110111110>, + LSX_2R_DESC_BASE<"vexth.du.wu", int_loongarch_lsx_vexth_du_wu, LSX128DOpnd, LSX128WOpnd> ; + +def VEXTH_QU_DU : LSX_2R<0b0111001010011110111111>, + LSX_2R_DESC_BASE<"vexth.qu.du", int_loongarch_lsx_vexth_qu_du, LSX128DOpnd, LSX128DOpnd>; + + +def VSLLWIL_H_B : LSX_I3_U<0b0111001100001000001>, + LSX_2R_U3_DESC_BASE<"vsllwil.h.b", int_loongarch_lsx_vsllwil_h_b, LSX128HOpnd, LSX128BOpnd>; + +def VSLLWIL_W_H : LSX_I4_U<0b011100110000100001>, + LSX_2R_U4_DESC_BASE<"vsllwil.w.h", int_loongarch_lsx_vsllwil_w_h, LSX128WOpnd, LSX128HOpnd>; + +def VSLLWIL_D_W : LSX_I5_U<0b01110011000010001>, + LSX_2R_U5_DESC_BASE<"vsllwil.d.w", int_loongarch_lsx_vsllwil_d_w, LSX128DOpnd, LSX128WOpnd> ; + + +def VEXTL_Q_D : LSX_2R<0b0111001100001001000000>, + LSX_2R_DESC_BASE<"vextl.q.d", int_loongarch_lsx_vextl_q_d, LSX128DOpnd, LSX128DOpnd>; + + +def VSLLWIL_HU_BU : LSX_I3_U<0b0111001100001100001>, + LSX_2R_U3_DESC_BASE<"vsllwil.hu.bu", int_loongarch_lsx_vsllwil_hu_bu, LSX128HOpnd, LSX128BOpnd>; + +def VSLLWIL_WU_HU : LSX_I4_U<0b011100110000110001>, + LSX_2R_U4_DESC_BASE<"vsllwil.wu.hu", int_loongarch_lsx_vsllwil_wu_hu, LSX128WOpnd, LSX128HOpnd>; + +def VSLLWIL_DU_WU : LSX_I5_U<0b01110011000011001>, + LSX_2R_U5_DESC_BASE<"vsllwil.du.wu", int_loongarch_lsx_vsllwil_du_wu, LSX128DOpnd, LSX128WOpnd> ; + + +def VEXTL_QU_DU : LSX_2R<0b0111001100001101000000>, + LSX_2R_DESC_BASE<"vextl.qu.du", int_loongarch_lsx_vextl_qu_du, LSX128DOpnd, LSX128DOpnd>; + + +def VBITCLRI_B : LSX_I3_U<0b0111001100010000001>, + LSX_2R_U3_DESC_BASE<"vbitclri.b", int_loongarch_lsx_vbitclri_b, LSX128BOpnd, LSX128BOpnd>; + +def VBITCLRI_H : LSX_I4_U<0b011100110001000001>, + LSX_2R_U4_DESC_BASE<"vbitclri.h", int_loongarch_lsx_vbitclri_h, LSX128HOpnd, LSX128HOpnd>; + +def VBITCLRI_W : LSX_I5_U<0b01110011000100001>, + LSX_2R_U5_DESC_BASE<"vbitclri.w", int_loongarch_lsx_vbitclri_w, LSX128WOpnd, LSX128WOpnd>; + +def VBITCLRI_D : LSX_I6_U<0b0111001100010001>, + LSX_2R_U6_DESC_BASE<"vbitclri.d", int_loongarch_lsx_vbitclri_d, LSX128DOpnd, LSX128DOpnd>; + + +def VBITSETI_B : LSX_I3_U<0b0111001100010100001>, + LSX_2R_U3_DESC_BASE<"vbitseti.b", int_loongarch_lsx_vbitseti_b, LSX128BOpnd, LSX128BOpnd>; + +def VBITSETI_H : LSX_I4_U<0b011100110001010001>, + LSX_2R_U4_DESC_BASE<"vbitseti.h", int_loongarch_lsx_vbitseti_h, LSX128HOpnd, LSX128HOpnd>; + +def VBITSETI_W : LSX_I5_U<0b01110011000101001>, + LSX_2R_U5_DESC_BASE<"vbitseti.w", int_loongarch_lsx_vbitseti_w, LSX128WOpnd, LSX128WOpnd>; + +def VBITSETI_D : LSX_I6_U<0b0111001100010101>, + LSX_2R_U6_DESC_BASE<"vbitseti.d", int_loongarch_lsx_vbitseti_d, LSX128DOpnd, LSX128DOpnd>; + + +def VBITREVI_B : LSX_I3_U<0b0111001100011000001>, + LSX_2R_U3_DESC_BASE<"vbitrevi.b", int_loongarch_lsx_vbitrevi_b, LSX128BOpnd, LSX128BOpnd>; + +def VBITREVI_H : LSX_I4_U<0b011100110001100001>, + LSX_2R_U4_DESC_BASE<"vbitrevi.h", int_loongarch_lsx_vbitrevi_h, LSX128HOpnd, LSX128HOpnd>; + +def VBITREVI_W : LSX_I5_U<0b01110011000110001>, + LSX_2R_U5_DESC_BASE<"vbitrevi.w", int_loongarch_lsx_vbitrevi_w, LSX128WOpnd, LSX128WOpnd>; + +def VBITREVI_D : LSX_I6_U<0b0111001100011001>, + LSX_2R_U6_DESC_BASE<"vbitrevi.d", int_loongarch_lsx_vbitrevi_d, LSX128DOpnd, LSX128DOpnd>; + + +def VSSRLRNI_B_H : LSX_I4_U<0b011100110101000001>, + LSX_2R_3R_U4_DESC_BASE<"vssrlrni.b.h", int_loongarch_lsx_vssrlrni_b_h, LSX128BOpnd, LSX128BOpnd>; + +def VSSRLRNI_H_W : LSX_I5_U<0b01110011010100001>, + LSX_2R_3R_U5_DESC_BASE<"vssrlrni.h.w", int_loongarch_lsx_vssrlrni_h_w, LSX128HOpnd, LSX128HOpnd>; + +def VSSRLRNI_W_D : LSX_I6_U<0b0111001101010001>, + LSX_2R_3R_U6_DESC_BASE<"vssrlrni.w.d", int_loongarch_lsx_vssrlrni_w_d, LSX128WOpnd, LSX128WOpnd>; + +def VSSRLRNI_D_Q : LSX_I7_U<0b011100110101001>, + LSX_2R_3R_U7_DESC_BASE<"vssrlrni.d.q", int_loongarch_lsx_vssrlrni_d_q, LSX128DOpnd, LSX128DOpnd>; + + +def VSRANI_B_H : LSX_I4_U<0b011100110101100001>, + LSX_2R_3R_U4_DESC_BASE<"vsrani.b.h", int_loongarch_lsx_vsrani_b_h, LSX128BOpnd, LSX128BOpnd>; + +def VSRANI_H_W : LSX_I5_U<0b01110011010110001>, + LSX_2R_3R_U5_DESC_BASE<"vsrani.h.w", int_loongarch_lsx_vsrani_h_w, LSX128HOpnd, LSX128HOpnd>; + +def VSRANI_W_D : LSX_I6_U<0b0111001101011001>, + LSX_2R_3R_U6_DESC_BASE<"vsrani.w.d", int_loongarch_lsx_vsrani_w_d, LSX128WOpnd, LSX128WOpnd>; + +def VSRANI_D_Q : LSX_I7_U<0b011100110101101>, + LSX_2R_3R_U7_DESC_BASE<"vsrani.d.q", int_loongarch_lsx_vsrani_d_q, LSX128DOpnd, LSX128DOpnd>; + + +def VEXTRINS_B : LSX_I8_U<0b01110011100011>, + LSX_2R_3R_U8_DESC_BASE<"vextrins.b", int_loongarch_lsx_vextrins_b, LSX128BOpnd, LSX128BOpnd>; + +def VEXTRINS_H : LSX_I8_U<0b01110011100010>, + LSX_2R_3R_U8_DESC_BASE<"vextrins.h", int_loongarch_lsx_vextrins_h, LSX128HOpnd, LSX128HOpnd>; + +def VEXTRINS_W : LSX_I8_U<0b01110011100001>, + LSX_2R_3R_U8_DESC_BASE<"vextrins.w", int_loongarch_lsx_vextrins_w, LSX128WOpnd, LSX128WOpnd>; + +def VEXTRINS_D : LSX_I8_U<0b01110011100000>, + LSX_2R_3R_U8_DESC_BASE<"vextrins.d", int_loongarch_lsx_vextrins_d, LSX128DOpnd, LSX128DOpnd>; + + +def VBITSELI_B : LSX_I8_U<0b01110011110001>, + LSX_2R_3R_U8_DESC_BASE<"vbitseli.b", int_loongarch_lsx_vbitseli_b, LSX128BOpnd, LSX128BOpnd>; + + +def VANDI_B : LSX_I8_U<0b01110011110100>, + LSX_2R_U8_DESC_BASE<"vandi.b", int_loongarch_lsx_vandi_b, LSX128BOpnd, LSX128BOpnd>; + + +def VORI_B : LSX_I8_U<0b01110011110101>, + LSX_2R_U8_DESC_BASE<"vori.b", int_loongarch_lsx_vori_b, LSX128BOpnd, LSX128BOpnd>; + + +def VXORI_B : LSX_I8_U<0b01110011110110>, + LSX_2R_U8_DESC_BASE<"vxori.b", int_loongarch_lsx_vxori_b, LSX128BOpnd, LSX128BOpnd>; + + +def VNORI_B : LSX_I8_U<0b01110011110111>, + LSX_2R_U8_DESC_BASE<"vnori.b", int_loongarch_lsx_vnori_b, LSX128BOpnd, LSX128BOpnd>; + + +def VLDI : LSX_1R_I13<0b01110011111000>, + LSX_I13_DESC_BASE<"vldi", int_loongarch_lsx_vldi, i32, simm13Op, LSX128DOpnd>; + +def VLDI_B : LSX_1R_I13_I10<0b01110011111000000>, + LSX_I13_DESC_BASE_10<"vldi", int_loongarch_lsx_vrepli_b, simm10, immZExt10, LSX128BOpnd>; + +def VLDI_H : LSX_1R_I13_I10<0b01110011111000001>, + LSX_I13_DESC_BASE_10<"vldi", int_loongarch_lsx_vrepli_h, simm10, immZExt10, LSX128HOpnd>; + +def VLDI_W : LSX_1R_I13_I10<0b01110011111000010>, + LSX_I13_DESC_BASE_10<"vldi", int_loongarch_lsx_vrepli_w, simm10, immZExt10, LSX128WOpnd>; + +def VLDI_D : LSX_1R_I13_I10<0b01110011111000011>, + LSX_I13_DESC_BASE_10<"vldi", int_loongarch_lsx_vrepli_d, simm10, immZExt10, LSX128DOpnd>; + +def VPERMI_W : LSX_I8_U<0b01110011111001>, + LSX_2R_3R_U8_DESC_BASE<"vpermi.w", int_loongarch_lsx_vpermi_w, LSX128WOpnd, LSX128WOpnd>; + + +def VSEQ_B : LSX_3R<0b01110000000000000>, IsCommutable, + LSX_3R_DESC_BASE<"vseq.b", vseteq_v16i8, LSX128BOpnd>; + +def VSEQ_H : LSX_3R<0b01110000000000001>, IsCommutable, + LSX_3R_DESC_BASE<"vseq.h", vseteq_v8i16, LSX128HOpnd>; + +def VSEQ_W : LSX_3R<0b01110000000000010>, IsCommutable, + LSX_3R_DESC_BASE<"vseq.w", vseteq_v4i32, LSX128WOpnd> ; + +def VSEQ_D : LSX_3R<0b01110000000000011>, IsCommutable, + LSX_3R_DESC_BASE<"vseq.d", vseteq_v2i64, LSX128DOpnd>; + + +def VSLE_B : LSX_3R<0b01110000000000100>, + LSX_3R_DESC_BASE<"vsle.b", vsetle_v16i8, LSX128BOpnd>; + +def VSLE_H : LSX_3R<0b01110000000000101>, + LSX_3R_DESC_BASE<"vsle.h", vsetle_v8i16, LSX128HOpnd>; + +def VSLE_W : LSX_3R<0b01110000000000110>, + LSX_3R_DESC_BASE<"vsle.w", vsetle_v4i32, LSX128WOpnd>; + +def VSLE_D : LSX_3R<0b01110000000000111>, + LSX_3R_DESC_BASE<"vsle.d", vsetle_v2i64, LSX128DOpnd>; + + +def VSLE_BU : LSX_3R<0b01110000000001000>, + LSX_3R_DESC_BASE<"vsle.bu", vsetule_v16i8, LSX128BOpnd>; + +def VSLE_HU : LSX_3R<0b01110000000001001>, + LSX_3R_DESC_BASE<"vsle.hu", vsetule_v8i16, LSX128HOpnd>; + +def VSLE_WU : LSX_3R<0b01110000000001010>, + LSX_3R_DESC_BASE<"vsle.wu", vsetule_v4i32, LSX128WOpnd>; + +def VSLE_DU : LSX_3R<0b01110000000001011>, + LSX_3R_DESC_BASE<"vsle.du", vsetule_v2i64, LSX128DOpnd>; + + +def VSLT_B : LSX_3R<0b01110000000001100>, + LSX_3R_DESC_BASE<"vslt.b", vsetlt_v16i8, LSX128BOpnd>; + +def VSLT_H : LSX_3R<0b01110000000001101>, + LSX_3R_DESC_BASE<"vslt.h", vsetlt_v8i16, LSX128HOpnd>; + +def VSLT_W : LSX_3R<0b01110000000001110>, + LSX_3R_DESC_BASE<"vslt.w", vsetlt_v4i32, LSX128WOpnd>; + +def VSLT_D : LSX_3R<0b01110000000001111>, + LSX_3R_DESC_BASE<"vslt.d", vsetlt_v2i64, LSX128DOpnd>; + + +def VSLT_BU : LSX_3R<0b01110000000010000>, + LSX_3R_DESC_BASE<"vslt.bu", vsetult_v16i8, LSX128BOpnd>; + +def VSLT_HU : LSX_3R<0b01110000000010001>, + LSX_3R_DESC_BASE<"vslt.hu", vsetult_v8i16, LSX128HOpnd>; + +def VSLT_WU : LSX_3R<0b01110000000010010>, + LSX_3R_DESC_BASE<"vslt.wu", vsetult_v4i32, LSX128WOpnd>; + +def VSLT_DU : LSX_3R<0b01110000000010011>, + LSX_3R_DESC_BASE<"vslt.du", vsetult_v2i64, LSX128DOpnd>; + + +def VADD_B : LSX_3R<0b01110000000010100>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.b", add, LSX128BOpnd>; + +def VADD_H : LSX_3R<0b01110000000010101>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.h", add, LSX128HOpnd>; + +def VADD_W : LSX_3R<0b01110000000010110>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.w", add, LSX128WOpnd>; + +def VADD_D : LSX_3R<0b01110000000010111>, IsCommutable, + LSX_3R_DESC_BASE<"vadd.d", add, LSX128DOpnd>; + + +def VSUB_B : LSX_3R<0b01110000000011000>, + LSX_3R_DESC_BASE<"vsub.b", sub, LSX128BOpnd>; + +def VSUB_H : LSX_3R<0b01110000000011001>, + LSX_3R_DESC_BASE<"vsub.h", sub, LSX128HOpnd>; + +def VSUB_W : LSX_3R<0b01110000000011010>, + LSX_3R_DESC_BASE<"vsub.w", sub, LSX128WOpnd>; + +def VSUB_D : LSX_3R<0b01110000000011011>, + LSX_3R_DESC_BASE<"vsub.d", sub, LSX128DOpnd>; + + + +//Pat +class LSXBitconvertPat preds = [HasLSX]> : + LSXPat<(DstVT (bitconvert SrcVT:$src)), + (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; +def : LSXBitconvertPat; + + + + +def : LSXPat<(i32 (vextract_sext_i8 v16i8:$vj, i32:$idx)), + (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, + i32:$idx), + sub_lo)), + GPR32), (i32 24))>; +def : LSXPat<(i32 (vextract_sext_i16 v8i16:$vj, i32:$idx)), + (SRAI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, + i32:$idx), + sub_lo)), + GPR32), (i32 16))>; +def : LSXPat<(i32 (vextract_sext_i32 v4i32:$vj, i32:$idx)), + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, + i32:$idx), + sub_lo)), + GPR32)>; +def : LSXPat<(i64 (vextract_sext_i64 v2i64:$vj, i32:$idx)), + (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, + i32:$idx), + sub_64)), + GPR64)>; + +def : LSXPat<(i32 (vextract_zext_i8 v16i8:$vj, i32:$idx)), + (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, + i32:$idx), + sub_lo)), + GPR32), (i32 24))>; +def : LSXPat<(i32 (vextract_zext_i16 v8i16:$vj, i32:$idx)), + (SRLI_W (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, + i32:$idx), + sub_lo)), + GPR32), (i32 16))>; +def : LSXPat<(i32 (vextract_zext_i32 v4i32:$vj, i32:$idx)), + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, + i32:$idx), + sub_lo)), + GPR32)>; + +def : LSXPat<(i64 (vextract_zext_i64 v2i64:$vj, i32:$idx)), + (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, + i32:$idx), + sub_64)), + GPR64)>; + +def : LSXPat<(f32 (vector_extract v4f32:$vj, i32:$idx)), + (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, + i32:$idx), + sub_lo))>; +def : LSXPat<(f64 (vector_extract v2f64:$vj, i32:$idx)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, + i32:$idx), + sub_64))>; + +def : LSXPat< + (i32 (vextract_sext_i8 v16i8:$vj, i64:$idx)), + (SRAI_W (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_B v16i8:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 24))>; +def : LSXPat< + (i32 (vextract_sext_i16 v8i16:$vj, i64:$idx)), + (SRAI_W (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_H v8i16:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 16))>; + +def : LSXPat< + (i32 (vextract_sext_i32 v4i32:$vj, i64:$idx)), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_W v4i32:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32)>; + +def : LSXPat< + (i64 (vextract_sext_i64 v2i64:$vj, i64:$idx)), + (COPY_TO_REGCLASS + (i64 (EXTRACT_SUBREG + (VREPLVE_D v2i64:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64)), + GPR64)>; + +def : LSXPat< + (i32 (vextract_zext_i8 v16i8:$vj, i64:$idx)), + (SRLI_W (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_B v16i8:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 24))>; +def : LSXPat< + (i32 (vextract_zext_i16 v8i16:$vj, i64:$idx)), + (SRLI_W (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_H v8i16:$vj, + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32), + (i32 16))>; +def : LSXPat< + (i32 (vextract_zext_i32 v4i32:$vj, i64:$idx)), + (COPY_TO_REGCLASS + (i32 (EXTRACT_SUBREG + (VREPLVE_W v4i32:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo)), + GPR32)>; +def : LSXPat< + (i64 (vextract_zext_i64 v2i64:$vj, i64:$idx)), + (COPY_TO_REGCLASS + (i64 (EXTRACT_SUBREG + (VREPLVE_D v2i64:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64)), + GPR64)>; + + def : LSXPat< + (f32 (vector_extract v4f32:$vj, i64:$idx)), + (f32 (EXTRACT_SUBREG + (VREPLVE_W v4f32:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_lo))>; +def : LSXPat< + (f64 (vector_extract v2f64:$vj, i64:$idx)), + (f64 (EXTRACT_SUBREG + (VREPLVE_D v2f64:$vj, + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG i64:$idx, sub_32)), GPR32)), + sub_64))>; + + +def : LSXPat<(vfseteq_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), + (VFCMP_CEQ_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; + +def : LSXPat<(vfseteq_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), + (VFCMP_CEQ_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; + +def : LSXPat<(vfsetle_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), + (VFCMP_CLE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; + +def : LSXPat<(vfsetle_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), + (VFCMP_CLE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; + +def : LSXPat<(vfsetlt_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), + (VFCMP_CLT_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; + +def : LSXPat<(vfsetlt_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), + (VFCMP_CLT_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; + +def : LSXPat<(vfsetne_v4f32 LSX128WOpnd:$a, LSX128WOpnd:$b), + (VFCMP_CNE_S LSX128WOpnd:$a, LSX128WOpnd:$b)>; + +def : LSXPat<(vfsetne_v2f64 LSX128DOpnd:$a, LSX128DOpnd:$b), + (VFCMP_CNE_D LSX128DOpnd:$a, LSX128DOpnd:$b)>; + + +class LSX_INSERT_PSEUDO_BASE : + LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ImmOp:$n, ROFS:$fs), + [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, Imm:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$vd = $vd_in"; +} + + +class INSERT_FW_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE; +class INSERT_FD_PSEUDO_DESC : LSX_INSERT_PSEUDO_BASE; + +def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC; +def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC; + + +class LSX_INSERT_VIDX_PSEUDO_BASE : + LSXPseudo<(outs ROVD:$vd), (ins ROVD:$vd_in, ROIdx:$n, ROFS:$fs), + [(set ROVD:$vd, (OpNode (Ty ROVD:$vd_in), ROFS:$fs, + ROIdx:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$vd = $vd_in"; +} + +class INSERT_H_VIDX64_PSEUDO_DESC : + LSX_INSERT_VIDX_PSEUDO_BASE; +def INSERT_H_VIDX64_PSEUDO : INSERT_H_VIDX64_PSEUDO_DESC; + +class INSERTPostRA : + LoongArchPseudo<(outs RC:$xd), (ins RC:$xd_in, RD:$n, RE:$fs), []> { + let mayLoad = 1; + let mayStore = 1; +} + +def INSERT_H_VIDX64_PSEUDO_POSTRA : INSERTPostRA; + +class LSX_COPY_PSEUDO_BASE : + LSXPseudo<(outs RCD:$vd), (ins RCVS:$vj, ImmOp:$n), + [(set RCD:$vd, (OpNode (VecTy RCVS:$vj), Imm:$n))]> { + bit usesCustomInserter = 1; +} + + +class COPY_FW_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE; +class COPY_FD_PSEUDO_DESC : LSX_COPY_PSEUDO_BASE; +def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC; +def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC; + + +let isCodeGenOnly = 1 in { + +def VST_H : LSX_I12_S<0b0010110001>, + ST_DESC_BASE<"vst", store, v8i16, LSX128HOpnd, mem_simm12>; +def VST_W : LSX_I12_S<0b0010110001>, + ST_DESC_BASE<"vst", store, v4i32, LSX128WOpnd, mem_simm12>; +def VST_D : LSX_I12_S<0b0010110001>, + ST_DESC_BASE<"vst", store, v2i64, LSX128DOpnd, mem_simm12>; + + +def VLD_H : LSX_I12_S<0b0010110000>, + LD_DESC_BASE<"vld", load, v8i16, LSX128HOpnd, mem_simm12>; +def VLD_W : LSX_I12_S<0b0010110000>, + LD_DESC_BASE<"vld", load, v4i32, LSX128WOpnd, mem_simm12>; +def VLD_D : LSX_I12_S<0b0010110000>, + LD_DESC_BASE<"vld", load, v2i64, LSX128DOpnd, mem_simm12>; + + + +def VANDI_B_N : LSX_I8_U<0b01110011110100>, + LSX_BIT_U8_VREPLVE_DESC_BASE<"vandi.b", and, vsplati8_uimm8, LSX128BOpnd>; + + +def VXORI_B_N : LSX_I8_U<0b01110011110110>, + LSX_BIT_U8_VREPLVE_DESC_BASE<"vxori.b", xor, vsplati8_uimm8, LSX128BOpnd>; + + +def VSRAI_B_N : LSX_I3_U<0b0111001100110100001>, + LSX_BIT_U3_VREPLVE_DESC_BASE<"vsrai.b", sra, vsplati8_uimm3, LSX128BOpnd>; + +def VSRAI_H_N : LSX_I4_U<0b011100110011010001>, + LSX_BIT_U4_VREPLVE_DESC_BASE<"vsrai.h", sra, vsplati16_uimm4, LSX128HOpnd>; + +def VSRAI_W_N : LSX_I5_U<0b01110011001101001>, + LSX_BIT_U5_VREPLVE_DESC_BASE<"vsrai.w", sra, vsplati32_uimm5, LSX128WOpnd>; + +def VSRAI_D_N : LSX_I6_U<0b0111001100110101>, + LSX_BIT_U6_VREPLVE_DESC_BASE<"vsrai.d", sra, vsplati64_uimm6, LSX128DOpnd>; + + +def VMAXI_BU_N : LSX_I5_U<0b01110010100101000>, + LSX_I5_U_DESC_BASE<"vmaxi.bu", umax, vsplati8_uimm5, LSX128BOpnd>; + +def VMAXI_HU_N : LSX_I5_U<0b01110010100101001>, + LSX_I5_U_DESC_BASE<"vmaxi.hu", umax, vsplati16_uimm5, LSX128HOpnd>; + +def VMAXI_WU_N : LSX_I5_U<0b01110010100101010>, + LSX_I5_U_DESC_BASE<"vmaxi.wu", umax, vsplati32_uimm5, LSX128WOpnd>; + +def VMAXI_DU_N : LSX_I5_U<0b01110010100101011>, + LSX_I5_U_DESC_BASE<"vmaxi.du", umax, vsplati64_uimm5, LSX128DOpnd>; + + +def VMINI_B_N : LSX_I5<0b01110010100100100>, + LSX_I5_DESC_BASE<"vmini.b", smin, vsplati8_simm5, LSX128BOpnd>; + +def VMINI_H_N : LSX_I5<0b01110010100100101>, + LSX_I5_DESC_BASE<"vmini.h", smin, vsplati16_simm5, LSX128HOpnd>; + +def VMINI_W_N : LSX_I5<0b01110010100100110>, + LSX_I5_DESC_BASE<"vmini.w", smin, vsplati32_simm5, LSX128WOpnd>; + +def VMINI_D_N : LSX_I5<0b01110010100100111>, + LSX_I5_DESC_BASE<"vmini.d", smin, vsplati64_simm5, LSX128DOpnd>; + + +def VMAXI_B_N : LSX_I5<0b01110010100100000>, + LSX_I5_DESC_BASE<"vmaxi.b", smax, vsplati8_simm5, LSX128BOpnd>; + +def VMAXI_H_N : LSX_I5<0b01110010100100001>, + LSX_I5_DESC_BASE<"vmaxi.h", smax, vsplati16_simm5, LSX128HOpnd>; + +def VMAXI_W_N : LSX_I5<0b01110010100100010>, + LSX_I5_DESC_BASE<"vmaxi.w", smax, vsplati32_simm5, LSX128WOpnd>; + +def VMAXI_D_N : LSX_I5<0b01110010100100011>, + LSX_I5_DESC_BASE<"vmaxi.d", smax, vsplati64_simm5, LSX128DOpnd>; + + +def VSEQI_B_N : LSX_I5<0b01110010100000000>, + LSX_I5_DESC_BASE<"vseqi.b", vseteq_v16i8, vsplati8_simm5, LSX128BOpnd>; + +def VSEQI_H_N : LSX_I5<0b01110010100000001>, + LSX_I5_DESC_BASE<"vseqi.h", vseteq_v8i16, vsplati16_simm5, LSX128HOpnd>; + +def VSEQI_W_N : LSX_I5<0b01110010100000010>, + LSX_I5_DESC_BASE<"vseqi.w", vseteq_v4i32, vsplati32_simm5, LSX128WOpnd>; + +def VSEQI_D_N : LSX_I5<0b01110010100000011>, + LSX_I5_DESC_BASE<"vseqi.d", vseteq_v2i64, vsplati64_simm5, LSX128DOpnd>; + + +def VSLEI_B_N : LSX_I5<0b01110010100000100>, + LSX_I5_DESC_BASE<"vslei.b", vsetle_v16i8, vsplati8_simm5, LSX128BOpnd>; + +def VSLEI_H_N : LSX_I5<0b01110010100000101>, + LSX_I5_DESC_BASE<"vslei.h", vsetle_v8i16, vsplati16_simm5, LSX128HOpnd>; + +def VSLEI_W_N : LSX_I5<0b01110010100000110>, + LSX_I5_DESC_BASE<"vslei.w", vsetle_v4i32, vsplati32_simm5, LSX128WOpnd>; + +def VSLEI_D_N : LSX_I5<0b01110010100000111>, + LSX_I5_DESC_BASE<"vslei.d", vsetle_v2i64, vsplati64_simm5, LSX128DOpnd>; + +def VSLEI_BU_N : LSX_I5_U<0b01110010100001000>, + LSX_I5_U_DESC_BASE<"vslei.bu", vsetule_v16i8, vsplati8_uimm5, LSX128BOpnd>; + +def VSLEI_HU_N : LSX_I5_U<0b01110010100001001>, + LSX_I5_U_DESC_BASE<"vslei.hu", vsetule_v8i16, vsplati16_uimm5, LSX128HOpnd>; + +def VSLEI_WU_N : LSX_I5_U<0b01110010100001010>, + LSX_I5_U_DESC_BASE<"vslei.wu", vsetule_v4i32, vsplati32_uimm5, LSX128WOpnd>; + +def VSLEI_DU_N : LSX_I5_U<0b01110010100001011>, + LSX_I5_U_DESC_BASE<"vslei.du", vsetule_v2i64, vsplati64_uimm5, LSX128DOpnd>; + + +def VSLTI_B_N : LSX_I5<0b01110010100001100>, + LSX_I5_DESC_BASE<"vslti.b", vsetlt_v16i8, vsplati8_simm5, LSX128BOpnd>; + +def VSLTI_H_N : LSX_I5<0b01110010100001101>, + LSX_I5_DESC_BASE<"vslti.h", vsetlt_v8i16, vsplati16_simm5, LSX128HOpnd>; + +def VSLTI_W_N : LSX_I5<0b01110010100001110>, + LSX_I5_DESC_BASE<"vslti.w", vsetlt_v4i32, vsplati32_simm5, LSX128WOpnd>; + +def VSLTI_D_N : LSX_I5<0b01110010100001111>, + LSX_I5_DESC_BASE<"vslti.d", vsetlt_v2i64, vsplati64_simm5, LSX128DOpnd>; + + +def VSLTI_BU_N : LSX_I5_U<0b01110010100010000>, + LSX_I5_U_DESC_BASE<"vslti.bu", vsetult_v16i8, vsplati8_uimm5, LSX128BOpnd>; + +def VSLTI_HU_N : LSX_I5_U<0b01110010100010001>, + LSX_I5_U_DESC_BASE<"vslti.hu", vsetult_v8i16, vsplati16_uimm5, LSX128HOpnd>; + +def VSLTI_WU_N : LSX_I5_U<0b01110010100010010>, + LSX_I5_U_DESC_BASE<"vslti.wu", vsetult_v4i32, vsplati32_uimm5, LSX128WOpnd>; + +def VSLTI_DU_N : LSX_I5_U<0b01110010100010011>, + LSX_I5_U_DESC_BASE<"vslti.du", vsetult_v2i64, vsplati64_uimm5, LSX128DOpnd>; + + +def VBITSELI_B_N : LSX_I8_U<0b01110011110001>, + LSX_2R_3R_SELECT<"vbitseli.b", vselect, LSX128BOpnd, LSX128BOpnd>; + +} + + +def : LSXPat<(v4f32 (load addrimm12:$addr)), (VLD_W addrimm12:$addr)>; +def : LSXPat<(v2f64 (load addrimm12:$addr)), (VLD_D addrimm12:$addr)>; + +def VST_FW : LSXPat<(store (v4f32 LSX128W:$vj), addrimm12:$addr), + (VST_W LSX128W:$vj, addrimm12:$addr)>; +def VST_FD : LSXPat<(store (v2f64 LSX128D:$vj), addrimm12:$addr), + (VST_D LSX128D:$vj, addrimm12:$addr)>; + +def VNEG_FW : LSXPat<(fneg (v4f32 LSX128W:$vj)), + (VBITREVI_W LSX128W:$vj, 31)>; +def VNEG_FD : LSXPat<(fneg (v2f64 LSX128D:$vj)), + (VBITREVI_D LSX128D:$vj, 63)>; + + +def : LSXPat<(v2i64 (LoongArchVABSD v2i64:$vj, v2i64:$vk, (i32 0))), + (v2i64 (VABSD_D $vj, $vk))>; + +def : LSXPat<(v4i32 (LoongArchVABSD v4i32:$vj, v4i32:$vk, (i32 0))), + (v4i32 (VABSD_W $vj, $vk))>; + +def : LSXPat<(v8i16 (LoongArchVABSD v8i16:$vj, v8i16:$vk, (i32 0))), + (v8i16 (VABSD_H $vj, $vk))>; + +def : LSXPat<(v16i8 (LoongArchVABSD v16i8:$vj, v16i8:$vk, (i32 0))), + (v16i8 (VABSD_B $vj, $vk))>; + +def : LSXPat<(v2i64 (LoongArchUVABSD v2i64:$vj, v2i64:$vk, (i32 0))), + (v2i64 (VABSD_DU $vj, $vk))>; + +def : LSXPat<(v4i32 (LoongArchUVABSD v4i32:$vj, v4i32:$vk, (i32 0))), + (v4i32 (VABSD_WU $vj, $vk))>; + +def : LSXPat<(v8i16 (LoongArchUVABSD v8i16:$vj, v8i16:$vk, (i32 0))), + (v8i16 (VABSD_HU $vj, $vk))>; + +def : LSXPat<(v16i8 (LoongArchUVABSD v16i8:$vj, v16i8:$vk, (i32 0))), + (v16i8 (VABSD_BU $vj, $vk))>; + + +def : LSXPat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), + (VBITSET_B v16i8:$vj, v16i8:$vk)>; +def : LSXPat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), + (VBITSET_H v8i16:$vj, v8i16:$vk)>; +def : LSXPat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), + (VBITSET_W v4i32:$vj, v4i32:$vk)>; +def : LSXPat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), + (VBITSET_D v2i64:$vj, v2i64:$vk)>; + +def : LSXPat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), + (VBITREV_B v16i8:$vj, v16i8:$vk)>; +def : LSXPat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), + (VBITREV_H v8i16:$vj, v8i16:$vk)>; +def : LSXPat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), + (VBITREV_W v4i32:$vj, v4i32:$vk)>; +def : LSXPat<(xor v2i64:$vj, (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk)), + (VBITREV_D v2i64:$vj, v2i64:$vk)>; + +def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), immAllOnesV)), + (VBITCLR_B v16i8:$vj, v16i8:$vk)>; +def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), immAllOnesV)), + (VBITCLR_H v8i16:$vj, v8i16:$vk)>; +def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), immAllOnesV)), + (VBITCLR_W v4i32:$vj, v4i32:$vk)>; +def : LSXPat<(and v2i64:$vj, (xor (shl (v2i64 vsplati64_imm_eq_1), v2i64:$vk), (bitconvert (v4i32 immAllOnesV)))), + (VBITCLR_D v2i64:$vj, v2i64:$vk)>; +def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def immi32Cst7 : ImmLeaf(Imm) && Imm == 7;}]>; +def immi32Cst15 : ImmLeaf(Imm) && Imm == 15;}]>; +def immi32Cst31 : ImmLeaf(Imm) && Imm == 31;}]>; + +def vsplati8imm7 : PatFrag<(ops node:$vt), + (and node:$vt, (vsplati8 immi32Cst7))>; +def vsplati16imm15 : PatFrag<(ops node:$vt), + (and node:$vt, (vsplati16 immi32Cst15))>; +def vsplati32imm31 : PatFrag<(ops node:$vt), + (and node:$vt, (vsplati32 immi32Cst31))>; +def vsplati64imm63 : PatFrag<(ops node:$vt), + (and node:$vt, vsplati64_imm_eq_63)>; + +class LSXShiftPat : + LSXPat<(VT (Node VT:$vs, (VT (and VT:$vt, Vec)))), + (VT (Insn VT:$vs, VT:$vt))>; + +class LSXBitPat : + LSXPat<(VT (Node VT:$vs, (shl vsplat_imm_eq_1, (Frag VT:$vt)))), + (VT (Insn VT:$vs, VT:$vt))>; + +multiclass LSXShiftPats { + def : LSXShiftPat(Insn#_B), + (vsplati8 immi32Cst7)>; + def : LSXShiftPat(Insn#_H), + (vsplati16 immi32Cst15)>; + def : LSXShiftPat(Insn#_W), + (vsplati32 immi32Cst31)>; + def : LSXPat<(v2i64 (Node v2i64:$vs, (v2i64 (and v2i64:$vt, + vsplati64_imm_eq_63)))), + (v2i64 (!cast(Insn#_D) v2i64:$vs, v2i64:$vt))>; +} + +multiclass LSXBitPats { + def : LSXBitPat(Insn#_B), vsplati8imm7>; + def : LSXBitPat(Insn#_H), vsplati16imm15>; + def : LSXBitPat(Insn#_W), vsplati32imm31>; + def : LSXPat<(Node v2i64:$vs, (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$vt))), + (v2i64 (!cast(Insn#_D) v2i64:$vs, v2i64:$vt))>; +} + +defm : LSXShiftPats; +defm : LSXShiftPats; +defm : LSXShiftPats; +defm : LSXBitPats; +defm : LSXBitPats; + +def : LSXPat<(and v16i8:$vs, (xor (shl vsplat_imm_eq_1, + (vsplati8imm7 v16i8:$vt)), + immAllOnesV)), + (v16i8 (VBITCLR_B v16i8:$vs, v16i8:$vt))>; +def : LSXPat<(and v8i16:$vs, (xor (shl vsplat_imm_eq_1, + (vsplati16imm15 v8i16:$vt)), + immAllOnesV)), + (v8i16 (VBITCLR_H v8i16:$vs, v8i16:$vt))>; +def : LSXPat<(and v4i32:$vs, (xor (shl vsplat_imm_eq_1, + (vsplati32imm31 v4i32:$vt)), + immAllOnesV)), + (v4i32 (VBITCLR_W v4i32:$vs, v4i32:$vt))>; +def : LSXPat<(and v2i64:$vs, (xor (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$vt)), + (bitconvert (v4i32 immAllOnesV)))), + (v2i64 (VBITCLR_D v2i64:$vs, v2i64:$vt))>; + + +def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), v4f32:$v), + (VFRECIP_S v4f32:$v)>; + +def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), v2f64:$v), + (VFRECIP_D v2f64:$v)>; + +def : LSXPat<(fdiv (v4f32 fpimm1), v4f32:$v), + (VFRECIP_S v4f32:$v)>; + +def : LSXPat<(fdiv (v2f64 fpimm1), v2f64:$v), + (VFRECIP_D v2f64:$v)>; + + +def : LSXPat<(fdiv (v4f32 (build_vector (f32 fpimm1), (f32 fpimm1), (f32 fpimm1), (f32 fpimm1))), (fsqrt v4f32:$v)), + (VFRSQRT_S v4f32:$v)>; + +def : LSXPat<(fdiv (v2f64 (build_vector (f64 fpimm1), (f64 fpimm1))), (fsqrt v2f64:$v)), + (VFRSQRT_D v2f64:$v)>; + +def : LSXPat<(fdiv (v4f32 fpimm1), (fsqrt v4f32:$v)), + (VFRSQRT_S v4f32:$v)>; + +def : LSXPat<(fdiv (v2f64 fpimm1), (fsqrt v2f64:$v)), + (VFRSQRT_D v2f64:$v)>; + + +def : LSXPat<(abs v2i64:$v), + (VMAX_D v2i64:$v, (VNEG_D v2i64:$v))>; + +def : LSXPat<(abs v4i32:$v), + (VMAX_W v4i32:$v, (VNEG_W v4i32:$v))>; + +def : LSXPat<(abs v8i16:$v), + (VMAX_H v8i16:$v, (VNEG_H v8i16:$v))>; + +def : LSXPat<(abs v16i8:$v), + (VMAX_B v16i8:$v, (VNEG_B v16i8:$v))>; + + +def : LSXPat<(sub (v16i8 immAllZerosV), v16i8:$v), + (VNEG_B v16i8:$v)>; + +def : LSXPat<(sub (v8i16 immAllZerosV), v8i16:$v), + (VNEG_H v8i16:$v)>; + +def : LSXPat<(sub (v4i32 immAllZerosV), v4i32:$v), + (VNEG_W v4i32:$v)>; + +def : LSXPat<(sub (v2i64 immAllZerosV), v2i64:$v), + (VNEG_D v2i64:$v)>; + + +def : LSXPat<(sra + (v16i8 (add + (v16i8 (add LSX128B:$a, LSX128B:$b)), + (v16i8 (srl + (v16i8 (add LSX128B:$a, LSX128B:$b)), + (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7), + (i32 7),(i32 7),(i32 7),(i32 7)) + ) + ) + ) + ) + ), + (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (VAVG_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; + +def : LSXPat<(sra + (v8i16 (add + (v8i16 (add LSX128H:$a, LSX128H:$b)), + (v8i16 (srl + (v8i16 (add LSX128H:$a, LSX128H:$b)), + (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), + (i32 15),(i32 15),(i32 15),(i32 15)) + ) + ) + ) + ) + ), + (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1) + ))), + (VAVG_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; + +def : LSXPat<(sra + (v4i32 (add + (v4i32 (add LSX128W:$a, LSX128W:$b)), + (v4i32 (srl + (v4i32 (add LSX128W:$a, LSX128W:$b)), + (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) + ) + ) + ) + ) + ), + (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), + (VAVG_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; + +def : LSXPat<(sra + (v2i64 (add + (v2i64 (add LSX128D:$a, LSX128D:$b)), + (v2i64 (srl + (v2i64 (add LSX128D:$a, LSX128D:$b)), + (v2i64 (build_vector (i64 63),(i64 63))) + ) + ) + ) + ), + (v2i64 (build_vector (i64 1),(i64 1)))), + (VAVG_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; + + + +def : LSXPat<(srl + (v16i8 (add LSX128B:$a, LSX128B:$b)), + (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVG_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; + +def : LSXPat<(srl + (v8i16 (add LSX128H:$a, LSX128H:$b)), + (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), + (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVG_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; + +def : LSXPat<(srl + (v4i32 (add LSX128W:$a, LSX128W:$b)), + (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) + ) + ), + (VAVG_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; + +def : LSXPat<(srl + (v2i64 (add LSX128D:$a, LSX128D:$b)), + (v2i64 (build_vector (i64 1),(i64 1)) + ) + ), + (VAVG_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; + + + + + + +def : LSXPat<(mulhs LSX128D:$a, LSX128D:$b), + (VMUH_D LSX128D:$a, LSX128D:$b)>; + +def : LSXPat<(mulhs LSX128W:$a, LSX128W:$b), + (VMUH_W LSX128W:$a, LSX128W:$b)>; + +def : LSXPat<(mulhs LSX128H:$a, LSX128H:$b), + (VMUH_H LSX128H:$a, LSX128H:$b)>; + +def : LSXPat<(mulhs LSX128B:$a, LSX128B:$b), + (VMUH_B LSX128B:$a, LSX128B:$b)>; + + +def : LSXPat<(mulhu LSX128D:$a, LSX128D:$b), + (VMUH_DU LSX128D:$a, LSX128D:$b)>; + +def : LSXPat<(mulhu LSX128W:$a, LSX128W:$b), + (VMUH_WU LSX128W:$a, LSX128W:$b)>; + +def : LSXPat<(mulhu LSX128H:$a, LSX128H:$b), + (VMUH_HU LSX128H:$a, LSX128H:$b)>; + +def : LSXPat<(mulhu LSX128B:$a, LSX128B:$b), + (VMUH_BU LSX128B:$a, LSX128B:$b)>; + + + +//===----------------------------------------------------------------------===// +// Intrinsics +//===----------------------------------------------------------------------===// + +def : LSXPat<(int_loongarch_lsx_vseq_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSEQ_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vseq_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSEQ_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vseq_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSEQ_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vseq_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSEQ_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsle_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSLE_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSLE_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSLE_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSLE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsle_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSLE_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSLE_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSLE_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsle_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSLE_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vslt_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSLT_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSLT_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSLT_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSLT_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vslt_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSLT_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSLT_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSLT_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vslt_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSLT_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VADD_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VADD_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VADD_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VADD_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSUB_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSUB_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSUB_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSUB_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsadd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSADD_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSADD_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSADD_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSADD_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vssub_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSSUB_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSSUB_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSSUB_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSSUB_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsadd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSADD_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSADD_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSADD_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsadd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSADD_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vssub_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSSUB_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSSUB_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSSUB_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vssub_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSSUB_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vhaddw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VHADDW_H_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhaddw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VHADDW_W_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhaddw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VHADDW_D_W LSX128W:$vj, LSX128W:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vhsubw_h_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VHSUBW_H_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhsubw_w_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VHSUBW_W_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhsubw_d_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VHSUBW_D_W LSX128W:$vj, LSX128W:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vhaddw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VHADDW_HU_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhaddw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VHADDW_WU_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhaddw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VHADDW_DU_WU LSX128W:$vj, LSX128W:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vhsubw_hu_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VHSUBW_HU_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhsubw_wu_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VHSUBW_WU_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vhsubw_du_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VHSUBW_DU_WU LSX128W:$vj, LSX128W:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vadda_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VADDA_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadda_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VADDA_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadda_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VADDA_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vadda_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VADDA_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vabsd_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VABSD_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VABSD_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VABSD_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VABSD_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vabsd_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VABSD_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VABSD_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VABSD_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vabsd_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VABSD_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vavg_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VAVG_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VAVG_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VAVG_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VAVG_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vavg_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VAVG_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VAVG_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VAVG_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavg_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VAVG_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vavgr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VAVGR_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VAVGR_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VAVGR_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VAVGR_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vavgr_bu (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VAVGR_BU LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_hu (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VAVGR_HU LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_wu (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VAVGR_WU LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vavgr_du (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VAVGR_DU LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsrlr_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSRLR_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrlr_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSRLR_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrlr_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSRLR_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrlr_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSRLR_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vsrar_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VSRAR_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrar_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VSRAR_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrar_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VSRAR_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vsrar_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VSRAR_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vbitset_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VBITSET_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitset_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VBITSET_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitset_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VBITSET_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitset_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VBITSET_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vbitrev_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk)), + (VBITREV_B LSX128B:$vj, LSX128B:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitrev_h (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk)), + (VBITREV_H LSX128H:$vj, LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitrev_w (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk)), + (VBITREV_W LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vbitrev_d (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk)), + (VBITREV_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfadd_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFADD_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfadd_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFADD_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfsub_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFSUB_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfsub_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFSUB_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfmax_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFMAX_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfmax_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFMAX_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfmin_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFMIN_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfmin_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFMIN_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfmaxa_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFMAXA_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfmaxa_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFMAXA_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfmina_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFMINA_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfmina_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFMINA_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vclo_b (v16i8 LSX128B:$vj)), + (VCLO_B LSX128B:$vj)>; +def : LSXPat<(int_loongarch_lsx_vclo_h (v8i16 LSX128H:$vj)), + (VCLO_H LSX128H:$vj)>; +def : LSXPat<(int_loongarch_lsx_vclo_w (v4i32 LSX128W:$vj)), + (VCLO_W LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vclo_d (v2i64 LSX128D:$vj)), + (VCLO_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vflogb_s (v4f32 LSX128W:$vj)), + (VFLOGB_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vflogb_d (v2f64 LSX128D:$vj)), + (VFLOGB_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vfclass_s (v4f32 LSX128W:$vj)), + (VFCLASS_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vfclass_d (v2f64 LSX128D:$vj)), + (VFCLASS_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vfrecip_s (v4f32 LSX128W:$vj)), + (VFRECIP_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vfrecip_d (v2f64 LSX128D:$vj)), + (VFRECIP_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vfrsqrt_s (v4f32 LSX128W:$vj)), + (VFRSQRT_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vfrsqrt_d (v2f64 LSX128D:$vj)), + (VFRSQRT_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vfcvtl_s_h (v8i16 LSX128H:$vk)), + (VFCVTL_S_H LSX128H:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcvth_s_h (v8i16 LSX128H:$vk)), + (VFCVTH_S_H LSX128H:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcvtl_d_s (v4f32 LSX128W:$vj)), + (VFCVTL_D_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vfcvth_d_s (v4f32 LSX128W:$vj)), + (VFCVTH_D_S LSX128W:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vftint_w_s (v4f32 LSX128W:$vj)), + (VFTINT_W_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vftint_l_d (v2f64 LSX128D:$vj)), + (VFTINT_L_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vftint_wu_s (v4f32 LSX128W:$vj)), + (VFTINT_WU_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vftint_lu_d (v2f64 LSX128D:$vj)), + (VFTINT_LU_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vreplgr2vr_b GPR32Opnd:$rj), + (VREPLGR2VR_B GPR32Opnd:$rj)>; +def : LSXPat<(int_loongarch_lsx_vreplgr2vr_h GPR32Opnd:$rj), + (VREPLGR2VR_H GPR32Opnd:$rj)>; +def : LSXPat<(int_loongarch_lsx_vreplgr2vr_w GPR32Opnd:$rj), + (VREPLGR2VR_W GPR32Opnd:$rj)>; +def : LSXPat<(int_loongarch_lsx_vreplgr2vr_d GPR64Opnd:$rj), + (VREPLGR2VR_D GPR64Opnd:$rj)>; + +def : LSXPat<(int_loongarch_lsx_vsrlri_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), + (VSRLRI_B LSX128B:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vsrlri_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), + (VSRLRI_H LSX128H:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsrlri_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), + (VSRLRI_W LSX128W:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsrlri_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), + (VSRLRI_D LSX128D:$vj, uimm6:$ui6)>; + +def : LSXPat<(int_loongarch_lsx_vsrari_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), + (VSRARI_B LSX128B:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vsrari_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), + (VSRARI_H LSX128H:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsrari_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), + (VSRARI_W LSX128W:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsrari_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), + (VSRARI_D LSX128D:$vj, uimm6:$ui6)>; + +def : LSXPat<(int_loongarch_lsx_vinsgr2vr_b (v16i8 LSX128B:$vj), GPR32Opnd:$rj, (immZExt4:$ui4)), + (VINSGR2VR_B LSX128B:$vj, GPR32Opnd:$rj, (uimm4i:$ui4))>; +def : LSXPat<(int_loongarch_lsx_vinsgr2vr_h (v8i16 LSX128H:$vj), GPR32Opnd:$rj, (immZExt3:$ui3)), + (VINSGR2VR_H LSX128H:$vj, GPR32Opnd:$rj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vinsgr2vr_w (v4i32 LSX128W:$vj), GPR32Opnd:$rj, (immZExt2:$ui2)), + (VINSGR2VR_W LSX128W:$vj, GPR32Opnd:$rj, uimm2:$ui2)>; +def : LSXPat<(int_loongarch_lsx_vinsgr2vr_d (v2i64 LSX128D:$vj), GPR64Opnd:$rj, (immZExt1:$ui1)), + (VINSGR2VR_D LSX128D:$vj, GPR64Opnd:$rj, uimm1i:$ui1)>; + +def : LSXPat<(int_loongarch_lsx_vpickve2gr_b (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VPICKVE2GR_B LSX128B:$vj, (uimm4i:$ui4))>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_h (v8i16 LSX128H:$vj), (immZExt3:$ui3)), + (VPICKVE2GR_H LSX128H:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_w (v4i32 LSX128W:$vj), (immZExt2:$ui2)), + (VPICKVE2GR_W LSX128W:$vj, uimm2:$ui2)>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_d (v2i64 LSX128D:$vj), (immZExt1:$ui1)), + (VPICKVE2GR_D LSX128D:$vj, uimm1i:$ui1)>; + +def : LSXPat<(int_loongarch_lsx_vpickve2gr_bu (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VPICKVE2GR_BU LSX128B:$vj, (uimm4i:$ui4))>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_hu (v8i16 LSX128H:$vj), (immZExt3:$ui3)), + (VPICKVE2GR_HU LSX128H:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vpickve2gr_wu (v4i32 LSX128W:$vj), (immZExt2:$ui2)), + (VPICKVE2GR_WU LSX128W:$vj, uimm2:$ui2)>; + +def : LSXPat<(int_loongarch_lsx_vsat_b (v16i8 LSX128B:$vj), (immZExt3:$ui3)), + (VSAT_B LSX128B:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vsat_h (v8i16 LSX128H:$vj), (immZExt4:$ui4)), + (VSAT_H LSX128H:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsat_w (v4i32 LSX128W:$vj), (immZExt5:$ui5)), + (VSAT_W LSX128W:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsat_d (v2i64 LSX128D:$vj), (immZExt6:$ui6)), + (VSAT_D LSX128D:$vj, uimm6:$ui6)>; + +def : LSXPat<(int_loongarch_lsx_vsat_bu (v16i8 LSX128B:$vj), (immZExt3:$ui3)), + (VSAT_BU LSX128B:$vj, uimm3:$ui3)>; +def : LSXPat<(int_loongarch_lsx_vsat_hu (v8i16 LSX128H:$vj), (immZExt4:$ui4)), + (VSAT_HU LSX128H:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsat_wu (v4i32 LSX128W:$vj), (immZExt5:$ui5)), + (VSAT_WU LSX128W:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsat_du (v2i64 LSX128D:$vj), (immZExt6:$ui6)), + (VSAT_DU LSX128D:$vj, uimm6:$ui6)>; + +def : LSXPat<(int_loongarch_lsx_vmskltz_b (v16i8 LSX128B:$vj)), + (VMSKLTZ_B LSX128B:$vj)>; +def : LSXPat<(int_loongarch_lsx_vmskltz_h (v8i16 LSX128H:$vj)), + (VMSKLTZ_H LSX128H:$vj)>; +def : LSXPat<(int_loongarch_lsx_vmskltz_w (v4i32 LSX128W:$vj)), + (VMSKLTZ_W LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vmskltz_d (v2i64 LSX128D:$vj)), + (VMSKLTZ_D LSX128D:$vj)>; + +def : LSXPat<(int_loongarch_lsx_vsrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vsrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrlni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRLNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRLNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRLNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRLNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrlni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRLNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRLNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRLNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrlni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRLNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrlrni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRLRNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrlrni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRLRNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrlrni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRLRNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrlrni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRLRNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vsrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vsrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vsrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vsrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrani_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRANI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrani_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRANI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrani_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRANI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrani_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRANI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrani_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRANI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrani_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRANI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrani_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRANI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrani_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRANI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrarni_b_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRARNI_B_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_h_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRARNI_H_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_w_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRARNI_W_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_d_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRARNI_D_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(int_loongarch_lsx_vssrarni_bu_h (v16i8 LSX128B:$vd_in), (v16i8 LSX128B:$vj), (immZExt4:$ui4)), + (VSSRARNI_BU_H LSX128B:$vd_in, LSX128B:$vj, uimm4i:$ui4)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_hu_w (v8i16 LSX128H:$vd_in), (v8i16 LSX128H:$vj), (immZExt5:$ui5)), + (VSSRARNI_HU_W LSX128H:$vd_in, LSX128H:$vj, uimm5:$ui5)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_wu_d (v4i32 LSX128W:$vd_in), (v4i32 LSX128W:$vj), (immZExt6:$ui6)), + (VSSRARNI_WU_D LSX128W:$vd_in, LSX128W:$vj, uimm6:$ui6)>; +def : LSXPat<(int_loongarch_lsx_vssrarni_du_q (v2i64 LSX128D:$vd_in), (v2i64 LSX128D:$vj), (immZExt7:$ui7)), + (VSSRARNI_DU_Q LSX128D:$vd_in, LSX128D:$vj, uimm7i:$ui7)>; + +def : LSXPat<(load (add iPTR:$vj, GPR64Opnd:$vk)), + (VLDX PtrRC:$vj, GPR64Opnd:$vk)>; + +def : LSXPat<(store (v16i8 LSX128B:$vd), (add iPTR:$vj, GPR64Opnd:$vk)), + (VSTX LSX128B:$vd, PtrRC:$vj, GPR64Opnd:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vshuf_b (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk), (v16i8 LSX128B:$va)), + (VSHUF_B LSX128B:$vj, LSX128B:$vk, LSX128B:$va)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CEQ_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_ceq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CEQ_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cor_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_COR_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cor_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_COR_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cun_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CUN_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cun_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CUN_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cune_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CUNE_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cune_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CUNE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CUEQ_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cueq_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CUEQ_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cne_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CNE_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cne_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CNE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_clt_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CLT_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_clt_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CLT_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cult_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CULT_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cult_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CULT_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cle_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CLE_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cle_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CLE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vfcmp_cule_s (v4f32 LSX128W:$vj), (v4f32 LSX128W:$vk)), + (VFCMP_CULE_S LSX128W:$vj, LSX128W:$vk)>; +def : LSXPat<(int_loongarch_lsx_vfcmp_cule_d (v2f64 LSX128D:$vj), (v2f64 LSX128D:$vk)), + (VFCMP_CULE_D LSX128D:$vj, LSX128D:$vk)>; + +def : LSXPat<(int_loongarch_lsx_vftintrz_w_s (v4f32 LSX128W:$vj)), + (VFTINTRZ_W_S LSX128W:$vj)>; +def : LSXPat<(int_loongarch_lsx_vftintrz_l_d (v2f64 LSX128D:$vj)), + (VFTINTRZ_L_D LSX128D:$vj)>; + + +def imm_mask : ImmLeaf(Imm) && Imm == -1;}]>; +def imm_mask_64 : ImmLeaf(Imm) && Imm == -1;}]>; + + +def : LSXPat<(xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask)), + (NOR_V_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vj))>; + +def : LSXPat<(xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask)), + (NOR_V_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vj))>; + +def : LSXPat<(xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64)), + (NOR_V_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vj))>; + + +def : LSXPat<(and + (v16i8 (xor (v16i8 LSX128B:$vj),(vsplati8 imm_mask))), + (v16i8 LSX128B:$vk) + ), + (VANDN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>; + +def : LSXPat<(and + (v8i16 (xor (v8i16 LSX128H:$vj), (vsplati16 imm_mask))), + (v8i16 LSX128H:$vk) + ), + (VANDN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>; + +def : LSXPat<(and + (v4i32 (xor (v4i32 LSX128W:$vj), (vsplati32 imm_mask))), + (v4i32 LSX128W:$vk) + ), + (VANDN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>; + +def : LSXPat<(and + (v2i64 (xor (v2i64 LSX128D:$vj), (vsplati64 imm_mask_64))), + (v2i64 LSX128D:$vk) + ), + (VANDN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>; + + +def : LSXPat<(or + (v16i8 LSX128B:$vj), + (v16i8 (xor (v16i8 LSX128B:$vk), (vsplati8 imm_mask))) + ), + (VORN_V (v16i8 LSX128B:$vj), (v16i8 LSX128B:$vk))>; + +def : LSXPat<(or + (v8i16 LSX128H:$vj), + (v8i16 (xor (v8i16 LSX128H:$vk), (vsplati16 imm_mask))) + ), + (VORN_H_PSEUDO (v8i16 LSX128H:$vj), (v8i16 LSX128H:$vk))>; + +def : LSXPat<(or + (v4i32 LSX128W:$vj), + (v4i32 (xor (v4i32 LSX128W:$vk), (vsplati32 imm_mask))) + ), + (VORN_W_PSEUDO (v4i32 LSX128W:$vj), (v4i32 LSX128W:$vk))>; + +def : LSXPat<(or + (v2i64 LSX128D:$vj), + (v2i64 (xor (v2i64 LSX128D:$vk), (vsplati64 imm_mask_64))) + ), + (VORN_D_PSEUDO (v2i64 LSX128D:$vj), (v2i64 LSX128D:$vk))>; + + +def : LSXPat<(add (v2i64 (abs LSX128D:$a)), (v2i64 (abs LSX128D:$b))), + (VADDA_D (v2i64 LSX128D:$a),(v2i64 LSX128D:$b))>; + +def : LSXPat<(add (v4i32 (abs LSX128W:$a)), (v4i32 (abs LSX128W:$b))), + (VADDA_W (v4i32 LSX128W:$a),(v4i32 LSX128W:$b))>; + +def : LSXPat<(add (v8i16 (abs LSX128H:$a)), (v8i16 (abs LSX128H:$b))), + (VADDA_H (v8i16 LSX128H:$a),(v8i16 LSX128H:$b))>; + +def : LSXPat<(add (v16i8 (abs LSX128B:$a)), (v16i8 (abs LSX128B:$b))), + (VADDA_B (v16i8 LSX128B:$a),(v16i8 LSX128B:$b))>; + + +def : LSXPat<(and v16i8:$vj, (xor (shl vsplat_imm_eq_1, v16i8:$vk), + (vsplati8 imm_mask))), + (VBITCLR_B v16i8:$vj, v16i8:$vk)>; + +def : LSXPat<(and v8i16:$vj, (xor (shl vsplat_imm_eq_1, v8i16:$vk), + (vsplati16 imm_mask))), + (VBITCLR_H v8i16:$vj, v8i16:$vk)>; + +def : LSXPat<(and v4i32:$vj, (xor (shl vsplat_imm_eq_1, v4i32:$vk), + (vsplati32 imm_mask))), + (VBITCLR_W v4i32:$vj, v4i32:$vk)>; + +def : LSXPat<(and v2i64:$vj, (xor (shl vsplat_imm_eq_1, v2i64:$vk), + (vsplati64 imm_mask_64))), + (VBITCLR_D v2i64:$vj, v2i64:$vk)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp index 488c66f47863..bf70b09d42c7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -1,4 +1,4 @@ -//=- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to an MCInst -=// +//===- LoongArchMCInstLower.cpp - Convert LoongArch MachineInstr to MCInst ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,86 +6,337 @@ // //===----------------------------------------------------------------------===// // -// This file contains code to lower LoongArch MachineInstrs to their -// corresponding MCInst records. +// This file contains code to lower LoongArch MachineInstrs to their corresponding +// MCInst records. // //===----------------------------------------------------------------------===// -#include "LoongArch.h" -#include "LoongArchSubtarget.h" -#include "llvm/CodeGen/AsmPrinter.h" +#include "LoongArchMCInstLower.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "LoongArchAsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/ErrorHandling.h" +#include using namespace llvm; -static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, - const AsmPrinter &AP) { - MCContext &Ctx = AP.OutContext; +LoongArchMCInstLower::LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter) + : AsmPrinter(asmprinter) {} - // TODO: Processing target flags. +void LoongArchMCInstLower::Initialize(MCContext *C) { + Ctx = C; +} - const MCExpr *ME = - MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); +MCOperand LoongArchMCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, + unsigned Offset) const { + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + LoongArchMCExpr::LoongArchExprKind TargetKind = LoongArchMCExpr::MEK_None; + const MCSymbol *Symbol; - if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) - ME = MCBinaryExpr::createAdd( - ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + switch(MO.getTargetFlags()) { + default: + llvm_unreachable("Invalid target flag!"); + case LoongArchII::MO_NO_FLAG: + break; + case LoongArchII::MO_GOT_HI: + TargetKind = LoongArchMCExpr::MEK_GOT_HI; + break; + case LoongArchII::MO_GOT_LO: + TargetKind = LoongArchMCExpr::MEK_GOT_LO; + break; + case LoongArchII::MO_GOT_RRHI: + TargetKind = LoongArchMCExpr::MEK_GOT_RRHI; + break; + case LoongArchII::MO_GOT_RRHIGHER: + TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHER; + break; + case LoongArchII::MO_GOT_RRHIGHEST: + TargetKind = LoongArchMCExpr::MEK_GOT_RRHIGHEST; + break; + case LoongArchII::MO_GOT_RRLO: + TargetKind = LoongArchMCExpr::MEK_GOT_RRLO; + break; + case LoongArchII::MO_PCREL_HI: + TargetKind = LoongArchMCExpr::MEK_PCREL_HI; + break; + case LoongArchII::MO_PCREL_LO: + TargetKind = LoongArchMCExpr::MEK_PCREL_LO; + break; + case LoongArchII::MO_PCREL_RRHI: + TargetKind = LoongArchMCExpr::MEK_PCREL_RRHI; + break; + case LoongArchII::MO_PCREL_RRHIGHER: + TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHER; + break; + case LoongArchII::MO_PCREL_RRHIGHEST: + TargetKind = LoongArchMCExpr::MEK_PCREL_RRHIGHEST; + break; + case LoongArchII::MO_PCREL_RRLO: + TargetKind = LoongArchMCExpr::MEK_PCREL_RRLO; + break; + case LoongArchII::MO_TLSIE_HI: + TargetKind = LoongArchMCExpr::MEK_TLSIE_HI; + break; + case LoongArchII::MO_TLSIE_LO: + TargetKind = LoongArchMCExpr::MEK_TLSIE_LO; + break; + case LoongArchII::MO_TLSIE_RRHI: + TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHI; + break; + case LoongArchII::MO_TLSIE_RRHIGHER: + TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHER; + break; + case LoongArchII::MO_TLSIE_RRHIGHEST: + TargetKind = LoongArchMCExpr::MEK_TLSIE_RRHIGHEST; + break; + case LoongArchII::MO_TLSIE_RRLO: + TargetKind = LoongArchMCExpr::MEK_TLSIE_RRLO; + break; + case LoongArchII::MO_TLSLE_HI: + TargetKind = LoongArchMCExpr::MEK_TLSLE_HI; + break; + case LoongArchII::MO_TLSLE_HIGHER: + TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHER; + break; + case LoongArchII::MO_TLSLE_HIGHEST: + TargetKind = LoongArchMCExpr::MEK_TLSLE_HIGHEST; + break; + case LoongArchII::MO_TLSLE_LO: + TargetKind = LoongArchMCExpr::MEK_TLSLE_LO; + break; + case LoongArchII::MO_TLSGD_HI: + TargetKind = LoongArchMCExpr::MEK_TLSGD_HI; + break; + case LoongArchII::MO_TLSGD_LO: + TargetKind = LoongArchMCExpr::MEK_TLSGD_LO; + break; + case LoongArchII::MO_TLSGD_RRHI: + TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHI; + break; + case LoongArchII::MO_TLSGD_RRHIGHER: + TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHER; + break; + case LoongArchII::MO_TLSGD_RRHIGHEST: + TargetKind = LoongArchMCExpr::MEK_TLSGD_RRHIGHEST; + break; + case LoongArchII::MO_TLSGD_RRLO: + TargetKind = LoongArchMCExpr::MEK_TLSGD_RRLO; + break; + case LoongArchII::MO_ABS_HI: + TargetKind = LoongArchMCExpr::MEK_ABS_HI; + break; + case LoongArchII::MO_ABS_HIGHER: + TargetKind = LoongArchMCExpr::MEK_ABS_HIGHER; + break; + case LoongArchII::MO_ABS_HIGHEST: + TargetKind = LoongArchMCExpr::MEK_ABS_HIGHEST; + break; + case LoongArchII::MO_ABS_LO: + TargetKind = LoongArchMCExpr::MEK_ABS_LO; + break; + case LoongArchII::MO_CALL_HI: + TargetKind = LoongArchMCExpr::MEK_CALL_HI; + break; + case LoongArchII::MO_CALL_LO: + TargetKind = LoongArchMCExpr::MEK_CALL_LO; + break; + } - return MCOperand::createExpr(ME); -} + switch (MOTy) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = AsmPrinter.getSymbol(MO.getGlobal()); + Offset += MO.getOffset(); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + Offset += MO.getOffset(); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + Offset += MO.getOffset(); + break; + + case MachineOperand::MO_MCSymbol: + Symbol = MO.getMCSymbol(); + Offset += MO.getOffset(); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + Offset += MO.getOffset(); + break; -bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, - MCOperand &MCOp, - const AsmPrinter &AP) { - switch (MO.getType()) { default: - report_fatal_error( - "lowerLoongArchMachineOperandToMCOperand: unknown operand type"); + llvm_unreachable(""); + } + + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, *Ctx); + + if (Offset) { + // Assume offset is never negative. + assert(Offset > 0); + + Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, *Ctx), + *Ctx); + } + + if (TargetKind != LoongArchMCExpr::MEK_None) + Expr = LoongArchMCExpr::create(TargetKind, Expr, *Ctx); + + return MCOperand::createExpr(Expr); +} + +MCOperand LoongArchMCInstLower::LowerOperand(const MachineOperand &MO, + unsigned offset) const { + MachineOperandType MOTy = MO.getType(); + + switch (MOTy) { + default: llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all implicit register operands. - if (MO.isImplicit()) - return false; - MCOp = MCOperand::createReg(MO.getReg()); - break; - case MachineOperand::MO_RegisterMask: - // Regmasks are like implicit defs. - return false; + if (MO.isImplicit()) break; + return MCOperand::createReg(MO.getReg()); case MachineOperand::MO_Immediate: - MCOp = MCOperand::createImm(MO.getImm()); - break; + return MCOperand::createImm(MO.getImm() + offset); + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_MCSymbol: + case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: - MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP); + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand(MO, MOTy, offset); + case MachineOperand::MO_RegisterMask: break; - case MachineOperand::MO_GlobalAddress: - MCOp = lowerSymbolOperand(MO, AP.getSymbolPreferLocal(*MO.getGlobal()), AP); + } + + return MCOperand(); +} + +MCOperand LoongArchMCInstLower::createSub(MachineBasicBlock *BB1, + MachineBasicBlock *BB2, + LoongArchMCExpr::LoongArchExprKind Kind) const { + const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::create(BB1->getSymbol(), *Ctx); + const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::create(BB2->getSymbol(), *Ctx); + const MCBinaryExpr *Sub = MCBinaryExpr::createSub(Sym1, Sym2, *Ctx); + + return MCOperand::createExpr(LoongArchMCExpr::create(Kind, Sub, *Ctx)); +} + +void LoongArchMCInstLower::lowerLongBranchADDI(const MachineInstr *MI, + MCInst &OutMI, int Opcode) const { + OutMI.setOpcode(Opcode); + + LoongArchMCExpr::LoongArchExprKind Kind; + unsigned TargetFlags = MI->getOperand(2).getTargetFlags(); + switch (TargetFlags) { + case LoongArchII::MO_ABS_HIGHEST: + Kind = LoongArchMCExpr::MEK_ABS_HIGHEST; break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), AP); + case LoongArchII::MO_ABS_HIGHER: + Kind = LoongArchMCExpr::MEK_ABS_HIGHER; break; - case MachineOperand::MO_ExternalSymbol: - MCOp = lowerSymbolOperand( - MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); + case LoongArchII::MO_ABS_HI: + Kind = LoongArchMCExpr::MEK_ABS_HI; break; - // TODO: lower special operands - case MachineOperand::MO_BlockAddress: - case MachineOperand::MO_JumpTableIndex: + case LoongArchII::MO_ABS_LO: + Kind = LoongArchMCExpr::MEK_ABS_LO; break; + default: + report_fatal_error("Unexpected flags for lowerLongBranchADDI"); + } + + // Lower two register operands. + for (unsigned I = 0, E = 2; I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + OutMI.addOperand(LowerOperand(MO)); + } + + if (MI->getNumOperands() == 3) { + // Lower register operand. + const MCExpr *Expr = + MCSymbolRefExpr::create(MI->getOperand(2).getMBB()->getSymbol(), *Ctx); + const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx); + OutMI.addOperand(MCOperand::createExpr(LoongArchExpr)); + } else if (MI->getNumOperands() == 4) { + // Create %lo($tgt-$baltgt) or %hi($tgt-$baltgt). + OutMI.addOperand(createSub(MI->getOperand(2).getMBB(), + MI->getOperand(3).getMBB(), Kind)); } - return true; } -bool llvm::lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, - MCInst &OutMI, AsmPrinter &AP) { +void LoongArchMCInstLower::lowerLongBranchPCADDU12I(const MachineInstr *MI, + MCInst &OutMI, int Opcode) const { + OutMI.setOpcode(Opcode); + + LoongArchMCExpr::LoongArchExprKind Kind; + unsigned TargetFlags = MI->getOperand(1).getTargetFlags(); + switch (TargetFlags) { + case LoongArchII::MO_PCREL_HI: + Kind = LoongArchMCExpr::MEK_PCREL_HI; + break; + case LoongArchII::MO_PCREL_LO: + Kind = LoongArchMCExpr::MEK_PCREL_LO; + break; + default: + report_fatal_error("Unexpected flags for lowerLongBranchADDI"); + } + + // Lower one register operands. + const MachineOperand &MO = MI->getOperand(0); + OutMI.addOperand(LowerOperand(MO)); + + const MCExpr *Expr = + MCSymbolRefExpr::create(MI->getOperand(1).getMBB()->getSymbol(), *Ctx); + const LoongArchMCExpr *LoongArchExpr = LoongArchMCExpr::create(Kind, Expr, *Ctx); + OutMI.addOperand(MCOperand::createExpr(LoongArchExpr)); +} +bool LoongArchMCInstLower::lowerLongBranch(const MachineInstr *MI, + MCInst &OutMI) const { + switch (MI->getOpcode()) { + default: + return false; + case LoongArch::LONG_BRANCH_ADDIW: + case LoongArch::LONG_BRANCH_ADDIW2Op: + lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_W); + return true; + case LoongArch::LONG_BRANCH_ADDID: + case LoongArch::LONG_BRANCH_ADDID2Op: + lowerLongBranchADDI(MI, OutMI, LoongArch::ADDI_D); + return true; + case LoongArch::LONG_BRANCH_PCADDU12I: + lowerLongBranchPCADDU12I(MI, OutMI, LoongArch::PCADDU12I); + return true; + } +} + +void LoongArchMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + if (lowerLongBranch(MI, OutMI)) + return; + OutMI.setOpcode(MI->getOpcode()); - for (const MachineOperand &MO : MI->operands()) { - MCOperand MCOp; - if (lowerLoongArchMachineOperandToMCOperand(MO, MCOp, AP)) + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp = LowerOperand(MO); + + if (MCOp.isValid()) OutMI.addOperand(MCOp); } - return false; } diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.h b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.h new file mode 100644 index 000000000000..6463a7b64fb0 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.h @@ -0,0 +1,55 @@ +//===- LoongArchMCInstLower.h - Lower MachineInstr to MCInst --------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H + +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineInstr; +class MCContext; +class MCInst; +class MCOperand; +class LoongArchAsmPrinter; + +/// LoongArchMCInstLower - This class is used to lower an MachineInstr into an +/// MCInst. +class LLVM_LIBRARY_VISIBILITY LoongArchMCInstLower { + using MachineOperandType = MachineOperand::MachineOperandType; + + MCContext *Ctx; + LoongArchAsmPrinter &AsmPrinter; + +public: + LoongArchMCInstLower(LoongArchAsmPrinter &asmprinter); + + void Initialize(MCContext *C); + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; + +private: + MCOperand LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, unsigned Offset) const; + MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, + LoongArchMCExpr::LoongArchExprKind Kind) const; + void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const; + void lowerLongBranchADDI(const MachineInstr *MI, MCInst &OutMI, + int Opcode) const; + void lowerLongBranchPCADDU12I(const MachineInstr *MI, MCInst &OutMI, + int Opcode) const; + bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMCINSTLOWER_H diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunction.cpp b/llvm/lib/Target/LoongArch/LoongArchMachineFunction.cpp new file mode 100644 index 000000000000..a9c52cbb1294 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunction.cpp @@ -0,0 +1,58 @@ +//===-- LoongArchMachineFunctionInfo.cpp - Private data used for LoongArch ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMachineFunction.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +LoongArchFunctionInfo::~LoongArchFunctionInfo() = default; + +void LoongArchFunctionInfo::createEhDataRegsFI() { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + for (int I = 0; I < 4; ++I) { + const TargetRegisterClass &RC = + static_cast(MF.getTarget()).getABI().IsLP64() + ? LoongArch::GPR64RegClass + : LoongArch::GPR32RegClass; + + EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlign(RC), false); + } +} + +bool LoongArchFunctionInfo::isEhDataRegFI(int FI) const { + return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1] + || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]); +} + +MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const char *ES) { + return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)); +} + +MachinePointerInfo LoongArchFunctionInfo::callPtrInfo(const GlobalValue *GV) { + return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV)); +} + +int LoongArchFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + if (MoveF64ViaSpillFI == -1) { + MoveF64ViaSpillFI = MF.getFrameInfo().CreateStackObject( + TRI.getSpillSize(*RC), TRI.getSpillAlign(*RC), false); + } + return MoveF64ViaSpillFI; +} + +void LoongArchFunctionInfo::anchor() {} diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunction.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunction.h new file mode 100644 index 000000000000..1765013eabd8 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunction.h @@ -0,0 +1,103 @@ +//===- LoongArchMachineFunctionInfo.h - Private data used for LoongArch ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the LoongArch specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include + +namespace llvm { + +/// LoongArchFunctionInfo - This class is derived from MachineFunction private +/// LoongArch target-specific information for each MachineFunction. +class LoongArchFunctionInfo : public MachineFunctionInfo { +public: + LoongArchFunctionInfo(MachineFunction &MF) : MF(MF) {} + + ~LoongArchFunctionInfo() override; + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } + void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } + + bool hasByvalArg() const { return HasByvalArg; } + void setFormalArgInfo(unsigned Size, bool HasByval) { + IncomingArgSize = Size; + HasByvalArg = HasByval; + } + + unsigned getIncomingArgSize() const { return IncomingArgSize; } + + bool callsEhReturn() const { return CallsEhReturn; } + void setCallsEhReturn() { CallsEhReturn = true; } + + void createEhDataRegsFI(); + int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; } + bool isEhDataRegFI(int FI) const; + + /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue + /// object representing a GOT entry for an external function. + MachinePointerInfo callPtrInfo(const char *ES); + + /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object + /// representing a GOT entry for a global function. + MachinePointerInfo callPtrInfo(const GlobalValue *GV); + + void setSaveS2() { SaveS2 = true; } + bool hasSaveS2() const { return SaveS2; } + + int getMoveF64ViaSpillFI(const TargetRegisterClass *RC); + +private: + virtual void anchor(); + + MachineFunction& MF; + + /// SRetReturnReg - Some subtargets require that sret lowering includes + /// returning the value of the returned struct in a register. This field + /// holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg = 0; + + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex = 0; + int VarArgsSaveSize = 0; + + /// True if function has a byval argument. + bool HasByvalArg; + + /// Size of incoming argument area. + unsigned IncomingArgSize; + + /// CallsEhReturn - Whether the function calls llvm.eh.return. + bool CallsEhReturn = false; + + /// Frame objects for spilling eh data registers. + int EhDataRegFI[4]; + + // saveS2 + bool SaveS2 = false; + + /// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the + /// LP32 FPXX ABI is enabled. -1 is used to denote invalid index. + int MoveF64ViaSpillFI = -1; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTION_H diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h deleted file mode 100644 index d4a6c884bc9d..000000000000 --- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +++ /dev/null @@ -1,57 +0,0 @@ -//=- LoongArchMachineFunctionInfo.h - LoongArch machine function info -----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares LoongArch-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H -#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H - -#include "LoongArchSubtarget.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -/// LoongArchMachineFunctionInfo - This class is derived from -/// MachineFunctionInfo and contains private LoongArch-specific information for -/// each MachineFunction. -class LoongArchMachineFunctionInfo : public MachineFunctionInfo { -private: - /// FrameIndex for start of varargs area - int VarArgsFrameIndex = 0; - /// Size of the save area used for varargs - int VarArgsSaveSize = 0; - - /// Size of stack frame to save callee saved registers - unsigned CalleeSavedStackSize = 0; - -public: - LoongArchMachineFunctionInfo(const MachineFunction &MF) {} - - MachineFunctionInfo * - clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, - const DenseMap &Src2DstMBB) - const override { - return DestMF.cloneInfo(*this); - } - - int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } - void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } - - unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } - void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } - - unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } - void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } -}; - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHMACHINEFUNCTIONINFO_H diff --git a/llvm/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp new file mode 100644 index 000000000000..8dbf30f2143e --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchModuleISelDAGToDAG.cpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// Instruction Selector Subtarget Control +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// This file defines a pass used to change the subtarget for the +// LoongArch Instruction selector. +// +//===----------------------------------------------------------------------===// + +#include "LoongArch.h" +#include "LoongArchTargetMachine.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarch-isel" + +namespace { + class LoongArchModuleDAGToDAGISel : public MachineFunctionPass { + public: + static char ID; + + LoongArchModuleDAGToDAGISel() : MachineFunctionPass(ID) {} + + // Pass Name + StringRef getPassName() const override { + return "LoongArch DAG->DAG Pattern Instruction Selection"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + }; + + char LoongArchModuleDAGToDAGISel::ID = 0; +} + +bool LoongArchModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + LLVM_DEBUG(errs() << "In LoongArchModuleDAGToDAGISel::runMachineFunction\n"); + return false; +} + +llvm::FunctionPass *llvm::createLoongArchModuleISelDagPass() { + return new LoongArchModuleDAGToDAGISel(); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp index 05902ebb7ba6..4d1a3cf22405 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -*- C++ -*-=// +//===- LoongArchRegisterInfo.cpp - LoongArch Register Information -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,132 +6,352 @@ // //===----------------------------------------------------------------------===// // -// This file contains the LoongArch implementation of the TargetRegisterInfo -// class. +// This file contains the LoongArch implementation of the TargetRegisterInfo class. // //===----------------------------------------------------------------------===// #include "LoongArchRegisterInfo.h" +#include "MCTargetDesc/LoongArchABIInfo.h" #include "LoongArch.h" +#include "LoongArchMachineFunction.h" #include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include using namespace llvm; +#define DEBUG_TYPE "loongarch-reg-info" + #define GET_REGINFO_TARGET_DESC #include "LoongArchGenRegisterInfo.inc" -LoongArchRegisterInfo::LoongArchRegisterInfo(unsigned HwMode) - : LoongArchGenRegisterInfo(LoongArch::R1, /*DwarfFlavour*/ 0, - /*EHFlavor*/ 0, - /*PC*/ 0, HwMode) {} +LoongArchRegisterInfo::LoongArchRegisterInfo() : LoongArchGenRegisterInfo(LoongArch::RA) {} -const MCPhysReg * -LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - auto &Subtarget = MF->getSubtarget(); +unsigned LoongArchRegisterInfo::getPICCallReg() { return LoongArch::T8; } + +const TargetRegisterClass * +LoongArchRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + LoongArchABIInfo ABI = MF.getSubtarget().getABI(); + LoongArchPtrClass PtrClassKind = static_cast(Kind); + + switch (PtrClassKind) { + case LoongArchPtrClass::Default: + return ABI.ArePtrs64bit() ? &LoongArch::GPR64RegClass : &LoongArch::GPR32RegClass; + case LoongArchPtrClass::StackPointer: + return ABI.ArePtrs64bit() ? &LoongArch::SP64RegClass : &LoongArch::SP32RegClass; + } + + llvm_unreachable("Unknown pointer kind"); +} - switch (Subtarget.getTargetABI()) { +unsigned +LoongArchRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + switch (RC->getID()) { default: - llvm_unreachable("Unrecognized ABI"); - case LoongArchABI::ABI_ILP32S: - case LoongArchABI::ABI_LP64S: - return CSR_ILP32S_LP64S_SaveList; - case LoongArchABI::ABI_ILP32F: - case LoongArchABI::ABI_LP64F: - return CSR_ILP32F_LP64F_SaveList; - case LoongArchABI::ABI_ILP32D: - case LoongArchABI::ABI_LP64D: - return CSR_ILP32D_LP64D_SaveList; + return 0; + case LoongArch::GPR32RegClassID: + case LoongArch::GPR64RegClassID: + { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + return 28 - TFI->hasFP(MF); } + case LoongArch::FGR32RegClassID: + return 32; + case LoongArch::FGR64RegClassID: + return 32; + } +} + +//===----------------------------------------------------------------------===// +// Callee Saved Registers methods +//===----------------------------------------------------------------------===// + +/// LoongArch Callee Saved Registers +const MCPhysReg * +LoongArchRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const LoongArchSubtarget &Subtarget = MF->getSubtarget(); + + if (Subtarget.isSingleFloat()) + return CSR_SingleFloatOnly_SaveList; + + if (Subtarget.isABI_LP64()) + return CSR_LP64_SaveList; + + if (Subtarget.isABI_LPX32()) + return CSR_LPX32_SaveList; + + return CSR_LP32_SaveList; } const uint32_t * LoongArchRegisterInfo::getCallPreservedMask(const MachineFunction &MF, - CallingConv::ID CC) const { - auto &Subtarget = MF.getSubtarget(); + CallingConv::ID) const { + const LoongArchSubtarget &Subtarget = MF.getSubtarget(); - switch (Subtarget.getTargetABI()) { - default: - llvm_unreachable("Unrecognized ABI"); - case LoongArchABI::ABI_ILP32S: - case LoongArchABI::ABI_LP64S: - return CSR_ILP32S_LP64S_RegMask; - case LoongArchABI::ABI_ILP32F: - case LoongArchABI::ABI_LP64F: - return CSR_ILP32F_LP64F_RegMask; - case LoongArchABI::ABI_ILP32D: - case LoongArchABI::ABI_LP64D: - return CSR_ILP32D_LP64D_RegMask; - } -} + if (Subtarget.isSingleFloat()) + return CSR_SingleFloatOnly_RegMask; + + if (Subtarget.isABI_LP64()) + return CSR_LP64_RegMask; -const uint32_t *LoongArchRegisterInfo::getNoPreservedMask() const { - return CSR_NoRegs_RegMask; + return CSR_LP32_RegMask; } -BitVector -LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - const LoongArchFrameLowering *TFI = getFrameLowering(MF); +BitVector LoongArchRegisterInfo:: +getReservedRegs(const MachineFunction &MF) const { + static const MCPhysReg ReservedGPR32[] = { + LoongArch::ZERO, LoongArch::SP, LoongArch::TP, LoongArch::T9 + }; + + static const MCPhysReg ReservedGPR64[] = { + LoongArch::ZERO_64, LoongArch::SP_64, LoongArch::TP_64, LoongArch::T9_64 + }; + BitVector Reserved(getNumRegs()); + const LoongArchSubtarget &Subtarget = MF.getSubtarget(); + + for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I) + Reserved.set(ReservedGPR32[I]); + + for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I) + Reserved.set(ReservedGPR64[I]); + + // Reserve FP if this function should have a dedicated frame pointer register. + if (Subtarget.getFrameLowering()->hasFP(MF)) { + Reserved.set(LoongArch::FP); + Reserved.set(LoongArch::FP_64); + + // Reserve the base register if we need to both realign the stack and + // allocate variable-sized objects at runtime. This should test the + // same conditions as LoongArchFrameLowering::hasBP(). + if (hasStackRealignment(MF) && MF.getFrameInfo().hasVarSizedObjects()) { + Reserved.set(LoongArch::S7); + Reserved.set(LoongArch::S7_64); + } + } - // Use markSuperRegs to ensure any register aliases are also reserved - markSuperRegs(Reserved, LoongArch::R0); // zero - markSuperRegs(Reserved, LoongArch::R2); // tp - markSuperRegs(Reserved, LoongArch::R3); // sp - markSuperRegs(Reserved, LoongArch::R21); // non-allocatable - if (TFI->hasFP(MF)) - markSuperRegs(Reserved, LoongArch::R22); // fp - // Reserve the base register if we need to realign the stack and allocate - // variable-sized objects at runtime. - if (TFI->hasBP(MF)) - markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp - - assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } -bool LoongArchRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { - return PhysReg == LoongArch::R0; +bool +LoongArchRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + return true; } -Register -LoongArchRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = getFrameLowering(MF); - return TFI->hasFP(MF) ? LoongArch::R22 : LoongArch::R3; +bool LoongArchRegisterInfo:: +requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; } -void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, - unsigned FIOperandNum, - RegScavenger *RS) const { - // TODO: this implementation is a temporary placeholder which does just - // enough to allow other aspects of code generation to be tested. +bool +LoongArchRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; +} + +/// Get the size of the offset supported by the given load/store/inline asm. +/// The result includes the effects of any scale factors applied to the +/// instruction immediate. +static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode, + MachineOperand MO) { + switch (Opcode) { + case LoongArch::LDPTR_W: + case LoongArch::LDPTR_W32: + case LoongArch::LDPTR_D: + case LoongArch::STPTR_W: + case LoongArch::STPTR_W32: + case LoongArch::STPTR_D: + case LoongArch::LL_W: + case LoongArch::LL_D: + case LoongArch::SC_W: + case LoongArch::SC_D: + return 14 + 2 /* scale factor */; + case LoongArch::INLINEASM: { + unsigned ConstraintID = InlineAsm::getMemoryConstraintID(MO.getImm()); + switch (ConstraintID) { + case InlineAsm::Constraint_ZC: { + return 14 + 2 /* scale factor */; + } + default: + return 12; + } + } + default: + return 12; + } +} - assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); +/// Get the scale factor applied to the immediate in the given load/store. +static inline unsigned getLoadStoreOffsetAlign(const unsigned Opcode) { + switch (Opcode) { + case LoongArch::LDPTR_W: + case LoongArch::LDPTR_W32: + case LoongArch::LDPTR_D: + case LoongArch::STPTR_W: + case LoongArch::STPTR_W32: + case LoongArch::STPTR_D: + case LoongArch::LL_W: + case LoongArch::LL_D: + case LoongArch::SC_W: + case LoongArch::SC_D: + return 4; + default: + return 1; + } +} +// FrameIndex represent objects inside a abstract stack. +// We must replace FrameIndex with an stack/frame pointer +// direct reference. +void LoongArchRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - DebugLoc DL = MI.getDebugLoc(); + const LoongArchFrameLowering *TFI = getFrameLowering(MF); + + LLVM_DEBUG(errs() << "\nFunction : " << MF.getName() << "\n"; + errs() << "<--------->\n" + << MI); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + uint64_t stackSize = MF.getFrameInfo().getStackSize(); + int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex); + + LLVM_DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" + << "spOffset : " << spOffset << "\n" + << "stackSize : " << stackSize << "\n" + << "SPAdj : " << SPAdj << "\n" + << "alignment : " + << DebugStr(MF.getFrameInfo().getObjectAlign(FrameIndex)) + << "\n"); + + LoongArchABIInfo ABI = + static_cast(MF.getTarget()).getABI(); + + // Everything else is referenced relative to whatever register + // getFrameIndexReference() returns. Register FrameReg; StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) + StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); - // Offsets must be encodable with a 12-bit immediate field. - if (!isInt<12>(Offset.getFixed())) { - report_fatal_error("Frame offsets outside of the signed 12-bit range is " - "not supported currently"); + LLVM_DEBUG(errs() << "Location : " + << "FrameReg<" << FrameReg << "> + " << Offset.getFixed() + << "\n<--------->\n"); + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = II->getDebugLoc(); + bool IsKill = false; + + if (!MI.isDebugValue()) { + // Make sure Offset fits within the field available. + // For ldptr/stptr/ll/sc instructions, this is a 14-bit signed immediate + // (scaled by 2), otherwise it is a 12-bit signed immediate. + unsigned OffsetBitSize = getLoadStoreOffsetSizeInBits( + MI.getOpcode(), MI.getOperand(FIOperandNum - 1)); + const Align OffsetAlign(getLoadStoreOffsetAlign(MI.getOpcode())); + + if (OffsetBitSize == 16 && isInt<12>(Offset.getFixed()) && + !isAligned(OffsetAlign, Offset.getFixed())) { + // If we have an offset that needs to fit into a signed n-bit immediate + // (where n == 16) and doesn't aligned and does fit into 12-bits + // then use an ADDI + const TargetRegisterClass *PtrRC = ABI.ArePtrs64bit() + ? &LoongArch::GPR64RegClass + : &LoongArch::GPR32RegClass; + MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); + unsigned Reg = RegInfo.createVirtualRegister(PtrRC); + const LoongArchInstrInfo &TII = *static_cast( + MBB.getParent()->getSubtarget().getInstrInfo()); + BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddiOp()), Reg) + .addReg(FrameReg) + .addImm(Offset.getFixed()); + + FrameReg = Reg; + Offset = StackOffset::getFixed(0); + IsKill = true; + } else if (!isInt<12>(Offset.getFixed())) { + // Otherwise split the offset into several pieces and add it in multiple + // instructions. + const LoongArchInstrInfo &TII = *static_cast( + MBB.getParent()->getSubtarget().getInstrInfo()); + unsigned Reg = TII.loadImmediate(Offset.getFixed(), MBB, II, DL); + BuildMI(MBB, II, DL, TII.get(ABI.GetPtrAddOp()), Reg) + .addReg(FrameReg) + .addReg(Reg, RegState::Kill); + + FrameReg = Reg; + Offset = StackOffset::getFixed(0); + IsKill = true; + } } - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, IsKill); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); } + +Register LoongArchRegisterInfo:: +getFrameRegister(const MachineFunction &MF) const { + const LoongArchSubtarget &Subtarget = MF.getSubtarget(); + const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + bool IsLP64 = + static_cast(MF.getTarget()).getABI().IsLP64(); + + return TFI->hasFP(MF) ? (IsLP64 ? LoongArch::FP_64 : LoongArch::FP) : + (IsLP64 ? LoongArch::SP_64 : LoongArch::SP); +} + +const TargetRegisterClass * +LoongArchRegisterInfo::intRegClass(unsigned Size) const { + if (Size == 4) + return &LoongArch::GPR32RegClass; + + assert(Size == 8); + return &LoongArch::GPR64RegClass; +} + +bool LoongArchRegisterInfo::canRealignStack(const MachineFunction &MF) const { + // Avoid realigning functions that explicitly do not want to be realigned. + // Normally, we should report an error when a function should be dynamically + // realigned but also has the attribute no-realign-stack. Unfortunately, + // with this attribute, MachineFrameInfo clamps each new object's alignment + // to that of the stack's alignment as specified by the ABI. As a result, + // the information of whether we have objects with larger alignment + // requirement than the stack's alignment is already lost at this point. + if (!TargetRegisterInfo::canRealignStack(MF)) + return false; + + const LoongArchSubtarget &Subtarget = MF.getSubtarget(); + unsigned FP = Subtarget.is64Bit() ? LoongArch::FP_64 : LoongArch::FP; + unsigned BP = Subtarget.is64Bit() ? LoongArch::S7_64 : LoongArch::S7; + + // We can't perform dynamic stack realignment if we can't reserve the + // frame pointer register. + if (!MF.getRegInfo().canReserveReg(FP)) + return false; + + // We can realign the stack if we know the maximum call frame size and we + // don't have variable sized objects. + if (Subtarget.getFrameLowering()->hasReservedCallFrame(MF)) + return true; + + // We have to reserve the base pointer register in the presence of variable + // sized objects. + return MF.getRegInfo().canReserveReg(BP); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h index cca130c3bc3a..dd3be916a73c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h @@ -1,4 +1,4 @@ -//= LoongArchRegisterInfo.h - LoongArch Register Information Impl -*- C++ -*-=// +//===- LoongArchRegisterInfo.h - LoongArch Register Information Impl ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,45 +6,75 @@ // //===----------------------------------------------------------------------===// // -// This file contains the LoongArch implementation of the TargetRegisterInfo -// class. +// This file contains the LoongArch implementation of the TargetRegisterInfo class. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H -#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "LoongArch.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include #define GET_REGINFO_HEADER #include "LoongArchGenRegisterInfo.inc" namespace llvm { -struct LoongArchRegisterInfo : public LoongArchGenRegisterInfo { +class TargetRegisterClass; - LoongArchRegisterInfo(unsigned HwMode); +class LoongArchRegisterInfo : public LoongArchGenRegisterInfo { +public: + enum class LoongArchPtrClass { + /// The default register class for integer values. + Default = 0, + /// The stack pointer only. + StackPointer = 1, + }; + LoongArchRegisterInfo(); + + /// Get PIC indirect call register + static unsigned getPICCallReg(); + + /// Code Generation virtual methods... + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const override; + + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const override; const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; - const uint32_t *getNoPreservedMask() const override; - BitVector getReservedRegs(const MachineFunction &MF) const override; - bool isConstantPhysReg(MCRegister PhysReg) const override; - const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override { - return &LoongArch::GPRRegClass; - } + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; - void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, - unsigned FIOperandNum, + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; + + /// Stack Frame Processing Methods + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + // Stack realignment queries. + bool canRealignStack(const MachineFunction &MF) const override; + + /// Debug information queries. Register getFrameRegister(const MachineFunction &MF) const override; + + /// Return GPR register class. + const TargetRegisterClass *intRegClass(unsigned Size) const; + +private: + void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int SPAdj, int64_t SPOffset) const; }; + } // end namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHREGISTERINFO_H diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td index 2d5ad99f6156..96569e0756c3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td @@ -1,4 +1,4 @@ -//===-- LoongArchRegisterInfo.td - LoongArch Register defs -*- tablegen -*-===// +//===-- LoongArchRegisterInfo.td - LoongArch Register defs -----------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,155 +7,367 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Declarations that describe the LoongArch register files +// Declarations that describe the LoongArch register file //===----------------------------------------------------------------------===// - let Namespace = "LoongArch" in { -class LoongArchReg Enc, string n, list alt = []> - : Register { +def sub_32 : SubRegIndex<32>; +def sub_64 : SubRegIndex<64>; +def sub_128 : SubRegIndex<128>; +def sub_fcsr1 : SubRegIndex<5>; +def sub_fcsr2 : SubRegIndex<13, 16>; +def sub_fcsr3 : SubRegIndex<2, 8>; +def sub_lo : SubRegIndex<32>; +def sub_hi : SubRegIndex<32, 32>; +def PC : Register<"pc">; +} + +class Unallocatable { + bit isAllocatable = 0; +} + +/// We have banks of registers each. +class LoongArchReg Enc, string n> : Register { let HWEncoding = Enc; - let AltNames = alt; + let Namespace = "LoongArch"; } -class LoongArchReg32 Enc, string n, list alt = []> - : Register { +class LoongArchRegWithSubRegs Enc, string n, list subregs> + : RegisterWithSubRegs { let HWEncoding = Enc; - let AltNames = alt; + let Namespace = "LoongArch"; } -def sub_32 : SubRegIndex<32>; -class LoongArchReg64 - : Register<""> { - let HWEncoding = subreg.HWEncoding; - let SubRegs = [subreg]; +/// LoongArch 32-bit CPU Registers. +class LoongArch32GPR Enc, string n> : LoongArchReg; + +/// LoongArch 64-bit CPU Registers. +class LoongArch64GPR Enc, string n, list subregs> + : LoongArchRegWithSubRegs { let SubRegIndices = [sub_32]; - let AsmName = subreg.AsmName; - let AltNames = subreg.AltNames; -} - -let FallbackRegAltNameIndex = NoRegAltName in -def RegAliasName : RegAltNameIndex; -} // Namespace = "LoongArch" - -// Integer registers - -let RegAltNameIndices = [RegAliasName] in { - def R0 : LoongArchReg<0, "r0", ["zero"]>, DwarfRegNum<[0]>; - def R1 : LoongArchReg<1, "r1", ["ra"]>, DwarfRegNum<[1]>; - def R2 : LoongArchReg<2, "r2", ["tp"]>, DwarfRegNum<[2]>; - def R3 : LoongArchReg<3, "r3", ["sp"]>, DwarfRegNum<[3]>; - def R4 : LoongArchReg<4, "r4", ["a0"]>, DwarfRegNum<[4]>; - def R5 : LoongArchReg<5, "r5", ["a1"]>, DwarfRegNum<[5]>; - def R6 : LoongArchReg<6, "r6", ["a2"]>, DwarfRegNum<[6]>; - def R7 : LoongArchReg<7, "r7", ["a3"]>, DwarfRegNum<[7]>; - def R8 : LoongArchReg<8, "r8", ["a4"]>, DwarfRegNum<[8]>; - def R9 : LoongArchReg<9, "r9", ["a5"]>, DwarfRegNum<[9]>; - def R10 : LoongArchReg<10, "r10", ["a6"]>, DwarfRegNum<[10]>; - def R11 : LoongArchReg<11, "r11", ["a7"]>, DwarfRegNum<[11]>; - def R12 : LoongArchReg<12, "r12", ["t0"]>, DwarfRegNum<[12]>; - def R13 : LoongArchReg<13, "r13", ["t1"]>, DwarfRegNum<[13]>; - def R14 : LoongArchReg<14, "r14", ["t2"]>, DwarfRegNum<[14]>; - def R15 : LoongArchReg<15, "r15", ["t3"]>, DwarfRegNum<[15]>; - def R16 : LoongArchReg<16, "r16", ["t4"]>, DwarfRegNum<[16]>; - def R17 : LoongArchReg<17, "r17", ["t5"]>, DwarfRegNum<[17]>; - def R18 : LoongArchReg<18, "r18", ["t6"]>, DwarfRegNum<[18]>; - def R19 : LoongArchReg<19, "r19", ["t7"]>, DwarfRegNum<[19]>; - def R20 : LoongArchReg<20, "r20", ["t8"]>, DwarfRegNum<[20]>; - def R21 : LoongArchReg<21, "r21", [""]>, DwarfRegNum<[21]>; - def R22 : LoongArchReg<22, "r22", ["fp", "s9"]>, DwarfRegNum<[22]>; - def R23 : LoongArchReg<23, "r23", ["s0"]>, DwarfRegNum<[23]>; - def R24 : LoongArchReg<24, "r24", ["s1"]>, DwarfRegNum<[24]>; - def R25 : LoongArchReg<25, "r25", ["s2"]>, DwarfRegNum<[25]>; - def R26 : LoongArchReg<26, "r26", ["s3"]>, DwarfRegNum<[26]>; - def R27 : LoongArchReg<27, "r27", ["s4"]>, DwarfRegNum<[27]>; - def R28 : LoongArchReg<28, "r28", ["s5"]>, DwarfRegNum<[28]>; - def R29 : LoongArchReg<29, "r29", ["s6"]>, DwarfRegNum<[29]>; - def R30 : LoongArchReg<30, "r30", ["s7"]>, DwarfRegNum<[30]>; - def R31 : LoongArchReg<31, "r31", ["s8"]>, DwarfRegNum<[31]>; -} // RegAltNameIndices = [RegAliasName] - -def GRLenVT : ValueTypeByHwMode<[LA32, LA64], - [i32, i64]>; -def GRLenRI : RegInfoByHwMode< - [LA32, LA64], - [RegInfo<32,32,32>, RegInfo<64,64,64>]>; - -// The order of registers represents the preferred allocation sequence. -// Registers are listed in the order caller-save, callee-save, specials. -def GPR : RegisterClass<"LoongArch", [GRLenVT], 32, (add - // Argument registers (a0...a7) - (sequence "R%u", 4, 11), - // Temporary registers (t0...t8) - (sequence "R%u", 12, 20), - // Static register (s9/fp, s0...s8) - (sequence "R%u", 22, 31), - // Specials (r0, ra, tp, sp) - (sequence "R%u", 0, 3), - // Reserved (Non-allocatable) - R21 - )> { - let RegInfos = GRLenRI; -} - -// Floating point registers - -let RegAltNameIndices = [RegAliasName] in { - def F0 : LoongArchReg32<0, "f0", ["fa0"]>, DwarfRegNum<[32]>; - def F1 : LoongArchReg32<1, "f1", ["fa1"]>, DwarfRegNum<[33]>; - def F2 : LoongArchReg32<2, "f2", ["fa2"]>, DwarfRegNum<[34]>; - def F3 : LoongArchReg32<3, "f3", ["fa3"]>, DwarfRegNum<[35]>; - def F4 : LoongArchReg32<4, "f4", ["fa4"]>, DwarfRegNum<[36]>; - def F5 : LoongArchReg32<5, "f5", ["fa5"]>, DwarfRegNum<[37]>; - def F6 : LoongArchReg32<6, "f6", ["fa6"]>, DwarfRegNum<[38]>; - def F7 : LoongArchReg32<7, "f7", ["fa7"]>, DwarfRegNum<[39]>; - def F8 : LoongArchReg32<8, "f8", ["ft0"]>, DwarfRegNum<[40]>; - def F9 : LoongArchReg32<9, "f9", ["ft1"]>, DwarfRegNum<[41]>; - def F10 : LoongArchReg32<10,"f10", ["ft2"]>, DwarfRegNum<[42]>; - def F11 : LoongArchReg32<11,"f11", ["ft3"]>, DwarfRegNum<[43]>; - def F12 : LoongArchReg32<12,"f12", ["ft4"]>, DwarfRegNum<[44]>; - def F13 : LoongArchReg32<13,"f13", ["ft5"]>, DwarfRegNum<[45]>; - def F14 : LoongArchReg32<14,"f14", ["ft6"]>, DwarfRegNum<[46]>; - def F15 : LoongArchReg32<15,"f15", ["ft7"]>, DwarfRegNum<[47]>; - def F16 : LoongArchReg32<16,"f16", ["ft8"]>, DwarfRegNum<[48]>; - def F17 : LoongArchReg32<17,"f17", ["ft9"]>, DwarfRegNum<[49]>; - def F18 : LoongArchReg32<18,"f18", ["ft10"]>, DwarfRegNum<[50]>; - def F19 : LoongArchReg32<19,"f19", ["ft11"]>, DwarfRegNum<[51]>; - def F20 : LoongArchReg32<20,"f20", ["ft12"]>, DwarfRegNum<[52]>; - def F21 : LoongArchReg32<21,"f21", ["ft13"]>, DwarfRegNum<[53]>; - def F22 : LoongArchReg32<22,"f22", ["ft14"]>, DwarfRegNum<[54]>; - def F23 : LoongArchReg32<23,"f23", ["ft15"]>, DwarfRegNum<[55]>; - def F24 : LoongArchReg32<24,"f24", ["fs0"]>, DwarfRegNum<[56]>; - def F25 : LoongArchReg32<25,"f25", ["fs1"]>, DwarfRegNum<[57]>; - def F26 : LoongArchReg32<26,"f26", ["fs2"]>, DwarfRegNum<[58]>; - def F27 : LoongArchReg32<27,"f27", ["fs3"]>, DwarfRegNum<[59]>; - def F28 : LoongArchReg32<28,"f28", ["fs4"]>, DwarfRegNum<[60]>; - def F29 : LoongArchReg32<29,"f29", ["fs5"]>, DwarfRegNum<[61]>; - def F30 : LoongArchReg32<30,"f30", ["fs6"]>, DwarfRegNum<[62]>; - def F31 : LoongArchReg32<31,"f31", ["fs7"]>, DwarfRegNum<[63]>; - - foreach I = 0-31 in { - def F#I#_64 : LoongArchReg64("F"#I)>, - DwarfRegNum<[!add(I, 32)]>; - } -} - -// The order of registers represents the preferred allocation sequence. -def FPR32 : RegisterClass<"LoongArch", [f32], 32, (sequence "F%u", 0, 31)>; -def FPR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>; - -// Condition flag registers +} + +/// LoongArch 64-bit Floating-point Registers +class FGR32 Enc, string n> : LoongArchReg; +class FGR64 Enc, string n, list subregs> + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_lo]; +} + +// LoongArch 128-bit (aliased) LSX Registers +class LSX128 Enc, string n, list subregs> + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_64]; +} + +// LoongArch 256-bit (aliased) LASX Registers +class LASX256 Enc, string n, list subregs> + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_128]; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +/// General Purpose 32-bit Registers +def ZERO : LoongArch32GPR<0, "zero">, + DwarfRegNum<[0]>; +def RA : LoongArch32GPR<1, "ra">, DwarfRegNum<[1]>; +def TP : LoongArch32GPR<2, "tp">, DwarfRegNum<[2]>; +def SP : LoongArch32GPR<3, "sp">, DwarfRegNum<[3]>; +def A0 : LoongArch32GPR<4, "r4">, DwarfRegNum<[4]>; +def A1 : LoongArch32GPR<5, "r5">, DwarfRegNum<[5]>; +def A2 : LoongArch32GPR<6, "r6">, DwarfRegNum<[6]>; +def A3 : LoongArch32GPR<7, "r7">, DwarfRegNum<[7]>; +def A4 : LoongArch32GPR<8, "r8">, DwarfRegNum<[8]>; +def A5 : LoongArch32GPR<9, "r9">, DwarfRegNum<[9]>; +def A6 : LoongArch32GPR<10, "r10">, DwarfRegNum<[10]>; +def A7 : LoongArch32GPR<11, "r11">, DwarfRegNum<[11]>; +def T0 : LoongArch32GPR<12, "r12">, DwarfRegNum<[12]>; +def T1 : LoongArch32GPR<13, "r13">, DwarfRegNum<[13]>; +def T2 : LoongArch32GPR<14, "r14">, DwarfRegNum<[14]>; +def T3 : LoongArch32GPR<15, "r15">, DwarfRegNum<[15]>; +def T4 : LoongArch32GPR<16, "r16">, DwarfRegNum<[16]>; +def T5 : LoongArch32GPR<17, "r17">, DwarfRegNum<[17]>; +def T6 : LoongArch32GPR<18, "r18">, DwarfRegNum<[18]>; +def T7 : LoongArch32GPR<19, "r19">, DwarfRegNum<[19]>; +def T8 : LoongArch32GPR<20, "r20">, DwarfRegNum<[20]>; +def T9 : LoongArch32GPR<21, "r21">, DwarfRegNum<[21]>; +def FP : LoongArch32GPR<22, "r22">, DwarfRegNum<[22]>; +def S0 : LoongArch32GPR<23, "r23">, DwarfRegNum<[23]>; +def S1 : LoongArch32GPR<24, "r24">, DwarfRegNum<[24]>; +def S2 : LoongArch32GPR<25, "r25">, DwarfRegNum<[25]>; +def S3 : LoongArch32GPR<26, "r26">, DwarfRegNum<[26]>; +def S4 : LoongArch32GPR<27, "r27">, DwarfRegNum<[27]>; +def S5 : LoongArch32GPR<28, "r28">, DwarfRegNum<[28]>; +def S6 : LoongArch32GPR<29, "r29">, DwarfRegNum<[29]>; +def S7 : LoongArch32GPR<30, "r30">, DwarfRegNum<[30]>; +def S8 : LoongArch32GPR<31, "r31">, DwarfRegNum<[31]>; + +let SubRegIndices = [sub_32] in { +def V0 : LoongArchRegWithSubRegs<4, "r4", [A0]>, DwarfRegNum<[4]>; +def V1 : LoongArchRegWithSubRegs<5, "r5", [A1]>, DwarfRegNum<[5]>; +} + +/// General Purpose 64-bit Registers +def ZERO_64 : LoongArch64GPR<0, "zero", [ZERO]>, DwarfRegNum<[0]>; +def RA_64 : LoongArch64GPR<1, "ra", [RA]>, DwarfRegNum<[1]>; +def TP_64 : LoongArch64GPR<2, "tp", [TP]>, DwarfRegNum<[2]>; +def SP_64 : LoongArch64GPR<3, "sp", [SP]>, DwarfRegNum<[3]>; +def A0_64 : LoongArch64GPR<4, "r4", [A0]>, DwarfRegNum<[4]>; +def A1_64 : LoongArch64GPR<5, "r5", [A1]>, DwarfRegNum<[5]>; +def A2_64 : LoongArch64GPR<6, "r6", [A2]>, DwarfRegNum<[6]>; +def A3_64 : LoongArch64GPR<7, "r7", [A3]>, DwarfRegNum<[7]>; +def A4_64 : LoongArch64GPR<8, "r8", [A4]>, DwarfRegNum<[8]>; +def A5_64 : LoongArch64GPR<9, "r9", [A5]>, DwarfRegNum<[9]>; +def A6_64 : LoongArch64GPR<10, "r10", [A6]>, DwarfRegNum<[10]>; +def A7_64 : LoongArch64GPR<11, "r11", [A7]>, DwarfRegNum<[11]>; +def T0_64 : LoongArch64GPR<12, "r12", [T0]>, DwarfRegNum<[12]>; +def T1_64 : LoongArch64GPR<13, "r13", [T1]>, DwarfRegNum<[13]>; +def T2_64 : LoongArch64GPR<14, "r14", [T2]>, DwarfRegNum<[14]>; +def T3_64 : LoongArch64GPR<15, "r15", [T3]>, DwarfRegNum<[15]>; +def T4_64 : LoongArch64GPR<16, "r16", [T4]>, DwarfRegNum<[16]>; +def T5_64 : LoongArch64GPR<17, "r17", [T5]>, DwarfRegNum<[17]>; +def T6_64 : LoongArch64GPR<18, "r18", [T6]>, DwarfRegNum<[18]>; +def T7_64 : LoongArch64GPR<19, "r19", [T7]>, DwarfRegNum<[19]>; +def T8_64 : LoongArch64GPR<20, "r20", [T8]>, DwarfRegNum<[20]>; +def T9_64 : LoongArch64GPR<21, "r21", [T9]>, DwarfRegNum<[21]>; +def FP_64 : LoongArch64GPR<22, "r22", [FP]>, DwarfRegNum<[22]>; +def S0_64 : LoongArch64GPR<23, "r23", [S0]>, DwarfRegNum<[23]>; +def S1_64 : LoongArch64GPR<24, "r24", [S1]>, DwarfRegNum<[24]>; +def S2_64 : LoongArch64GPR<25, "r25", [S2]>, DwarfRegNum<[25]>; +def S3_64 : LoongArch64GPR<26, "r26", [S3]>, DwarfRegNum<[26]>; +def S4_64 : LoongArch64GPR<27, "r27", [S4]>, DwarfRegNum<[27]>; +def S5_64 : LoongArch64GPR<28, "r28", [S5]>, DwarfRegNum<[28]>; +def S6_64 : LoongArch64GPR<29, "r29", [S6]>, DwarfRegNum<[29]>; +def S7_64 : LoongArch64GPR<30, "r30", [S7]>, DwarfRegNum<[30]>; +def S8_64 : LoongArch64GPR<31, "r31", [S8]>, DwarfRegNum<[31]>; + +let SubRegIndices = [sub_64] in { +def V0_64 : LoongArch64GPR<4, "r4", [A0_64]>, DwarfRegNum<[4]>; +def V1_64 : LoongArch64GPR<5, "r5", [A1_64]>, DwarfRegNum<[5]>; +} + +/// FP registers +foreach I = 0-31 in +def F#I : FGR32, DwarfRegNum<[!add(I, 32)]>; + +foreach I = 0-31 in +def F#I#_64 : FGR64("F"#I)]>, DwarfRegNum<[!add(I, 32)]>; +/// FP Condition Flag 0~7 foreach I = 0-7 in def FCC#I : LoongArchReg; -def CFR : RegisterClass<"LoongArch", [GRLenVT], 32, (sequence "FCC%u", 0, 7)> { - let RegInfos = GRLenRI; +/// FP Control and Status Registers, FCSR 1~3 +foreach I = 1-3 in +def FCSR#I : LoongArchReg; + +class FCSRReg Enc, string n, list subregs> : + RegisterWithSubRegs { +// field bits<2> chan_encoding = 0; + let Namespace = "LoongArch"; + let SubRegIndices = [sub_fcsr1, sub_fcsr2, sub_fcsr3]; +// let HWEncoding{8-0} = encoding{8-0}; +// let HWEncoding{10-9} = chan_encoding; } -// Control and status registers +def FCSR0 : FCSRReg<0, "fcsr0", [FCSR1, FCSR2, FCSR3]>; -foreach I = 0-3 in -def FCSR#I : LoongArchReg; +/// PC register +//let NameSpace = "LoongArch" in +//def PC : Register<"pc">; + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +def GPR32 : RegisterClass<"LoongArch", [i32], 32, (add + // Reserved + ZERO, + // Return Values and Arguments + A0, A1, A2, A3, A4, A5, A6, A7, + // Not preserved across procedure calls + T0, T1, T2, T3, T4, T5, T6, T7, T8, + // Callee save + S0, S1, S2, S3, S4, S5, S6, S7, S8, + // Reserved + RA, TP, SP, + // Reserved + T9, FP)>; + +def GPR64 : RegisterClass<"LoongArch", [i64], 64, (add + // Reserved + ZERO_64, + // Return Values and Arguments + A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64, + // Not preserved across procedure calls + T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, + // Callee save + S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64, S8_64, + // Reserved + RA_64, TP_64, SP_64, + // Reserved + T9_64, FP_64)>; + +def GPRTC64 : RegisterClass<"LoongArch", [i64], 64, (add + // Return Values and Arguments + A0_64, A1_64, A2_64, A3_64, A4_64, A5_64, A6_64, A7_64, + // Not preserved across procedure calls + T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64)>; + +/// FP Registers. +def FGR64 : RegisterClass<"LoongArch", [f64], 64, (sequence "F%u_64", 0, 31)>; +def FGR32 : RegisterClass<"LoongArch", [f32], 64, (sequence "F%u", 0, 31)>; + +/// FP condition Flag registers. +def FCFR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCC%u", 0, 7)>, + Unallocatable; + +def SP32 : RegisterClass<"LoongArch", [i32], 32, (add SP)>, Unallocatable; +def SP64 : RegisterClass<"LoongArch", [i64], 64, (add SP_64)>, Unallocatable; +def TP32 : RegisterClass<"LoongArch", [i32], 32, (add TP)>, Unallocatable; +def TP64 : RegisterClass<"LoongArch", [i64], 64, (add TP_64)>, Unallocatable; + +/// FP control and Status registers. +def FCSR : RegisterClass<"LoongArch", [i32], 4, (sequence "FCSR%u", 0, 3)>, + Unallocatable; + +//LSX +foreach I = 0-31 in +def VR#I : LSX128("F"#I#"_64")]>, + DwarfRegNum<[!add(I, 32)]>; + +//LASX +foreach I = 0-31 in +def XR#I : LASX256("VR"#I)]>, + DwarfRegNum<[!add(I, 32)]>; + +def LSX128B: RegisterClass<"LoongArch", [v16i8], 128, + (sequence "VR%u", 0, 31)>; + +def LSX128H: RegisterClass<"LoongArch", [v8i16], 128, + (sequence "VR%u", 0, 31)>; + +def LSX128W: RegisterClass<"LoongArch", [v4i32, v4f32], 128, + (sequence "VR%u", 0, 31)>; + +def LSX128D: RegisterClass<"LoongArch", [v2i64, v2f64], 128, + (sequence "VR%u", 0, 31)>; + +def LASX256B: RegisterClass<"LoongArch", [v32i8], 256, + (sequence "XR%u", 0, 31)>; +def LASX256H: RegisterClass<"LoongArch", [v16i16], 256, + (sequence "XR%u", 0, 31)>; +def LASX256W: RegisterClass<"LoongArch", [v8i32, v8f32], 256, + (sequence "XR%u", 0, 31)>; +def LASX256D: RegisterClass<"LoongArch", [v4i64, v4f64], 256, + (sequence "XR%u", 0, 31)>; + +//===----------------------------------------------------------------------===// +// Register Operands. +//===----------------------------------------------------------------------===// -let isAllocatable = false in -def FCSR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCSR%u", 0, 3)>; +class LoongArchAsmRegOperand : AsmOperandClass { + let ParserMethod = "parseAnyRegister"; +} + +def GPR32AsmOperand : LoongArchAsmRegOperand { + let Name = "GPR32AsmReg"; + let PredicateMethod = "isGPRAsmReg"; +} + +def GPR64AsmOperand : LoongArchAsmRegOperand { + let Name = "GPR64AsmReg"; + let PredicateMethod = "isGPRAsmReg"; +} + +def FGR32AsmOperand : LoongArchAsmRegOperand { + let Name = "FGR32AsmReg"; + let PredicateMethod = "isFGRAsmReg"; +} + +def FGR64AsmOperand : LoongArchAsmRegOperand { + let Name = "FGR64AsmReg"; + let PredicateMethod = "isFGRAsmReg"; +} + +def FCSRAsmOperand : LoongArchAsmRegOperand { + let Name = "FCSRAsmReg"; +} + +def FCFRAsmOperand : LoongArchAsmRegOperand { + let Name = "FCFRAsmReg"; +} + +//LSX +def LSX128AsmOperand : LoongArchAsmRegOperand { + let Name = "LSX128AsmReg"; +} + +//LASX +def LASX256AsmOperand : LoongArchAsmRegOperand { + let Name = "LASX256AsmReg"; +} + +def GPR32Opnd : RegisterOperand { + let ParserMatchClass = GPR32AsmOperand; +} + +def GPR64Opnd : RegisterOperand { + let ParserMatchClass = GPR64AsmOperand; +} + +def GPRTC64Opnd : RegisterOperand { + let ParserMatchClass = GPR64AsmOperand; +} + +def FGR32Opnd : RegisterOperand { + let ParserMatchClass = FGR32AsmOperand; +} + +def FGR64Opnd : RegisterOperand { + let ParserMatchClass = FGR64AsmOperand; +} + +def FCSROpnd : RegisterOperand { + let ParserMatchClass = FCSRAsmOperand; +} + +def FCFROpnd : RegisterOperand { + let ParserMatchClass = FCFRAsmOperand; +} + +//LSX +def LSX128BOpnd : RegisterOperand { + let ParserMatchClass = LSX128AsmOperand; +} + +def LSX128HOpnd : RegisterOperand { + let ParserMatchClass = LSX128AsmOperand; +} + +def LSX128WOpnd : RegisterOperand { + let ParserMatchClass = LSX128AsmOperand; +} + +def LSX128DOpnd : RegisterOperand { + let ParserMatchClass = LSX128AsmOperand; +} + +//LASX +def LASX256BOpnd : RegisterOperand { + let ParserMatchClass = LASX256AsmOperand; +} + +def LASX256HOpnd : RegisterOperand { + let ParserMatchClass = LASX256AsmOperand; +} + +def LASX256WOpnd : RegisterOperand { + let ParserMatchClass = LASX256AsmOperand; +} + +def LASX256DOpnd : RegisterOperand { + let ParserMatchClass = LASX256AsmOperand; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp index ff84e7c8cc1f..ef990ae09e8d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information -*- C++ -*--=// +//===-- LoongArchSubtarget.cpp - LoongArch Subtarget Information --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,7 +11,16 @@ //===----------------------------------------------------------------------===// #include "LoongArchSubtarget.h" -#include "LoongArchFrameLowering.h" +#include "LoongArch.h" +#include "LoongArchMachineFunction.h" +#include "LoongArchRegisterInfo.h" +#include "LoongArchTargetMachine.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -23,32 +32,74 @@ using namespace llvm; void LoongArchSubtarget::anchor() {} -LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies( - const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, - StringRef ABIName) { - bool Is64Bit = TT.isArch64Bit(); - if (CPU.empty()) - CPU = Is64Bit ? "generic-la64" : "generic-la32"; +LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, + StringRef FS, + const LoongArchTargetMachine &TM, + MaybeAlign StackAlignOverride) + : LoongArchGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), HasLA64(false), + IsSoftFloat(false), IsSingleFloat(false), IsFP64bit(false), HasLSX(false), + HasLASX(false), UnalignedAccess(false), + StackAlignOverride(StackAlignOverride), TM(TM), TargetTriple(TT), + TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS, TM)), + FrameLowering(*this), TLInfo(TM, *this) { + + // Check if Architecture and ABI are compatible. + assert(((!is64Bit() && isABI_LP32()) || + (is64Bit() && (isABI_LPX32() || isABI_LP64()))) && + "Invalid Arch & ABI pair."); + + if (hasLSX() && !isFP64bit()) + report_fatal_error("LSX requires 64-bit floating point register." + "See -mattr=+fp64.", + false); + + assert(isFP64bit()); +} + +bool LoongArchSubtarget::isPositionIndependent() const { + return TM.isPositionIndependent(); +} + +/// This overrides the PostRAScheduler bit in the SchedModel for any CPU. +bool LoongArchSubtarget::enablePostRAScheduler() const { return true; } - if (TuneCPU.empty()) - TuneCPU = CPU; +void LoongArchSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(is64Bit() ? &LoongArch::GPR64RegClass + : &LoongArch::GPR32RegClass); +} + +CodeGenOpt::Level LoongArchSubtarget::getOptLevelToEnablePostRAScheduler() const { + return CodeGenOpt::Aggressive; +} + +LoongArchSubtarget & +LoongArchSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM) { + StringRef CPUName = LoongArch_MC::selectLoongArchCPU(TM.getTargetTriple(), CPU); - ParseSubtargetFeatures(CPU, TuneCPU, FS); - if (Is64Bit) { - GRLenVT = MVT::i64; - GRLen = 64; + // Parse features string. + ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS); + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUName); + + if (StackAlignOverride) + stackAlignment = *StackAlignOverride; + else if (isABI_LPX32() || isABI_LP64()) + stackAlignment = Align(16); + else { + assert(isABI_LP32() && "Unknown ABI for stack alignment!"); + stackAlignment = Align(8); } - // TODO: ILP32{S,F} LP64{S,F} - TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D; return *this; } -LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU, - StringRef TuneCPU, StringRef FS, - StringRef ABIName, - const TargetMachine &TM) - : LoongArchGenSubtargetInfo(TT, CPU, TuneCPU, FS), - FrameLowering( - initializeSubtargetDependencies(TT, CPU, TuneCPU, FS, ABIName)), - InstrInfo(*this), RegInfo(getHwMode()), TLInfo(TM, *this) {} +Reloc::Model LoongArchSubtarget::getRelocationModel() const { + return TM.getRelocationModel(); +} + +bool LoongArchSubtarget::isABI_LP64() const { return getABI().IsLP64(); } +bool LoongArchSubtarget::isABI_LPX32() const { return getABI().IsLPX32(); } +bool LoongArchSubtarget::isABI_LP32() const { return getABI().IsLP32(); } +const LoongArchABIInfo &LoongArchSubtarget::getABI() const { return TM.getABI(); } diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index fbe7a176b371..588d9f46bd67 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -1,4 +1,4 @@ -//===- LoongArchSubtarget.h - Define Subtarget for the LoongArch -*- C++ -*-==// +//===-- LoongArchSubtarget.h - Define Subtarget for the LoongArch ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -13,15 +13,16 @@ #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H +#include "MCTargetDesc/LoongArchABIInfo.h" #include "LoongArchFrameLowering.h" #include "LoongArchISelLowering.h" #include "LoongArchInstrInfo.h" -#include "LoongArchRegisterInfo.h" -#include "MCTargetDesc/LoongArchBaseInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/ErrorHandling.h" +#include #define GET_SUBTARGETINFO_HEADER #include "LoongArchGenSubtargetInfo.inc" @@ -29,61 +30,114 @@ namespace llvm { class StringRef; +class LoongArchTargetMachine; + class LoongArchSubtarget : public LoongArchGenSubtargetInfo { virtual void anchor(); - bool HasLA64 = false; - bool HasBasicF = false; - bool HasBasicD = false; - bool HasExtLSX = false; - bool HasExtLASX = false; - bool HasExtLVZ = false; - bool HasExtLBT = false; - unsigned GRLen = 32; - MVT GRLenVT = MVT::i32; - LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; - LoongArchFrameLowering FrameLowering; - LoongArchInstrInfo InstrInfo; - LoongArchRegisterInfo RegInfo; - LoongArchTargetLowering TLInfo; - - /// Initializes using the passed in CPU and feature strings so that we can - /// use initializer lists for subtarget initialization. - LoongArchSubtarget &initializeSubtargetDependencies(const Triple &TT, - StringRef CPU, - StringRef TuneCPU, - StringRef FS, - StringRef ABIName); + + // HasLA64 - The target processor has LA64 ISA support. + bool HasLA64; + + // IsSoftFloat - The target does not support any floating point instructions. + bool IsSoftFloat; + + // IsSingleFloat - The target only supports single precision float + // point operations. This enable the target to use all 32 32-bit + // floating point registers instead of only using even ones. + bool IsSingleFloat; + + // IsFP64bit - The target processor has 64-bit floating point registers. + bool IsFP64bit; + + /// Features related to the presence of specific instructions. + + // HasLSX - Supports LSX. + bool HasLSX; + + // HasLASX - Supports LASX. + bool HasLASX; + + /// The minimum alignment known to hold of the stack frame on + /// entry to the function and which must be maintained by every function. + Align stackAlignment; + + // Allow unaligned memory accesses. + bool UnalignedAccess; + + /// The overridden stack alignment. + MaybeAlign StackAlignOverride; + + InstrItineraryData InstrItins; + + const LoongArchTargetMachine &TM; + + Triple TargetTriple; + + const SelectionDAGTargetInfo TSInfo; + const LoongArchInstrInfo InstrInfo; + const LoongArchFrameLowering FrameLowering; + const LoongArchTargetLowering TLInfo; public: - // Initializes the data members to match that of the specified triple. - LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, - StringRef FS, StringRef ABIName, const TargetMachine &TM); + bool isPositionIndependent() const; + /// This overrides the PostRAScheduler bit in the SchedModel for each CPU. + bool enablePostRAScheduler() const override; + void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; + CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override; + + bool isABI_LP64() const; + bool isABI_LPX32() const; + bool isABI_LP32() const; + const LoongArchABIInfo &getABI() const; - // Parses features string setting specified subtarget options. The - // definition of this function is auto-generated by tblgen. + /// This constructor initializes the data members to match that + /// of the specified triple. + LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + const LoongArchTargetMachine &TM, MaybeAlign StackAlignOverride); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); - const LoongArchFrameLowering *getFrameLowering() const override { + bool is64Bit() const { return HasLA64; } + bool isFP64bit() const { return IsFP64bit; } + unsigned getGPRSizeInBytes() const { return is64Bit() ? 8 : 4; } + bool isSingleFloat() const { return IsSingleFloat; } + bool hasLSX() const { return HasLSX; } + bool hasLASX() const { return HasLASX; } + bool useSoftFloat() const { return IsSoftFloat; } + + bool allowUnalignedAccess() const { return UnalignedAccess; } + + bool isXRaySupported() const override { return true; } + + Align getStackAlignment() const { return stackAlignment; } + + // Grab relocation model + Reloc::Model getRelocationModel() const; + + LoongArchSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM); + + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + const LoongArchInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } + const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } - const LoongArchInstrInfo *getInstrInfo() const override { return &InstrInfo; } const LoongArchRegisterInfo *getRegisterInfo() const override { - return &RegInfo; + return &InstrInfo.getRegisterInfo(); } const LoongArchTargetLowering *getTargetLowering() const override { return &TLInfo; } - bool is64Bit() const { return HasLA64; } - bool hasBasicF() const { return HasBasicF; } - bool hasBasicD() const { return HasBasicD; } - bool hasExtLSX() const { return HasExtLSX; } - bool hasExtLASX() const { return HasExtLASX; } - bool hasExtLVZ() const { return HasExtLVZ; } - bool hasExtLBT() const { return HasExtLBT; } - MVT getGRLenVT() const { return GRLenVT; } - unsigned getGRLen() const { return GRLen; } - LoongArchABI::ABI getTargetABI() const { return TargetABI; } + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } }; -} // end namespace llvm +} // End llvm namespace -#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHSUBTARGET_H +#endif diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 7ba5848e0997..f8a1dc5faad3 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch ---===// +//===-- LoongArchTargetMachine.cpp - Define TargetMachine for LoongArch -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,12 +12,29 @@ #include "LoongArchTargetMachine.h" #include "LoongArch.h" -#include "MCTargetDesc/LoongArchBaseInfo.h" -#include "TargetInfo/LoongArchTargetInfo.h" +#include "LoongArchISelDAGToDAG.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetObjectFile.h" +#include "LoongArchTargetTransformInfo.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +#include +#include using namespace llvm; @@ -26,29 +43,63 @@ using namespace llvm; extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() { // Register the target. RegisterTargetMachine X(getTheLoongArch32Target()); - RegisterTargetMachine Y(getTheLoongArch64Target()); + RegisterTargetMachine A(getTheLoongArch64Target()); } -static std::string computeDataLayout(const Triple &TT) { - if (TT.isArch64Bit()) - return "e-m:e-p:64:64-i64:64-i128:128-n64-S128"; - assert(TT.isArch32Bit() && "only LA32 and LA64 are currently supported"); - return "e-m:e-p:32:32-i64:64-n32-S128"; +static std::string computeDataLayout(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + std::string Ret; + LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions); + + Ret += "e"; + + if (ABI.IsLP32()) + Ret += "-m:m"; + else + Ret += "-m:e"; + + // Pointers are 32 bit on some ABIs. + if (!ABI.IsLP64()) + Ret += "-p:32:32"; + + // 8 and 16 bit integers only need to have natural alignment, but try to + // align them to 32 bits. 64 bit integers have natural alignment. + Ret += "-i8:8:32-i16:16:32-i64:64"; + + // 32 bit registers are always available and the stack is at least 64 bit + // aligned. On LP64 64 bit registers are also available and the stack is + // 128 bit aligned. + if (ABI.IsLP64() || ABI.IsLPX32()) + Ret += "-n32:64-S128"; + else + Ret += "-n32-S64"; + + return Ret; } -static Reloc::Model getEffectiveRelocModel(const Triple &TT, +static Reloc::Model getEffectiveRelocModel(bool JIT, Optional RM) { - return RM.value_or(Reloc::Static); + if (!RM.hasValue() || JIT) + return Reloc::Static; + return *RM; } -LoongArchTargetMachine::LoongArchTargetMachine( - const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT) - : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, - getEffectiveRelocModel(TT, RM), +// On function prologue, the stack is created by decrementing +// its pointer. Once decremented, all references are done with positive +// offset from the stack/frame pointer, using StackGrowsUp enables +// an easier handling. +// Using CodeModel::Large enables different CALL behavior. +LoongArchTargetMachine::LoongArchTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + Optional CM, + CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, + CPU, FS, Options, getEffectiveRelocModel(JIT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), - TLOF(std::make_unique()) { + TLOF(std::make_unique()), + ABI(LoongArchABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)) { initAsmInfo(); } @@ -57,44 +108,45 @@ LoongArchTargetMachine::~LoongArchTargetMachine() = default; const LoongArchSubtarget * LoongArchTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); - Attribute TuneAttr = F.getFnAttribute("tune-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); - std::string CPU = - CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; - std::string TuneCPU = - TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; - std::string FS = - FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + // FIXME: This is related to the code below to reset the target options, + // we need to know whether or not the soft float flag is set on the + // function, so we can enable it as a subtarget feature. + bool softFloat = + F.hasFnAttribute("use-soft-float") && + F.getFnAttribute("use-soft-float").getValueAsString() == "true"; + + if (softFloat) + FS += FS.empty() ? "+soft-float" : ",+soft-float"; - std::string Key = CPU + TuneCPU + FS; - auto &I = SubtargetMap[Key]; + auto &I = SubtargetMap[CPU + FS]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - auto ABIName = Options.MCOptions.getABIName(); - if (const MDString *ModuleTargetABI = dyn_cast_or_null( - F.getParent()->getModuleFlag("target-abi"))) { - auto TargetABI = LoongArchABI::getTargetABI(ABIName); - if (TargetABI != LoongArchABI::ABI_Unknown && - ModuleTargetABI->getString() != ABIName) { - report_fatal_error("-target-abi option != target-abi module flag"); - } - ABIName = ModuleTargetABI->getString(); - } - I = std::make_unique(TargetTriple, CPU, TuneCPU, FS, - ABIName, *this); + I = std::make_unique(TargetTriple, CPU, FS, *this, + MaybeAlign(F.getParent()->getOverrideStackAlignment())); } return I.get(); } namespace { + +/// LoongArch Code Generator Pass Configuration Options. class LoongArchPassConfig : public TargetPassConfig { public: LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + } LoongArchTargetMachine &getLoongArchTargetMachine() const { return getTM(); @@ -102,22 +154,42 @@ public: void addIRPasses() override; bool addInstSelector() override; + void addPreEmitPass() override; }; -} // end namespace -TargetPassConfig * -LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { +} // end anonymous namespace + +TargetPassConfig *LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { return new LoongArchPassConfig(*this, PM); } void LoongArchPassConfig::addIRPasses() { - addPass(createAtomicExpandPass()); - TargetPassConfig::addIRPasses(); + addPass(createAtomicExpandPass()); } - +// Install an instruction selector pass using +// the ISelDag to gen LoongArch code. bool LoongArchPassConfig::addInstSelector() { - addPass(createLoongArchISelDag(getLoongArchTargetMachine())); - + addPass(createLoongArchModuleISelDagPass()); + addPass(createLoongArchISelDag(getLoongArchTargetMachine(), getOptLevel())); return false; } + +TargetTransformInfo +LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const { + LLVM_DEBUG(errs() << "Target Transform Info Pass Added\n"); + return TargetTransformInfo(BasicTTIImpl(this, F)); +} + +// Implemented by targets that want to run passes immediately before +// machine code is emitted. return true if -print-machineinstrs should +// print out the code after the passes. +void LoongArchPassConfig::addPreEmitPass() { + // Expand pseudo instructions that are sensitive to register allocation. + addPass(createLoongArchExpandPseudoPass()); + + // Relax conditional branch instructions if they're otherwise out of + // range of their destination. + // This pass must be run after any pseudo instruction expansion + addPass(&BranchRelaxationPassID); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h index cbd872031a32..8e395d18389a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h @@ -1,4 +1,4 @@ -//=- LoongArchTargetMachine.h - Define TargetMachine for LoongArch -*- C++ -*-// +//===- LoongArchTargetMachine.h - Define TargetMachine for LoongArch ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -13,25 +13,33 @@ #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETMACHINE_H +#include "MCTargetDesc/LoongArchABIInfo.h" #include "LoongArchSubtarget.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { class LoongArchTargetMachine : public LLVMTargetMachine { std::unique_ptr TLOF; + // Selected ABI + LoongArchABIInfo ABI; + mutable StringMap> SubtargetMap; public: LoongArchTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, - Optional CM, CodeGenOpt::Level OL, - bool JIT); + StringRef FS, const TargetOptions &Options, + Optional RM, Optional CM, + CodeGenOpt::Level OL, bool JIT); ~LoongArchTargetMachine() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override; - const LoongArchSubtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; @@ -39,6 +47,20 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Mips doesn't have any special address spaces so we just reserve + // the first 256 for software use (e.g. OpenCL) and treat casts + // between them as noops. + return SrcAS < 256 && DestAS < 256; + } + + const LoongArchABIInfo &getABI() const { return ABI; } + + bool isMachineVerifierClean() const override { + return false; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp new file mode 100644 index 000000000000..9c6250d28930 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.cpp @@ -0,0 +1,26 @@ +//===-- LoongArchTargetObjectFile.cpp - LoongArch Object Files ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetObjectFile.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +void LoongArchTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.h b/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.h new file mode 100644 index 000000000000..a50c57171f80 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetObjectFile.h @@ -0,0 +1,24 @@ +//===-- llvm/Target/LoongArchTargetObjectFile.h - LoongArch Object Info ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { +class LoongArchTargetMachine; + class LoongArchTargetObjectFile : public TargetLoweringObjectFileELF { + + public: + + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; + }; +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetStreamer.h b/llvm/lib/Target/LoongArch/LoongArchTargetStreamer.h new file mode 100644 index 000000000000..a9adc32d0eb5 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetStreamer.h @@ -0,0 +1,130 @@ +//===-- LoongArchTargetStreamer.h - LoongArch Target Streamer ------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETSTREAMER_H + +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class formatted_raw_ostream; + +struct LoongArchFPABIInfo; + +class LoongArchTargetStreamer : public MCTargetStreamer { +public: + LoongArchTargetStreamer(MCStreamer &S); + + virtual void setPic(bool Value) {} + + virtual void emitDirectiveOptionPic0(); + virtual void emitDirectiveOptionPic2(); + + virtual void emitDirectiveSetArch(StringRef Arch); + virtual void emitDirectiveSetLoongArch32(); + virtual void emitDirectiveSetloongarch64(); + + void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, MCOperand Op2, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int32_t Imm, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, + MCOperand Op3, SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, + int16_t Imm1, int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI); + void emitAdd(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, + const MCSubtargetInfo *STI); + void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, + SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI); + + void forbidModuleDirective() { ModuleDirectiveAllowed = false; } + void reallowModuleDirective() { ModuleDirectiveAllowed = true; } + bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; } + + template + void updateABIInfo(const PredicateLibrary &P) { + ABI = P.getABI(); + } + + const LoongArchABIInfo &getABI() const { + assert(ABI.hasValue() && "ABI hasn't been set!"); + return *ABI; + } + +protected: + llvm::Optional ABI; + + bool GPRInfoSet; + + bool FPRInfoSet; + + bool FrameInfoSet; + int FrameOffset; + unsigned FrameReg; + unsigned ReturnReg; + +private: + bool ModuleDirectiveAllowed; +}; + +// This part is for ascii assembly output +class LoongArchTargetAsmStreamer : public LoongArchTargetStreamer { + formatted_raw_ostream &OS; + +public: + LoongArchTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + + void emitDirectiveOptionPic0() override; + void emitDirectiveOptionPic2() override; + + void emitDirectiveSetArch(StringRef Arch) override; + void emitDirectiveSetLoongArch32() override; + void emitDirectiveSetloongarch64() override; +}; + +// This part is for ELF object output +class LoongArchTargetELFStreamer : public LoongArchTargetStreamer { + const MCSubtargetInfo &STI; + bool Pic; + +public: + MCELFStreamer &getStreamer(); + LoongArchTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); + + void setPic(bool Value) override { Pic = Value; } + + void emitLabel(MCSymbol *Symbol) override; + void finish() override; + + void emitDirectiveOptionPic0() override; + void emitDirectiveOptionPic2() override; +}; +} +#endif diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp new file mode 100644 index 000000000000..9510dc027ecd --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -0,0 +1,325 @@ +//===-- LoongArchTargetTransformInfo.cpp - LoongArch specific TTI pass +//----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// LoongArch target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/CostTable.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "LoongArchtti" + +//===----------------------------------------------------------------------===// +// +// LoongArch cost model. +// +//===----------------------------------------------------------------------===// + +bool LoongArchTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // Inline a callee if its target-features are a subset of the callers + // target-features. + return (CallerBits & CalleeBits) == CalleeBits; +} + +TargetTransformInfo::PopcntSupportKind +LoongArchTTIImpl::getPopcntSupport(unsigned TyWidth) { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + if (TyWidth == 32 || TyWidth == 64) + return TTI::PSK_FastHardware; + return TTI::PSK_Software; +} + +unsigned LoongArchTTIImpl::getNumberOfRegisters(bool Vector) { + if (Vector && !ST->hasLSX()) + return 0; + + return 32; +} + +unsigned LoongArchTTIImpl::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasLASX()) + return 256; + + if (ST->hasLSX()) + return 128; + + return 0; + } + return 64; +} + +unsigned LoongArchTTIImpl::getMaxInterleaveFactor(unsigned VF) { + if (VF == 1) + return 1; + return 2; +} + +InstructionCost LoongArchTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, + const Instruction *CxtI) { + + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const CostTblEntry LASXCostTable[] = { + + {ISD::SHL, MVT::v32i8, 1}, + {ISD::SHL, MVT::v16i16, 1}, + {ISD::SHL, MVT::v8i32, 1}, + {ISD::SHL, MVT::v4i64, 1}, + + {ISD::SRL, MVT::v32i8, 1}, + {ISD::SRL, MVT::v16i16, 1}, + {ISD::SRL, MVT::v8i32, 1}, + {ISD::SRL, MVT::v4i64, 1}, + + {ISD::SRA, MVT::v32i8, 1}, + {ISD::SRA, MVT::v16i16, 1}, + {ISD::SRA, MVT::v8i32, 1}, + {ISD::SRA, MVT::v4i64, 1}, + + {ISD::SUB, MVT::v32i8, 1}, + {ISD::SUB, MVT::v16i16, 1}, + {ISD::SUB, MVT::v8i32, 1}, + {ISD::SUB, MVT::v4i64, 1}, + + {ISD::ADD, MVT::v32i8, 1}, + {ISD::ADD, MVT::v16i16, 1}, + {ISD::ADD, MVT::v8i32, 1}, + {ISD::ADD, MVT::v4i64, 1}, + + {ISD::MUL, MVT::v32i8, 1}, + {ISD::MUL, MVT::v16i16, 1}, + {ISD::MUL, MVT::v8i32, 1}, + {ISD::MUL, MVT::v4i64, 1}, + + {ISD::SDIV, MVT::v32i8, 29}, + {ISD::SDIV, MVT::v16i16, 19}, + {ISD::SDIV, MVT::v8i32, 14}, + {ISD::SDIV, MVT::v4i64, 13}, + + {ISD::UDIV, MVT::v32i8, 29}, + {ISD::UDIV, MVT::v16i16, 19}, + {ISD::UDIV, MVT::v8i32, 14}, + {ISD::UDIV, MVT::v4i64, 13}, + + {ISD::SREM, MVT::v32i8, 33}, + {ISD::SREM, MVT::v16i16, 21}, + {ISD::SREM, MVT::v8i32, 15}, + {ISD::SREM, MVT::v4i64, 13}, + + {ISD::UREM, MVT::v32i8, 29}, + {ISD::UREM, MVT::v16i16, 19}, + {ISD::UREM, MVT::v8i32, 14}, + {ISD::UREM, MVT::v4i64, 13}, + + {ISD::FADD, MVT::f64, 1}, + {ISD::FADD, MVT::f32, 1}, + {ISD::FADD, MVT::v4f64, 1}, + {ISD::FADD, MVT::v8f32, 1}, + + {ISD::FSUB, MVT::f64, 1}, + {ISD::FSUB, MVT::f32, 1}, + {ISD::FSUB, MVT::v4f64, 1}, + {ISD::FSUB, MVT::v8f32, 1}, + + {ISD::FMUL, MVT::f64, 1}, + {ISD::FMUL, MVT::f32, 1}, + {ISD::FMUL, MVT::v4f64, 1}, + {ISD::FMUL, MVT::v8f32, 1}, + + {ISD::FDIV, MVT::f32, 12}, + {ISD::FDIV, MVT::f64, 10}, + {ISD::FDIV, MVT::v8f32, 12}, + {ISD::FDIV, MVT::v4f64, 10} + + }; + + if (ST->hasLASX()) + if (const auto *Entry = CostTableLookup(LASXCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry LSXCostTable[] = { + + {ISD::SHL, MVT::v16i8, 1}, + {ISD::SHL, MVT::v8i16, 1}, + {ISD::SHL, MVT::v4i32, 1}, + {ISD::SHL, MVT::v2i64, 1}, + + {ISD::SRL, MVT::v16i8, 1}, + {ISD::SRL, MVT::v8i16, 1}, + {ISD::SRL, MVT::v4i32, 1}, + {ISD::SRL, MVT::v2i64, 1}, + + {ISD::SRA, MVT::v16i8, 1}, + {ISD::SRA, MVT::v8i16, 1}, + {ISD::SRA, MVT::v4i32, 1}, + {ISD::SRA, MVT::v2i64, 1}, + + {ISD::SUB, MVT::v16i8, 1}, + {ISD::SUB, MVT::v8i16, 1}, + {ISD::SUB, MVT::v4i32, 1}, + {ISD::SUB, MVT::v2i64, 1}, + + {ISD::ADD, MVT::v16i8, 1}, + {ISD::ADD, MVT::v8i16, 1}, + {ISD::ADD, MVT::v4i32, 1}, + {ISD::ADD, MVT::v2i64, 1}, + + {ISD::MUL, MVT::v16i8, 1}, + {ISD::MUL, MVT::v8i16, 1}, + {ISD::MUL, MVT::v4i32, 1}, + {ISD::MUL, MVT::v2i64, 1}, + + {ISD::SDIV, MVT::v16i8, 29}, + {ISD::SDIV, MVT::v8i16, 19}, + {ISD::SDIV, MVT::v4i32, 14}, + {ISD::SDIV, MVT::v2i64, 13}, + + {ISD::UDIV, MVT::v16i8, 29}, + {ISD::UDIV, MVT::v8i16, 19}, + {ISD::UDIV, MVT::v4i32, 14}, + {ISD::UDIV, MVT::v2i64, 13}, + + {ISD::SREM, MVT::v16i8, 33}, + {ISD::SREM, MVT::v8i16, 21}, + {ISD::SREM, MVT::v4i32, 15}, + {ISD::SREM, MVT::v2i64, 13}, + + {ISD::UREM, MVT::v16i8, 29}, + {ISD::UREM, MVT::v8i16, 19}, + {ISD::UREM, MVT::v4i32, 14}, + {ISD::UREM, MVT::v2i64, 13}, + + {ISD::FADD, MVT::f64, 1}, + {ISD::FADD, MVT::f32, 1}, + {ISD::FADD, MVT::v2f64, 1}, + {ISD::FADD, MVT::v4f32, 1}, + + {ISD::FSUB, MVT::f64, 1}, + {ISD::FSUB, MVT::f32, 1}, + {ISD::FSUB, MVT::v2f64, 1}, + {ISD::FSUB, MVT::v4f32, 1}, + + {ISD::FMUL, MVT::f64, 1}, + {ISD::FMUL, MVT::f32, 1}, + {ISD::FMUL, MVT::v2f64, 1}, + {ISD::FMUL, MVT::v4f32, 1}, + + {ISD::FDIV, MVT::f32, 12}, + {ISD::FDIV, MVT::f64, 10}, + {ISD::FDIV, MVT::v4f32, 12}, + {ISD::FDIV, MVT::v2f64, 10} + + }; + + if (ST->hasLSX()) + if (const auto *Entry = CostTableLookup(LSXCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + + // Fallback to the default implementation. + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); +} + +InstructionCost LoongArchTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { + assert(Val->isVectorTy() && "This must be a vector type"); + + Type *ScalarType = Val->getScalarType(); + + if (Index != -1U) { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(DL, Val); + + // This type is legalized to a scalar type. + if (!LT.second.isVector()) + return 0; + + // The type may be split. Normalize the index to the new type. + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + + // The element at index zero is already inside the vector. + if (Index == 0) // if (ScalarType->isFloatingPointTy() && Index == 0) + return 0; + } + + // Add to the base cost if we know that the extracted element of a vector is + // destined to be moved to and used in the integer register file. + int RegisterFileMoveCost = 0; + if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy()) + RegisterFileMoveCost = 1; + + return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; +} + +unsigned LoongArchTTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { + return getRegisterBitWidth(true); +} + +InstructionCost LoongArchTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const TypeConversionCostTblEntry LASXConversionTbl[] = { + + // TODO:The cost requires more granular testing + {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 3}, + {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3}, + {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 3}, + {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3}, + {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3}, + {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3}, + + }; + + EVT SrcTy = TLI->getValueType(DL, Src); + EVT DstTy = TLI->getValueType(DL, Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + + if (ST->hasLASX()) { + if (const auto *Entry = ConvertCostTableLookup( + LASXConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return Entry->Cost; + } + + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h new file mode 100644 index 000000000000..3a93fc8eccbb --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -0,0 +1,91 @@ +//===-- LoongArchTargetTransformInfo.h - LoongArch specific TTI -------------*- +// C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// \file +// This file a TargetTransformInfo::Concept conforming object specific to the +// LoongArch target machine. It uses the target's detailed information to +// provide more precise answers to certain TTI queries, while letting the +// target independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_LoongArch_LoongArchTARGETTRANSFORMINFO_H + +#include "LoongArch.h" +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { + +class LoongArchTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const LoongArchSubtarget *ST; + const LoongArchTargetLowering *TLI; + + const LoongArchSubtarget *getST() const { return ST; } + const LoongArchTargetLowering *getTLI() const { return TLI; } + +public: + explicit LoongArchTTIImpl(const LoongArchTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + + /// \name Scalar TTI Implementations + // /// @{ + + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + bool enableInterleavedAccessVectorization() { return true; } + + unsigned getNumberOfRegisters(bool Vector); + + unsigned getRegisterBitWidth(bool Vector) const; + + unsigned getMaxInterleaveFactor(unsigned VF); + + InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index); + + InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + + unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; + + InstructionCost getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt index 2e1ca69a3e56..927fa7d5b930 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/CMakeLists.txt @@ -1,12 +1,15 @@ -add_llvm_component_library(LLVMLoongArchDesc + add_llvm_component_library(LLVMLoongArchDesc + LoongArchABIInfo.cpp + LoongArchAnalyzeImmediate.cpp LoongArchAsmBackend.cpp - LoongArchBaseInfo.cpp LoongArchELFObjectWriter.cpp + LoongArchELFStreamer.cpp LoongArchInstPrinter.cpp LoongArchMCAsmInfo.cpp - LoongArchMCTargetDesc.cpp LoongArchMCCodeEmitter.cpp - LoongArchMatInt.cpp + LoongArchMCExpr.cpp + LoongArchMCTargetDesc.cpp + LoongArchTargetStreamer.cpp LINK_COMPONENTS MC diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp new file mode 100644 index 000000000000..18b67961a4d6 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.cpp @@ -0,0 +1,106 @@ +//===---- LoongArchABIInfo.cpp - Information about LoongArch ABI's ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchABIInfo.h" +#include "LoongArchRegisterInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCTargetOptions.h" + +using namespace llvm; + +namespace { +static const MCPhysReg LP32IntRegs[4] = {LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3}; + +static const MCPhysReg LoongArch64IntRegs[8] = { + LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64, + LoongArch::A4_64, LoongArch::A5_64, LoongArch::A6_64, LoongArch::A7_64}; +} + +ArrayRef LoongArchABIInfo::GetByValArgRegs() const { + if (IsLP32()) + return makeArrayRef(LP32IntRegs); + if (IsLPX32() || IsLP64()) + return makeArrayRef(LoongArch64IntRegs); + llvm_unreachable("Unhandled ABI"); +} + +ArrayRef LoongArchABIInfo::GetVarArgRegs() const { + if (IsLP32()) + return makeArrayRef(LP32IntRegs); + if (IsLPX32() || IsLP64()) + return makeArrayRef(LoongArch64IntRegs); + llvm_unreachable("Unhandled ABI"); +} + +LoongArchABIInfo LoongArchABIInfo::computeTargetABI(const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { + if (Options.getABIName().startswith("lp32")) + return LoongArchABIInfo::LP32(); + if (Options.getABIName().startswith("lpx32")) + return LoongArchABIInfo::LPX32(); + if (Options.getABIName().startswith("lp64")) + return LoongArchABIInfo::LP64(); + assert(Options.getABIName().empty() && "Unknown ABI option for LoongArch"); + + if (TT.isLoongArch64()) + return LoongArchABIInfo::LP64(); + return LoongArchABIInfo::LP32(); +} + +unsigned LoongArchABIInfo::GetStackPtr() const { + return ArePtrs64bit() ? LoongArch::SP_64 : LoongArch::SP; +} + +unsigned LoongArchABIInfo::GetFramePtr() const { + return ArePtrs64bit() ? LoongArch::FP_64 : LoongArch::FP; +} + +unsigned LoongArchABIInfo::GetBasePtr() const { + return ArePtrs64bit() ? LoongArch::S7_64 : LoongArch::S7; +} + +unsigned LoongArchABIInfo::GetNullPtr() const { + return ArePtrs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; +} + +unsigned LoongArchABIInfo::GetZeroReg() const { + return AreGprs64bit() ? LoongArch::ZERO_64 : LoongArch::ZERO; +} + +unsigned LoongArchABIInfo::GetPtrAddOp() const { + return ArePtrs64bit() ? LoongArch::ADD_D : LoongArch::ADD_W; +} + +unsigned LoongArchABIInfo::GetPtrAddiOp() const { + return ArePtrs64bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; +} + +unsigned LoongArchABIInfo::GetPtrSubOp() const { + return ArePtrs64bit() ? LoongArch::SUB_D : LoongArch::SUB_W; +} + +unsigned LoongArchABIInfo::GetPtrAndOp() const { + return ArePtrs64bit() ? LoongArch::AND : LoongArch::AND32; +} + +unsigned LoongArchABIInfo::GetGPRMoveOp() const { + return ArePtrs64bit() ? LoongArch::OR : LoongArch::OR32; +} + +unsigned LoongArchABIInfo::GetEhDataReg(unsigned I) const { + static const unsigned EhDataReg[] = { + LoongArch::A0, LoongArch::A1, LoongArch::A2, LoongArch::A3 + }; + static const unsigned EhDataReg64[] = { + LoongArch::A0_64, LoongArch::A1_64, LoongArch::A2_64, LoongArch::A3_64 + }; + + return IsLP64() ? EhDataReg64[I] : EhDataReg[I]; +} + diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h new file mode 100644 index 000000000000..334ee80ea98c --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchABIInfo.h @@ -0,0 +1,76 @@ +//===---- LoongArchABIInfo.h - Information about LoongArch ABI's --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHABIINFO_H + +#include "llvm/ADT/Triple.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace llvm { + +template class ArrayRef; +class MCTargetOptions; +class StringRef; +class TargetRegisterClass; + +class LoongArchABIInfo { +public: + enum class ABI { Unknown, LP32, LPX32, LP64 }; + +protected: + ABI ThisABI; + +public: + LoongArchABIInfo(ABI ThisABI) : ThisABI(ThisABI) {} + + static LoongArchABIInfo Unknown() { return LoongArchABIInfo(ABI::Unknown); } + static LoongArchABIInfo LP32() { return LoongArchABIInfo(ABI::LP32); } + static LoongArchABIInfo LPX32() { return LoongArchABIInfo(ABI::LPX32); } + static LoongArchABIInfo LP64() { return LoongArchABIInfo(ABI::LP64); } + static LoongArchABIInfo computeTargetABI(const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); + + bool IsKnown() const { return ThisABI != ABI::Unknown; } + bool IsLP32() const { return ThisABI == ABI::LP32; } + bool IsLPX32() const { return ThisABI == ABI::LPX32; } + bool IsLP64() const { return ThisABI == ABI::LP64; } + ABI GetEnumValue() const { return ThisABI; } + + /// The registers to use for byval arguments. + ArrayRef GetByValArgRegs() const; + + /// The registers to use for the variable argument list. + ArrayRef GetVarArgRegs() const; + + /// Ordering of ABI's + /// LoongArchGenSubtargetInfo.inc will use this to resolve conflicts when given + /// multiple ABI options. + bool operator<(const LoongArchABIInfo Other) const { + return ThisABI < Other.GetEnumValue(); + } + + unsigned GetStackPtr() const; + unsigned GetFramePtr() const; + unsigned GetBasePtr() const; + unsigned GetNullPtr() const; + unsigned GetZeroReg() const; + unsigned GetPtrAddOp() const; + unsigned GetPtrAddiOp() const; + unsigned GetPtrSubOp() const; + unsigned GetPtrAndOp() const; + unsigned GetGPRMoveOp() const; + inline bool ArePtrs64bit() const { return IsLP64(); } + inline bool AreGprs64bit() const { return IsLPX32() || IsLP64(); } + + unsigned GetEhDataReg(unsigned I) const; +}; +} + +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp new file mode 100644 index 000000000000..96e43b2d339b --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.cpp @@ -0,0 +1,64 @@ +//===- LoongArchAnalyzeImmediate.cpp - Analyze Immediates -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchAnalyzeImmediate.h" +#include "LoongArch.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; + +LoongArchAnalyzeImmediate::InstSeq +LoongArchAnalyzeImmediate::generateInstSeq(int64_t Val, bool Is64Bit) { + // Val: + // | hi32 | lo32 | + // +------------+------------------+------------------+-----------+ + // | Bits_52_63 | Bits_32_51 | Bits_12_31 | Bits_0_11 | + // +------------+------------------+------------------+-----------+ + // 63 52 51 32 31 12 11 0 + unsigned ORIOp = Is64Bit ? LoongArch::ORI : LoongArch::ORI32; + unsigned LU12IOp = Is64Bit ? LoongArch::LU12I_W : LoongArch::LU12I_W32; + unsigned ADDIOp = Is64Bit ? LoongArch::ADDI_W64 : LoongArch::ADDI_W; + unsigned LU32IOp = LoongArch::LU32I_D_R2; + unsigned LU52IOp = LoongArch::LU52I_D; + + int64_t Bits_52_63 = Val >> 52 & 0xFFF; + int64_t Bits_32_51 = Val >> 32 & 0xFFFFF; + int64_t Bits_12_31 = Val >> 12 & 0xFFFFF; + int64_t Bits_0_11 = Val & 0xFFF; + + InstSeq Insts; + + if (isInt<12>(Val) && Is64Bit) { + Insts.push_back(Inst(LoongArch::ADDI_D, SignExtend64<12>(Bits_0_11))); + return Insts; + } + + if (Bits_52_63 != 0 && SignExtend64<52>(Val) == 0) { + Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63))); + return Insts; + } + + if (Bits_12_31 == 0) + Insts.push_back(Inst(ORIOp, Bits_0_11)); + else if (SignExtend32<1>(Bits_0_11 >> 11) == SignExtend32<20>(Bits_12_31)) + Insts.push_back(Inst(ADDIOp, SignExtend64<12>(Bits_0_11))); + else { + Insts.push_back(Inst(LU12IOp, SignExtend64<20>(Bits_12_31))); + if (Bits_0_11 != 0) + Insts.push_back(Inst(ORIOp, Bits_0_11)); + } + + if (SignExtend32<1>(Bits_12_31 >> 19) != SignExtend32<20>(Bits_32_51)) + Insts.push_back(Inst(LU32IOp, SignExtend64<20>(Bits_32_51))); + + if (SignExtend32<1>(Bits_32_51 >> 19) != SignExtend32<12>(Bits_52_63)) + Insts.push_back(Inst(LU52IOp, SignExtend64<12>(Bits_52_63))); + + return Insts; +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h similarity index 62% rename from llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h rename to llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h index be1b425894de..3ff00f254c12 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAnalyzeImmediate.h @@ -1,4 +1,4 @@ -//===- LoongArchMatInt.h - Immediate materialisation - --------*- C++ -*--===// +//===- LoongArchAnalyzeImmediate.h - Analyze Immediates --------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_MATINT_H -#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_MATINT_H +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H #include "llvm/ADT/SmallVector.h" -#include namespace llvm { -namespace LoongArchMatInt { +namespace LoongArchAnalyzeImmediate { struct Inst { unsigned Opc; int64_t Imm; @@ -23,8 +22,8 @@ using InstSeq = SmallVector; // Helper to generate an instruction sequence that will materialise the given // immediate value into a register. -InstSeq generateInstSeq(int64_t Val); -} // end namespace LoongArchMatInt +InstSeq generateInstSeq(int64_t Val, bool Is64Bit); +} // end namespace LoongArchAnalyzeImmediate } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHANALYZEIMMEDIATE_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index 94a068897f8c..75b7838bf185 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchAsmBackend.cpp - LoongArch Assembler Backend -*- C++ -*---===// +//===-- LoongArchAsmBackend.cpp - LoongArch Asm Backend ----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,41 +9,171 @@ // This file implements the LoongArchAsmBackend class. // //===----------------------------------------------------------------------===// +// -#include "LoongArchAsmBackend.h" -#include "llvm/MC/MCAsmLayout.h" +#include "MCTargetDesc/LoongArchAsmBackend.h" +#include "MCTargetDesc/LoongArchABIInfo.h" +#include "MCTargetDesc/LoongArchFixupKinds.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/Support/Endian.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/EndianStream.h" - -#define DEBUG_TYPE "loongarch-asmbackend" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; -void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, - const MCFixup &Fixup, - const MCValue &Target, - MutableArrayRef Data, uint64_t Value, - bool IsResolved, - const MCSubtargetInfo *STI) const { - // TODO: Apply the Value for given Fixup into the provided data fragment. - return; +std::unique_ptr +LoongArchAsmBackend::createObjectTargetWriter() const { + return createLoongArchELFObjectWriter(TheTriple, IsLPX32); } -bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, - const MCFixup &Fixup, - const MCValue &Target) { - // TODO: Determine which relocation require special processing at linking - // time. - return false; +/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided +/// data fragment, at the offset specified by the fixup and following the +/// fixup kind as appropriate. +void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved, + const MCSubtargetInfo *STI) const { + MCFixupKind Kind = Fixup.getKind(); + if (Kind > FirstTargetFixupKind) + return; + + if (!Value) + return; // Doesn't change encoding. + + // Where do we start in the object + unsigned Offset = Fixup.getOffset(); + // Number of bytes we need to fixup + unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8; + + + // Grab current value, if any, from bits. + uint64_t CurVal = 0; + + for (unsigned i = 0; i != NumBytes; ++i) + CurVal |= (uint64_t)((uint8_t)Data[Offset + i]) << (i*8); + + uint64_t Mask = ((uint64_t)(-1) >> + (64 - getFixupKindInfo(Kind).TargetSize)); + CurVal |= Value & Mask; + + // Write out the fixed up bytes back to the code/data bits. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] = (uint8_t)((CurVal >> (i*8)) & 0xff); } +Optional LoongArchAsmBackend::getFixupKind(StringRef Name) const { + if (STI.getTargetTriple().isOSBinFormatELF()) { + unsigned Type = llvm::StringSwitch(Name) +#define ELF_RELOC(X, Y) .Case(#X, Y) +#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" +#undef ELF_RELOC + .Case("BFD_RELOC_NONE", ELF::R_LARCH_NONE) + .Case("BFD_RELOC_32", ELF::R_LARCH_32) + .Case("BFD_RELOC_64", ELF::R_LARCH_64) + .Default(-1u); + if (Type != -1u) + return static_cast(FirstLiteralRelocationKind + Type); + } + return None; +} + +const MCFixupKindInfo &LoongArchAsmBackend:: +getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[] = { + // This table *must* be in same the order of fixup_* kinds in + // LoongArchFixupKinds.h. + // + // name offset bits flags + { "fixup_LARCH_NONE", 0, 0, 0 }, + { "fixup_LARCH_SOP_PUSH_ABSOLUTE", 0, 0, 0}, + { "fixup_LARCH_SOP_PUSH_PCREL", 0, 0, 0}, + { "fixup_LARCH_SOP_PUSH_GPREL", 0, 0, 0}, + { "fixup_LARCH_SOP_PUSH_TLS_TPREL", 0, 0, 0}, + { "fixup_LARCH_SOP_PUSH_TLS_GOT", 0, 0, 0}, + { "fixup_LARCH_SOP_PUSH_TLS_GD", 0, 0, 0}, + { "fixup_LARCH_SOP_PUSH_PLT_PCREL", 0, 0, 0}, + { "fixup_LARCH_32", 0, 0, 0}, + { "fixup_LARCH_64", 0, 0, 0}, + { "fixup_LARCH_RELATIVE", 0, 0, 0}, + { "fixup_LARCH_COPY", 0, 0, 0}, + { "fixup_LARCH_JUMP_SLOT", 0, 0, 0}, + { "fixup_LARCH_TLS_DTPMOD32", 0, 0, 0}, + { "fixup_LARCH_TLS_DTPMOD64", 0, 0, 0}, + { "fixup_LARCH_TLS_DTPREL32", 0, 0, 0}, + { "fixup_LARCH_TLS_DTPREL64", 0, 0, 0}, + { "fixup_LARCH_TLS_TPREL32", 0, 0, 0}, + { "fixup_LARCH_TLS_TPREL64", 0, 0, 0}, + { "fixup_LARCH_IRELATIVE", 0, 0, 0}, + { "fixup_LARCH_MARK_LA", 0, 0, 0}, + { "fixup_LARCH_MARK_PCREL", 0, 0, 0}, + { "fixup_LARCH_SOP_PUSH_DUP", 0, 0, 0}, + { "fixup_LARCH_SOP_ASSERT", 0, 0, 0}, + { "fixup_LARCH_SOP_NOT", 0, 0, 0}, + { "fixup_LARCH_SOP_SUB", 0, 0, 0}, + { "fixup_LARCH_SOP_SL", 0, 0, 0}, + { "fixup_LARCH_SOP_SR", 0, 0, 0}, + { "fixup_LARCH_SOP_ADD", 0, 0, 0}, + { "fixup_LARCH_SOP_AND", 0, 0, 0}, + { "fixup_LARCH_SOP_IF_ELSE", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_S_10_5", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_U_10_12", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_S_10_12", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_S_10_16", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_S_10_16_S2", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_S_5_20", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2", 0, 0, 0}, + { "fixup_LARCH_SOP_POP_32_U", 0, 0, 0}, + { "fixup_LARCH_ADD8", 0, 0, 0}, + { "fixup_LARCH_ADD16", 0, 0, 0}, + { "fixup_LARCH_ADD24", 0, 0, 0}, + { "fixup_LARCH_ADD32", 0, 0, 0}, + { "fixup_LARCH_ADD64", 0, 0, 0}, + { "fixup_LARCH_SUB8", 0, 0, 0}, + { "fixup_LARCH_SUB16", 0, 0, 0}, + { "fixup_LARCH_SUB24", 0, 0, 0}, + { "fixup_LARCH_SUB32", 0, 0, 0}, + { "fixup_LARCH_SUB64", 0, 0, 0}, + }; + + // Fixup kinds from .reloc directive are like R_LARCH_NONE. They do not + // require any extra processing. + if (Kind >= FirstLiteralRelocationKind) + return MCAsmBackend::getFixupKindInfo(FK_NONE); + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + + return Infos[Kind - FirstTargetFixupKind]; +} + +/// WriteNopData - Write an (optimal) nop sequence of Count bytes +/// to the given output. If the target cannot generate such a sequence, +/// it should return an error. +/// +/// \return - True on success. bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const { - // Check for byte count not multiple of instruction word size - if (Count % 4 != 0) + // Check for a less than instruction size number of bytes + if ((Count % 4) != 0) return false; // The nop on LoongArch is andi r0, r0, 0. @@ -53,16 +183,49 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, return true; } -std::unique_ptr -LoongArchAsmBackend::createObjectTargetWriter() const { - return createLoongArchELFObjectWriter(OSABI, Is64Bit); +bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { + // .reloc directive should force relocation. + if (Fixup.getKind() >= FirstLiteralRelocationKind) + return true; + + const unsigned FixupKind = Fixup.getKind(); + switch (FixupKind) { + default: + return false; + // All these relocations require special processing + // at linking time. Delegate this work to a linker. + case LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL: + case LoongArch::fixup_LARCH_SOP_PUSH_PCREL: + case LoongArch::fixup_LARCH_SOP_PUSH_GPREL: + case LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD: + case LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT: + case LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL: + case LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE: + case LoongArch::fixup_LARCH_SOP_IF_ELSE: + case LoongArch::fixup_LARCH_SOP_ADD: + case LoongArch::fixup_LARCH_SOP_SUB: + case LoongArch::fixup_LARCH_SOP_AND: + case LoongArch::fixup_LARCH_SOP_SL: + case LoongArch::fixup_LARCH_SOP_SR: + case LoongArch::fixup_LARCH_SOP_POP_32_S_10_5: + case LoongArch::fixup_LARCH_SOP_POP_32_S_5_20: + case LoongArch::fixup_LARCH_SOP_POP_32_S_10_12: + case LoongArch::fixup_LARCH_SOP_POP_32_U_10_12: + case LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2: + case LoongArch::fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2: + case LoongArch::fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2: + return true; + } } MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const MCTargetOptions &Options) { - const Triple &TT = STI.getTargetTriple(); - uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); - return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit()); + LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI( + STI.getTargetTriple(), STI.getCPU(), Options); + return new LoongArchAsmBackend(STI, T, MRI, STI.getTargetTriple(), + STI.getCPU(), ABI.IsLPX32()); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h index a5f0b816c972..d96791f7d2cb 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h @@ -1,4 +1,4 @@ -//===-- LoongArchAsmBackend.h - LoongArch Assembler Backend ---*- C++ -*---===// +//===-- LoongArchAsmBackend.h - LoongArch Asm Backend ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,55 +9,84 @@ // This file defines the LoongArchAsmBackend class. // //===----------------------------------------------------------------------===// +// #ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H #define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H -#include "MCTargetDesc/LoongArchBaseInfo.h" -#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "MCTargetDesc/LoongArchFixupKinds.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" namespace llvm { +class MCAssembler; +struct MCFixupKindInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSymbolELF; +class Target; + class LoongArchAsmBackend : public MCAsmBackend { - uint8_t OSABI; - bool Is64Bit; + const MCSubtargetInfo &STI; + Triple TheTriple; + bool IsLPX32; public: - LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit) - : MCAsmBackend(support::little), OSABI(OSABI), Is64Bit(Is64Bit) {} - ~LoongArchAsmBackend() override {} + LoongArchAsmBackend(const MCSubtargetInfo &STI, const Target &T, + const MCRegisterInfo &MRI, const Triple &TT, + StringRef CPU, bool LPX32) + : MCAsmBackend(support::little), STI(STI), TheTriple(TT), IsLPX32(LPX32) { + assert(TT.isLittleEndian()); + } + + std::unique_ptr + createObjectTargetWriter() const override; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const override; - bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target) override; + Optional getFixupKind(StringRef Name) const override; + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + + unsigned getNumFixupKinds() const override { + return LoongArch::NumTargetFixupKinds; + } + + /// @name Target Relaxation Interfaces + /// @{ - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override { + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override { return false; } - unsigned getNumFixupKinds() const override { - // FIXME: Implement this when we define fixup kind - return 0; + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + // FIXME. + llvm_unreachable("RelaxInstruction() unimplemented"); + return false; } - void relaxInstruction(MCInst &Inst, - const MCSubtargetInfo &STI) const override {} + /// @} bool writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const override; - std::unique_ptr - createObjectTargetWriter() const override; -}; -} // end namespace llvm + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; + +}; // class LoongArchAsmBackend + +} // namespace -#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHASMBACKEND_H +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp deleted file mode 100644 index de2ba2833414..000000000000 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp +++ /dev/null @@ -1,40 +0,0 @@ -//= LoongArchBaseInfo.cpp - Top level definitions for LoongArch MC -*- C++ -*-// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements helper functions for the LoongArch target useful for the -// compiler back-end and the MC libraries. -// -//===----------------------------------------------------------------------===// - -#include "LoongArchBaseInfo.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Triple.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace llvm { - -namespace LoongArchABI { - -ABI getTargetABI(StringRef ABIName) { - auto TargetABI = StringSwitch(ABIName) - .Case("ilp32s", ABI_ILP32S) - .Case("ilp32f", ABI_ILP32F) - .Case("ilp32d", ABI_ILP32D) - .Case("lp64s", ABI_LP64S) - .Case("lp64f", ABI_LP64F) - .Case("lp64d", ABI_LP64D) - .Default(ABI_Unknown); - return TargetABI; -} - -// FIXME: other register? -MCRegister getBPReg() { return LoongArch::R31; } - -} // end namespace LoongArchABI - -} // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h index fee247a0c02c..707333c18fbd 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -1,4 +1,4 @@ -//=- LoongArchBaseInfo.h - Top level definitions for LoongArch MC -*- C++ -*-=// +//===-- LoongArchBaseInfo.h - Top level definitions for LoongArch MC ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,39 +6,123 @@ // //===----------------------------------------------------------------------===// // -// This file contains small standalone enum definitions and helper function -// definitions for the LoongArch target useful for the compiler back-end and the -// MC libraries. +// This file contains small standalone helper functions and enum definitions for +// the LoongArch target useful for the compiler back-end and the MC libraries. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H #define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H -#include "MCTargetDesc/LoongArchMCTargetDesc.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/SubtargetFeature.h" +#include "LoongArchFixupKinds.h" +#include "LoongArchMCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" namespace llvm { -namespace LoongArchABI { -enum ABI { - ABI_ILP32S, - ABI_ILP32F, - ABI_ILP32D, - ABI_LP64S, - ABI_LP64F, - ABI_LP64D, - ABI_Unknown -}; +/// LoongArchII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace LoongArchII { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // LoongArch Specific MachineOperand flags. -ABI getTargetABI(StringRef ABIName); + MO_NO_FLAG, -// Returns the register used to hold the stack pointer after realignment. -MCRegister getBPReg(); -} // end namespace LoongArchABI + /// MO_ABS_XXX - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HI, + MO_ABS_LO, + MO_ABS_HIGHER, + MO_ABS_HIGHEST, -} // end namespace llvm + /// MO_PCREL_XXX - Represents the hi or low part of an pc relative symbol + /// address. + MO_PCREL_HI, + MO_PCREL_LO, + // with tmp reg + MO_PCREL_RRHI, + MO_PCREL_RRLO, + MO_PCREL_RRHIGHER, + MO_PCREL_RRHIGHEST, -#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHBASEINFO_H + // LArch Tls gd and ld + MO_TLSGD_HI, + MO_TLSGD_LO, + // with tmp reg + MO_TLSGD_RRHI, + MO_TLSGD_RRLO, + MO_TLSGD_RRHIGHER, + MO_TLSGD_RRHIGHEST, + + // LArch thread tprel (ie/le) + // LArch Tls ie + MO_TLSIE_HI, + MO_TLSIE_LO, + // with tmp reg + MO_TLSIE_RRHI, + MO_TLSIE_RRLO, + MO_TLSIE_RRHIGHER, + MO_TLSIE_RRHIGHEST, + // LArch Tls le + MO_TLSLE_HI, + MO_TLSLE_LO, + MO_TLSLE_HIGHER, + MO_TLSLE_HIGHEST, + + // Loongarch got + MO_GOT_HI, + MO_GOT_LO, + // with tmp reg + MO_GOT_RRHI, + MO_GOT_RRLO, + MO_GOT_RRHIGHER, + MO_GOT_RRHIGHEST, + + MO_CALL_HI, + MO_CALL_LO, + }; + + enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for + // LoongArch instructions. + // + + // Pseudo - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// FrmR - This form is for instructions of the format R. + FrmR = 1, + /// FrmI - This form is for instructions of the format I. + FrmI = 2, + /// FrmJ - This form is for instructions of the format J. + FrmJ = 3, + /// FrmFR - This form is for instructions of the format FR. + FrmFR = 4, + /// FrmFI - This form is for instructions of the format FI. + FrmFI = 5, + /// FrmOther - This form is for instructions that have no specific format. + FrmOther = 6, + + FormMask = 15, + /// IsCTI - Instruction is a Control Transfer Instruction. + IsCTI = 1 << 4, + /// HasForbiddenSlot - Instruction has a forbidden slot. + HasForbiddenSlot = 1 << 5, + /// IsPCRelativeLoad - A Load instruction with implicit source register + /// ($pc) with explicit offset and destination register + IsPCRelativeLoad = 1 << 6, + /// HasFCCRegOperand - Instruction uses an $fcc register. + HasFCCRegOperand = 1 << 7 + + }; +} +} + +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index 1850b0d8a756..c08f3ba0c5d3 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer ---*- C++ -*---===// +//===-- LoongArchELFObjectWriter.cpp - LoongArch ELF Writer -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,59 +6,184 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/LoongArchFixupKinds.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "loongarch-elf-object-writer" using namespace llvm; namespace { + class LoongArchELFObjectWriter : public MCELFObjectTargetWriter { public: - LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); + LoongArchELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64); - ~LoongArchELFObjectWriter() override; + ~LoongArchELFObjectWriter() override = default; - // Return true if the given relocation must be with a symbol rather than - // section plus offset. + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; bool needsRelocateWithSymbol(const MCSymbol &Sym, unsigned Type) const override { return true; } - -protected: - unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const override; }; -} // end namespace -LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) - : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_LOONGARCH, - /*HasRelocationAddend*/ true) {} +} // end anonymous namespace -LoongArchELFObjectWriter::~LoongArchELFObjectWriter() {} +LoongArchELFObjectWriter::LoongArchELFObjectWriter(uint8_t OSABI, + bool HasRelocationAddend, bool Is64) + : MCELFObjectTargetWriter(Is64, OSABI, ELF::EM_LOONGARCH, HasRelocationAddend) {} unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, - const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - // Determine the type of the relocation - unsigned Kind = Fixup.getTargetKind(); + const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // Determine the type of the relocation. + ///XXX:Reloc + unsigned Kind = (unsigned)Fixup.getKind(); if (Kind >= FirstLiteralRelocationKind) return Kind - FirstLiteralRelocationKind; switch (Kind) { - // TODO: Implement this when we defined fixup kind. - default: - return ELF::R_LARCH_NONE; + default: + return ELF::R_LARCH_NONE; + //llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + case LoongArch::fixup_LARCH_32: + return ELF::R_LARCH_32; + case FK_GPRel_4: + case FK_Data_8: + case LoongArch::fixup_LARCH_64: + return ELF::R_LARCH_64; + case LoongArch::fixup_LARCH_NONE: + return ELF::R_LARCH_NONE; + case LoongArch::fixup_LARCH_RELATIVE: + return ELF::R_LARCH_RELATIVE; + case LoongArch::fixup_LARCH_COPY: + return ELF::R_LARCH_COPY; + case LoongArch::fixup_LARCH_JUMP_SLOT: + return ELF::R_LARCH_JUMP_SLOT; + case LoongArch::fixup_LARCH_TLS_DTPMOD32: + return ELF::R_LARCH_TLS_DTPMOD32; + case LoongArch::fixup_LARCH_TLS_DTPMOD64: + return ELF::R_LARCH_TLS_DTPMOD64; + case LoongArch::fixup_LARCH_TLS_DTPREL32: + return ELF::R_LARCH_TLS_DTPREL32; + case LoongArch::fixup_LARCH_TLS_DTPREL64: + return ELF::R_LARCH_TLS_DTPREL64; + case LoongArch::fixup_LARCH_TLS_TPREL32: + return ELF::R_LARCH_TLS_TPREL32; + case LoongArch::fixup_LARCH_TLS_TPREL64: + return ELF::R_LARCH_TLS_TPREL64; + case LoongArch::fixup_LARCH_IRELATIVE: + return ELF::R_LARCH_IRELATIVE; + case LoongArch::fixup_LARCH_MARK_LA: + return ELF::R_LARCH_MARK_LA; + case LoongArch::fixup_LARCH_MARK_PCREL: + return ELF::R_LARCH_MARK_PCREL; + case LoongArch::fixup_LARCH_SOP_PUSH_PCREL: + return ELF::R_LARCH_SOP_PUSH_PCREL; + case LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE: + return ELF::R_LARCH_SOP_PUSH_ABSOLUTE; + case LoongArch::fixup_LARCH_SOP_PUSH_DUP: + return ELF::R_LARCH_SOP_PUSH_DUP; + case LoongArch::fixup_LARCH_SOP_PUSH_GPREL: + return ELF::R_LARCH_SOP_PUSH_GPREL; + case LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL: + return ELF::R_LARCH_SOP_PUSH_TLS_TPREL; + case LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT: + return ELF::R_LARCH_SOP_PUSH_TLS_GOT; + case LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD: + return ELF::R_LARCH_SOP_PUSH_TLS_GD; + case LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL: + return ELF::R_LARCH_SOP_PUSH_PLT_PCREL; + case LoongArch::fixup_LARCH_SOP_ASSERT: + return ELF::R_LARCH_SOP_ASSERT; + case LoongArch::fixup_LARCH_SOP_NOT: + return ELF::R_LARCH_SOP_NOT; + case LoongArch::fixup_LARCH_SOP_SUB: + return ELF::R_LARCH_SOP_SUB; + case LoongArch::fixup_LARCH_SOP_SL: + return ELF::R_LARCH_SOP_SL; + case LoongArch::fixup_LARCH_SOP_SR: + return ELF::R_LARCH_SOP_SR; + case LoongArch::fixup_LARCH_SOP_ADD: + return ELF::R_LARCH_SOP_ADD; + case LoongArch::fixup_LARCH_SOP_AND: + return ELF::R_LARCH_SOP_AND; + case LoongArch::fixup_LARCH_SOP_IF_ELSE: + return ELF::R_LARCH_SOP_IF_ELSE; + case LoongArch::fixup_LARCH_SOP_POP_32_S_10_5: + return ELF::R_LARCH_SOP_POP_32_S_10_5; + case LoongArch::fixup_LARCH_SOP_POP_32_U_10_12: + return ELF::R_LARCH_SOP_POP_32_U_10_12; + case LoongArch::fixup_LARCH_SOP_POP_32_S_10_12: + return ELF::R_LARCH_SOP_POP_32_S_10_12; + case LoongArch::fixup_LARCH_SOP_POP_32_S_10_16: + return ELF::R_LARCH_SOP_POP_32_S_10_16; + case LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2: + return ELF::R_LARCH_SOP_POP_32_S_10_16_S2; + case LoongArch::fixup_LARCH_SOP_POP_32_S_5_20: + return ELF::R_LARCH_SOP_POP_32_S_5_20; + case LoongArch::fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2: + return ELF::R_LARCH_SOP_POP_32_S_0_5_10_16_S2; + case LoongArch::fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2: + return ELF::R_LARCH_SOP_POP_32_S_0_10_10_16_S2; + case LoongArch::fixup_LARCH_SOP_POP_32_U: + return ELF::R_LARCH_SOP_POP_32_U; + case LoongArch::fixup_LARCH_ADD8: + return ELF::R_LARCH_ADD8; + case LoongArch::fixup_LARCH_ADD16: + return ELF::R_LARCH_ADD16; + case LoongArch::fixup_LARCH_ADD24: + return ELF::R_LARCH_ADD24; + case LoongArch::fixup_LARCH_ADD32: + return ELF::R_LARCH_ADD32; + case LoongArch::fixup_LARCH_ADD64: + return ELF::R_LARCH_ADD64; + case LoongArch::fixup_LARCH_SUB8: + return ELF::R_LARCH_SUB8; + case LoongArch::fixup_LARCH_SUB16: + return ELF::R_LARCH_SUB16; + case LoongArch::fixup_LARCH_SUB24: + return ELF::R_LARCH_SUB24; + case LoongArch::fixup_LARCH_SUB32: + return ELF::R_LARCH_SUB32; + case LoongArch::fixup_LARCH_SUB64: + return ELF::R_LARCH_SUB64; + case LoongArch::fixup_LARCH_GNU_VTINHERIT: + return ELF::R_LARCH_GNU_VTINHERIT; + case LoongArch::fixup_LARCH_GNU_VTENTRY: + return ELF::R_LARCH_GNU_VTENTRY; } } std::unique_ptr -llvm::createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit) { - return std::make_unique(OSABI, Is64Bit); +llvm::createLoongArchELFObjectWriter(const Triple &TT, bool IsLPX32) { + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); + bool IsLP64 = TT.isArch64Bit() && !IsLPX32; + bool HasRelocationAddend = TT.isArch64Bit(); + return std::make_unique(OSABI, HasRelocationAddend, + IsLP64); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp new file mode 100644 index 000000000000..a74fee3f8108 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp @@ -0,0 +1,138 @@ +//===-------- LoongArchELFStreamer.cpp - ELF Object Output ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchELFStreamer.h" +#include "LoongArchFixupKinds.h" +#include "LoongArchTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +static std::pair getRelocPairForSize(unsigned Size) { + switch (Size) { + default: + llvm_unreachable("unsupported fixup size"); + case 1: + return std::make_pair(LoongArch::fixup_LARCH_ADD8, + LoongArch::fixup_LARCH_SUB8); + case 2: + return std::make_pair(LoongArch::fixup_LARCH_ADD16, + LoongArch::fixup_LARCH_SUB16); + case 4: + return std::make_pair(LoongArch::fixup_LARCH_ADD32, + LoongArch::fixup_LARCH_SUB32); + case 8: + return std::make_pair(LoongArch::fixup_LARCH_ADD64, + LoongArch::fixup_LARCH_SUB64); + } +} + +static bool requiresFixups(MCContext &C, const MCExpr *Value, + const MCExpr *&LHS, const MCExpr *&RHS, + LoongArchELFStreamer *MCS) { + const auto *MBE = dyn_cast(Value); + if (MBE == nullptr) + return false; + + MCValue E; + if (!Value->evaluateAsRelocatable(E, nullptr, nullptr)) + return false; + if (E.getSymA() == nullptr || E.getSymB() == nullptr) + return false; + + const auto &A = E.getSymA()->getSymbol(); + const auto &B = E.getSymB()->getSymbol(); + + if (A.getName().empty() && B.getName().empty()) + return false; + + if (!A.isInSection() && !B.isInSection() && + !A.getName().empty() && !B.getName().empty()) + return false; + + LHS = + MCBinaryExpr::create(MCBinaryExpr::Add, MCSymbolRefExpr::create(&A, C), + MCConstantExpr::create(E.getConstant(), C), C); + RHS = E.getSymB(); + + bool isCheckInstr = + StringSwitch(MCS->getCurrentSectionOnly()->getName()) + .Case(".debug_aranges", true) + .Default(false); + + return (A.isInSection() + ? (isCheckInstr ? A.getSection().hasInstructions() : true) + : !A.getName().empty()) || + (B.isInSection() ? B.getSection().hasInstructions() + : !B.getName().empty()); +} + +LoongArchELFStreamer::LoongArchELFStreamer(MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter)) { + } + +void LoongArchELFStreamer::emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { + Frame.Begin = getContext().createTempSymbol(); + MCELFStreamer::emitLabel(Frame.Begin); +} + +MCSymbol *LoongArchELFStreamer::emitCFILabel() { + MCSymbol *Label = getContext().createTempSymbol("cfi", true); + MCELFStreamer::emitLabel(Label); + return Label; +} + +void LoongArchELFStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { + Frame.End = getContext().createTempSymbol(); + MCELFStreamer::emitLabel(Frame.End); +} + +void LoongArchELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size, + SMLoc Loc) { + const MCExpr *A, *B; + if (!requiresFixups(getContext(), Value, A, B, this)) + return MCELFStreamer::emitValueImpl(Value, Size, Loc); + + MCStreamer::emitValueImpl(Value, Size, Loc); + + MCDataFragment *DF = getOrCreateDataFragment(); + flushPendingLabels(DF, DF->getContents().size()); + MCDwarfLineEntry::make(this, getCurrentSectionOnly()); + + unsigned Add, Sub; + std::tie(Add, Sub) = getRelocPairForSize(Size); + + DF->getFixups().push_back(MCFixup::create( + DF->getContents().size(), A, static_cast(Add), Loc)); + DF->getFixups().push_back(MCFixup::create( + DF->getContents().size(), B, static_cast(Sub), Loc)); + + DF->getContents().resize(DF->getContents().size() + Size, 0); +} + +MCELFStreamer *llvm::createLoongArchELFStreamer( + MCContext &Context, std::unique_ptr MAB, + std::unique_ptr OW, std::unique_ptr Emitter, + bool RelaxAll) { + return new LoongArchELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter)); +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h new file mode 100644 index 000000000000..875cebcb7400 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.h @@ -0,0 +1,53 @@ +//===- LoongArchELFStreamer.h - ELF Object Output --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a custom MCELFStreamer which allows us to insert some hooks before +// emitting data into an actual object file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCELFStreamer.h" +#include + +namespace llvm { + +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCSubtargetInfo; +struct MCDwarfFrameInfo; + +class LoongArchELFStreamer : public MCELFStreamer { + +public: + LoongArchELFStreamer(MCContext &Context, std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter); + + /// Overriding these functions allows us to dismiss all labels. + void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override; + + // Overriding these functions allows us to avoid recording of these labels + // in emitLabel. + void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; + void emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; + MCSymbol *emitCFILabel() override; +}; + +MCELFStreamer *createLoongArchELFStreamer(MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr OW, + std::unique_ptr Emitter, + bool RelaxAll); +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHELFSTREAMER_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h new file mode 100644 index 000000000000..e0e1200d8bad --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h @@ -0,0 +1,90 @@ +//===-- LoongArchFixupKinds.h - LoongArch Specific Fixup Entries ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace LoongArch { + // Although most of the current fixup types reflect a unique relocation + // one can have multiple fixup types for a given relocation and thus need + // to be uniquely named. + // + // This table *must* be in the same order of + // MCFixupKindInfo Infos[LoongArch::NumTargetFixupKinds] + // in LoongArchAsmBackend.cpp. + // + enum Fixups { + // R_LARCH_NONE. + fixup_LARCH_NONE = FirstTargetFixupKind, + + // reloc_hint + fixup_LARCH_SOP_PUSH_ABSOLUTE, + fixup_LARCH_SOP_PUSH_PCREL, + fixup_LARCH_SOP_PUSH_GPREL, + fixup_LARCH_SOP_PUSH_TLS_TPREL, + fixup_LARCH_SOP_PUSH_TLS_GOT, + fixup_LARCH_SOP_PUSH_TLS_GD, + fixup_LARCH_SOP_PUSH_PLT_PCREL, + // fixup methods + fixup_LARCH_32, + fixup_LARCH_64, + fixup_LARCH_RELATIVE, + fixup_LARCH_COPY, + fixup_LARCH_JUMP_SLOT, + fixup_LARCH_TLS_DTPMOD32, + fixup_LARCH_TLS_DTPMOD64, + fixup_LARCH_TLS_DTPREL32, + fixup_LARCH_TLS_DTPREL64, + fixup_LARCH_TLS_TPREL32, + fixup_LARCH_TLS_TPREL64, + fixup_LARCH_IRELATIVE, + fixup_LARCH_MARK_LA, + fixup_LARCH_MARK_PCREL, + fixup_LARCH_SOP_PUSH_DUP, + fixup_LARCH_SOP_ASSERT, + fixup_LARCH_SOP_NOT, + fixup_LARCH_SOP_SUB, + fixup_LARCH_SOP_SL, + fixup_LARCH_SOP_SR, + fixup_LARCH_SOP_ADD, + fixup_LARCH_SOP_AND, + fixup_LARCH_SOP_IF_ELSE, + fixup_LARCH_SOP_POP_32_S_10_5, + fixup_LARCH_SOP_POP_32_U_10_12, + fixup_LARCH_SOP_POP_32_S_10_12, + fixup_LARCH_SOP_POP_32_S_10_16, + fixup_LARCH_SOP_POP_32_S_10_16_S2, + fixup_LARCH_SOP_POP_32_S_5_20, + fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2, + fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2, + fixup_LARCH_SOP_POP_32_U, + fixup_LARCH_ADD8, + fixup_LARCH_ADD16, + fixup_LARCH_ADD24, + fixup_LARCH_ADD32, + fixup_LARCH_ADD64, + fixup_LARCH_SUB8, + fixup_LARCH_SUB16, + fixup_LARCH_SUB24, + fixup_LARCH_SUB32, + fixup_LARCH_SUB64, + fixup_LARCH_GNU_VTINHERIT, + fixup_LARCH_GNU_VTENTRY, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; +} // namespace LoongArch +} // namespace llvm + + +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp index 66183868f468..065020ad4be5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp @@ -1,4 +1,4 @@ -//===- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to asm syntax --===// +//===-- LoongArchInstPrinter.cpp - Convert LoongArch MCInst to assembly syntax ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,53 +11,242 @@ //===----------------------------------------------------------------------===// #include "LoongArchInstPrinter.h" -#include "LoongArchBaseInfo.h" -#include "llvm/MC/MCAsmInfo.h" +#include "MCTargetDesc/LoongArchMCExpr.h" +#include "LoongArchInstrInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; -#define DEBUG_TYPE "loongarch-asm-printer" +#define DEBUG_TYPE "asm-printer" -// Include the auto-generated portion of the assembly writer. #define PRINT_ALIAS_INSTR #include "LoongArchGenAsmWriter.inc" +template +static bool isReg(const MCInst &MI, unsigned OpNo) { + assert(MI.getOperand(OpNo).isReg() && "Register operand expected."); + return MI.getOperand(OpNo).getReg() == R; +} + +const char* LoongArch::LoongArchFCCToString(LoongArch::CondCode CC) { + switch (CC) { + case FCOND_T: + case FCOND_F: return "caf"; + case FCOND_OR: + case FCOND_UN: return "cun"; + case FCOND_UNE: + case FCOND_OEQ: return "ceq"; + case FCOND_ONE: + case FCOND_UEQ: return "cueq"; + case FCOND_UGE: + case FCOND_OLT: return "clt"; + case FCOND_OGE: + case FCOND_ULT: return "cult"; + case FCOND_UGT: + case FCOND_OLE: return "cle"; + case FCOND_OGT: + case FCOND_ULE: return "cule"; + case FCOND_ST: + case FCOND_SF: return "saf"; + case FCOND_GLE: + case FCOND_NGLE:return "sun"; + case FCOND_SEQ: return "seq"; + case FCOND_SNE: return "sne"; + case FCOND_GL: + case FCOND_NGL: return "sueq"; + case FCOND_NLT: + case FCOND_LT: return "slt"; + case FCOND_GE: + case FCOND_NGE: return "sult"; + case FCOND_NLE: + case FCOND_LE: return "sle"; + case FCOND_GT: + case FCOND_NGT: return "sule"; + case FCOND_CNE: return "cne"; + case FCOND_COR: return "cor"; + case FCOND_SOR: return "sor"; + case FCOND_CUNE: return "cune"; + case FCOND_SUNE: return "sune"; + } + llvm_unreachable("Impossible condition code!"); +} + +void LoongArchInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << '$' << StringRef(getRegisterName(RegNo)).lower(); +} + void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) { - if (!printAliasInstr(MI, Address, STI, O)) - printInstruction(MI, Address, STI, O); - printAnnotation(O, Annot); -} + switch (MI->getOpcode()) { + default: + break; + case LoongArch::PCADDU12I_ri: + case LoongArch::PCADDU12I_rii: + case LoongArch::LU12I_W_ri: + printLoadAddr(MI, O); + return; + case LoongArch::ADD_D_rrr: + case LoongArch::LDX_D_rrr: + case LoongArch::ADDI_D_rri: + case LoongArch::ADDI_D_rrii: + case LoongArch::LD_D_rri: + case LoongArch::LD_D_rrii: + case LoongArch::ORI_rri: + case LoongArch::ORI_rrii: + case LoongArch::LU32I_D_ri: + case LoongArch::LU32I_D_rii: + case LoongArch::LU52I_D_rri: + case LoongArch::LU52I_D_rrii: + O << "\t# la expanded slot"; + return; + } -void LoongArchInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { - O << '$' << getRegisterName(RegNo); + // Try to print any aliases first. + if (!printAliasInstr(MI, Address, O) && !printAlias(*MI, O)) + printInstruction(MI, Address, O); + printAnnotation(O, Annot); } void LoongArchInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNo); + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + printRegName(O, Op.getReg()); + return; + } - if (MO.isReg()) { - printRegName(O, MO.getReg()); + if (Op.isImm()) { + O << formatImm(Op.getImm()); return; } + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI, true); +} + +template +void LoongArchInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); if (MO.isImm()) { - O << MO.getImm(); + uint64_t Imm = MO.getImm(); + Imm -= Offset; + Imm &= (1 << Bits) - 1; + Imm += Offset; + O << formatImm(Imm); return; } - assert(MO.isExpr() && "Unknown operand kind in printOperand"); - MO.getExpr()->print(O, &MAI); + printOperand(MI, opNum, O); +} + +void LoongArchInstPrinter:: +printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { + // Load/Store memory operands -- $reg, imm + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); +} + +void LoongArchInstPrinter::printAMemOperand(const MCInst *MI, int opNum, + raw_ostream &O) { + // AM* instruction memory operand: "rj, 0" + printRegName(O, MI->getOperand(opNum).getReg()); + O << ", 0"; +} + +void LoongArchInstPrinter:: +printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) { + // when using stack locations for not load/store instructions + // print the same way as all normal 3 operand instructions. + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); +} + +void LoongArchInstPrinter:: +printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand& MO = MI->getOperand(opNum); + O << LoongArchFCCToString((LoongArch::CondCode)MO.getImm()); +} + +bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI, + unsigned OpNo, raw_ostream &OS) { + OS << "\t" << Str << "\t"; + if(MI.getOpcode() == LoongArch::JIRL) { + printOperand(&MI, OpNo, OS); + OS << "@plt"; + }else + printOperand(&MI, OpNo, OS); + return true; +} + +bool LoongArchInstPrinter::printAlias(const char *Str, const MCInst &MI, + unsigned OpNo0, unsigned OpNo1, + raw_ostream &OS) { + printAlias(Str, MI, OpNo0, OS); + OS << ", "; + printOperand(&MI, OpNo1, OS); + return true; +} + +bool LoongArchInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) { + switch (MI.getOpcode()) { + case LoongArch::OR: + // or $r0, $r1, $zero => move $r0, $r1 + return isReg(MI, 2) && printAlias("move", MI, 0, 1, OS); + default: return false; + } +} + +void LoongArchInstPrinter:: +printRegisterList(const MCInst *MI, int opNum, raw_ostream &O) { + // - 2 because register List is always first operand of instruction and it is + // always followed by memory operand (base + offset). + for (int i = opNum, e = MI->getNumOperands() - 2; i != e; ++i) { + if (i != opNum) + O << ", "; + printRegName(O, MI->getOperand(i).getReg()); + } } -const char *LoongArchInstPrinter::getRegisterName(unsigned RegNo) { - // Default print reg alias name - return getRegisterName(RegNo, LoongArch::RegAliasName); +void LoongArchInstPrinter:: +printLoadAddr(const MCInst *MI, raw_ostream &O) { + const MCOperand &Op = MI->getOperand(1); + const MCExpr *Expr = Op.getExpr(); + const LoongArchMCExpr *LoongArchExpr = cast(Expr); + switch (LoongArchExpr->getKind()) { + default: + llvm_unreachable("invalid handled!"); + return; + case LoongArchMCExpr::MEK_ABS_HI: + O << "\tla.abs\t"; + break; + case LoongArchMCExpr::MEK_GOT_HI: + O << "\tla.got\t"; + break; + case LoongArchMCExpr::MEK_PCREL_HI: + O << "\tla.pcrel\t"; + break; + case LoongArchMCExpr::MEK_TLSGD_HI: + O << "\tla.tls.gd\t"; + break; + case LoongArchMCExpr::MEK_TLSIE_HI: + O << "\tla.tls.ie\t"; + break; + case LoongArchMCExpr::MEK_TLSLE_HI: + O << "\tla.tls.le\t"; + break; + } + printRegName(O, MI->getOperand(0).getReg()); + O << ", "; + Expr->print(O, nullptr); + return; } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h index 0cbb3d73cd03..050dcc137a06 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h @@ -1,4 +1,4 @@ -//===-- LoongArchInstPrinter.h - Convert LoongArch MCInst to asm syntax ---===// +//=== LoongArchInstPrinter.h - Convert LoongArch MCInst to assembly syntax -*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,40 +10,110 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H -#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H - -#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#ifndef LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H +#define LLVM_LIB_TARGET_LOONGARCH_INSTPRINTER_LOONGARCHINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" namespace llvm { +namespace LoongArch { +// LoongArch Branch Codes +enum FPBranchCode { + BRANCH_F, + BRANCH_T, + BRANCH_INVALID +}; + +// LoongArch Condition Codes +enum CondCode { + FCOND_F = 0x0, + FCOND_SF, + FCOND_OLT, + FCOND_LT, + FCOND_OEQ, + FCOND_SEQ, + FCOND_OLE, + FCOND_LE, + FCOND_UN, + FCOND_NGLE, + FCOND_ULT, + FCOND_NGE, + FCOND_UEQ, + FCOND_NGL, + FCOND_ULE, + FCOND_NGT, + FCOND_CNE, + FCOND_SNE, + FCOND_COR = 0x14, + FCOND_SOR = 0x15, + FCOND_CUNE = 0x18, + FCOND_SUNE = 0x19, + + // To be used with float branch False + // This conditions have the same mnemonic as the + // above ones, but are used with a branch False; + FCOND_T, + FCOND_UNE, + FCOND_ST, + FCOND_UGE, + FCOND_NLT, + FCOND_UGT, + FCOND_NLE, + FCOND_OR, + FCOND_GLE, + FCOND_OGE, + FCOND_GE, + FCOND_ONE, + FCOND_GL, + FCOND_OGT, + FCOND_GT +}; + +const char *LoongArchFCCToString(LoongArch::CondCode CC); +} // end namespace LoongArch + class LoongArchInstPrinter : public MCInstPrinter { public: LoongArchInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MII, MRI) {} + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + // Autogenerated by tblgen. + std::pair getMnemonic(const MCInst *MI) override; + void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printRegName(raw_ostream &OS, unsigned RegNo) const override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, unsigned RegNo) const override; - // Autogenerated by tblgen. - std::pair getMnemonic(const MCInst *MI) override; - void printInstruction(const MCInst *MI, uint64_t Address, - const MCSubtargetInfo &STI, raw_ostream &O); - bool printAliasInstr(const MCInst *MI, uint64_t Address, - const MCSubtargetInfo &STI, raw_ostream &O); + bool printAliasInstr(const MCInst *MI, uint64_t Address, raw_ostream &OS); void printCustomAliasOperand(const MCInst *MI, uint64_t Address, unsigned OpIdx, unsigned PrintMethodIdx, - const MCSubtargetInfo &STI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - static const char *getRegisterName(unsigned RegNo, unsigned AltIdx); + raw_ostream &O); private: - void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, - raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, uint64_t /*Address*/, unsigned OpNum, + raw_ostream &O) { + printOperand(MI, OpNum, O); + } + template + void printUImm(const MCInst *MI, int opNum, raw_ostream &O); + void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); + void printAMemOperand(const MCInst *MI, int opNum, raw_ostream &O); + void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O); + void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O); + + bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo, + raw_ostream &OS); + bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo0, + unsigned OpNo1, raw_ostream &OS); + bool printAlias(const MCInst &MI, raw_ostream &OS); + void printSaveRestore(const MCInst *MI, raw_ostream &O); + void printRegisterList(const MCInst *MI, int opNum, raw_ostream &O); + void printLoadAddr(const MCInst *MI, raw_ostream &O); }; } // end namespace llvm -#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHINSTPRINTER_H +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp index bc946db2f449..b3091a107b51 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm properties ------*- C++ -*--===// +//===-- LoongArchMCAsmInfo.cpp - LoongArch Asm Properties ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,23 +12,28 @@ #include "LoongArchMCAsmInfo.h" #include "llvm/ADT/Triple.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/MC/MCStreamer.h" using namespace llvm; -void LoongArchMCAsmInfo::anchor() {} +void LoongArchMCAsmInfo::anchor() { } -LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TT) { - CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4; - AlignmentIsInBytes = false; - Data8bitsDirective = "\t.byte\t"; - Data16bitsDirective = "\t.half\t"; - Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = "\t.dword\t"; - ZeroDirective = "\t.space\t"; - CommentString = "#"; +LoongArchMCAsmInfo::LoongArchMCAsmInfo(const Triple &TheTriple, + const MCTargetOptions &Options) { + + if (TheTriple.isLoongArch64() + && TheTriple.getEnvironment() != Triple::GNUABILPX32) + CodePointerSize = CalleeSaveStackSlotSize = 8; + + AlignmentIsInBytes = false; + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = "\t.dword\t"; + CommentString = "#"; + ZeroDirective = "\t.space\t"; SupportsDebugInformation = true; - DwarfRegNumForCFI = true; ExceptionsType = ExceptionHandling::DwarfCFI; + DwarfRegNumForCFI = true; + //HasLoongArchExpressions = true; + UseIntegratedAssembler = true; + UsesELFSectionDirectiveForBSS = true; } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h index ed1abbf46153..244db58dbab9 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h @@ -1,4 +1,4 @@ -//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info --------------*- C++ -*--===// +//===-- LoongArchMCAsmInfo.h - LoongArch Asm Info ------------------------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -22,9 +22,10 @@ class LoongArchMCAsmInfo : public MCAsmInfoELF { void anchor() override; public: - explicit LoongArchMCAsmInfo(const Triple &TargetTriple); + explicit LoongArchMCAsmInfo(const Triple &TheTriple, + const MCTargetOptions &Options); }; -} // end namespace llvm +} // namespace llvm -#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCASMINFO_H +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index 01a370a90403..df4e72e90bd2 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//=- LoongArchMCCodeEmitter.cpp - Convert LoongArch code to machine code --===// +//===-- LoongArchMCCodeEmitter.cpp - Convert LoongArch Code to Machine Code ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,118 +10,1422 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/LoongArchBaseInfo.h" +#include "LoongArchMCCodeEmitter.h" +#include "MCTargetDesc/LoongArchFixupKinds.h" +#include "MCTargetDesc/LoongArchMCExpr.h" #include "MCTargetDesc/LoongArchMCTargetDesc.h" -#include "llvm/MC/MCCodeEmitter.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/EndianStream.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; #define DEBUG_TYPE "mccodeemitter" -namespace { -class LoongArchMCCodeEmitter : public MCCodeEmitter { - LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete; - void operator=(const LoongArchMCCodeEmitter &) = delete; - MCContext &Ctx; - MCInstrInfo const &MCII; - -public: - LoongArchMCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII) - : Ctx(ctx), MCII(MCII) {} - - ~LoongArchMCCodeEmitter() override {} - - void encodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const override; - - /// TableGen'erated function for getting the binary encoding for an - /// instruction. - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// Return binary encoding of operand. If the machine operand requires - /// relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// Return binary encoding of an immediate operand specified by OpNo. - /// The value returned is the value of the immediate minus 1. - /// Note that this function is dedicated to specific immediate types, - /// e.g. uimm2_plus1. - unsigned getImmOpValueSub1(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// Return binary encoding of an immediate operand specified by OpNo. - /// The value returned is the value of the immediate shifted right - // arithmetically by 2. - /// Note that this function is dedicated to specific immediate types, - /// e.g. simm14_lsl2, simm16_lsl2, simm21_lsl2 and simm26_lsl2. - unsigned getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; -}; -} // end namespace +#define GET_INSTRMAP_INFO +#include "LoongArchGenInstrInfo.inc" +#undef GET_INSTRMAP_INFO -unsigned -LoongArchMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +namespace llvm { - if (MO.isReg()) - return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); +MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, + MCContext &Ctx) { + return new LoongArchMCCodeEmitter(MCII, Ctx); +} - if (MO.isImm()) - return static_cast(MO.getImm()); +} // end namespace llvm - llvm_unreachable("Unhandled expression!"); +void LoongArchMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; } -unsigned -LoongArchMCCodeEmitter::getImmOpValueSub1(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(OpNo).getImm() - 1; +void LoongArchMCCodeEmitter::EmitInstruction(uint64_t Val, unsigned Size, + const MCSubtargetInfo &STI, + raw_ostream &OS) const { + for (unsigned i = 0; i < Size; ++i) { + unsigned Shift = i * 8; + EmitByte((Val >> Shift) & 0xff, OS); + } } -unsigned -LoongArchMCCodeEmitter::getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - unsigned Res = MI.getOperand(OpNo).getImm(); - assert((Res & 3) == 0 && "lowest 2 bits are non-zero"); - return Res >> 2; -} - -void LoongArchMCCodeEmitter::encodeInstruction( - const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); - // Get byte count of instruction. +/// encodeInstruction - Emit the instruction. +/// Size the instruction with Desc.getSize(). +void LoongArchMCCodeEmitter:: +encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const +{ + MCInst TmpInst = MI; + + uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + + const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode()); + + // Get byte count of instruction unsigned Size = Desc.getSize(); + if (!Size) + llvm_unreachable("Desc.getSize() returns 0"); + + EmitInstruction(Binary, Size, STI, OS); +} + +/// getBranchTargetOpValue - Return binary encoding of the branch +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned LoongArchMCCodeEmitter:: +getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); - switch (Size) { + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm() >> 2; + + assert(MO.isExpr() && + "getBranchTargetOpValue expects only expressions or immediates"); + + // XXX: brtarget reloc EncoderMethod. + const MCExpr *Expr = MO.getExpr(); + int64_t Value = 0x0; + const MCConstantExpr *tmpExpr = MCConstantExpr::create(Value, Ctx); + Fixups.push_back(MCFixup::create(0, Expr, + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_PCREL))); + switch (MI.getOpcode()) { default: - llvm_unreachable("Unhandled encodeInstruction length!"); - case 4: { - uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::write(OS, Bits, support::little); + llvm_unreachable("Unhandled reloc instruction!"); + break; + case LoongArch::BEQZ: + case LoongArch::BEQZ32: + case LoongArch::BNEZ: + case LoongArch::BNEZ32: + case LoongArch::BCEQZ: + case LoongArch::BCNEZ: + Fixups.push_back(MCFixup::create(0, tmpExpr, + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_0_5_10_16_S2))); break; + case LoongArch::BEQ: + case LoongArch::BEQ32: + case LoongArch::BNE: + case LoongArch::BNE32: + case LoongArch::BLT: + case LoongArch::BLT32: + case LoongArch::BGE: + case LoongArch::BGE32: + case LoongArch::BLTU: + case LoongArch::BLTU32: + case LoongArch::BGEU: + case LoongArch::BGEU32: + Fixups.push_back(MCFixup::create(0, tmpExpr, + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2))); + break; + } + return 0; +} + +/// getJumpTargetOpValue - Return binary encoding of the jump +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned LoongArchMCCodeEmitter:: +getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm()>>2; + + assert(MO.isExpr() && + "getJumpTargetOpValue expects only expressions or an immediate"); + + const MCExpr *Expr = MO.getExpr(); + int64_t Value = 0x0; + const MCConstantExpr *tmpExpr = MCConstantExpr::create(Value, Ctx); + Fixups.push_back(MCFixup::create(0, Expr, + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL))); + if (MI.getOpcode() == LoongArch::JIRL) + Fixups.push_back(MCFixup::create(0, tmpExpr, + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2))); + else // B or BL + Fixups.push_back(MCFixup::create(0, tmpExpr, + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2))); + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 1; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 2; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 3; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 1; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 2; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Value = MO.getImm(); + return Value >> 3; + } + + return 0; +} + +unsigned LoongArchMCCodeEmitter:: +getExprOpValue(const MCInst &MI, const MCExpr *Expr, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + int64_t Res; + + if (Expr->evaluateAsAbsolute(Res)) + return Res; + + MCExpr::ExprKind Kind = Expr->getKind(); + if (Kind == MCExpr::Constant) { + return cast(Expr)->getValue(); + } + + if (Kind == MCExpr::Binary) { + unsigned Res = getExprOpValue(MI, cast(Expr)->getLHS(), Fixups, STI); + Res += getExprOpValue(MI, cast(Expr)->getRHS(), Fixups, STI); + return Res; + } + + if (Kind == MCExpr::Target) { + int64_t Value = 0x0; + const LoongArchMCExpr *LoongArchExpr = cast(Expr); + const MCExpr *BinExpr = nullptr; + const MCExpr *GOTExpr = nullptr; + const MCSymbol *GOTSymbol = Ctx.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); + + LoongArch::Fixups FixupKind = LoongArch::Fixups(0); + switch (LoongArchExpr->getKind()) { + case LoongArchMCExpr::MEK_None: + case LoongArchMCExpr::MEK_Special: + llvm_unreachable("Unhandled fixup kind!"); + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + break; + case LoongArchMCExpr::MEK_PLT: + Value = 0x0; + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + if (MI.getOpcode() == LoongArch::JIRL) + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2))); + else // B or BL + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_0_10_10_16_S2))); + break; + case LoongArchMCExpr::MEK_CALL_HI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + + Value = 0x20000; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x12; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + + break; + case LoongArchMCExpr::MEK_CALL_LO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PLT_PCREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + + Value = 0x4; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x20004; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x12; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x12; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_16_S2))); + break; + case LoongArchMCExpr::MEK_GOT_HI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x800; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_GOT_LO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x4; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x804; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_GOT_RRHI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Fixups.push_back(MCFixup::create(0, GOTExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000000; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_GOT_RRLO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x4; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000004; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0xfff; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); + break; + case LoongArchMCExpr::MEK_GOT_RRHIGHER: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x80000008; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_GOT_RRHIGHEST: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x8000000c; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_GPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x34; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_ABS_HI: + FixupKind = LoongArch::fixup_LARCH_MARK_LA; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_ABS_LO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0xfff; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); + break; + case LoongArchMCExpr::MEK_ABS_HIGHER: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_ABS_HIGHEST: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x34; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_PCREL_HI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x800; + BinExpr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_PCREL_LO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x4; + BinExpr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x804; + BinExpr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_PCREL_RRHI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000000; + BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_PCREL_RRLO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x4; + BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000004; + BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0xfff; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); + break; + case LoongArchMCExpr::MEK_PCREL_RRHIGHER: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000008; + BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_PCREL_RRHIGHEST: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x8000000c; + BinExpr = MCBinaryExpr::createAdd(LoongArchExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + Value = 0x34; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_TLSGD_HI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x800; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_TLSGD_LO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x4; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x804; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_TLSGD_RRHI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Fixups.push_back(MCFixup::create(0, GOTExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000000; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_TLSGD_RRLO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x4; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000004; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0xfff; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); + break; + case LoongArchMCExpr::MEK_TLSGD_RRHIGHER: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x80000008; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_TLSGD_RRHIGHEST: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x8000000c; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GD; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x34; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_TLSIE_HI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x800; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_TLSIE_LO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x4; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x804; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_TLSIE_RRHI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Fixups.push_back(MCFixup::create(0, GOTExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000000; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_TLSIE_RRLO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x4; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + Value = 0x80000004; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SUB))); + Value = 0xfff; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); + break; + case LoongArchMCExpr::MEK_TLSIE_RRHIGHER: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x80000008; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_TLSIE_RRHIGHEST: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_PCREL; + GOTExpr = MCSymbolRefExpr::create(GOTSymbol, + MCSymbolRefExpr::VK_None, Ctx); + Value = 0x8000000c; + BinExpr = MCBinaryExpr::createAdd(GOTExpr, MCConstantExpr::create(Value, Ctx), Ctx); + Fixups.push_back(MCFixup::create(0, BinExpr, MCFixupKind(FixupKind))); + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_GOT; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_ADD))); + Value = 0x34; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + case LoongArchMCExpr::MEK_TLSLE_HI: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + + Value = 0x20; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_TLSLE_LO: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0xfff; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_AND))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_U_10_12))); + break; + case LoongArchMCExpr::MEK_TLSLE_HIGHER: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0xc; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SL))); + Value = 0x2c; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_5_20))); + break; + case LoongArchMCExpr::MEK_TLSLE_HIGHEST: + FixupKind = LoongArch::fixup_LARCH_SOP_PUSH_TLS_TPREL; + Fixups.push_back(MCFixup::create(0, LoongArchExpr, MCFixupKind(FixupKind))); + Value = 0x34; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_PUSH_ABSOLUTE))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_SR))); + Value = 0x0; + Fixups.push_back(MCFixup::create(0, MCConstantExpr::create(Value, Ctx), + MCFixupKind(LoongArch::fixup_LARCH_SOP_POP_32_S_10_12))); + break; + } + return 0; } + + if (Kind == MCExpr::SymbolRef) { + LoongArch::Fixups FixupKind = LoongArch::Fixups(0); + + switch(cast(Expr)->getKind()) { + default: llvm_unreachable("Unknown fixup kind!"); + break; + } + Fixups.push_back(MCFixup::create(0, Expr, MCFixupKind(FixupKind))); + return 0; + } + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned LoongArchMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + unsigned RegNo = Ctx.getRegisterInfo()->getEncodingValue(Reg); + return RegNo; + } else if (MO.isImm()) { + return static_cast(MO.getImm()); + } else if (MO.isDFPImm()) { + return static_cast(bit_cast(MO.getDFPImm())); } + // MO must be an Expr. + assert(MO.isExpr()); + return getExprOpValue(MI, MO.getExpr(),Fixups, STI); } -MCCodeEmitter *llvm::createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx) { - return new LoongArchMCCodeEmitter(Ctx, MCII); +/// Return binary encoding of memory related operand. +/// If the offset operand requires relocation, record the relocation. +template +unsigned LoongArchMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 12; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= ShiftAmount; + + return (OffBits & 0xFFF) | RegBits; +} + +/// Return binary encoding of AM* memory related operand. +unsigned +LoongArchMCCodeEmitter::getAMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, bits 11-0 are not used. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) + << 12; + return RegBits; +} + +unsigned LoongArchMCCodeEmitter::getMemEncoding10l2(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 10; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= 2; + + return (OffBits & 0x3FF) | RegBits; +} + +unsigned LoongArchMCCodeEmitter::getMemEncoding11l1(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 11; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= 1; + + return (OffBits & 0x7FF) | RegBits; +} + +unsigned LoongArchMCCodeEmitter::getMemEncoding9l3(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 16-12, offset is encoded in bits 11-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 9; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= 3; + + return (OffBits & 0x1FF) | RegBits; +} + +/// Return binary encoding of simm14 memory related operand. Such as LL/SC instructions. +/// If the offset operand requires relocation, record the relocation. +template +unsigned LoongArchMCCodeEmitter::getSimm14MemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Base register is encoded in bits 18-14, offset is encoded in bits 13-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups, STI) << 14; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI); + + // Apply the scale factor if there is one. + OffBits >>= ShiftAmount; + + return (OffBits & 0x3FFF) | RegBits; +} + +unsigned +LoongArchMCCodeEmitter::getFCMPEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand& MO = MI.getOperand(OpNo); + switch((LoongArch::CondCode)MO.getImm()){ + case LoongArch::FCOND_T: + return 0x0; + case LoongArch::FCOND_OR: + return 0x8; + case LoongArch::FCOND_UNE: + return 0x4; + case LoongArch::FCOND_ONE: + return 0xC; + case LoongArch::FCOND_UGE: + return 0x2; + case LoongArch::FCOND_OGE: + return 0xA; + case LoongArch::FCOND_UGT: + return 0x6; + case LoongArch::FCOND_OGT: + return 0xE; + case LoongArch::FCOND_ST: + return 0x1; + case LoongArch::FCOND_GLE: + return 0x9; + case LoongArch::FCOND_GL: + return 0xD; + case LoongArch::FCOND_NLT: + return 0x3; + case LoongArch::FCOND_GE: + return 0xB; + case LoongArch::FCOND_NLE: + return 0x7; + case LoongArch::FCOND_GT: + return 0xF; + default: + return MO.getImm(); + } +} + +template +unsigned +LoongArchMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(MI.getOperand(OpNo).isImm()); + unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); + Value -= Offset; + return Value; } #include "LoongArchGenMCCodeEmitter.inc" diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h new file mode 100644 index 000000000000..01634015174f --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.h @@ -0,0 +1,146 @@ +//===- LoongArchMCCodeEmitter.h - Convert LoongArch Code to Machine Code --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArchMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H + +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +namespace llvm { + +class MCContext; +class MCExpr; +class MCFixup; +class MCInst; +class MCInstrInfo; +class MCOperand; +class MCSubtargetInfo; +class raw_ostream; + +class LoongArchMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + MCContext &Ctx; + +public: + LoongArchMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_) + : MCII(mcii), Ctx(Ctx_) {} + LoongArchMCCodeEmitter(const LoongArchMCCodeEmitter &) = delete; + LoongArchMCCodeEmitter &operator=(const LoongArchMCCodeEmitter &) = delete; + ~LoongArchMCCodeEmitter() override = default; + + void EmitByte(unsigned char C, raw_ostream &OS) const; + + void EmitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI, + raw_ostream &OS) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getJumpTargetOpValue - Return binary encoding of the jump + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getBranchTargetOpValue - Return binary encoding of the branch + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getMachineOpValue - Return binary encoding of operand. If the machin + // operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + template + unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getAMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getMemEncoding10l2(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getMemEncoding11l1(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getMemEncoding9l3(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + template + unsigned getSimm14MemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getFCMPEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// Subtract Offset then encode as a N-bit unsigned integer. + template + unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getExprOpValue(const MCInst &MI, const MCExpr *Expr, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm11Lsl1Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm10Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm9Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm8Lsl1Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm8Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + unsigned getSImm8Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCCODEEMITTER_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp new file mode 100644 index 000000000000..1af027f15547 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -0,0 +1,158 @@ +//===-- LoongArchMCExpr.cpp - LoongArch specific MC expression classes --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "LoongArchMCExpr.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "loongarchmcexpr" + +const LoongArchMCExpr *LoongArchMCExpr::create(LoongArchMCExpr::LoongArchExprKind Kind, + const MCExpr *Expr, MCContext &Ctx) { + return new (Ctx) LoongArchMCExpr(Kind, Expr); +} + +void LoongArchMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + int64_t AbsVal; + if (Expr->evaluateAsAbsolute(AbsVal)) + OS << AbsVal; + else + Expr->print(OS, MAI, true); +} + +bool +LoongArchMCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { + if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup)) + return false; + + if (Res.getRefKind() != MCSymbolRefExpr::VK_None) + return false; + + // evaluateAsAbsolute() and evaluateAsValue() require that we evaluate the + // %hi/%lo/etc. here. Fixup is a null pointer when either of these is the + // caller. + if (Res.isAbsolute() && Fixup == nullptr) { + int64_t AbsVal = Res.getConstant(); + switch (Kind) { + default: + break; + case MEK_None: + case MEK_Special: + llvm_unreachable("MEK_None and MEK_Special are invalid"); + } + Res = MCValue::get(AbsVal); + return true; + } + + // We want to defer it for relocatable expressions since the constant is + // applied to the whole symbol value. + // + // The value of getKind() that is given to MCValue is only intended to aid + // debugging when inspecting MCValue objects. It shouldn't be relied upon + // for decision making. + Res = MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); + + return true; +} + +void LoongArchMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + case MCExpr::Constant: + break; + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + case MCExpr::SymbolRef: { + // We're known to be under a TLS fixup, so any symbol should be + // modified. There should be only one. + const MCSymbolRefExpr &SymRef = *cast(Expr); + cast(SymRef.getSymbol()).setType(ELF::STT_TLS); + break; + } + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + } +} + +void LoongArchMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + default: + break; + case MEK_None: + case MEK_Special: + llvm_unreachable("MEK_None and MEK_Special are invalid"); + break; + case MEK_CALL_HI: + case MEK_CALL_LO: + case MEK_GOT_HI: + case MEK_GOT_LO: + case MEK_GOT_RRHI: + case MEK_GOT_RRLO: + case MEK_GOT_RRHIGHER: + case MEK_GOT_RRHIGHEST: + case MEK_ABS_HI: + case MEK_ABS_LO: + case MEK_ABS_HIGHER: + case MEK_ABS_HIGHEST: + case MEK_PCREL_HI: + case MEK_PCREL_LO: + case MEK_PCREL_RRHI: + case MEK_PCREL_RRHIGHER: + case MEK_PCREL_RRHIGHEST: + case MEK_PCREL_RRLO: + case MEK_PLT: + // If we do have nested target-specific expressions, they will be in + // a consecutive chain. + if (const LoongArchMCExpr *E = dyn_cast(getSubExpr())) + E->fixELFSymbolsInTLSFixups(Asm); + break; + case MEK_TLSGD_HI: + case MEK_TLSGD_LO: + case MEK_TLSGD_RRHI: + case MEK_TLSGD_RRHIGHER: + case MEK_TLSGD_RRHIGHEST: + case MEK_TLSGD_RRLO: + case MEK_TLSLE_HI: + case MEK_TLSLE_HIGHER: + case MEK_TLSLE_HIGHEST: + case MEK_TLSLE_LO: + case MEK_TLSIE_HI: + case MEK_TLSIE_LO: + case MEK_TLSIE_RRHI: + case MEK_TLSIE_RRHIGHER: + case MEK_TLSIE_RRHIGHEST: + case MEK_TLSIE_RRLO: + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); + break; + } +} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h new file mode 100644 index 000000000000..7851d478e913 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -0,0 +1,97 @@ +//===- LoongArchMCExpr.h - LoongArch specific MC expression classes -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H +#define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H + +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" + +namespace llvm { + +class LoongArchMCExpr : public MCTargetExpr { +public: + enum LoongArchExprKind { + MEK_None, + MEK_CALL_HI, + MEK_CALL_LO, + MEK_GOT_HI, + MEK_GOT_LO, + MEK_GOT_RRHI, + MEK_GOT_RRHIGHER, + MEK_GOT_RRHIGHEST, + MEK_GOT_RRLO, + MEK_ABS_HI, + MEK_ABS_HIGHER, + MEK_ABS_HIGHEST, + MEK_ABS_LO, + MEK_PCREL_HI, + MEK_PCREL_LO, + MEK_PCREL_RRHI, + MEK_PCREL_RRHIGHER, + MEK_PCREL_RRHIGHEST, + MEK_PCREL_RRLO, + MEK_TLSLE_HI, + MEK_TLSLE_HIGHER, + MEK_TLSLE_HIGHEST, + MEK_TLSLE_LO, + MEK_TLSIE_HI, + MEK_TLSIE_LO, + MEK_TLSIE_RRHI, + MEK_TLSIE_RRHIGHER, + MEK_TLSIE_RRHIGHEST, + MEK_TLSIE_RRLO, + MEK_TLSGD_HI, + MEK_TLSGD_LO, + MEK_TLSGD_RRHI, + MEK_TLSGD_RRHIGHER, + MEK_TLSGD_RRHIGHEST, + MEK_TLSGD_RRLO, + MEK_PLT, + MEK_Special, + }; + +private: + const LoongArchExprKind Kind; + const MCExpr *Expr; + + explicit LoongArchMCExpr(LoongArchExprKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} + +public: + static const LoongArchMCExpr *create(LoongArchExprKind Kind, const MCExpr *Expr, + MCContext &Ctx); + static const LoongArchMCExpr *createGpOff(LoongArchExprKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + /// Get the kind of this expression. + LoongArchExprKind getKind() const { return Kind; } + + /// Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCEXPR_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index 8d71235f6a81..18e3224ab82c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions ---------===// +//===-- LoongArchMCTargetDesc.cpp - LoongArch Target Descriptions -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,35 +11,47 @@ //===----------------------------------------------------------------------===// #include "LoongArchMCTargetDesc.h" -#include "LoongArchBaseInfo.h" -#include "LoongArchInstPrinter.h" -#include "LoongArchMCAsmInfo.h" +#include "LoongArchTargetStreamer.h" +#include "MCTargetDesc/LoongArchAsmBackend.h" +#include "MCTargetDesc/LoongArchELFStreamer.h" +#include "MCTargetDesc/LoongArchInstPrinter.h" +#include "MCTargetDesc/LoongArchMCAsmInfo.h" #include "TargetInfo/LoongArchTargetInfo.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCDwarf.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/Compiler.h" + +using namespace llvm; #define GET_INSTRINFO_MC_DESC -#define ENABLE_INSTR_PREDICATE_VERIFIER #include "LoongArchGenInstrInfo.inc" -#define GET_REGINFO_MC_DESC -#include "LoongArchGenRegisterInfo.inc" - #define GET_SUBTARGETINFO_MC_DESC #include "LoongArchGenSubtargetInfo.inc" -using namespace llvm; +#define GET_REGINFO_MC_DESC +#include "LoongArchGenRegisterInfo.inc" -static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitLoongArchMCRegisterInfo(X, LoongArch::R1); - return X; +/// Select the LoongArch CPU for the given triple and cpu name. +/// FIXME: Merge with the copy in LoongArchSubtarget.cpp +StringRef LoongArch_MC::selectLoongArchCPU(const Triple &TT, StringRef CPU) { + if (CPU.empty() || CPU == "generic") { + if (TT.isLoongArch32()) + CPU = "loongarch32"; //FIXME + else + CPU = "la464"; + } + return CPU; } static MCInstrInfo *createLoongArchMCInstrInfo() { @@ -48,20 +60,24 @@ static MCInstrInfo *createLoongArchMCInstrInfo() { return X; } -static MCSubtargetInfo * -createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - if (CPU.empty()) - CPU = TT.isArch64Bit() ? "la464" : "generic-la32"; +static MCRegisterInfo *createLoongArchMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitLoongArchMCRegisterInfo(X, LoongArch::RA); + return X; +} + +static MCSubtargetInfo *createLoongArchMCSubtargetInfo(const Triple &TT, + StringRef CPU, StringRef FS) { + CPU = LoongArch_MC::selectLoongArchCPU(TT, CPU); return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } static MCAsmInfo *createLoongArchMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT, const MCTargetOptions &Options) { - MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT); + MCAsmInfo *MAI = new LoongArchMCAsmInfo(TT, Options); - // Initial state of the frame pointer is sp(r3). - MCRegister SP = MRI.getDwarfRegNum(LoongArch::R3, true); + unsigned SP = MRI.getDwarfRegNum(LoongArch::SP, true); MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(nullptr, SP, 0); MAI->addInitialFrameState(Inst); @@ -76,40 +92,96 @@ static MCInstPrinter *createLoongArchMCInstPrinter(const Triple &T, return new LoongArchInstPrinter(MAI, MII, MRI); } +static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, + bool RelaxAll) { + MCStreamer *S; + S = createLoongArchELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(Emitter), RelaxAll); + return S; +} + +static MCTargetStreamer *createLoongArchAsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new LoongArchTargetAsmStreamer(S, OS); +} + +static MCTargetStreamer *createLoongArchNullTargetStreamer(MCStreamer &S) { + return new LoongArchTargetStreamer(S); +} + +static MCTargetStreamer * +createLoongArchObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new LoongArchTargetELFStreamer(S, STI); +} + namespace { class LoongArchMCInstrAnalysis : public MCInstrAnalysis { public: - explicit LoongArchMCInstrAnalysis(const MCInstrInfo *Info) - : MCInstrAnalysis(Info) {} + LoongArchMCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const override { unsigned NumOps = Inst.getNumOperands(); - if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { + if (NumOps == 0) + return false; + if (Info->get(Inst.getOpcode()).isBranch() || Inst.getOpcode() == LoongArch::BL) { + // just not jirl Target = Addr + Inst.getOperand(NumOps - 1).getImm(); return true; + } else { + return false; } - - return false; } }; +} -} // end namespace - -static MCInstrAnalysis *createLoongArchInstrAnalysis(const MCInstrInfo *Info) { +static MCInstrAnalysis *createLoongArchMCInstrAnalysis(const MCInstrInfo *Info) { return new LoongArchMCInstrAnalysis(Info); } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetMC() { for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) { - TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createLoongArchMCAsmInfo); + + // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(*T, createLoongArchMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createLoongArchMCRegisterInfo); + + // Register the elf streamer. + TargetRegistry::RegisterELFStreamer(*T, createMCStreamer); + + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createLoongArchAsmTargetStreamer); + + TargetRegistry::RegisterNullTargetStreamer(*T, + createLoongArchNullTargetStreamer); + + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(*T, createLoongArchMCSubtargetInfo); - TargetRegistry::RegisterMCAsmInfo(*T, createLoongArchMCAsmInfo); - TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter); - TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); + + // Register the MC instruction analyzer. + TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchMCInstrAnalysis); + + // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(*T, createLoongArchMCInstPrinter); - TargetRegistry::RegisterMCInstrAnalysis(*T, createLoongArchInstrAnalysis); + + TargetRegistry::RegisterObjectTargetStreamer( + *T, createLoongArchObjectTargetStreamer); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(*T, createLoongArchAsmBackend); } + + // Register the MC Code Emitter + for (Target *T : {&getTheLoongArch32Target(), &getTheLoongArch64Target()}) + TargetRegistry::RegisterMCCodeEmitter(*T, createLoongArchMCCodeEmitter); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h index ab35a0096c8a..04a5c79e633a 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h @@ -1,4 +1,4 @@ -//===- LoongArchMCTargetDesc.h - LoongArch Target Descriptions --*- C++ -*-===// +//===-- LoongArchMCTargetDesc.h - LoongArch Target Descriptions -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -13,8 +13,8 @@ #ifndef LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H #define LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H -#include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/DataTypes.h" + #include namespace llvm { @@ -25,7 +25,15 @@ class MCInstrInfo; class MCObjectTargetWriter; class MCRegisterInfo; class MCSubtargetInfo; +class MCTargetOptions; +class StringRef; class Target; +class Triple; +class raw_ostream; +class raw_pwrite_stream; + +Target &getTheLoongArch32Target(); +Target &getTheLoongArch64Target(); MCCodeEmitter *createLoongArchMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); @@ -36,20 +44,24 @@ MCAsmBackend *createLoongArchAsmBackend(const Target &T, const MCTargetOptions &Options); std::unique_ptr -createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); +createLoongArchELFObjectWriter(const Triple &TT, bool IsLPX32); + +namespace LoongArch_MC { +StringRef selectLoongArchCPU(const Triple &TT, StringRef CPU); +} -} // end namespace llvm +} // End llvm namespace -// Defines symbolic names for LoongArch registers. +// Defines symbolic names for LoongArch registers. This defines a mapping from +// register name to register number. #define GET_REGINFO_ENUM #include "LoongArchGenRegisterInfo.inc" -// Defines symbolic names for LoongArch instructions. +// Defines symbolic names for the LoongArch instructions. #define GET_INSTRINFO_ENUM -#define GET_INSTRINFO_MC_HELPER_DECLS #include "LoongArchGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM #include "LoongArchGenSubtargetInfo.inc" -#endif // LLVM_LIB_TARGET_LOONGARCH_MCTARGETDESC_LOONGARCHMCTARGETDESC_H +#endif diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp deleted file mode 100644 index 1509c436c810..000000000000 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp +++ /dev/null @@ -1,51 +0,0 @@ -//===- LoongArchMatInt.cpp - Immediate materialisation ---------*- C++ -*--===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "LoongArchMatInt.h" -#include "MCTargetDesc/LoongArchMCTargetDesc.h" -#include "llvm/Support/MathExtras.h" - -using namespace llvm; - -LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) { - // Val: - // | hi32 | lo32 | - // +-----------+------------------+------------------+-----------+ - // | Highest12 | Higher20 | Hi20 | Lo12 | - // +-----------+------------------+------------------+-----------+ - // 63 52 51 32 31 12 11 0 - // - const int64_t Highest12 = Val >> 52 & 0xFFF; - const int64_t Higher20 = Val >> 32 & 0xFFFFF; - const int64_t Hi20 = Val >> 12 & 0xFFFFF; - const int64_t Lo12 = Val & 0xFFF; - InstSeq Insts; - - if (Highest12 != 0 && SignExtend64<52>(Val) == 0) { - Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12))); - return Insts; - } - - if (Hi20 == 0) - Insts.push_back(Inst(LoongArch::ORI, Lo12)); - else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20)) - Insts.push_back(Inst(LoongArch::ADDI_W, SignExtend64<12>(Lo12))); - else { - Insts.push_back(Inst(LoongArch::LU12I_W, SignExtend64<20>(Hi20))); - if (Lo12 != 0) - Insts.push_back(Inst(LoongArch::ORI, Lo12)); - } - - if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20)) - Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20))); - - if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12)) - Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12))); - - return Insts; -} diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp new file mode 100644 index 000000000000..4d0e785a30a4 --- /dev/null +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchTargetStreamer.cpp @@ -0,0 +1,330 @@ +//===-- LoongArchTargetStreamer.cpp - LoongArch Target Streamer Methods -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides LoongArch specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "LoongArchABIInfo.h" +#include "LoongArchELFStreamer.h" +#include "LoongArchInstPrinter.h" +#include "LoongArchMCExpr.h" +#include "LoongArchMCTargetDesc.h" +#include "LoongArchTargetObjectFile.h" +#include "LoongArchTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +namespace { +static cl::opt RoundSectionSizes( + "loongarch-round-section-sizes", cl::init(false), + cl::desc("Round section sizes up to the section alignment"), cl::Hidden); +} // end anonymous namespace + +LoongArchTargetStreamer::LoongArchTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), ModuleDirectiveAllowed(true) { + GPRInfoSet = FPRInfoSet = FrameInfoSet = false; +} +void LoongArchTargetStreamer::emitDirectiveOptionPic0() {} +void LoongArchTargetStreamer::emitDirectiveOptionPic2() {} +void LoongArchTargetStreamer::emitDirectiveSetArch(StringRef Arch) { + forbidModuleDirective(); +} +void LoongArchTargetStreamer::emitDirectiveSetLoongArch32() { forbidModuleDirective(); } +void LoongArchTargetStreamer::emitDirectiveSetloongarch64() { forbidModuleDirective(); } + +void LoongArchTargetStreamer::emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRXX(unsigned Opcode, unsigned Reg0, MCOperand Op1, + MCOperand Op2, SMLoc IDLoc, const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(Op1); + TmpInst.addOperand(Op2); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRRXX(unsigned Opcode, unsigned Reg0, unsigned Reg1, + MCOperand Op2, MCOperand Op3, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(Op2); + TmpInst.addOperand(Op3); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, + SMLoc IDLoc, const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(Op1); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, + SMLoc IDLoc, const MCSubtargetInfo *STI) { + emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, STI); +} + +void LoongArchTargetStreamer::emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, + SMLoc IDLoc, const MCSubtargetInfo *STI) { + emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, STI); +} + +void LoongArchTargetStreamer::emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, + SMLoc IDLoc, const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createImm(Imm1)); + TmpInst.addOperand(MCOperand::createImm(Imm2)); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, + MCOperand Op2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(Op2); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, + unsigned Reg2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI); +} + +void LoongArchTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, + unsigned Reg1, int32_t Imm, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); +} + +void LoongArchTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, + unsigned Reg1, int16_t Imm0, int16_t Imm1, + int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(MCOperand::createImm(Imm0)); + TmpInst.addOperand(MCOperand::createImm(Imm1)); + TmpInst.addOperand(MCOperand::createImm(Imm2)); + TmpInst.setLoc(IDLoc); + getStreamer().emitInstruction(TmpInst, *STI); +} + +void LoongArchTargetStreamer::emitAdd(unsigned DstReg, unsigned SrcReg, + unsigned TrgReg, bool Is64Bit, + const MCSubtargetInfo *STI) { + emitRRR(Is64Bit ? LoongArch::ADD_D : LoongArch::ADD_W, DstReg, SrcReg, TrgReg, SMLoc(), + STI); +} + +void LoongArchTargetStreamer::emitDSLL(unsigned DstReg, unsigned SrcReg, + int16_t ShiftAmount, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + if (ShiftAmount >= 32) { + emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount - 32, IDLoc, STI); + return; + } + + emitRRI(LoongArch::SLLI_D, DstReg, SrcReg, ShiftAmount, IDLoc, STI); +} + +void LoongArchTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { + emitRRI(LoongArch::ANDI, LoongArch::ZERO, LoongArch::ZERO, 0, IDLoc, STI); +} + +LoongArchTargetAsmStreamer::LoongArchTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS) + : LoongArchTargetStreamer(S), OS(OS) {} + +void LoongArchTargetAsmStreamer::emitDirectiveOptionPic0() { + OS << "\t.option\tpic0\n"; +} + +void LoongArchTargetAsmStreamer::emitDirectiveOptionPic2() { + OS << "\t.option\tpic2\n"; +} + +void LoongArchTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) { + OS << "\t.set arch=" << Arch << "\n"; + LoongArchTargetStreamer::emitDirectiveSetArch(Arch); +} + +void LoongArchTargetAsmStreamer::emitDirectiveSetLoongArch32() { + //OS << "\t.set\tloongarch32\n"; + LoongArchTargetStreamer::emitDirectiveSetLoongArch32(); +} + +void LoongArchTargetAsmStreamer::emitDirectiveSetloongarch64() { + //OS << "\t.set\tloongarch64\n"; + LoongArchTargetStreamer::emitDirectiveSetloongarch64(); +} + +// This part is for ELF object output. +LoongArchTargetELFStreamer::LoongArchTargetELFStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) + : LoongArchTargetStreamer(S), STI(STI) { + MCAssembler &MCA = getStreamer().getAssembler(); + + // It's possible that MCObjectFileInfo isn't fully initialized at this point + // due to an initialization order problem where LLVMTargetMachine creates the + // target streamer before TargetLoweringObjectFile calls + // InitializeMCObjectFileInfo. There doesn't seem to be a single place that + // covers all cases so this statement covers most cases and direct object + // emission must call setPic() once MCObjectFileInfo has been initialized. The + // cases we don't handle here are covered by LoongArchAsmPrinter. + Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent(); + + // Set the header flags that we can in the constructor. + // FIXME: This is a fairly terrible hack. We set the rest + // of these in the destructor. The problem here is two-fold: + // + // a: Some of the eflags can be set/reset by directives. + // b: There aren't any usage paths that initialize the ABI + // pointer until after we initialize either an assembler + // or the target machine. + // We can fix this by making the target streamer construct + // the ABI, but this is fraught with wide ranging dependency + // issues as well. + unsigned EFlags = MCA.getELFHeaderEFlags(); + + // FIXME: Fix a dependency issue by instantiating the ABI object to some + // default based off the triple. The triple doesn't describe the target + // fully, but any external user of the API that uses the MCTargetStreamer + // would otherwise crash on assertion failure. + + ABI = LoongArchABIInfo( + STI.getTargetTriple().getArch() == Triple::ArchType::loongarch32 + ? LoongArchABIInfo::LP32() + : LoongArchABIInfo::LP64()); + + EFlags |= ELF::EF_LARCH_ABI; + MCA.setELFHeaderEFlags(EFlags); +} + +void LoongArchTargetELFStreamer::emitLabel(MCSymbol *S) { + auto *Symbol = cast(S); + getStreamer().getAssembler().registerSymbol(*Symbol); + uint8_t Type = Symbol->getType(); + if (Type != ELF::STT_FUNC) + return; + +} + +void LoongArchTargetELFStreamer::finish() { + MCAssembler &MCA = getStreamer().getAssembler(); + const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo(); + + // .bss, .text and .data are always at least 16-byte aligned. + MCSection &TextSection = *OFI.getTextSection(); + MCA.registerSection(TextSection); + MCSection &DataSection = *OFI.getDataSection(); + MCA.registerSection(DataSection); + MCSection &BSSSection = *OFI.getBSSSection(); + MCA.registerSection(BSSSection); + + TextSection.setAlignment(Align(std::max(16u, TextSection.getAlignment()))); + DataSection.setAlignment(Align(std::max(16u, DataSection.getAlignment()))); + BSSSection.setAlignment(Align(std::max(16u, BSSSection.getAlignment()))); + + if (RoundSectionSizes) { + // Make sections sizes a multiple of the alignment. This is useful for + // verifying the output of IAS against the output of other assemblers but + // it's not necessary to produce a correct object and increases section + // size. + MCStreamer &OS = getStreamer(); + for (MCSection &S : MCA) { + MCSectionELF &Section = static_cast(S); + + unsigned Alignment = Section.getAlignment(); + if (Alignment) { + OS.switchSection(&Section); + if (Section.useCodeAlign()) + OS.emitCodeAlignment(Alignment, &STI, Alignment); + else + OS.emitValueToAlignment(Alignment, 0, 1, Alignment); + } + } + } + + // Update e_header flags. See the FIXME and comment above in + // the constructor for a full rundown on this. + unsigned EFlags = MCA.getELFHeaderEFlags(); + + // ABI + // LP64 does not require any ABI bits. + if (getABI().IsLP32()) + EFlags |= ELF::EF_LARCH_ABI_LP32; + else if (getABI().IsLPX32()) + EFlags |= ELF::EF_LARCH_ABI_LPX32; + else + EFlags |= ELF::EF_LARCH_ABI_LP64; + + MCA.setELFHeaderEFlags(EFlags); +} + +MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() { + return static_cast(Streamer); +} + +void LoongArchTargetELFStreamer::emitDirectiveOptionPic0() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + // This option overrides other PIC options like -KPIC. + Pic = false; + ///XXX:Reloc no this flags + //Flags &= ~ELF::EF_LOONGARCH_PIC; + MCA.setELFHeaderEFlags(Flags); +} + +void LoongArchTargetELFStreamer::emitDirectiveOptionPic2() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Pic = true; + // NOTE: We are following the GAS behaviour here which means the directive + // 'pic2' also sets the CPIC bit in the ELF header. This is different from + // what is stated in the SYSV ABI which consider the bits EF_LOONGARCH_PIC and + // EF_LOONGARCH_CPIC to be mutually exclusive. + ///XXX:Reloc no this flags + //Flags |= ELF::EF_LOONGARCH_PIC | ELF::EF_LOONGARCH_CPIC; + MCA.setELFHeaderEFlags(Flags); +} diff --git a/llvm/lib/Target/LoongArch/TargetInfo/CMakeLists.txt b/llvm/lib/Target/LoongArch/TargetInfo/CMakeLists.txt index e14360ff555e..f53ddba40050 100644 --- a/llvm/lib/Target/LoongArch/TargetInfo/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/TargetInfo/CMakeLists.txt @@ -2,7 +2,6 @@ add_llvm_component_library(LLVMLoongArchInfo LoongArchTargetInfo.cpp LINK_COMPONENTS - MC Support ADD_TO_COMPONENT diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp index 10654510032f..e6b84518017c 100644 --- a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp +++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation ---------===// +//===-- LoongArchTargetInfo.cpp - LoongArch Target Implementation -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -21,10 +21,14 @@ Target &llvm::getTheLoongArch64Target() { } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTargetInfo() { - RegisterTarget X( - getTheLoongArch32Target(), "loongarch32", "32-bit LoongArch", - "LoongArch"); - RegisterTarget Y( - getTheLoongArch64Target(), "loongarch64", "64-bit LoongArch", - "LoongArch"); +#if 0 + //TODO: support it in futrue + RegisterTarget + X(getTheLoongArch32Target(), "loongarch32", "LoongArch (32-bit)", "LoongArch"); +#endif + RegisterTarget + A(getTheLoongArch64Target(), "loongarch64", "LoongArch (64-bit)", + "LoongArch"); } diff --git a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h index b24cf879512c..7dce2497f7d9 100644 --- a/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h +++ b/llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h @@ -1,4 +1,4 @@ -//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -*- C++ -*-===// +//===-- LoongArchTargetInfo.h - LoongArch Target Implementation -----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -16,6 +16,6 @@ class Target; Target &getTheLoongArch32Target(); Target &getTheLoongArch64Target(); -} // end namespace llvm +} // namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_TARGETINFO_LOONGARCHTARGETINFO_H diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp index d242083f958b..1a2d89fbbbd6 100644 --- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp @@ -27,6 +27,12 @@ using namespace llvm; +static cl::opt +EnableLoongson3FixLLSC("mips-fix-loongson3-llsc", cl::Hidden, + cl::desc("Work around loongson3 llsc erratum"), + cl::init(true)); + + #define DEBUG_TYPE "mips-pseudo" namespace { @@ -188,6 +194,21 @@ bool MipsExpandPseudo::expandAtomicCmpSwapSubword( .addImm(ShiftImm); } + if (EnableLoongson3FixLLSC) { + bool Has_sync = false; + for (MachineBasicBlock::iterator MBBb = sinkMBB->begin(), MBBe = sinkMBB->end(); + MBBb != MBBe; ++MBBb) { + Has_sync |= MBBb->getOpcode() == Mips::SYNC ? true : false; + if (MBBb->mayLoad() || MBBb->mayStore()) + break; + } + + if (!Has_sync) { + MachineBasicBlock::iterator Pos = sinkMBB->begin(); + BuildMI(*sinkMBB, Pos, DL, TII->get(Mips::SYNC)).addImm(0); + } + } + LivePhysRegs LiveRegs; computeAndAddLiveIns(LiveRegs, *loop1MBB); computeAndAddLiveIns(LiveRegs, *loop2MBB); @@ -289,6 +310,20 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, BuildMI(loop2MBB, DL, TII->get(BEQ)) .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + if (EnableLoongson3FixLLSC) { + bool Has_sync = false; + for (MachineBasicBlock::iterator MBBb = exitMBB->begin(), MBBe = exitMBB->end(); + MBBb != MBBe; ++MBBb) { + Has_sync |= MBBb->getOpcode() == Mips::SYNC ? true : false; + if (MBBb->mayLoad() || MBBb->mayStore()) + break; + } + if (!Has_sync) { + MachineBasicBlock::iterator Pos = exitMBB->begin(); + BuildMI(*exitMBB, Pos, DL, TII->get(Mips::SYNC)).addImm(0); + } + } + LivePhysRegs LiveRegs; computeAndAddLiveIns(LiveRegs, *loop1MBB); computeAndAddLiveIns(LiveRegs, *loop2MBB); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 3274e36ab71a..fadba1472f43 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -104,6 +104,7 @@ static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; +static const uint64_t kLoongArch64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; @@ -481,6 +482,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, bool IsMIPSN32ABI = TargetTriple.getEnvironment() == Triple::GNUABIN32; bool IsMIPS32 = TargetTriple.isMIPS32(); bool IsMIPS64 = TargetTriple.isMIPS64(); + bool IsLoongArch64 = TargetTriple.isLoongArch64(); bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; @@ -548,7 +550,9 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, Mapping.Offset = kWindowsShadowOffset64; } else if (IsMIPS64) Mapping.Offset = kMIPS64_ShadowOffset64; - else if (IsIOS) + else if (IsLoongArch64) { + Mapping.Offset = kLoongArch64_ShadowOffset64; + } else if (IsIOS) Mapping.Offset = kDynamicShadowSentinel; else if (IsMacOS && IsAArch64) Mapping.Offset = kDynamicShadowSentinel; diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4606bd5de6c3..70155a590083 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -385,6 +385,14 @@ static const MemoryMapParams Linux_X86_64_MemoryMapParams = { #endif }; +// loongarch64 Linux +static const MemoryMapParams Linux_LOONGARCH64_MemoryMapParams = { + 0, // AndMask (not used) + 0x008000000000, // XorMask + 0, // ShadowBase (not used) + 0x002000000000, // OriginBase +}; + // mips64 Linux static const MemoryMapParams Linux_MIPS64_MemoryMapParams = { 0, // AndMask (not used) @@ -454,6 +462,11 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = { &Linux_X86_64_MemoryMapParams, }; +static const PlatformMemoryMapParams Linux_LOONGARCH_MemoryMapParams = { + nullptr, + &Linux_LOONGARCH64_MemoryMapParams, +}; + static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = { nullptr, &Linux_MIPS64_MemoryMapParams, @@ -516,6 +529,7 @@ public: private: friend struct MemorySanitizerVisitor; friend struct VarArgAMD64Helper; + friend struct VarArgLoongArch64Helper; friend struct VarArgMIPS64Helper; friend struct VarArgAArch64Helper; friend struct VarArgPowerPC64Helper; @@ -937,6 +951,9 @@ void MemorySanitizer::initializeModule(Module &M) { case Triple::x86: MapParams = Linux_X86_MemoryMapParams.bits32; break; + case Triple::loongarch64: + MapParams = Linux_LOONGARCH_MemoryMapParams.bits64; + break; case Triple::mips64: case Triple::mips64el: MapParams = Linux_MIPS_MemoryMapParams.bits64; @@ -4445,6 +4462,117 @@ struct VarArgAMD64Helper : public VarArgHelper { } }; +/// LoongArch64-specific implementation of VarArgHelper. +struct VarArgLoongArch64Helper : public VarArgHelper { + Function &F; + MemorySanitizer &MS; + MemorySanitizerVisitor &MSV; + Value *VAArgTLSCopy = nullptr; + Value *VAArgSize = nullptr; + + SmallVector VAStartInstrumentationList; + + VarArgLoongArch64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} + + void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { + unsigned VAArgOffset = 0; + const DataLayout &DL = F.getParent()->getDataLayout(); + for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(), + End = CB.arg_end(); + ArgIt != End; ++ArgIt) { + Triple TargetTriple(F.getParent()->getTargetTriple()); + Value *A = *ArgIt; + Value *Base; + uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); + Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize); + VAArgOffset += ArgSize; + VAArgOffset = alignTo(VAArgOffset, 8); + if (!Base) + continue; + IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); + } + + Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset); + // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of + // a new class member i.e. it is the total size of all VarArgs. + IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); + } + + /// Compute the shadow address for a given va_arg. + Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, + unsigned ArgOffset, unsigned ArgSize) { + // Make sure we don't overflow __msan_va_arg_tls. + if (ArgOffset + ArgSize > kParamTLSSize) + return nullptr; + Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); + Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); + return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), + "_msarg"); + } + + void visitVAStartInst(VAStartInst &I) override { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr, *OriginPtr; + const Align Alignment = Align(8); + std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( + VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */ 8, Alignment, false); + } + + void visitVACopyInst(VACopyInst &I) override { + IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); + Value *VAListTag = I.getArgOperand(0); + Value *ShadowPtr, *OriginPtr; + const Align Alignment = Align(8); + std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( + VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); + IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), + /* size */ 8, Alignment, false); + } + + void finalizeInstrumentation() override { + assert(!VAArgSize && !VAArgTLSCopy && + "finalizeInstrumentation called twice"); + IRBuilder<> IRB(MSV.FnPrologueEnd); + VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); + Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), + VAArgSize); + + if (!VAStartInstrumentationList.empty()) { + // If there is a va_start in this function, make a backup copy of + // va_arg_tls somewhere in the function entry block. + VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); + IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); + } + + // Instrument va_start. + // Copy va_list shadow from the backup copy of the TLS contents. + for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { + CallInst *OrigInst = VAStartInstrumentationList[i]; + IRBuilder<> IRB(OrigInst->getNextNode()); + Value *VAListTag = OrigInst->getArgOperand(0); + Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); + Value *RegSaveAreaPtrPtr = + IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), + PointerType::get(Type::getInt64PtrTy(*MS.C), 0)); + Value *RegSaveAreaPtr = + IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); + Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; + const Align Alignment = Align(8); + std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = + MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), + Alignment, /*isStore*/ true); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, + CopySize); + } + } +}; + /// MIPS64-specific implementation of VarArgHelper. struct VarArgMIPS64Helper : public VarArgHelper { Function &F; @@ -5344,6 +5472,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, return new VarArgPowerPC64Helper(Func, Msan, Visitor); else if (TargetTriple.getArch() == Triple::systemz) return new VarArgSystemZHelper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::loongarch64) + return new VarArgLoongArch64Helper(Func, Msan, Visitor); else return new VarArgNoOpHelper(Func, Msan, Visitor); } diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 644c5c82e58e..76fd9277cbca 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" @@ -1861,10 +1862,12 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, /// SSA value. We only can ensure this for a limited set of operations, and we /// don't want to do the rewrites unless we are confident that the result will /// be promotable, so we have an early test here. -static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { +static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL, + bool isMips64) { // Collect the candidate types for vector-based promotion. Also track whether // we have different element types. SmallVector CandidateTys; + SetVector LoadStoreTys; Type *CommonEltTy = nullptr; bool HaveCommonEltTy = true; auto CheckCandidateType = [&](Type *Ty) { @@ -1885,15 +1888,56 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { HaveCommonEltTy = false; } }; - // Consider any loads or stores that are the exact size of the slice. - for (const Slice &S : P) - if (S.beginOffset() == P.beginOffset() && - S.endOffset() == P.endOffset()) { + + // To solve Skia compiling issue for mips64 only, use a different impl which + // is backported from llvm17. + if (isMips64) { + // Put load and store types into a set for de-duplication. + for (const Slice &S : P) { + Type *Ty; if (auto *LI = dyn_cast(S.getUse()->getUser())) - CheckCandidateType(LI->getType()); + Ty = LI->getType(); else if (auto *SI = dyn_cast(S.getUse()->getUser())) - CheckCandidateType(SI->getValueOperand()->getType()); + Ty = SI->getValueOperand()->getType(); + else + continue; + LoadStoreTys.insert(Ty); + // Consider any loads or stores that are the exact size of the slice. + if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset()) + CheckCandidateType(Ty); + } + // Consider additional vector types where the element type size is a + // multiple of load/store element size. + for (Type *Ty : LoadStoreTys) { + if (!VectorType::isValidElementType(Ty)) + continue; + unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue(); + // Make a copy of CandidateTys and iterate through it, because we might + // append to CandidateTys in the loop. + SmallVector CandidateTysCopy = CandidateTys; + for (VectorType *&VTy : CandidateTysCopy) { + unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue(); + unsigned ElementSize = + DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); + if (TypeSize != VectorSize && TypeSize != ElementSize && + VectorSize % TypeSize == 0) { + VectorType *NewVTy = + VectorType::get(Ty, VectorSize / TypeSize, false); + CheckCandidateType(NewVTy); + } + } } + } else { + // Consider any loads or stores that are the exact size of the slice. + for (const Slice &S : P) + if (S.beginOffset() == P.beginOffset() && + S.endOffset() == P.endOffset()) { + if (auto *LI = dyn_cast(S.getUse()->getUser())) + CheckCandidateType(LI->getType()); + else if (auto *SI = dyn_cast(S.getUse()->getUser())) + CheckCandidateType(SI->getValueOperand()->getType()); + } + } // If we didn't find a vector type, nothing to do here. if (CandidateTys.empty()) @@ -4276,9 +4320,10 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS, assert(DL.getTypeAllocSize(SliceTy).getFixedSize() >= P.size()); bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); + Triple TT(AI.getModule()->getTargetTriple()); VectorType *VecTy = - IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); + IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL, TT.isMIPS64()); if (VecTy) SliceTy = VecTy; diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp index c60efa465bb6..fc3e6745c0a1 100644 --- a/llvm/lib/XRay/InstrumentationMap.cpp +++ b/llvm/lib/XRay/InstrumentationMap.cpp @@ -61,6 +61,7 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) || !(ObjFile.getBinary()->getArch() == Triple::x86_64 || ObjFile.getBinary()->getArch() == Triple::ppc64le || + ObjFile.getBinary()->getArch() == Triple::loongarch64 || ObjFile.getBinary()->getArch() == Triple::arm || ObjFile.getBinary()->getArch() == Triple::aarch64)) return make_error( diff --git a/llvm/test/CodeGen/LoongArch/1ri.mir b/llvm/test/CodeGen/LoongArch/1ri.mir deleted file mode 100644 index c7d74b987756..000000000000 --- a/llvm/test/CodeGen/LoongArch/1ri.mir +++ /dev/null @@ -1,96 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 1RI20 -# ------------------------------------------------------------------------------------------------- -# ---------------------+-----------------------------------------------------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------+-----------------------------------------------------------+--------------- -# opcode | imm20 | rd -# ---------------------+-----------------------------------------------------------+--------------- - ---- -# CHECK-LABEL: test_LU12I_W: -# CHECK-ENC: 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 -# CHECK-ASM: lu12i.w $a0, 49 -name: test_LU12I_W -body: | - bb.0: - $r4 = LU12I_W 49 -... ---- -# CHECK-LABEL: test_LU32I_D: -# CHECK-ENC: 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 -# CHECK-ASM: lu32i.d $a0, 196 -name: test_LU32I_D -body: | - bb.0: - $r4 = LU32I_D $r4, 196 -... ---- -# CHECK-LABEL: test_PCADDI: -# CHECK-ENC: 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 1 0 0 1 0 0 -# CHECK-ASM: pcaddi $a0, 187 -name: test_PCADDI -body: | - bb.0: - $r4 = PCADDI 187 -... ---- -# CHECK-LABEL: test_PCALAU12I: -# CHECK-ENC: 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 0 1 0 0 -# CHECK-ASM: pcalau12i $a0, 89 -name: test_PCALAU12I -body: | - bb.0: - $r4 = PCALAU12I 89 -... ---- -# CHECK-LABEL: test_PCADDU12I: -# CHECK-ENC: 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: pcaddu12i $a0, 37 -name: test_PCADDU12I -body: | - bb.0: - $r4 = PCADDU12I 37 -... ---- -# CHECK-LABEL: test_PCADDU18I: -# CHECK-ENC: 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 -# CHECK-ASM: pcaddu18i $a0, 26 -name: test_PCADDU18I -body: | - bb.0: - $r4 = PCADDU18I 26 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 1RI21 -# ------------------------------------------------------------------------------------------------- -# ------------------+-----------------------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------+-----------------------------------------------+--------------+--------------- -# opcode | imm21{15-0} | rj | imm21{20-16} -# ------------------+-----------------------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_BEQZ: -# CHECK-ENC: 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: beqz $a0, 92 -name: test_BEQZ -body: | - bb.0: - BEQZ $r4, 92 -... ---- -# CHECK-LABEL: test_BNEZ: -# CHECK-ENC: 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: bnez $a0, 84 -name: test_BNEZ -body: | - bb.0: - BNEZ $r4, 84 diff --git a/llvm/test/CodeGen/LoongArch/2r.mir b/llvm/test/CodeGen/LoongArch/2r.mir deleted file mode 100644 index 488944526e58..000000000000 --- a/llvm/test/CodeGen/LoongArch/2r.mir +++ /dev/null @@ -1,230 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2R -# ------------------------------------------------------------------------------------------------- -# ------------------------------------------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------------------------------------------+--------------+--------------- -# opcode | rj | rd -# ------------------------------------------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_CLO_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: clo.w $a0, $a1 -name: test_CLO_W -body: | - bb.0: - $r4 = CLO_W $r5 -... ---- -# CHECK-LABEL: test_CLZ_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: clz.w $a0, $a1 -name: test_CLZ_W -body: | - bb.0: - $r4 = CLZ_W $r5 -... ---- -# CHECK-LABEL: test_CTO_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: cto.w $a0, $a1 -name: test_CTO_W -body: | - bb.0: - $r4 = CTO_W $r5 -... ---- -# CHECK-LABEL: test_CTZ_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ctz.w $a0, $a1 -name: test_CTZ_W -body: | - bb.0: - $r4 = CTZ_W $r5 -... ---- -# CHECK-LABEL: test_CLO_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: clo.d $a0, $a1 -name: test_CLO_D -body: | - bb.0: - $r4 = CLO_D $r5 -... ---- -# CHECK-LABEL: test_CLZ_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: clz.d $a0, $a1 -name: test_CLZ_D -body: | - bb.0: - $r4 = CLZ_D $r5 -... ---- -# CHECK-LABEL: test_CTO_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: cto.d $a0, $a1 -name: test_CTO_D -body: | - bb.0: - $r4 = CTO_D $r5 -... ---- -# CHECK-LABEL: test_CTZ_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ctz.d $a0, $a1 -name: test_CTZ_D -body: | - bb.0: - $r4 = CTZ_D $r5 -... ---- -# CHECK-LABEL: test_REVB_2H: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revb.2h $a0, $a1 -name: test_REVB_2H -body: | - bb.0: - $r4 = REVB_2H $r5 -... ---- -# CHECK-LABEL: test_REVB_4H: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revb.4h $a0, $a1 -name: test_REVB_4H -body: | - bb.0: - $r4 = REVB_4H $r5 -... ---- -# CHECK-LABEL: test_REVB_2W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revb.2w $a0, $a1 -name: test_REVB_2W -body: | - bb.0: - $r4 = REVB_2W $r5 -... ---- -# CHECK-LABEL: test_REVB_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revb.d $a0, $a1 -name: test_REVB_D -body: | - bb.0: - $r4 = REVB_D $r5 -... ---- -# CHECK-LABEL: test_REVH_2W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revh.2w $a0, $a1 -name: test_REVH_2W -body: | - bb.0: - $r4 = REVH_2W $r5 -... ---- -# CHECK-LABEL: test_REVH_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: revh.d $a0, $a1 -name: test_REVH_D -body: | - bb.0: - $r4 = REVH_D $r5 -... ---- -# CHECK-LABEL: test_BITREV_4B: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bitrev.4b $a0, $a1 -name: test_BITREV_4B -body: | - bb.0: - $r4 = BITREV_4B $r5 -... ---- -# CHECK-LABEL: test_BITREV_8B: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bitrev.8b $a0, $a1 -name: test_BITREV_8B -body: | - bb.0: - $r4 = BITREV_8B $r5 -... ---- -# CHECK-LABEL: test_BITREV_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bitrev.w $a0, $a1 -name: test_BITREV_W -body: | - bb.0: - $r4 = BITREV_W $r5 -... ---- -# CHECK-LABEL: test_BITREV_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bitrev.d $a0, $a1 -name: test_BITREV_D -body: | - bb.0: - $r4 = BITREV_D $r5 -... ---- -# CHECK-LABEL: test_EXT_W_H: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ext.w.h $a0, $a1 -name: test_EXT_W_H -body: | - bb.0: - $r4 = EXT_W_H $r5 -... ---- -# CHECK-LABEL: test_EXT_W_B: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ext.w.b $a0, $a1 -name: test_EXT_W_B -body: | - bb.0: - $r4 = EXT_W_B $r5 -... ---- -# CHECK-LABEL: test_CPUCFG: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: cpucfg $a0, $a1 -name: test_CPUCFG -body: | - bb.0: - $r4 = CPUCFG $r5 -... ---- -# CHECK-LABEL: test_RDTIMEL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rdtimel.w $a0, $a1 -name: test_RDTIMEL_W -body: | - bb.0: - $r4, $r5 = RDTIMEL_W -... ---- -# CHECK-LABEL: test_RDTIMEH_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rdtimeh.w $a0, $a1 -name: test_RDTIMEH_W -body: | - bb.0: - $r4, $r5 = RDTIMEH_W -... ---- -# CHECK-LABEL: test_RDTIME_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rdtime.d $a0, $a1 -name: test_RDTIME_D -body: | - bb.0: - $r4, $r5 = RDTIME_D diff --git a/llvm/test/CodeGen/LoongArch/2ri.mir b/llvm/test/CodeGen/LoongArch/2ri.mir deleted file mode 100644 index 263fed42cada..000000000000 --- a/llvm/test/CodeGen/LoongArch/2ri.mir +++ /dev/null @@ -1,432 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI5 -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------+--------------+--------------- -# opcode | imm5 | rj | rd -# ---------------------------------------------------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_SLLI_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: slli.w $a0, $a1, 0 -name: test_SLLI_W -body: | - bb.0: - $r4 = SLLI_W $r5, 0 -... ---- -# CHECK-LABEL: test_SRLI_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srli.w $a0, $a1, 30 -name: test_SRLI_W -body: | - bb.0: - $r4 = SRLI_W $r5, 30 -... ---- -# CHECK-LABEL: test_SRAI_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srai.w $a0, $a1, 24 -name: test_SRAI_W -body: | - bb.0: - $r4 = SRAI_W $r5, 24 -... ---- -# CHECK-LABEL: test_ROTRI_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rotri.w $a0, $a1, 23 -name: test_ROTRI_W -body: | - bb.0: - $r4 = ROTRI_W $r5, 23 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI6 -# ------------------------------------------------------------------------------------------------- -# ------------------------------------------------+-----------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------------------------+-----------------+--------------+--------------- -# opcode | imm6 | rj | rd -# ------------------------------------------------+-----------------+--------------+--------------- - ---- -# CHECK-LABEL: test_SLLI_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: slli.d $a0, $a1, 39 -name: test_SLLI_D -body: | - bb.0: - $r4 = SLLI_D $r5, 39 -... ---- -# CHECK-LABEL: test_SRLI_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srli.d $a0, $a1, 38 -name: test_SRLI_D -body: | - bb.0: - $r4 = SRLI_D $r5, 38 -... ---- -# CHECK-LABEL: test_SRAI_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srai.d $a0, $a1, 27 -name: test_SRAI_D -body: | - bb.0: - $r4 = SRAI_D $r5, 27 -... ---- -# CHECK-LABEL: test_ROTRI_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rotri.d $a0, $a1, 7 -name: test_ROTRI_D -body: | - bb.0: - $r4 = ROTRI_D $r5, 7 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI12 -# ------------------------------------------------------------------------------------------------- -# ------------------------------+-----------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------+-----------------------------------+--------------+--------------- -# opcode | imm12 | rj | rd -# ------------------------------+-----------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_SLTI: -# CHECK-ENC: 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: slti $a0, $a1, 235 -name: test_SLTI -body: | - bb.0: - $r4 = SLTI $r5, 235 -... ---- -# CHECK-LABEL: test_SLTUI: -# CHECK-ENC: 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sltui $a0, $a1, 162 -name: test_SLTUI -body: | - bb.0: - $r4 = SLTUI $r5, 162 -... ---- -# CHECK-LABEL: test_ADDI_W: -# CHECK-ENC: 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: addi.w $a0, $a1, 246 -name: test_ADDI_W -body: | - bb.0: - $r4 = ADDI_W $r5, 246 -... ---- -# CHECK-LABEL: test_ADDI_D: -# CHECK-ENC: 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: addi.d $a0, $a1, 75 -name: test_ADDI_D -body: | - bb.0: - $r4 = ADDI_D $r5, 75 -... ---- -# CHECK-LABEL: test_LU52I_D: -# CHECK-ENC: 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: lu52i.d $a0, $a1, 195 -name: test_LU52I_D -body: | - bb.0: - $r4 = LU52I_D $r5, 195 -... ---- -# CHECK-LABEL: test_ANDI: -# CHECK-ENC: 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: andi $a0, $a1, 106 -name: test_ANDI -body: | - bb.0: - $r4 = ANDI $r5, 106 -... ---- -# CHECK-LABEL: test_ORI: -# CHECK-ENC: 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ori $a0, $a1, 47 -name: test_ORI -body: | - bb.0: - $r4 = ORI $r5, 47 -... ---- -# CHECK-LABEL: test_XORI: -# CHECK-ENC: 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: xori $a0, $a1, 99 -name: test_XORI -body: | - bb.0: - $r4 = XORI $r5, 99 -... ---- -# CHECK-LABEL: test_LD_B: -# CHECK-ENC: 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.b $a0, $a1, 21 -name: test_LD_B -body: | - bb.0: - $r4 = LD_B $r5, 21 -... ---- -# CHECK-LABEL: test_LD_H: -# CHECK-ENC: 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.h $a0, $a1, 80 -name: test_LD_H -body: | - bb.0: - $r4 = LD_H $r5, 80 -... ---- -# CHECK-LABEL: test_LD_W: -# CHECK-ENC: 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.w $a0, $a1, 92 -name: test_LD_W -body: | - bb.0: - $r4 = LD_W $r5, 92 -... ---- -# CHECK-LABEL: test_LD_BU: -# CHECK-ENC: 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.bu $a0, $a1, 150 -name: test_LD_BU -body: | - bb.0: - $r4 = LD_BU $r5, 150 -... ---- -# CHECK-LABEL: test_LD_HU: -# CHECK-ENC: 0 0 1 0 1 0 1 0 0 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.hu $a0, $a1, 198 -name: test_LD_HU -body: | - bb.0: - $r4 = LD_HU $r5, 198 -... ---- -# CHECK-LABEL: test_LD_WU: -# CHECK-ENC: 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ld.wu $a0, $a1, 31 -name: test_LD_WU -body: | - bb.0: - $r4 = LD_WU $r5, 31 -... ---- -# CHECK-LABEL: test_ST_B: -# CHECK-ENC: 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: st.b $a0, $a1, 95 -name: test_ST_B -body: | - bb.0: - ST_B $r4, $r5, 95 -... ---- -# CHECK-LABEL: test_ST_H: -# CHECK-ENC: 0 0 1 0 1 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: st.h $a0, $a1, 122 -name: test_ST_H -body: | - bb.0: - ST_H $r4, $r5, 122 -... ---- -# CHECK-LABEL: test_ST_W: -# CHECK-ENC: 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 1 0 1 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: st.w $a0, $a1, 175 -name: test_ST_W -body: | - bb.0: - ST_W $r4, $r5, 175 -... ---- -# CHECK-LABEL: test_ST_D: -# CHECK-ENC: 0 0 1 0 1 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: st.d $a0, $a1, 60 -name: test_ST_D -body: | - bb.0: - ST_D $r4, $r5, 60 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI14 -# ------------------------------------------------------------------------------------------------- -# ------------------------+-----------------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------+-----------------------------------------+--------------+--------------- -# opcode | imm14 | rj | rd -# ------------------------+-----------------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_LDPTR_W: -# CHECK-ENC: 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldptr.w $a0, $a1, 264 -name: test_LDPTR_W -body: | - bb.0: - $r4 = LDPTR_W $r5, 264 -... ---- -# CHECK-LABEL: test_LDPTR_D: -# CHECK-ENC: 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldptr.d $a0, $a1, 224 -name: test_LDPTR_D -body: | - bb.0: - $r4 = LDPTR_D $r5, 224 -... ---- -# CHECK-LABEL: test_STPTR_W: -# CHECK-ENC: 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stptr.w $a0, $a1, 348 -name: test_STPTR_W -body: | - bb.0: - STPTR_W $r4, $r5, 348 -... ---- -# CHECK-LABEL: test_STPTR_D: -# CHECK-ENC: 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stptr.d $a0, $a1, 580 -name: test_STPTR_D -body: | - bb.0: - STPTR_D $r4, $r5, 580 -... ---- -# CHECK-LABEL: test_LL_W: -# CHECK-ENC: 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ll.w $a0, $a1, 972 -name: test_LL_W -body: | - bb.0: - $r4 = LL_W $r5, 972 -... ---- -# CHECK-LABEL: test_LL_D: -# CHECK-ENC: 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ll.d $a0, $a1, 296 -name: test_LL_D -body: | - bb.0: - $r4 = LL_D $r5, 296 -... ---- -# CHECK-LABEL: test_SC_W: -# CHECK-ENC: 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sc.w $a0, $a1, 384 -name: test_SC_W -body: | - bb.0: - $r4 = SC_W $r4, $r5, 384 -... ---- -# CHECK-LABEL: test_SC_D: -# CHECK-ENC: 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sc.d $a0, $a1, 420 -name: test_SC_D -body: | - bb.0: - $r4 = SC_D $r4, $r5, 420 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 2RI16 -# ------------------------------------------------------------------------------------------------- -# ------------------+-----------------------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------+-----------------------------------------------+--------------+--------------- -# opcode | imm16 | rj | rd -# ------------------+-----------------------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_ADDU16I_D: -# CHECK-ENC: 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: addu16i.d $a0, $a1, 23 -name: test_ADDU16I_D -body: | - bb.0: - $r4 = ADDU16I_D $r5, 23 -... ---- -# CHECK-LABEL: test_JIRL: -# CHECK-ENC: 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: jirl $a0, $a1, 196 -name: test_JIRL -body: | - bb.0: - $r4 = JIRL $r5, 196 -... ---- -# CHECK-LABEL: test_BEQ: -# CHECK-ENC: 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: beq $a0, $a1, 784 -name: test_BEQ -body: | - bb.0: - BEQ $r4, $r5, 784 -... ---- -# CHECK-LABEL: test_BNE: -# CHECK-ENC: 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bne $a0, $a1, 76 -name: test_BNE -body: | - bb.0: - BNE $r4, $r5, 76 -... ---- -# CHECK-LABEL: test_BLT: -# CHECK-ENC: 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: blt $a0, $a1, 492 -name: test_BLT -body: | - bb.0: - BLT $r4, $r5, 492 -... ---- -# CHECK-LABEL: test_BGE: -# CHECK-ENC: 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bge $a0, $a1, 48 -name: test_BGE -body: | - bb.0: - BGE $r4, $r5, 48 -... ---- -# CHECK-LABEL: test_BLTU: -# CHECK-ENC: 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bltu $a0, $a1, 68 -name: test_BLTU -body: | - bb.0: - BLTU $r4, $r5, 68 -... ---- -# CHECK-LABEL: test_BGEU: -# CHECK-ENC: 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bgeu $a0, $a1, 352 -name: test_BGEU -body: | - bb.0: - BGEU $r4, $r5, 352 diff --git a/llvm/test/CodeGen/LoongArch/3r.mir b/llvm/test/CodeGen/LoongArch/3r.mir deleted file mode 100644 index a1b97d5637b2..000000000000 --- a/llvm/test/CodeGen/LoongArch/3r.mir +++ /dev/null @@ -1,995 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 3R -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------+--------------+--------------- -# opcode | rk | rj | rd -# ---------------------------------------------------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_ADD_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: add.w $a0, $a1, $a0 -name: test_ADD_W -body: | - bb.0: - $r4 = ADD_W $r5, $r4 -... ---- -# CHECK-LABEL: test_ADD_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: add.d $a0, $a1, $a0 -name: test_ADD_D -body: | - bb.0: - $r4 = ADD_D $r5, $r4 -... ---- -# CHECK-LABEL: test_SUB_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sub.w $a0, $a1, $a0 -name: test_SUB_W -body: | - bb.0: - $r4 = SUB_W $r5, $r4 -... ---- -# CHECK-LABEL: test_SUB_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sub.d $a0, $a1, $a0 -name: test_SUB_D -body: | - bb.0: - $r4 = SUB_D $r5, $r4 -... ---- -# CHECK-LABEL: test_SLT: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: slt $a0, $a1, $a0 -name: test_SLT -body: | - bb.0: - $r4 = SLT $r5, $r4 -... ---- -# CHECK-LABEL: test_SLTU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sltu $a0, $a1, $a0 -name: test_SLTU -body: | - bb.0: - $r4 = SLTU $r5, $r4 -... ---- -# CHECK-LABEL: test_MASKEQZ: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: maskeqz $a0, $a1, $a0 -name: test_MASKEQZ -body: | - bb.0: - $r4 = MASKEQZ $r5, $r4 -... ---- -# CHECK-LABEL: test_MASKNEZ: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: masknez $a0, $a1, $a0 -name: test_MASKNEZ -body: | - bb.0: - $r4 = MASKNEZ $r5, $r4 -... ---- -# CHECK-LABEL: test_NOR: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: nor $a0, $a1, $a0 -name: test_NOR -body: | - bb.0: - $r4 = NOR $r5, $r4 -... ---- -# CHECK-LABEL: test_AND: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: and $a0, $a1, $a0 -name: test_AND -body: | - bb.0: - $r4 = AND $r5, $r4 -... ---- -# CHECK-LABEL: test_OR: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: or $a0, $a1, $a0 -name: test_OR -body: | - bb.0: - $r4 = OR $r5, $r4 -... ---- -# CHECK-LABEL: test_XOR: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: xor $a0, $a1, $a0 -name: test_XOR -body: | - bb.0: - $r4 = XOR $r5, $r4 -... ---- -# CHECK-LABEL: test_ORN: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: orn $a0, $a1, $a0 -name: test_ORN -body: | - bb.0: - $r4 = ORN $r5, $r4 -... ---- -# CHECK-LABEL: test_ANDN: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: andn $a0, $a1, $a0 -name: test_ANDN -body: | - bb.0: - $r4 = ANDN $r5, $r4 -... ---- -# CHECK-LABEL: test_SLL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sll.w $a0, $a1, $a0 -name: test_SLL_W -body: | - bb.0: - $r4 = SLL_W $r5, $r4 -... ---- -# CHECK-LABEL: test_SRL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srl.w $a0, $a1, $a0 -name: test_SRL_W -body: | - bb.0: - $r4 = SRL_W $r5, $r4 -... ---- -# CHECK-LABEL: test_SRA_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sra.w $a0, $a1, $a0 -name: test_SRA_W -body: | - bb.0: - $r4 = SRA_W $r5, $r4 -... ---- -# CHECK-LABEL: test_SLL_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sll.d $a0, $a1, $a0 -name: test_SLL_D -body: | - bb.0: - $r4 = SLL_D $r5, $r4 -... ---- -# CHECK-LABEL: test_SRL_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: srl.d $a0, $a1, $a0 -name: test_SRL_D -body: | - bb.0: - $r4 = SRL_D $r5, $r4 -... ---- -# CHECK-LABEL: test_SRA_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sra.d $a0, $a1, $a0 -name: test_SRA_D -body: | - bb.0: - $r4 = SRA_D $r5, $r4 -... ---- -# CHECK-LABEL: test_ROTR_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rotr.w $a0, $a1, $a0 -name: test_ROTR_W -body: | - bb.0: - $r4 = ROTR_W $r5, $r4 -... ---- -# CHECK-LABEL: test_ROTR_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: rotr.d $a0, $a1, $a0 -name: test_ROTR_D -body: | - bb.0: - $r4 = ROTR_D $r5, $r4 -... ---- -# CHECK-LABEL: test_MUL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mul.w $a0, $a1, $a0 -name: test_MUL_W -body: | - bb.0: - $r4 = MUL_W $r5, $r4 -... ---- -# CHECK-LABEL: test_MULH_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulh.w $a0, $a1, $a0 -name: test_MULH_W -body: | - bb.0: - $r4 = MULH_W $r5, $r4 -... ---- -# CHECK-LABEL: test_MULH_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulh.wu $a0, $a1, $a0 -name: test_MULH_WU -body: | - bb.0: - $r4 = MULH_WU $r5, $r4 -... ---- -# CHECK-LABEL: test_MUL_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mul.d $a0, $a1, $a0 -name: test_MUL_D -body: | - bb.0: - $r4 = MUL_D $r5, $r4 -... ---- -# CHECK-LABEL: test_MULH_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulh.d $a0, $a1, $a0 -name: test_MULH_D -body: | - bb.0: - $r4 = MULH_D $r5, $r4 -... ---- -# CHECK-LABEL: test_MULH_DU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulh.du $a0, $a1, $a0 -name: test_MULH_DU -body: | - bb.0: - $r4 = MULH_DU $r5, $r4 -... ---- -# CHECK-LABEL: test_MULW_D_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulw.d.w $a0, $a1, $a0 -name: test_MULW_D_W -body: | - bb.0: - $r4 = MULW_D_W $r5, $r4 -... ---- -# CHECK-LABEL: test_MULW_D_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mulw.d.wu $a0, $a1, $a0 -name: test_MULW_D_WU -body: | - bb.0: - $r4 = MULW_D_WU $r5, $r4 -... ---- -# CHECK-LABEL: test_DIV_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: div.w $a0, $a1, $a0 -name: test_DIV_W -body: | - bb.0: - $r4 = DIV_W $r5, $r4 -... ---- -# CHECK-LABEL: test_MOD_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mod.w $a0, $a1, $a0 -name: test_MOD_W -body: | - bb.0: - $r4 = MOD_W $r5, $r4 -... ---- -# CHECK-LABEL: test_DIV_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: div.wu $a0, $a1, $a0 -name: test_DIV_WU -body: | - bb.0: - $r4 = DIV_WU $r5, $r4 -... ---- -# CHECK-LABEL: test_MOD_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mod.wu $a0, $a1, $a0 -name: test_MOD_WU -body: | - bb.0: - $r4 = MOD_WU $r5, $r4 -... ---- -# CHECK-LABEL: test_DIV_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: div.d $a0, $a1, $a0 -name: test_DIV_D -body: | - bb.0: - $r4 = DIV_D $r5, $r4 -... ---- -# CHECK-LABEL: test_MOD_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mod.d $a0, $a1, $a0 -name: test_MOD_D -body: | - bb.0: - $r4 = MOD_D $r5, $r4 -... ---- -# CHECK-LABEL: test_DIV_DU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: div.du $a0, $a1, $a0 -name: test_DIV_DU -body: | - bb.0: - $r4 = DIV_DU $r5, $r4 -... ---- -# CHECK-LABEL: test_MOD_DU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: mod.du $a0, $a1, $a0 -name: test_MOD_DU -body: | - bb.0: - $r4 = MOD_DU $r5, $r4 -... ---- -# CHECK-LABEL: test_CRC_W_B_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crc.w.b.w $a0, $a1, $a0 -name: test_CRC_W_B_W -body: | - bb.0: - $r4 = CRC_W_B_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRC_W_H_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crc.w.h.w $a0, $a1, $a0 -name: test_CRC_W_H_W -body: | - bb.0: - $r4 = CRC_W_H_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRC_W_W_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crc.w.w.w $a0, $a1, $a0 -name: test_CRC_W_W_W -body: | - bb.0: - $r4 = CRC_W_W_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRC_W_D_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crc.w.d.w $a0, $a1, $a0 -name: test_CRC_W_D_W -body: | - bb.0: - $r4 = CRC_W_D_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRCC_W_B_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crcc.w.b.w $a0, $a1, $a0 -name: test_CRCC_W_B_W -body: | - bb.0: - $r4 = CRCC_W_B_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRCC_W_H_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crcc.w.h.w $a0, $a1, $a0 -name: test_CRCC_W_H_W -body: | - bb.0: - $r4 = CRCC_W_H_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRCC_W_W_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crcc.w.w.w $a0, $a1, $a0 -name: test_CRCC_W_W_W -body: | - bb.0: - $r4 = CRCC_W_W_W $r5, $r4 -... ---- -# CHECK-LABEL: test_CRCC_W_D_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: crcc.w.d.w $a0, $a1, $a0 -name: test_CRCC_W_D_W -body: | - bb.0: - $r4 = CRCC_W_D_W $r5, $r4 -... ---- -# CHECK-LABEL: test_AMSWAP_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amswap_db.w $a0, $a1, $a2 -name: test_AMSWAP_DB_W -body: | - bb.0: - $r4 = AMSWAP_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMSWAP_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amswap_db.d $a0, $a1, $a2 -name: test_AMSWAP_DB_D -body: | - bb.0: - $r4 = AMSWAP_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMADD_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amadd_db.w $a0, $a1, $a2 -name: test_AMADD_DB_W -body: | - bb.0: - $r4 = AMADD_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMADD_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amadd_db.d $a0, $a1, $a2 -name: test_AMADD_DB_D -body: | - bb.0: - $r4 = AMADD_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMAND_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amand_db.w $a0, $a1, $a2 -name: test_AMAND_DB_W -body: | - bb.0: - $r4 = AMAND_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMAND_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amand_db.d $a0, $a1, $a2 -name: test_AMAND_DB_D -body: | - bb.0: - $r4 = AMAND_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMOR_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amor_db.w $a0, $a1, $a2 -name: test_AMOR_DB_W -body: | - bb.0: - $r4 = AMOR_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMOR_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amor_db.d $a0, $a1, $a2 -name: test_AMOR_DB_D -body: | - bb.0: - $r4 = AMOR_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMXOR_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amxor_db.w $a0, $a1, $a2 -name: test_AMXOR_DB_W -body: | - bb.0: - $r4 = AMXOR_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMXOR_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amxor_db.d $a0, $a1, $a2 -name: test_AMXOR_DB_D -body: | - bb.0: - $r4 = AMXOR_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax_db.w $a0, $a1, $a2 -name: test_AMMAX_DB_W -body: | - bb.0: - $r4 = AMMAX_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax_db.d $a0, $a1, $a2 -name: test_AMMAX_DB_D -body: | - bb.0: - $r4 = AMMAX_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DB_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin_db.w $a0, $a1, $a2 -name: test_AMMIN_DB_W -body: | - bb.0: - $r4 = AMMIN_DB_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DB_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin_db.d $a0, $a1, $a2 -name: test_AMMIN_DB_D -body: | - bb.0: - $r4 = AMMIN_DB_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DB_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax_db.wu $a0, $a1, $a2 -name: test_AMMAX_DB_WU -body: | - bb.0: - $r4 = AMMAX_DB_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DB_DU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax_db.du $a0, $a1, $a2 -name: test_AMMAX_DB_DU -body: | - bb.0: - $r4 = AMMAX_DB_DU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DB_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin_db.wu $a0, $a1, $a2 -name: test_AMMIN_DB_WU -body: | - bb.0: - $r4 = AMMIN_DB_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DB_DU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin_db.du $a0, $a1, $a2 -name: test_AMMIN_DB_DU -body: | - bb.0: - $r4 = AMMIN_DB_DU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMSWAP_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amswap.w $a0, $a1, $a2 -name: test_AMSWAP_W -body: | - bb.0: - $r4 = AMSWAP_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMSWAP_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amswap.d $a0, $a1, $a2 -name: test_AMSWAP_D -body: | - bb.0: - $r4 = AMSWAP_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMADD_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amadd.w $a0, $a1, $a2 -name: test_AMADD_W -body: | - bb.0: - $r4 = AMADD_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMADD_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amadd.d $a0, $a1, $a2 -name: test_AMADD_D -body: | - bb.0: - $r4 = AMADD_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMAND_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amand.w $a0, $a1, $a2 -name: test_AMAND_W -body: | - bb.0: - $r4 = AMAND_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMAND_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amand.d $a0, $a1, $a2 -name: test_AMAND_D -body: | - bb.0: - $r4 = AMAND_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMOR_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amor.w $a0, $a1, $a2 -name: test_AMOR_W -body: | - bb.0: - $r4 = AMOR_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMOR_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amor.d $a0, $a1, $a2 -name: test_AMOR_D -body: | - bb.0: - $r4 = AMOR_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMXOR_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amxor.w $a0, $a1, $a2 -name: test_AMXOR_W -body: | - bb.0: - $r4 = AMXOR_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMXOR_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: amxor.d $a0, $a1, $a2 -name: test_AMXOR_D -body: | - bb.0: - $r4 = AMXOR_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax.w $a0, $a1, $a2 -name: test_AMMAX_W -body: | - bb.0: - $r4 = AMMAX_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax.d $a0, $a1, $a2 -name: test_AMMAX_D -body: | - bb.0: - $r4 = AMMAX_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin.w $a0, $a1, $a2 -name: test_AMMIN_W -body: | - bb.0: - $r4 = AMMIN_W $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin.d $a0, $a1, $a2 -name: test_AMMIN_D -body: | - bb.0: - $r4 = AMMIN_D $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax.wu $a0, $a1, $a2 -name: test_AMMAX_WU -body: | - bb.0: - $r4 = AMMAX_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMAX_DU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammax.du $a0, $a1, $a2 -name: test_AMMAX_DU -body: | - bb.0: - $r4 = AMMAX_DU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin.wu $a0, $a1, $a2 -name: test_AMMIN_WU -body: | - bb.0: - $r4 = AMMIN_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_AMMIN_DU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 0 1 0 1 0 0 1 1 0 0 0 1 0 0 -# CHECK-ASM: ammin.du $a0, $a1, $a2 -name: test_AMMIN_DU -body: | - bb.0: - $r4 = AMMIN_DU $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.b $a0, $a1, $a2 -name: test_LDX_B -body: | - bb.0: - $r4 = LDX_B $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.h $a0, $a1, $a2 -name: test_LDX_H -body: | - bb.0: - $r4 = LDX_H $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.w $a0, $a1, $a2 -name: test_LDX_W -body: | - bb.0: - $r4 = LDX_W $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.d $a0, $a1, $a2 -name: test_LDX_D -body: | - bb.0: - $r4 = LDX_D $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_BU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.bu $a0, $a1, $a2 -name: test_LDX_BU -body: | - bb.0: - $r4 = LDX_BU $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_HU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.hu $a0, $a1, $a2 -name: test_LDX_HU -body: | - bb.0: - $r4 = LDX_HU $r5, $r6 -... ---- -# CHECK-LABEL: test_LDX_WU: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldx.wu $a0, $a1, $a2 -name: test_LDX_WU -body: | - bb.0: - $r4 = LDX_WU $r5, $r6 -... ---- -# CHECK-LABEL: test_LDGT_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldgt.b $a0, $a1, $a2 -name: test_LDGT_B -body: | - bb.0: - $r4 = LDGT_B $r5, $r6 -... ---- -# CHECK-LABEL: test_LDGT_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldgt.h $a0, $a1, $a2 -name: test_LDGT_H -body: | - bb.0: - $r4 = LDGT_H $r5, $r6 -... ---- -# CHECK-LABEL: test_LDGT_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldgt.w $a0, $a1, $a2 -name: test_LDGT_W -body: | - bb.0: - $r4 = LDGT_W $r5, $r6 -... ---- -# CHECK-LABEL: test_LDGT_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldgt.d $a0, $a1, $a2 -name: test_LDGT_D -body: | - bb.0: - $r4 = LDGT_D $r5, $r6 -... ---- -# CHECK-LABEL: test_LDLE_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldle.b $a0, $a1, $a2 -name: test_LDLE_B -body: | - bb.0: - $r4 = LDLE_B $r5, $r6 -... ---- -# CHECK-LABEL: test_LDLE_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldle.h $a0, $a1, $a2 -name: test_LDLE_H -body: | - bb.0: - $r4 = LDLE_H $r5, $r6 -... ---- -# CHECK-LABEL: test_LDLE_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldle.w $a0, $a1, $a2 -name: test_LDLE_W -body: | - bb.0: - $r4 = LDLE_W $r5, $r6 -... ---- -# CHECK-LABEL: test_LDLE_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 0 1 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldle.d $a0, $a1, $a2 -name: test_LDLE_D -body: | - bb.0: - $r4 = LDLE_D $r5, $r6 -... ---- -# CHECK-LABEL: test_STX_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stx.b $a0, $a1, $a2 -name: test_STX_B -body: | - bb.0: - STX_B $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STX_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stx.h $a0, $a1, $a2 -name: test_STX_H -body: | - bb.0: - STX_H $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STX_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stx.w $a0, $a1, $a2 -name: test_STX_W -body: | - bb.0: - STX_W $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STX_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stx.d $a0, $a1, $a2 -name: test_STX_D -body: | - bb.0: - STX_D $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STGT_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stgt.b $a0, $a1, $a2 -name: test_STGT_B -body: | - bb.0: - STGT_B $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STGT_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stgt.h $a0, $a1, $a2 -name: test_STGT_H -body: | - bb.0: - STGT_H $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STGT_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stgt.w $a0, $a1, $a2 -name: test_STGT_W -body: | - bb.0: - STGT_W $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STGT_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stgt.d $a0, $a1, $a2 -name: test_STGT_D -body: | - bb.0: - STGT_D $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STLE_B: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stle.b $a0, $a1, $a2 -name: test_STLE_B -body: | - bb.0: - STLE_B $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STLE_H: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stle.h $a0, $a1, $a2 -name: test_STLE_H -body: | - bb.0: - STLE_H $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STLE_W: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stle.w $a0, $a1, $a2 -name: test_STLE_W -body: | - bb.0: - STLE_W $r4, $r5, $r6 -... ---- -# CHECK-LABEL: test_STLE_D: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stle.d $a0, $a1, $a2 -name: test_STLE_D -body: | - bb.0: - STLE_D $r4, $r5, $r6 diff --git a/llvm/test/CodeGen/LoongArch/3ri.mir b/llvm/test/CodeGen/LoongArch/3ri.mir deleted file mode 100644 index c86e14189617..000000000000 --- a/llvm/test/CodeGen/LoongArch/3ri.mir +++ /dev/null @@ -1,69 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 3RI2 -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------+-----+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------+-----+--------------+--------------+--------------- -# opcode |imm2 | rk | rj | rd -# ---------------------------------------------+-----+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_ALSL_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.w $a0, $a1, $a2, 4 -name: test_ALSL_W -body: | - bb.0: - $r4 = ALSL_W $r5, $r6, 4 -... ---- -# CHECK-LABEL: test_ALSL_WU: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.wu $a0, $a1, $a2, 2 -name: test_ALSL_WU -body: | - bb.0: - $r4 = ALSL_WU $r5, $r6, 2 -... ---- -# CHECK-LABEL: test_ALSL_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.d $a0, $a1, $a2, 4 -name: test_ALSL_D -body: | - bb.0: - $r4 = ALSL_D $r5, $r6, 4 -... ---- -# CHECK-LABEL: test_BYTEPICK_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bytepick.w $a0, $a1, $a2, 0 -name: test_BYTEPICK_W -body: | - bb.0: - $r4 = BYTEPICK_W $r5, $r6, 0 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: 3RI3 -# ------------------------------------------------------------------------------------------------- -# ------------------------------------------+--------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------------------+--------+--------------+--------------+--------------- -# opcode | imm3 | rk | rj | rd -# ------------------------------------------+--------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_BYTEPICK_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bytepick.d $a0, $a1, $a2, 4 -name: test_BYTEPICK_D -body: | - bb.0: - $r4 = BYTEPICK_D $r5, $r6, 4 diff --git a/llvm/test/CodeGen/LoongArch/align.ll b/llvm/test/CodeGen/LoongArch/align.ll new file mode 100644 index 000000000000..c5b08dbd458a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/align.ll @@ -0,0 +1,8 @@ +; RUN: llc -mtriple=loongarch64 %s -o - | FileCheck %s + +define void @foo() { +;CHECK: .p2align 2 +;CHECK: foo: +entry: + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll new file mode 100644 index 000000000000..795b5c6b21ce --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll @@ -0,0 +1,902 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define void @cmpxchg_i8_acquire_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r4, $r4, 3 +; LA64-NEXT: ori $r8, $zero, 255 +; LA64-NEXT: sll.w $r8, $r8, $r4 +; LA64-NEXT: nor $r9, $zero, $r8 +; LA64-NEXT: andi $r5, $r5, 255 +; LA64-NEXT: sll.w $r5, $r5, $r4 +; LA64-NEXT: andi $r6, $r6, 255 +; LA64-NEXT: sll.w $r6, $r6, $r4 +; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r8 +; LA64-NEXT: bne $r12, $r5, .LBB0_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r9 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB0_1 +; LA64-NEXT: .LBB0_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: srl.w $r10, $r12, $r4 +; LA64-NEXT: ext.w.b $r10, $r10 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire + ret void +} + +define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_release_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r4, $r4, 3 +; LA64-NEXT: ori $r8, $zero, 255 +; LA64-NEXT: sll.w $r8, $r8, $r4 +; LA64-NEXT: nor $r9, $zero, $r8 +; LA64-NEXT: andi $r5, $r5, 255 +; LA64-NEXT: sll.w $r5, $r5, $r4 +; LA64-NEXT: andi $r6, $r6, 255 +; LA64-NEXT: sll.w $r6, $r6, $r4 +; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r8 +; LA64-NEXT: bne $r12, $r5, .LBB1_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r9 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB1_1 +; LA64-NEXT: .LBB1_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: srl.w $r10, $r12, $r4 +; LA64-NEXT: ext.w.b $r10, $r10 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire + ret void +} + +;; Check that only the failure ordering is taken care. +define void @cmpxchg_i8_acquire_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r4, $r4, 3 +; LA64-NEXT: ori $r8, $zero, 255 +; LA64-NEXT: sll.w $r8, $r8, $r4 +; LA64-NEXT: nor $r9, $zero, $r8 +; LA64-NEXT: andi $r5, $r5, 255 +; LA64-NEXT: sll.w $r5, $r5, $r4 +; LA64-NEXT: andi $r6, $r6, 255 +; LA64-NEXT: sll.w $r6, $r6, $r4 +; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r8 +; LA64-NEXT: bne $r12, $r5, .LBB2_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r9 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB2_1 +; LA64-NEXT: .LBB2_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: srl.w $r10, $r12, $r4 +; LA64-NEXT: ext.w.b $r10, $r10 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire monotonic + ret void +} + +define void @cmpxchg_i16_acquire_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r4, $r4, 3 +; LA64-NEXT: lu12i.w $r8, 15 +; LA64-NEXT: ori $r8, $r8, 4095 +; LA64-NEXT: sll.w $r9, $r8, $r4 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: and $r5, $r5, $r8 +; LA64-NEXT: sll.w $r5, $r5, $r4 +; LA64-NEXT: and $r6, $r6, $r8 +; LA64-NEXT: sll.w $r6, $r6, $r4 +; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r9 +; LA64-NEXT: bne $r12, $r5, .LBB3_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r10 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB3_1 +; LA64-NEXT: .LBB3_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: srl.w $r8, $r12, $r4 +; LA64-NEXT: ext.w.h $r8, $r8 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire + ret void +} + +define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_release_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r4, $r4, 3 +; LA64-NEXT: lu12i.w $r8, 15 +; LA64-NEXT: ori $r8, $r8, 4095 +; LA64-NEXT: sll.w $r9, $r8, $r4 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: and $r5, $r5, $r8 +; LA64-NEXT: sll.w $r5, $r5, $r4 +; LA64-NEXT: and $r6, $r6, $r8 +; LA64-NEXT: sll.w $r6, $r6, $r4 +; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r9 +; LA64-NEXT: bne $r12, $r5, .LBB4_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r10 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB4_1 +; LA64-NEXT: .LBB4_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: srl.w $r8, $r12, $r4 +; LA64-NEXT: ext.w.h $r8, $r8 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire + ret void +} + +;; Check that only the failure ordering is taken care. +define void @cmpxchg_i16_acquire_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r4, $r4, 3 +; LA64-NEXT: lu12i.w $r8, 15 +; LA64-NEXT: ori $r8, $r8, 4095 +; LA64-NEXT: sll.w $r9, $r8, $r4 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: and $r5, $r5, $r8 +; LA64-NEXT: sll.w $r5, $r5, $r4 +; LA64-NEXT: and $r6, $r6, $r8 +; LA64-NEXT: sll.w $r6, $r6, $r4 +; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r9 +; LA64-NEXT: bne $r12, $r5, .LBB5_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r10 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB5_1 +; LA64-NEXT: .LBB5_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: srl.w $r8, $r12, $r4 +; LA64-NEXT: ext.w.h $r8, $r8 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire monotonic + ret void +} + +define void @cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB6_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.w $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB6_1 +; LA64-NEXT: .LBB6_3: +; LA64-NEXT: dbar 0 +; LA64-NEXT: jr $ra + %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire + ret void +} + +define void @cmpxchg_i32_release_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_release_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB7_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.w $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB7_1 +; LA64-NEXT: .LBB7_3: +; LA64-NEXT: dbar 0 +; LA64-NEXT: jr $ra + %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire + ret void +} + +;; Check that only the failure ordering is taken care. +define void @cmpxchg_i32_acquire_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB8_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.w $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB8_1 +; LA64-NEXT: .LBB8_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: jr $ra + %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire monotonic + ret void +} + +define void @cmpxchg_i64_acquire_acquire(i64* %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB9_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.d $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB9_1 +; LA64-NEXT: .LBB9_3: +; LA64-NEXT: dbar 0 +; LA64-NEXT: jr $ra + %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire + ret void +} + +define void @cmpxchg_i64_release_acquire(i64* %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_release_acquire: +; LA64: # %bb.0: +; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB10_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.d $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB10_1 +; LA64-NEXT: .LBB10_3: +; LA64-NEXT: dbar 0 +; LA64-NEXT: jr $ra + %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire + ret void +} + +;; Check that only the failure ordering is taken care. +define void @cmpxchg_i64_acquire_monotonic(i64* %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB11_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.d $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB11_1 +; LA64-NEXT: .LBB11_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: jr $ra + %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire monotonic + ret void +} + +define i8 @cmpxchg_i8_acquire_acquire_reti8(i8* %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r8, $r4, 3 +; LA64-NEXT: ori $r4, $zero, 255 +; LA64-NEXT: sll.w $r9, $r4, $r8 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: andi $r4, $r5, 255 +; LA64-NEXT: sll.w $r5, $r4, $r8 +; LA64-NEXT: andi $r4, $r6, 255 +; LA64-NEXT: sll.w $r6, $r4, $r8 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r9 +; LA64-NEXT: bne $r12, $r5, .LBB12_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r10 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB12_1 +; LA64-NEXT: .LBB12_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: srl.w $r4, $r12, $r8 +; LA64-NEXT: ext.w.b $r4, $r4 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire + %res = extractvalue { i8, i1 } %tmp, 0 + ret i8 %res +} + +define i16 @cmpxchg_i16_acquire_acquire_reti16(i16* %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r8, $r4, 3 +; LA64-NEXT: lu12i.w $r4, 15 +; LA64-NEXT: ori $r4, $r4, 4095 +; LA64-NEXT: sll.w $r9, $r4, $r8 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: and $r5, $r5, $r4 +; LA64-NEXT: sll.w $r5, $r5, $r8 +; LA64-NEXT: and $r4, $r6, $r4 +; LA64-NEXT: sll.w $r6, $r4, $r8 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r9 +; LA64-NEXT: bne $r12, $r5, .LBB13_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r10 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB13_1 +; LA64-NEXT: .LBB13_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: srl.w $r4, $r12, $r8 +; LA64-NEXT: ext.w.h $r4, $r4 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire + %res = extractvalue { i16, i1 } %tmp, 0 + ret i16 %res +} + +define i32 @cmpxchg_i32_acquire_acquire_reti32(i32* %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r7, $r5, 0 +; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r5, $r4, 0 +; LA64-NEXT: bne $r5, $r7, .LBB14_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.w $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB14_1 +; LA64-NEXT: .LBB14_3: +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $r4, $r5 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 0 + ret i32 %res +} + +define i64 @cmpxchg_i64_acquire_acquire_reti64(i64* %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: +; LA64: # %bb.0: +; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB15_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.d $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB15_1 +; LA64-NEXT: .LBB15_3: +; LA64-NEXT: dbar 0 +; LA64-NEXT: move $r4, $r7 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire + %res = extractvalue { i64, i1 } %tmp, 0 + ret i64 %res +} + +define i1 @cmpxchg_i8_acquire_acquire_reti1(i8* %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r8, $r4, 3 +; LA64-NEXT: ori $r4, $zero, 255 +; LA64-NEXT: sll.w $r9, $r4, $r8 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: andi $r4, $r5, 255 +; LA64-NEXT: sll.w $r11, $r4, $r8 +; LA64-NEXT: andi $r4, $r6, 255 +; LA64-NEXT: sll.w $r6, $r4, $r8 +; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r12, $r7, 0 +; LA64-NEXT: and $r13, $r12, $r9 +; LA64-NEXT: bne $r13, $r11, .LBB16_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 +; LA64-NEXT: and $r12, $r12, $r10 +; LA64-NEXT: or $r12, $r12, $r6 +; LA64-NEXT: sc.w $r12, $r7, 0 +; LA64-NEXT: beq $r12, $zero, .LBB16_1 +; LA64-NEXT: .LBB16_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: srl.w $r4, $r13, $r8 +; LA64-NEXT: ext.w.b $r4, $r4 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: ext.w.b $r5, $r5 +; LA64-NEXT: xor $r4, $r4, $r5 +; LA64-NEXT: sltui $r4, $r4, 1 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire + %res = extractvalue { i8, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i16_acquire_acquire_reti1(i16* %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r8, $r4, 3 +; LA64-NEXT: lu12i.w $r4, 15 +; LA64-NEXT: ori $r4, $r4, 4095 +; LA64-NEXT: sll.w $r9, $r4, $r8 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: and $r11, $r5, $r4 +; LA64-NEXT: sll.w $r11, $r11, $r8 +; LA64-NEXT: and $r4, $r6, $r4 +; LA64-NEXT: sll.w $r6, $r4, $r8 +; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r12, $r7, 0 +; LA64-NEXT: and $r13, $r12, $r9 +; LA64-NEXT: bne $r13, $r11, .LBB17_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 +; LA64-NEXT: and $r12, $r12, $r10 +; LA64-NEXT: or $r12, $r12, $r6 +; LA64-NEXT: sc.w $r12, $r7, 0 +; LA64-NEXT: beq $r12, $zero, .LBB17_1 +; LA64-NEXT: .LBB17_3: +; LA64-NEXT: dbar 20 +; LA64-NEXT: srl.w $r4, $r13, $r8 +; LA64-NEXT: ext.w.h $r4, $r4 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: ext.w.h $r5, $r5 +; LA64-NEXT: xor $r4, $r4, $r5 +; LA64-NEXT: sltui $r4, $r4, 1 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire + %res = extractvalue { i16, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i32_acquire_acquire_reti1(i32* %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB18_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.w $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB18_1 +; LA64-NEXT: .LBB18_3: +; LA64-NEXT: dbar 0 +; LA64-NEXT: xor $r4, $r7, $r5 +; LA64-NEXT: sltui $r4, $r4, 1 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire + %res = extractvalue { i32, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i64_acquire_acquire_reti1(i64* %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB19_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.d $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB19_1 +; LA64-NEXT: .LBB19_3: +; LA64-NEXT: dbar 0 +; LA64-NEXT: xor $r4, $r7, $r5 +; LA64-NEXT: sltui $r4, $r4, 1 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res +} + +define void @cmpxchg_i8_monotonic_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r4, $r4, 3 +; LA64-NEXT: ori $r8, $zero, 255 +; LA64-NEXT: sll.w $r8, $r8, $r4 +; LA64-NEXT: nor $r9, $zero, $r8 +; LA64-NEXT: andi $r5, $r5, 255 +; LA64-NEXT: sll.w $r5, $r5, $r4 +; LA64-NEXT: andi $r6, $r6, 255 +; LA64-NEXT: sll.w $r6, $r6, $r4 +; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r8 +; LA64-NEXT: bne $r12, $r5, .LBB20_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r9 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB20_1 +; LA64-NEXT: .LBB20_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: srl.w $r10, $r12, $r4 +; LA64-NEXT: ext.w.b $r10, $r10 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i16_monotonic_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r4, $r4, 3 +; LA64-NEXT: lu12i.w $r8, 15 +; LA64-NEXT: ori $r8, $r8, 4095 +; LA64-NEXT: sll.w $r9, $r8, $r4 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: and $r5, $r5, $r8 +; LA64-NEXT: sll.w $r5, $r5, $r4 +; LA64-NEXT: and $r6, $r6, $r8 +; LA64-NEXT: sll.w $r6, $r6, $r4 +; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r9 +; LA64-NEXT: bne $r12, $r5, .LBB21_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r10 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB21_1 +; LA64-NEXT: .LBB21_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: srl.w $r8, $r12, $r4 +; LA64-NEXT: ext.w.h $r8, $r8 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i32_monotonic_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB22_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.w $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB22_1 +; LA64-NEXT: .LBB22_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: jr $ra + %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic + ret void +} + +define void @cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB23_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.d $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB23_1 +; LA64-NEXT: .LBB23_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: jr $ra + %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic + ret void +} + +define i8 @cmpxchg_i8_monotonic_monotonic_reti8(i8* %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r8, $r4, 3 +; LA64-NEXT: ori $r4, $zero, 255 +; LA64-NEXT: sll.w $r9, $r4, $r8 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: andi $r4, $r5, 255 +; LA64-NEXT: sll.w $r5, $r4, $r8 +; LA64-NEXT: andi $r4, $r6, 255 +; LA64-NEXT: sll.w $r6, $r4, $r8 +; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r9 +; LA64-NEXT: bne $r12, $r5, .LBB24_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r10 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB24_1 +; LA64-NEXT: .LBB24_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: srl.w $r4, $r12, $r8 +; LA64-NEXT: ext.w.b $r4, $r4 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic + %res = extractvalue { i8, i1 } %tmp, 0 + ret i8 %res +} + +define i16 @cmpxchg_i16_monotonic_monotonic_reti16(i16* %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r8, $r4, 3 +; LA64-NEXT: lu12i.w $r4, 15 +; LA64-NEXT: ori $r4, $r4, 4095 +; LA64-NEXT: sll.w $r9, $r4, $r8 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: and $r5, $r5, $r4 +; LA64-NEXT: sll.w $r5, $r5, $r8 +; LA64-NEXT: and $r4, $r6, $r4 +; LA64-NEXT: sll.w $r6, $r4, $r8 +; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r11, $r7, 0 +; LA64-NEXT: and $r12, $r11, $r9 +; LA64-NEXT: bne $r12, $r5, .LBB25_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 +; LA64-NEXT: and $r11, $r11, $r10 +; LA64-NEXT: or $r11, $r11, $r6 +; LA64-NEXT: sc.w $r11, $r7, 0 +; LA64-NEXT: beq $r11, $zero, .LBB25_1 +; LA64-NEXT: .LBB25_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: srl.w $r4, $r12, $r8 +; LA64-NEXT: ext.w.h $r4, $r4 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: jr $ra + %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic + %res = extractvalue { i16, i1 } %tmp, 0 + ret i16 %res +} + +define i32 @cmpxchg_i32_monotonic_monotonic_reti32(i32* %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r7, $r5, 0 +; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r5, $r4, 0 +; LA64-NEXT: bne $r5, $r7, .LBB26_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.w $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB26_1 +; LA64-NEXT: .LBB26_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: move $r4, $r5 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 0 + ret i32 %res +} + +define i64 @cmpxchg_i64_monotonic_monotonic_reti64(i64* %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: +; LA64: # %bb.0: +; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB27_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.d $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB27_1 +; LA64-NEXT: .LBB27_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: move $r4, $r7 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic + %res = extractvalue { i64, i1 } %tmp, 0 + ret i64 %res +} + +define i1 @cmpxchg_i8_monotonic_monotonic_reti1(i8* %ptr, i8 %cmp, i8 %val) nounwind { +; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r8, $r4, 3 +; LA64-NEXT: ori $r4, $zero, 255 +; LA64-NEXT: sll.w $r9, $r4, $r8 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: andi $r4, $r5, 255 +; LA64-NEXT: sll.w $r11, $r4, $r8 +; LA64-NEXT: andi $r4, $r6, 255 +; LA64-NEXT: sll.w $r6, $r4, $r8 +; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r12, $r7, 0 +; LA64-NEXT: and $r13, $r12, $r9 +; LA64-NEXT: bne $r13, $r11, .LBB28_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 +; LA64-NEXT: and $r12, $r12, $r10 +; LA64-NEXT: or $r12, $r12, $r6 +; LA64-NEXT: sc.w $r12, $r7, 0 +; LA64-NEXT: beq $r12, $zero, .LBB28_1 +; LA64-NEXT: .LBB28_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: srl.w $r4, $r13, $r8 +; LA64-NEXT: ext.w.b $r4, $r4 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: ext.w.b $r5, $r5 +; LA64-NEXT: xor $r4, $r4, $r5 +; LA64-NEXT: sltui $r4, $r4, 1 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic + %res = extractvalue { i8, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i16_monotonic_monotonic_reti1(i16* %ptr, i16 %cmp, i16 %val) nounwind { +; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: addi.d $r7, $zero, -4 +; LA64-NEXT: and $r7, $r4, $r7 +; LA64-NEXT: andi $r4, $r4, 3 +; LA64-NEXT: slli.w $r8, $r4, 3 +; LA64-NEXT: lu12i.w $r4, 15 +; LA64-NEXT: ori $r4, $r4, 4095 +; LA64-NEXT: sll.w $r9, $r4, $r8 +; LA64-NEXT: nor $r10, $zero, $r9 +; LA64-NEXT: and $r11, $r5, $r4 +; LA64-NEXT: sll.w $r11, $r11, $r8 +; LA64-NEXT: and $r4, $r6, $r4 +; LA64-NEXT: sll.w $r6, $r4, $r8 +; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r12, $r7, 0 +; LA64-NEXT: and $r13, $r12, $r9 +; LA64-NEXT: bne $r13, $r11, .LBB29_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 +; LA64-NEXT: and $r12, $r12, $r10 +; LA64-NEXT: or $r12, $r12, $r6 +; LA64-NEXT: sc.w $r12, $r7, 0 +; LA64-NEXT: beq $r12, $zero, .LBB29_1 +; LA64-NEXT: .LBB29_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: srl.w $r4, $r13, $r8 +; LA64-NEXT: ext.w.h $r4, $r4 +; LA64-NEXT: # %bb.4: +; LA64-NEXT: ext.w.h $r5, $r5 +; LA64-NEXT: xor $r4, $r4, $r5 +; LA64-NEXT: sltui $r4, $r4, 1 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic + %res = extractvalue { i16, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i32_monotonic_monotonic_reti1(i32* %ptr, i32 %cmp, i32 %val) nounwind { +; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: slli.w $r6, $r6, 0 +; LA64-NEXT: slli.w $r5, $r5, 0 +; LA64-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB30_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.w $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB30_1 +; LA64-NEXT: .LBB30_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: xor $r4, $r7, $r5 +; LA64-NEXT: sltui $r4, $r4, 1 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic + %res = extractvalue { i32, i1 } %tmp, 1 + ret i1 %res +} + +define i1 @cmpxchg_i64_monotonic_monotonic_reti1(i64* %ptr, i64 %cmp, i64 %val) nounwind { +; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.d $r7, $r4, 0 +; LA64-NEXT: bne $r7, $r5, .LBB31_3 +; LA64-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1 +; LA64-NEXT: move $r8, $r6 +; LA64-NEXT: sc.d $r8, $r4, 0 +; LA64-NEXT: beq $r8, $zero, .LBB31_1 +; LA64-NEXT: .LBB31_3: +; LA64-NEXT: dbar 1792 +; LA64-NEXT: xor $r4, $r7, $r5 +; LA64-NEXT: sltui $r4, $r4, 1 +; LA64-NEXT: jr $ra + %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic + %res = extractvalue { i64, i1 } %tmp, 1 + ret i1 %res +} diff --git a/llvm/test/CodeGen/LoongArch/atomic-operand-imm0.ll b/llvm/test/CodeGen/LoongArch/atomic-operand-imm0.ll new file mode 100644 index 000000000000..d1d0c0bc42f8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/atomic-operand-imm0.ll @@ -0,0 +1,17 @@ +; Test that the last immediate 0 operand of amtomic instruction is printed + +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @test_i32(i32* %dst, i32 %val) { +; CHECK: ammax_db.wu $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG3:[0-9]+]], 0 +entry: + %a = atomicrmw umax i32* %dst, i32 %val monotonic + ret void +} + +define void @test_i64(i64* %dst, i64 %val) { +; CHECK: ammax_db.du $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG3:[0-9]+]], 0 +entry: + %a = atomicrmw umax i64* %dst, i64 %val monotonic + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/atomic_16_8.ll b/llvm/test/CodeGen/LoongArch/atomic_16_8.ll new file mode 100644 index 000000000000..ba454ab400ff --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/atomic_16_8.ll @@ -0,0 +1,785 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s + + +define void @umax_8(i8* %ptr) { +; CHECK-LABEL: umax_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umax i8* %ptr, i8 100 seq_cst + ret void +} + +define void @umax_16(i16* %ptr) { +; CHECK-LABEL: umax_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB1_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umax i16* %ptr, i16 100 seq_cst + ret void +} + +define void @max_8(i8* %ptr) { +; CHECK-LABEL: max_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB2_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw max i8* %ptr, i8 100 seq_cst + ret void +} + +define void @max_16(i16* %ptr) { +; CHECK-LABEL: max_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB3_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw max i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @umin_8(i8* %ptr) { +; CHECK-LABEL: umin_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB4_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umin i8* %ptr, i8 100 seq_cst + ret void +} + +define void @umin_16(i16* %ptr) { +; CHECK-LABEL: umin_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB5_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umin i16* %ptr, i16 100 seq_cst + ret void +} + +define void @min_8(i8* %ptr) { +; CHECK-LABEL: min_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB6_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw min i8* %ptr, i8 100 seq_cst + ret void +} + +define void @min_16(i16* %ptr) { +; CHECK-LABEL: min_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r7 +; CHECK-NEXT: and $r5, $r5, $r7 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB7_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw min i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @or_8(i8* %ptr) { +; CHECK-LABEL: or_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB8_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw or i8* %ptr, i8 100 seq_cst + ret void +} + +define void @or_16(i16* %ptr) { +; CHECK-LABEL: or_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB9_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw or i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @add_8(i8* %ptr) { +; CHECK-LABEL: add_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB10_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw add i8* %ptr, i8 100 seq_cst + ret void +} + +define void @add_16(i16* %ptr) { +; CHECK-LABEL: add_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB11_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw add i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @sub_8(i8* %ptr) { +; CHECK-LABEL: sub_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB12_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw sub i8* %ptr, i8 100 seq_cst + ret void +} + +define void @sub_16(i16* %ptr) { +; CHECK-LABEL: sub_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB13_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw sub i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @and_8(i8* %ptr) { +; CHECK-LABEL: and_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB14_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw and i8* %ptr, i8 100 seq_cst + ret void +} + +define void @and_16(i16* %ptr) { +; CHECK-LABEL: and_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB15_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw and i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @nand_8(i8* %ptr) { +; CHECK-LABEL: nand_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB16_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i8* %ptr, i8 100 seq_cst + ret void +} + +define void @nand_16(i16* %ptr) { +; CHECK-LABEL: nand_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB17_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @xor_8(i8* %ptr) { +; CHECK-LABEL: xor_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB18_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xor i8* %ptr, i8 100 seq_cst + ret void +} + +define void @xor_16(i16* %ptr) { +; CHECK-LABEL: xor_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB19_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xor i16* %ptr, i16 100 seq_cst + ret void +} + + +define void @xchg_8(i8* %ptr) { +; CHECK-LABEL: xchg_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r7, $zero, 255 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB20_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.b $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xchg i8* %ptr, i8 100 seq_cst + ret void +} + +define void @xchg_16(i16* %ptr) { +; CHECK-LABEL: xchg_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r7, 15 +; CHECK-NEXT: ori $r7, $r7, 4095 +; CHECK-NEXT: sll.w $r7, $r7, $r4 +; CHECK-NEXT: nor $r8, $zero, $r7 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r7 +; CHECK-NEXT: and $r12, $r10, $r8 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB21_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r9, $r10, $r7 +; CHECK-NEXT: srl.w $r9, $r9, $r4 +; CHECK-NEXT: ext.w.h $r9, $r9 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xchg i16* %ptr, i16 100 seq_cst + ret void +} + +define void @cmpxchg_8(i8* %ptr) { +; CHECK-LABEL: cmpxchg_8: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 1 +; CHECK-NEXT: ori $r6, $zero, 100 +; CHECK-NEXT: addi.d $r7, $zero, -4 +; CHECK-NEXT: and $r7, $r4, $r7 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: ori $r8, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r8, $r4 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: andi $r6, $r6, 255 +; CHECK-NEXT: sll.w $r6, $r6, $r4 +; CHECK-NEXT: andi $r5, $r5, 255 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r11, $r7, 0 +; CHECK-NEXT: and $r12, $r11, $r8 +; CHECK-NEXT: bne $r12, $r6, .LBB22_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; CHECK-NEXT: and $r11, $r11, $r9 +; CHECK-NEXT: or $r11, $r11, $r5 +; CHECK-NEXT: sc.w $r11, $r7, 0 +; CHECK-NEXT: beq $r11, $zero, .LBB22_1 +; CHECK-NEXT: .LBB22_3: +; CHECK-NEXT: dbar 20 +; CHECK-NEXT: srl.w $r10, $r12, $r4 +; CHECK-NEXT: ext.w.b $r10, $r10 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: jr $ra + %ret = cmpxchg i8* %ptr, i8 100, i8 1 seq_cst seq_cst + ret void +} + +define void @cmpxchg_16(i16* %ptr) { +; CHECK-LABEL: cmpxchg_16: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 1 +; CHECK-NEXT: ori $r6, $zero, 100 +; CHECK-NEXT: addi.d $r7, $zero, -4 +; CHECK-NEXT: and $r7, $r4, $r7 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r4, $r4, 3 +; CHECK-NEXT: lu12i.w $r8, 15 +; CHECK-NEXT: ori $r8, $r8, 4095 +; CHECK-NEXT: sll.w $r9, $r8, $r4 +; CHECK-NEXT: nor $r10, $zero, $r9 +; CHECK-NEXT: and $r6, $r6, $r8 +; CHECK-NEXT: sll.w $r6, $r6, $r4 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r4 +; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r11, $r7, 0 +; CHECK-NEXT: and $r12, $r11, $r9 +; CHECK-NEXT: bne $r12, $r6, .LBB23_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; CHECK-NEXT: and $r11, $r11, $r10 +; CHECK-NEXT: or $r11, $r11, $r5 +; CHECK-NEXT: sc.w $r11, $r7, 0 +; CHECK-NEXT: beq $r11, $zero, .LBB23_1 +; CHECK-NEXT: .LBB23_3: +; CHECK-NEXT: dbar 20 +; CHECK-NEXT: srl.w $r8, $r12, $r4 +; CHECK-NEXT: ext.w.h $r8, $r8 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: jr $ra + %ret = cmpxchg i16* %ptr, i16 100, i16 1 seq_cst seq_cst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/atomic_64_32.ll b/llvm/test/CodeGen/LoongArch/atomic_64_32.ll new file mode 100644 index 000000000000..61a24cd5d479 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/atomic_64_32.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64 --verify-machineinstrs -o - %s | FileCheck %s + + +define void @umax_32(i32* %ptr) { +; CHECK-LABEL: umax_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.wu $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umax i32* %ptr, i32 100 seq_cst + ret void +} + +define void @umax_64(i64* %ptr) { +; CHECK-LABEL: umax_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umax i64* %ptr, i64 100 seq_cst + ret void +} + +define void @max_32(i32* %ptr) { +; CHECK-LABEL: max_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw max i32* %ptr, i32 100 seq_cst + ret void +} + +define void @max_64(i64* %ptr) { +; CHECK-LABEL: max_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw max i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @umin_32(i32* %ptr) { +; CHECK-LABEL: umin_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.wu $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umin i32* %ptr, i32 100 seq_cst + ret void +} + +define void @umin_64(i64* %ptr) { +; CHECK-LABEL: umin_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw umin i64* %ptr, i64 100 seq_cst + ret void +} + +define void @min_32(i32* %ptr) { +; CHECK-LABEL: min_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw min i32* %ptr, i32 100 seq_cst + ret void +} + +define void @min_64(i64* %ptr) { +; CHECK-LABEL: min_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw min i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @or_32(i32* %ptr) { +; CHECK-LABEL: or_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw or i32* %ptr, i32 100 seq_cst + ret void +} + +define void @or_64(i64* %ptr) { +; CHECK-LABEL: or_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw or i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @add_32(i32* %ptr) { +; CHECK-LABEL: add_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw add i32* %ptr, i32 100 seq_cst + ret void +} + +define void @add_64(i64* %ptr) { +; CHECK-LABEL: add_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw add i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @sub_32(i32* %ptr) { +; CHECK-LABEL: sub_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.w $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.w $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw sub i32* %ptr, i32 100 seq_cst + ret void +} + +define void @sub_64(i64* %ptr) { +; CHECK-LABEL: sub_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.d $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw sub i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @and_32(i32* %ptr) { +; CHECK-LABEL: and_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw and i32* %ptr, i32 100 seq_cst + ret void +} + +define void @and_64(i64* %ptr) { +; CHECK-LABEL: and_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw and i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @nand_32(i32* %ptr) { +; CHECK-LABEL: nand_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.w $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB16_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i32* %ptr, i32 100 seq_cst + ret void +} + +define void @nand_64(i64* %ptr) { +; CHECK-LABEL: nand_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.d $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB17_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @xor_32(i32* %ptr) { +; CHECK-LABEL: xor_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xor i32* %ptr, i32 100 seq_cst + ret void +} + +define void @xor_64(i64* %ptr) { +; CHECK-LABEL: xor_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xor i64* %ptr, i64 100 seq_cst + ret void +} + + +define void @xchg_32(i32* %ptr) { +; CHECK-LABEL: xchg_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.w $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xchg i32* %ptr, i32 100 seq_cst + ret void +} + +define void @xchg_64(i64* %ptr) { +; CHECK-LABEL: xchg_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 100 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: jr $ra + %ret = atomicrmw xchg i64* %ptr, i64 100 seq_cst + ret void +} + +define void @cmpxchg_32(i32* %ptr) { +; CHECK-LABEL: cmpxchg_32: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 1 +; CHECK-NEXT: ori $r6, $zero, 100 +; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB22_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB22_1 +; CHECK-NEXT: .LBB22_3: +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: jr $ra + %ret = cmpxchg i32* %ptr, i32 100, i32 1 seq_cst seq_cst + ret void +} + +define void @cmpxchg_64(i64* %ptr) { +; CHECK-LABEL: cmpxchg_64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r5, $zero, 1 +; CHECK-NEXT: addi.d $r6, $zero, 100 +; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB23_3 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.d $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB23_1 +; CHECK-NEXT: .LBB23_3: +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: jr $ra + %ret = cmpxchg i64* %ptr, i64 100, i64 1 seq_cst seq_cst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll new file mode 100644 index 000000000000..7ef963cc6c19 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll @@ -0,0 +1,1776 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define float @float_fadd_acquire(ptr %p) nounwind { +; CHECK-LABEL: float_fadd_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_2 Depth 2 +; CHECK-NEXT: fadd.s $f2, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB0_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB0_2 +; CHECK-NEXT: .LBB0_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB0_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fsub_acquire(ptr %p) nounwind { +; CHECK-LABEL: float_fsub_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB1_2 Depth 2 +; CHECK-NEXT: lu12i.w $r5, .LCPI1_0 +; CHECK-NEXT: ori $r5, $r5, .LCPI1_0 +; CHECK-NEXT: lu32i.d $r5, .LCPI1_0 +; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI1_0 +; CHECK-NEXT: fld.s $f1, $r5, 0 +; CHECK-NEXT: fadd.s $f1, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f1 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB1_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB1_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB1_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB1_2 +; CHECK-NEXT: .LBB1_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB1_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fmin_acquire(ptr %p) nounwind { +; CHECK-LABEL: float_fmin_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB2_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB2_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmin.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB2_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB2_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB2_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB2_2 +; CHECK-NEXT: .LBB2_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB2_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define float @float_fmax_acquire(ptr %p) nounwind { +; CHECK-LABEL: float_fmax_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB3_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmax.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB3_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB3_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB3_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB3_2 +; CHECK-NEXT: .LBB3_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB3_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 + ret float %v +} + +define double @double_fadd_acquire(ptr %p) nounwind { +; CHECK-LABEL: double_fadd_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 2 +; CHECK-NEXT: .LBB4_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fadd.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB4_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fsub_acquire(ptr %p) nounwind { +; CHECK-LABEL: double_fsub_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 2 +; CHECK-NEXT: .LBB5_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: lu12i.w $r4, .LCPI5_0 +; CHECK-NEXT: ori $r4, $r4, .LCPI5_0 +; CHECK-NEXT: lu32i.d $r4, .LCPI5_0 +; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI5_0 +; CHECK-NEXT: fld.d $f1, $r4, 0 +; CHECK-NEXT: fadd.d $f0, $f0, $f1 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB5_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fmin_acquire(ptr %p) nounwind { +; CHECK-LABEL: double_fmin_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 2 +; CHECK-NEXT: .LBB6_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmin.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB6_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define double @double_fmax_acquire(ptr %p) nounwind { +; CHECK-LABEL: double_fmax_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 2 +; CHECK-NEXT: .LBB7_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmax.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB7_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 + ret double %v +} + +define float @float_fadd_release(ptr %p) nounwind { +; CHECK-LABEL: float_fadd_release: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB8_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB8_2 Depth 2 +; CHECK-NEXT: fadd.s $f2, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB8_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB8_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB8_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB8_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB8_2 +; CHECK-NEXT: .LBB8_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB8_1 Depth=1 +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB8_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 + ret float %v +} + +define float @float_fsub_release(ptr %p) nounwind { +; CHECK-LABEL: float_fsub_release: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: .LBB9_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB9_2 Depth 2 +; CHECK-NEXT: lu12i.w $r5, .LCPI9_0 +; CHECK-NEXT: ori $r5, $r5, .LCPI9_0 +; CHECK-NEXT: lu32i.d $r5, .LCPI9_0 +; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI9_0 +; CHECK-NEXT: fld.s $f1, $r5, 0 +; CHECK-NEXT: fadd.s $f1, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f1 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB9_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB9_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB9_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB9_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB9_2 +; CHECK-NEXT: .LBB9_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB9_1 Depth=1 +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB9_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 + ret float %v +} + +define float @float_fmin_release(ptr %p) nounwind { +; CHECK-LABEL: float_fmin_release: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB10_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB10_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmin.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB10_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB10_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB10_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB10_2 +; CHECK-NEXT: .LBB10_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1 +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB10_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 + ret float %v +} + +define float @float_fmax_release(ptr %p) nounwind { +; CHECK-LABEL: float_fmax_release: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB11_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB11_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmax.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB11_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB11_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB11_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB11_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB11_2 +; CHECK-NEXT: .LBB11_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB11_1 Depth=1 +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB11_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 + ret float %v +} + +define double @double_fadd_release(ptr %p) nounwind { +; CHECK-LABEL: double_fadd_release: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 3 +; CHECK-NEXT: addi.d $r28, $zero, 0 +; CHECK-NEXT: .LBB12_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: fadd.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r28 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB12_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 + ret double %v +} + +define double @double_fsub_release(ptr %p) nounwind { +; CHECK-LABEL: double_fsub_release: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r28, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 3 +; CHECK-NEXT: addi.d $r28, $zero, 0 +; CHECK-NEXT: .LBB13_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: lu12i.w $r4, .LCPI13_0 +; CHECK-NEXT: ori $r4, $r4, .LCPI13_0 +; CHECK-NEXT: lu32i.d $r4, .LCPI13_0 +; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI13_0 +; CHECK-NEXT: fld.d $f1, $r4, 0 +; CHECK-NEXT: fadd.d $f0, $f0, $f1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r28 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB13_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r28, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 + ret double %v +} + +define double @double_fmin_release(ptr %p) nounwind { +; CHECK-LABEL: double_fmin_release: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 3 +; CHECK-NEXT: addi.d $r28, $zero, 0 +; CHECK-NEXT: .LBB14_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmin.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r28 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB14_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 + ret double %v +} + +define double @double_fmax_release(ptr %p) nounwind { +; CHECK-LABEL: double_fmax_release: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 3 +; CHECK-NEXT: addi.d $r28, $zero, 0 +; CHECK-NEXT: .LBB15_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmax.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r28 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB15_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 + ret double %v +} + +define float @float_fadd_acq_rel(ptr %p) nounwind { +; CHECK-LABEL: float_fadd_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB16_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB16_2 Depth 2 +; CHECK-NEXT: fadd.s $f2, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB16_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB16_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB16_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB16_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB16_2 +; CHECK-NEXT: .LBB16_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB16_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB16_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 + ret float %v +} + +define float @float_fsub_acq_rel(ptr %p) nounwind { +; CHECK-LABEL: float_fsub_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: .LBB17_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB17_2 Depth 2 +; CHECK-NEXT: lu12i.w $r5, .LCPI17_0 +; CHECK-NEXT: ori $r5, $r5, .LCPI17_0 +; CHECK-NEXT: lu32i.d $r5, .LCPI17_0 +; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI17_0 +; CHECK-NEXT: fld.s $f1, $r5, 0 +; CHECK-NEXT: fadd.s $f1, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f1 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB17_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB17_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB17_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB17_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB17_2 +; CHECK-NEXT: .LBB17_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB17_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB17_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 + ret float %v +} + +define float @float_fmin_acq_rel(ptr %p) nounwind { +; CHECK-LABEL: float_fmin_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB18_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB18_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmin.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB18_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB18_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB18_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB18_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB18_2 +; CHECK-NEXT: .LBB18_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB18_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB18_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 + ret float %v +} + +define float @float_fmax_acq_rel(ptr %p) nounwind { +; CHECK-LABEL: float_fmax_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB19_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB19_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmax.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB19_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB19_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB19_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB19_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB19_2 +; CHECK-NEXT: .LBB19_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB19_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB19_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 + ret float %v +} + +define double @double_fadd_acq_rel(ptr %p) nounwind { +; CHECK-LABEL: double_fadd_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 4 +; CHECK-NEXT: addi.d $r28, $zero, 2 +; CHECK-NEXT: .LBB20_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: fadd.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r28 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB20_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 + ret double %v +} + +define double @double_fsub_acq_rel(ptr %p) nounwind { +; CHECK-LABEL: double_fsub_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r28, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 4 +; CHECK-NEXT: addi.d $r28, $zero, 2 +; CHECK-NEXT: .LBB21_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: lu12i.w $r4, .LCPI21_0 +; CHECK-NEXT: ori $r4, $r4, .LCPI21_0 +; CHECK-NEXT: lu32i.d $r4, .LCPI21_0 +; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI21_0 +; CHECK-NEXT: fld.d $f1, $r4, 0 +; CHECK-NEXT: fadd.d $f0, $f0, $f1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r28 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB21_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r28, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 + ret double %v +} + +define double @double_fmin_acq_rel(ptr %p) nounwind { +; CHECK-LABEL: double_fmin_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 4 +; CHECK-NEXT: addi.d $r28, $zero, 2 +; CHECK-NEXT: .LBB22_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmin.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r28 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB22_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 + ret double %v +} + +define double @double_fmax_acq_rel(ptr %p) nounwind { +; CHECK-LABEL: double_fmax_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 4 +; CHECK-NEXT: addi.d $r28, $zero, 2 +; CHECK-NEXT: .LBB23_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmax.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r28 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB23_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 + ret double %v +} + +define float @float_fadd_seq_cst(ptr %p) nounwind { +; CHECK-LABEL: float_fadd_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB24_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB24_2 Depth 2 +; CHECK-NEXT: fadd.s $f2, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB24_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB24_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB24_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB24_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB24_2 +; CHECK-NEXT: .LBB24_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB24_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB24_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 + ret float %v +} + +define float @float_fsub_seq_cst(ptr %p) nounwind { +; CHECK-LABEL: float_fsub_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: .LBB25_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB25_2 Depth 2 +; CHECK-NEXT: lu12i.w $r5, .LCPI25_0 +; CHECK-NEXT: ori $r5, $r5, .LCPI25_0 +; CHECK-NEXT: lu32i.d $r5, .LCPI25_0 +; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI25_0 +; CHECK-NEXT: fld.s $f1, $r5, 0 +; CHECK-NEXT: fadd.s $f1, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f1 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB25_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB25_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB25_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB25_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB25_2 +; CHECK-NEXT: .LBB25_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB25_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB25_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 + ret float %v +} + +define float @float_fmin_seq_cst(ptr %p) nounwind { +; CHECK-LABEL: float_fmin_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB26_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB26_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmin.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB26_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB26_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB26_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB26_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB26_2 +; CHECK-NEXT: .LBB26_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB26_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB26_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 + ret float %v +} + +define float @float_fmax_seq_cst(ptr %p) nounwind { +; CHECK-LABEL: float_fmax_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB27_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB27_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmax.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB27_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB27_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB27_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB27_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB27_2 +; CHECK-NEXT: .LBB27_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB27_1 Depth=1 +; CHECK-NEXT: dbar 0 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB27_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 + ret float %v +} + +define double @double_fadd_seq_cst(ptr %p) nounwind { +; CHECK-LABEL: double_fadd_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 5 +; CHECK-NEXT: .LBB28_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fadd.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB28_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 + ret double %v +} + +define double @double_fsub_seq_cst(ptr %p) nounwind { +; CHECK-LABEL: double_fsub_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 5 +; CHECK-NEXT: .LBB29_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: lu12i.w $r4, .LCPI29_0 +; CHECK-NEXT: ori $r4, $r4, .LCPI29_0 +; CHECK-NEXT: lu32i.d $r4, .LCPI29_0 +; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI29_0 +; CHECK-NEXT: fld.d $f1, $r4, 0 +; CHECK-NEXT: fadd.d $f0, $f0, $f1 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB29_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 + ret double %v +} + +define double @double_fmin_seq_cst(ptr %p) nounwind { +; CHECK-LABEL: double_fmin_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 5 +; CHECK-NEXT: .LBB30_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmin.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB30_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 + ret double %v +} + +define double @double_fmax_seq_cst(ptr %p) nounwind { +; CHECK-LABEL: double_fmax_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 5 +; CHECK-NEXT: .LBB31_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmax.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB31_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 + ret double %v +} + +define float @float_fadd_monotonic(ptr %p) nounwind { +; CHECK-LABEL: float_fadd_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB32_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB32_2 Depth 2 +; CHECK-NEXT: fadd.s $f2, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB32_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB32_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB32_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB32_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB32_2 +; CHECK-NEXT: .LBB32_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB32_1 Depth=1 +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB32_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 + ret float %v +} + +define float @float_fsub_monotonic(ptr %p) nounwind { +; CHECK-LABEL: float_fsub_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: .LBB33_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB33_2 Depth 2 +; CHECK-NEXT: lu12i.w $r5, .LCPI33_0 +; CHECK-NEXT: ori $r5, $r5, .LCPI33_0 +; CHECK-NEXT: lu32i.d $r5, .LCPI33_0 +; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI33_0 +; CHECK-NEXT: fld.s $f1, $r5, 0 +; CHECK-NEXT: fadd.s $f1, $f0, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f1 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB33_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB33_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB33_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB33_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB33_2 +; CHECK-NEXT: .LBB33_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB33_1 Depth=1 +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB33_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 + ret float %v +} + +define float @float_fmin_monotonic(ptr %p) nounwind { +; CHECK-LABEL: float_fmin_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB34_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB34_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmin.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB34_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB34_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB34_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB34_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB34_2 +; CHECK-NEXT: .LBB34_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB34_1 Depth=1 +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB34_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 + ret float %v +} + +define float @float_fmax_monotonic(ptr %p) nounwind { +; CHECK-LABEL: float_fmax_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $f0, $r4, 0 +; CHECK-NEXT: addi.w $r5, $zero, 1 +; CHECK-NEXT: movgr2fr.w $f1, $r5 +; CHECK-NEXT: ffint.s.w $f1, $f1 +; CHECK-NEXT: .LBB35_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB35_2 Depth 2 +; CHECK-NEXT: fmax.s $f2, $f0, $f0 +; CHECK-NEXT: fmax.s $f2, $f2, $f1 +; CHECK-NEXT: movfr2gr.s $r5, $f2 +; CHECK-NEXT: movfr2gr.s $r6, $f0 +; CHECK-NEXT: .LBB35_2: # %atomicrmw.start +; CHECK-NEXT: # Parent Loop BB35_1 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ll.w $r7, $r4, 0 +; CHECK-NEXT: bne $r7, $r6, .LBB35_4 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB35_2 Depth=2 +; CHECK-NEXT: move $r8, $r5 +; CHECK-NEXT: sc.w $r8, $r4, 0 +; CHECK-NEXT: beq $r8, $zero, .LBB35_2 +; CHECK-NEXT: .LBB35_4: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB35_1 Depth=1 +; CHECK-NEXT: dbar 1792 +; CHECK-NEXT: movgr2fr.w $f0, $r7 +; CHECK-NEXT: bne $r7, $r6, .LBB35_1 +; CHECK-NEXT: # %bb.5: # %atomicrmw.end +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 + ret float %v +} + +define double @double_fadd_monotonic(ptr %p) nounwind { +; CHECK-LABEL: double_fadd_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 0 +; CHECK-NEXT: .LBB36_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fadd.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB36_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 + ret double %v +} + +define double @double_fsub_monotonic(ptr %p) nounwind { +; CHECK-LABEL: double_fsub_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 8 +; CHECK-NEXT: addi.d $r26, $sp, 0 +; CHECK-NEXT: addi.d $r27, $zero, 0 +; CHECK-NEXT: .LBB37_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: lu12i.w $r4, .LCPI37_0 +; CHECK-NEXT: ori $r4, $r4, .LCPI37_0 +; CHECK-NEXT: lu32i.d $r4, .LCPI37_0 +; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI37_0 +; CHECK-NEXT: fld.d $f1, $r4, 0 +; CHECK-NEXT: fadd.d $f0, $f0, $f1 +; CHECK-NEXT: fst.d $f0, $sp, 0 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 8 +; CHECK-NEXT: beqz $r4, .LBB37_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: jr $ra + %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 + ret double %v +} + +define double @double_fmin_monotonic(ptr %p) nounwind { +; CHECK-LABEL: double_fmin_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 0 +; CHECK-NEXT: .LBB38_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmin.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB38_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 + ret double %v +} + +define double @double_fmax_monotonic(ptr %p) nounwind { +; CHECK-LABEL: double_fmax_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: fld.d $f0, $r4, 0 +; CHECK-NEXT: addi.d $r4, $zero, 1 +; CHECK-NEXT: movgr2fr.d $f1, $r4 +; CHECK-NEXT: ffint.d.l $f24, $f1 +; CHECK-NEXT: addi.d $r24, $zero, 8 +; CHECK-NEXT: addi.d $r25, $sp, 16 +; CHECK-NEXT: addi.d $r26, $sp, 8 +; CHECK-NEXT: addi.d $r27, $zero, 0 +; CHECK-NEXT: .LBB39_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: fst.d $f0, $sp, 16 +; CHECK-NEXT: fmax.d $f0, $f0, $f0 +; CHECK-NEXT: fmax.d $f0, $f0, $f24 +; CHECK-NEXT: fst.d $f0, $sp, 8 +; CHECK-NEXT: move $r4, $r24 +; CHECK-NEXT: move $r5, $r23 +; CHECK-NEXT: move $r6, $r25 +; CHECK-NEXT: move $r7, $r26 +; CHECK-NEXT: move $r8, $r27 +; CHECK-NEXT: move $r9, $r27 +; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange +; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange +; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange +; CHECK-NEXT: jirl $ra, $ra, 0 +; CHECK-NEXT: fld.d $f0, $sp, 16 +; CHECK-NEXT: beqz $r4, .LBB39_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 + ret double %v +} diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll new file mode 100644 index 000000000000..3e04fc53c0f3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll @@ -0,0 +1,1882 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s + +define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB1_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB4_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB5_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB8_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB9_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB12_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB13_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB16_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB17_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB20_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB21_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB24_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB25_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB28_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB29_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB32_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB33_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB36_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB37_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB40_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB41_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB44_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB45_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB48_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB49_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB52_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB53_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB56_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB57_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB60_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB61_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB64_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB65_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umax_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umax ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB68_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: sltu $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB69_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_umin_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw umin ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB72_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: masknez $r11, $r14, $r13 +; CHECK-NEXT: maskeqz $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB73_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_max_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw max ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB76_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r14, $r10, $r8 +; CHECK-NEXT: and $r5, $r5, $r8 +; CHECK-NEXT: slt $r13, $r14, $r5 +; CHECK-NEXT: maskeqz $r11, $r14, $r13 +; CHECK-NEXT: masknez $r13, $r5, $r13 +; CHECK-NEXT: or $r11, $r11, $r13 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB77_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_min_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw min ptr %a, i64 %b monotonic + ret i64 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/atomicrmw.ll new file mode 100644 index 000000000000..4732ec0fa189 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/atomicrmw.ll @@ -0,0 +1,3652 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 %b acquire + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB1_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 0 acquire + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB2_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 -1 acquire + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB3_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 %b acquire + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB4_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 0 acquire + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB5_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 -1 acquire + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB8_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB9_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB12_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB13_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.w $r7, $zero, $r6 +; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.d $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB16_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB17_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r5, $r4, 0 +; CHECK-NEXT: and $r7, $r5, $r6 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.w $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB18_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.d $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB19_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB20_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB21_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB24_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB25_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i8_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB28_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i16_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB29_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i16 %b acquire + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i32_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i32 %b acquire + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i64_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i64 %b acquire + ret i64 %1 +} + +define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB32_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 %b release + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB33_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 0 release + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB34_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 -1 release + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB35_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 %b release + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB36_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 0 release + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB37_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 -1 release + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB40_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB41_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB44_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB45_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.w $r7, $zero, $r6 +; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.d $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB48_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB49_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r5, $r4, 0 +; CHECK-NEXT: and $r7, $r5, $r6 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.w $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB50_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.d $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB51_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB52_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB53_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB56_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB57_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i8_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB60_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB61_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i16 %b release + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i32 %b release + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i64_release: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i64 %b release + ret i64 %1 +} + +define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB64_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB65_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 0 acq_rel + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB66_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 -1 acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB67_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB68_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 0 acq_rel + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB69_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB72_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB73_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB76_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB77_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.w $r7, $zero, $r6 +; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.d $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB80_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB81_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r5, $r4, 0 +; CHECK-NEXT: and $r7, $r5, $r6 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.w $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB82_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.d $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB83_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB84_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB85_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB88_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB89_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i8_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB92_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i16_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB93_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i32 %b acq_rel + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i64_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i64 %b acq_rel + ret i64 %1 +} + +define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB96_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB97_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 0 seq_cst + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB98_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 -1 seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB99_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB100_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 0 seq_cst + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB101_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB104_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB105_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB108_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB109_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.w $r7, $zero, $r6 +; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.d $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB112_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB113_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r5, $r4, 0 +; CHECK-NEXT: and $r7, $r5, $r6 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.w $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB114_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.d $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB115_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB116_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB117_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB120_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB121_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB124_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i16_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB125_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i32_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i32 %b seq_cst + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i64_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i64 %b seq_cst + ret i64 %1 +} + +define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB128_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB129_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 0 monotonic + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB130_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i8 -1 monotonic + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB131_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_0_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r5, $zero, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB132_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 0 monotonic + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { +; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r5, $zero, -1 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r5, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB133_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i16 -1 monotonic + ret i16 %1 +} + +define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xchg_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xchg ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB136_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: add.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB137_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_add_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw add ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB140_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: sub.w $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB141_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.w $r7, $zero, $r6 +; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_sub_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: sub.d $r7, $zero, $r5 +; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw sub ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB144_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: nor $r11, $zero, $r11 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB145_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r5, $r4, 0 +; CHECK-NEXT: and $r7, $r5, $r6 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.w $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB146_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_nand_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.d $r6, $r4, 0 +; CHECK-NEXT: and $r7, $r6, $r5 +; CHECK-NEXT: nor $r7, $zero, $r7 +; CHECK-NEXT: sc.d $r7, $r4, 0 +; CHECK-NEXT: beq $r7, $zero, .LBB147_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw nand ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB148_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: and $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB149_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_and_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw and ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB152_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: or $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB153_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_or_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw or ptr %a, i64 %b monotonic + ret i64 %1 +} + +define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i8_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: ori $r4, $zero, 255 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB156_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.b $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: addi.d $r6, $zero, -4 +; CHECK-NEXT: and $r6, $r4, $r6 +; CHECK-NEXT: andi $r4, $r4, 3 +; CHECK-NEXT: slli.w $r7, $r4, 3 +; CHECK-NEXT: lu12i.w $r4, 15 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: sll.w $r8, $r4, $r7 +; CHECK-NEXT: nor $r9, $zero, $r8 +; CHECK-NEXT: sll.w $r5, $r5, $r7 +; CHECK-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ll.w $r10, $r6, 0 +; CHECK-NEXT: xor $r11, $r10, $r5 +; CHECK-NEXT: and $r11, $r11, $r8 +; CHECK-NEXT: and $r12, $r10, $r9 +; CHECK-NEXT: or $r12, $r12, $r11 +; CHECK-NEXT: sc.w $r12, $r6, 0 +; CHECK-NEXT: beq $r12, $zero, .LBB157_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: and $r4, $r10, $r8 +; CHECK-NEXT: srl.w $r4, $r4, $r7 +; CHECK-NEXT: ext.w.h $r4, $r4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r6, $r5, 0 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r5 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i32 %b monotonic + ret i32 %1 +} + +define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { +; CHECK-LABEL: atomicrmw_xor_i64_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra + %1 = atomicrmw xor ptr %a, i64 %b monotonic + ret i64 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/bss.ll b/llvm/test/CodeGen/LoongArch/bss.ll new file mode 100644 index 000000000000..cfc30b3a7594 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/bss.ll @@ -0,0 +1,5 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +; CHECK: .section .bss,"aw",@nobits +; CHECK: .globl a +@a = global i32 0, align 4 diff --git a/llvm/test/CodeGen/LoongArch/bstrins_d.ll b/llvm/test/CodeGen/LoongArch/bstrins_d.ll index 342e044c7a7b..819bfdbb34e2 100644 --- a/llvm/test/CodeGen/LoongArch/bstrins_d.ll +++ b/llvm/test/CodeGen/LoongArch/bstrins_d.ll @@ -1,207 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s - -;; Test generation of the bstrins.d instruction. -;; There are 8 patterns that can be matched to bstrins.d. See performORCombine -;; for details. - -;; Pattern 1 -;; R = or (and X, mask0), (and (shl Y, lsb), mask1) -;; => -;; R = BSTRINS X, Y, msb, lsb -define i64 @pat1(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: pat1: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff - %shl = shl i64 %b, 16 - %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 - %or = or i64 %and1, %and2 - ret i64 %or -} - -define i64 @pat1_swap(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: pat1_swap: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff - %shl = shl i64 %b, 16 - %and2 = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 - %or = or i64 %and2, %and1 - ret i64 %or -} - -;; Pattern 2 -;; R = or (and X, mask0), (shl (and Y, mask1), lsb) -;; => -;; R = BSTRINS X, Y, msb, lsb -define i64 @pat2(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: pat2: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff - %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff - %shl = shl i64 %and2, 16 - %or = or i64 %and1, %shl - ret i64 %or -} - -define i64 @pat2_swap(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: pat2_swap: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i64 %a, -1099511562241 ; 0xffffff000000ffff - %and2 = and i64 %b, 16777215 ; 0x0000000000ffffff - %shl = shl i64 %and2, 16 - %or = or i64 %shl, %and1 - ret i64 %or -} - -;; Pattern 3 -;; R = or (and X, mask0), (and Y, mask1) -;; => -;; R = BSTRINS X, (srl (and Y, mask1), lsb), msb, lsb -define i64 @pat3(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: pat3: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a1, $a1, 288 -; CHECK-NEXT: srli.d $a1, $a1, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f - %and2 = and i64 %b, 288 ; 0x0000000000000120 - %or = or i64 %and1, %and2 - ret i64 %or -} - -define i64 @pat3_swap(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: pat3_swap: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a1, $a1, 288 -; CHECK-NEXT: srli.d $a1, $a1, 4 -; CHECK-NEXT: bstrins.d $a0, $a1, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i64 %a, -4081 ; 0xfffffffffffff00f - %and2 = and i64 %b, 288 ; 0x0000000000000120 - %or = or i64 %and2, %and1 - ret i64 %or -} - -;; Pattern 4 -;; R = or (and X, mask), (shl Y, shamt) -;; => -;; R = BSTRINS X, Y, 63, shamt -define i64 @pat4(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: pat4: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 255 - %shl = shl i64 %b, 8 +; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s + +define void @bstrinsd_63_27(i64* nocapture %d) nounwind { +; CHECK-LABEL: bstrinsd_63_27: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $r5, $r4, 0 +; CHECK-NEXT: addi.d $r6, $zero, 123 +; CHECK-NEXT: bstrins.d $r5, $r6, 63, 27 +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %tmp = load i64, i64* %d, align 8 + %and5 = and i64 %tmp, 134217727 + %or = or i64 %and5, 16508780544 + store i64 %or, i64* %d, align 8 + ret void +} + +define void @bstrinsd_33_28(i64* nocapture %d) nounwind { +; CHECK-LABEL: bstrinsd_33_28: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $r5, $r4, 0 +; CHECK-NEXT: addi.d $r6, $zero, 4 +; CHECK-NEXT: bstrins.d $r5, $r6, 33, 28 +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %tmp = load i64, i64* %d, align 8 + %and5 = and i64 %tmp, -16911433729 + %or = or i64 %and5, 1073741824 + store i64 %or, i64* %d, align 8 + ret void +} + +define void @bstrinsd_49_34(i64* nocapture %d) nounwind { +; CHECK-LABEL: bstrinsd_49_34: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $r5, $r4, 0 +; CHECK-NEXT: srli.d $r6, $r5, 50 +; CHECK-NEXT: bstrins.d $r5, $r6, 49, 34 +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %tmp0 = load i64, i64* %d, align 8 + %lshr = lshr i64 %tmp0, 50 + %tmp1 = load i64, i64* %d, align 8 + %shl = shl nuw nsw i64 %lshr, 34 + %and = and i64 %tmp1, -1125882726973441 %or = or i64 %and, %shl - ret i64 %or -} - -define i64 @pat4_swap(i64 %a, i64 %b) nounwind { -; CHECK-LABEL: pat4_swap: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.d $a0, $a1, 63, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 255 - %shl = shl i64 %b, 8 - %or = or i64 %shl, %and - ret i64 %or -} - -;; Pattern 5 -;; R = or (and X, mask0), const -;; => -;; R = BSTRINS X, (const >> lsb), msb, lsb -define i64 @pat5(i64 %a) nounwind { -; CHECK-LABEL: pat5: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a1, 74565 -; CHECK-NEXT: ori $a1, $a1, 1656 -; CHECK-NEXT: bstrins.d $a0, $a1, 47, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff - %or = or i64 %and, 20015998304256 ; 0x0000123456780000 - ret i64 %or -} - -;; Pattern 6: a = b | ((c & mask) << shamt) -;; In this testcase b is 0x123456000000789a, but in fact we do not require b -;; being a constant. As long as all positions in b to be overwritten by the -;; incoming bits are known to be zero, the pattern could be matched. -define i64 @pat6(i64 %c) nounwind { -; CHECK-LABEL: pat6: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a1, 7 -; CHECK-NEXT: ori $a1, $a1, 2202 -; CHECK-NEXT: lu32i.d $a1, 284160 -; CHECK-NEXT: lu52i.d $a1, $a1, 291 -; CHECK-NEXT: bstrins.d $a1, $a0, 39, 16 -; CHECK-NEXT: move $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %c, 16777215 ; 0x0000000000ffffff - %shl = shl i64 %and, 16 - %or = or i64 %shl, 1311767949471676570 ; 0x123456000000789a - ret i64 %or -} - -;; Pattern 7: a = b | ((c << shamt) & shifted_mask) -;; Similar to pattern 6. -define i64 @pat7(i64 %c) nounwind { -; CHECK-LABEL: pat7: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a1, 7 -; CHECK-NEXT: ori $a1, $a1, 2202 -; CHECK-NEXT: lu32i.d $a1, 284160 -; CHECK-NEXT: lu52i.d $a1, $a1, 291 -; CHECK-NEXT: bstrins.d $a1, $a0, 39, 16 -; CHECK-NEXT: move $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 - %shl = shl i64 %c, 16 - %and = and i64 %shl, 1099511562240 ; 0x000000ffffff0000 - %or = or i64 %and, 1311767949471676570 ; 0x123456000000789a - ret i64 %or -} - -;; Pattern 8: a = b | (c & shifted_mask) -;; Similar to pattern 7 but without shift to c. -define i64 @pat8(i64 %c) nounwind { -; CHECK-LABEL: pat8: -; CHECK: # %bb.0: -; CHECK-NEXT: srli.d $a1, $a0, 16 -; CHECK-NEXT: lu12i.w $a0, 7 -; CHECK-NEXT: ori $a0, $a0, 2202 -; CHECK-NEXT: lu32i.d $a0, 284160 -; CHECK-NEXT: lu52i.d $a0, $a0, 291 -; CHECK-NEXT: bstrins.d $a0, $a1, 39, 16 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %c, 1099511562240 ; 0x000000ffffff0000 - %or = or i64 %and, 1311767949471676570 ; 0x123456000000789a - ret i64 %or -} - -;; Test that bstrins.d is not generated because constant OR operand -;; doesn't fit into bits cleared by constant AND operand. -define i64 @no_bstrins_d(i64 %a) nounwind { -; CHECK-LABEL: no_bstrins_d: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a1, 354185 -; CHECK-NEXT: lu32i.d $a1, 4660 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: lu12i.w $a1, 354191 -; CHECK-NEXT: ori $a1, $a1, 4095 -; CHECK-NEXT: lu32i.d $a1, -60876 -; CHECK-NEXT: and $a0, $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 18446462598732906495 ; 0xffff00000000ffff - %or = or i64 %and, 20015998341120 ; 0x0000123456789000 - ret i64 %or + store i64 %or, i64* %d, align 8 + ret void } diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll index 47c4d826c2ee..3b62a760e81b 100644 --- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll +++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll @@ -1,212 +1,28 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s - -;; Test generation of the bstrins.w instruction. -;; There are 8 patterns that can be matched to bstrins.w. See performORCombine -;; for details. - -;; Pattern 1 -;; R = or (and X, mask0), (and (shl Y, lsb), mask1) -;; => -;; R = BSTRINS X, Y, msb, lsb -define i32 @pat1(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat1: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i32 %a, -1048321 ; 0xfff000ff - %shl = shl i32 %b, 8 - %and2 = and i32 %shl, 1048320 ; 0x000fff00 - %or = or i32 %and1, %and2 - ret i32 %or -} - -define i32 @pat1_swap(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat1_swap: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i32 %a, -1048321 ; 0xfff000ff - %shl = shl i32 %b, 8 - %and2 = and i32 %shl, 1048320 ; 0x000fff00 - %or = or i32 %and2, %and1 - ret i32 %or -} - -;; Pattern 2 -;; R = or (and X, mask0), (shl (and Y, mask1), lsb) -;; => -;; R = BSTRINS X, Y, msb, lsb -define i32 @pat2(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat2: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i32 %a, -1048321 ; 0xfff000ff - %and2 = and i32 %b, 4095 ; 0x00000fff - %shl = shl i32 %and2, 8 - %or = or i32 %and1, %shl - ret i32 %or -} - -define i32 @pat2_swap(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat2_swap: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.w $a0, $a1, 19, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i32 %a, -1048321 ; 0xfff000ff - %and2 = and i32 %b, 4095 ; 0x00000fff - %shl = shl i32 %and2, 8 - %or = or i32 %shl, %and1 - ret i32 %or -} - -;; Pattern 3 -;; R = or (and X, mask0), (and Y, mask1) -;; => -;; R = BSTRINS X, (srl (and Y, mask1), lsb), msb, lsb -define i32 @pat3(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat3: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a1, $a1, 288 -; CHECK-NEXT: srli.w $a1, $a1, 4 -; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i32 %a, -4081 ; 0xfffff00f - %and2 = and i32 %b, 288 ; 0x00000120 - %or = or i32 %and1, %and2 - ret i32 %or -} - -define i32 @pat3_swap(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat3_swap: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a1, $a1, 288 -; CHECK-NEXT: srli.w $a1, $a1, 4 -; CHECK-NEXT: bstrins.w $a0, $a1, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i32 %a, -4081 ; 0xfffff00f - %and2 = and i32 %b, 288 ; 0x00000120 - %or = or i32 %and2, %and1 - ret i32 %or -} - -define i32 @pat3_positive_mask0(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat3_positive_mask0: -; CHECK: # %bb.0: -; CHECK-NEXT: srli.w $a1, $a1, 28 -; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and1 = and i32 %a, 268435455 ; 0x0fffffff - %and2 = and i32 %b, 4026531840 ; 0xf0000000 - %or = or i32 %and1, %and2 - ret i32 %or -} - -;; Pattern 4 -;; R = or (and X, mask), (shl Y, shamt) -;; => -;; R = BSTRINS X, Y, 31, shamt -define i32 @pat4(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat4: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 268435455 ; 0x0fffffff - %shl = shl i32 %b, 28 - %or = or i32 %and, %shl - ret i32 %or -} - -define i32 @pat4_swap(i32 %a, i32 %b) nounwind { -; CHECK-LABEL: pat4_swap: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrins.w $a0, $a1, 31, 28 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 268435455 ; 0x0fffffff - %shl = shl i32 %b, 28 - %or = or i32 %shl, %and - ret i32 %or -} - -;; Pattern 5 -;; R = or (and X, mask), const -;; => -;; R = BSTRINS X, (const >> lsb), msb, lsb -define i32 @pat5(i32 %a) nounwind { -; CHECK-LABEL: pat5: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a1, 1 -; CHECK-NEXT: ori $a1, $a1, 564 -; CHECK-NEXT: bstrins.w $a0, $a1, 23, 8 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 4278190335 ; 0xff0000ff - %or = or i32 %and, 1192960 ; 0x00123400 - ret i32 %or -} - -;; Pattern 6: a = b | ((c & mask) << shamt) -;; In this testcase b is 0x10000002, but in fact we do not require b being a -;; constant. As long as all positions in b to be overwritten by the incoming -;; bits are known to be zero, the pattern could be matched. -define i32 @pat6(i32 %c) nounwind { -; CHECK-LABEL: pat6: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a1, 65536 -; CHECK-NEXT: ori $a1, $a1, 2 -; CHECK-NEXT: bstrins.w $a1, $a0, 27, 4 -; CHECK-NEXT: move $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %c, 16777215 ; 0x00ffffff - %shl = shl i32 %and, 4 - %or = or i32 %shl, 268435458 ; 0x10000002 - ret i32 %or -} - -;; Pattern 7: a = b | ((c << shamt) & shifted_mask) -;; Similar to pattern 6. -define i32 @pat7(i32 %c) nounwind { -; CHECK-LABEL: pat7: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a1, 65536 -; CHECK-NEXT: ori $a1, $a1, 2 -; CHECK-NEXT: bstrins.w $a1, $a0, 27, 4 -; CHECK-NEXT: move $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 - %shl = shl i32 %c, 4 - %and = and i32 %shl, 268435440 ; 0x0ffffff0 - %or = or i32 %and, 268435458 ; 0x10000002 - ret i32 %or -} - -;; Pattern 8: a = b | (c & shifted_mask) -;; Similar to pattern 7 but without shift to c. -define i32 @pat8(i32 %c) nounwind { -; CHECK-LABEL: pat8: -; CHECK: # %bb.0: -; CHECK-NEXT: srli.w $a1, $a0, 4 -; CHECK-NEXT: lu12i.w $a0, 65536 -; CHECK-NEXT: ori $a0, $a0, 2 -; CHECK-NEXT: bstrins.w $a0, $a1, 27, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %c, 268435440 ; 0x0ffffff0 - %or = or i32 %and, 268435458 ; 0x10000002 - ret i32 %or -} - -;; Test that bstrins.w is not generated because constant OR operand -;; doesn't fit into bits cleared by constant AND operand. -define i32 @no_bstrins_w(i32 %a) nounwind { -; CHECK-LABEL: no_bstrins_w: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a1, 291 -; CHECK-NEXT: ori $a1, $a1, 1104 -; CHECK-NEXT: or $a0, $a0, $a1 -; CHECK-NEXT: lu12i.w $a1, -3805 -; CHECK-NEXT: ori $a1, $a1, 1279 -; CHECK-NEXT: and $a0, $a0, $a1 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 4278190335 ; 0xff0000ff - %or = or i32 %and, 1193040 ; 0x00123450 - ret i32 %or +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @bstrins_w(i32 %s, i32* nocapture %d) nounwind { +; CHECK-LABEL: bstrins_w: +; CHECK: bstrins.w $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]], 13, 5 +entry: + %and = shl i32 %s, 5 + %shl = and i32 %and, 16352 + %tmp3 = load i32, i32* %d, align 4 + %and5 = and i32 %tmp3, -16353 + %or = or i32 %and5, %shl + store i32 %or, i32* %d, align 4 + ret void +} + +define i32 @no_bstrinsw(i32* nocapture %d) { +; CHECK-LABEL: no_bstrinsw: +; CHECK: addi.w $r[[REG2:[0-9]+]], $zero, -4 +; CHECK: and $r[[REG1:[0-9]+]], $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] +; CHECK: ori $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]], 8 +; CHECK-NOT: bstrins.w {{[[:space:]].*}} +entry: + %tmp = load volatile i32, i32* %d, align 4 + %and = and i32 %tmp, -4 + %or = or i32 %and, 8 + store volatile i32 %or, i32* %d, align 4 + ret i32 %and } diff --git a/llvm/test/CodeGen/LoongArch/bstrpick_d.ll b/llvm/test/CodeGen/LoongArch/bstrpick_d.ll index 51d4967dc3f5..e1169cb21fc1 100644 --- a/llvm/test/CodeGen/LoongArch/bstrpick_d.ll +++ b/llvm/test/CodeGen/LoongArch/bstrpick_d.ll @@ -1,97 +1,64 @@ -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define i64 @bstrpickd_add_zext(i32 signext %n) { +entry: + %add = add i32 %n, 1 + %res = zext i32 %add to i64 + ret i64 %res + +; CHECK-LABEL: bstrpickd_add_zext: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 31, 0 -define i64 @lshr40_and255(i64 %a) { -; CHECK-LABEL: lshr40_and255: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.d $a0, $a0, 47, 40 -; CHECK-NEXT: jirl $zero, $ra, 0 - %shr = lshr i64 %a, 40 - %and = and i64 %shr, 255 - ret i64 %and } -define i64 @ashr50_and511(i64 %a) { -; CHECK-LABEL: ashr50_and511: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.d $a0, $a0, 58, 50 -; CHECK-NEXT: jirl $zero, $ra, 0 - %shr = ashr i64 %a, 50 - %and = and i64 %shr, 511 +define i64 @bstrpickd_and12(i64 zeroext %a) { +entry: + %and = and i64 %a, 4095 ret i64 %and -} -define i64 @zext_i32_to_i64(i32 %a) { -; CHECK-LABEL: zext_i32_to_i64: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - %res = zext i32 %a to i64 - ret i64 %res +; CHECK-LABEL: bstrpickd_and12: +; CHECK: andi $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 4095 + } -define i64 @and8191(i64 %a) { -; CHECK-LABEL: and8191: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.d $a0, $a0, 12, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 +define i64 @bstrpickd_and13(i64 zeroext %a) { +entry: %and = and i64 %a, 8191 ret i64 %and + +; CHECK-LABEL: bstrpickd_and13: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 12, 0 + } -;; Check that andi but not bstrpick.d is generated. -define i64 @and4095(i64 %a) { -; CHECK-LABEL: and4095: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a0, $a0, 4095 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 4095 +define i64 @bstrpickd_lsr_and8(i64 zeroext %a) { +entry: + %shr = lshr i64 %a, 40 + %and = and i64 %shr, 255 ret i64 %and -} -;; (srl (and a, 0xff0), 4) => (BSTRPICK a, 11, 4) -define i64 @and0xff0_lshr4(i64 %a) { -; CHECK-LABEL: and0xff0_lshr4: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.d $a0, $a0, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 4080 - %shr = lshr i64 %and, 4 - ret i64 %shr -} +; CHECK-LABEL: bstrpickd_lsr_and8: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 47, 40 -;; (sra (and a, 0xff0), 5) can also be combined to (BSTRPICK a, 11, 5). -;; This is because (sra (and a, 0xff0)) would be combined to (srl (and a, 0xff0), 5) -;; firstly by DAGCombiner::SimplifyDemandedBits. -define i64 @and4080_ashr5(i64 %a) { -; CHECK-LABEL: and4080_ashr5: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.d $a0, $a0, 11, 5 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 4080 - %shr = ashr i64 %and, 5 - ret i64 %shr } -;; Negative test: the second operand of AND is not a shifted mask -define i64 @and0xf30_lshr4(i64 %a) { -; CHECK-LABEL: and0xf30_lshr4: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a0, $a0, 3888 -; CHECK-NEXT: srli.d $a0, $a0, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 3888 - %shr = lshr i64 %and, 4 - ret i64 %shr +define i64 @bstrpickd_zext(i32 signext %a) { +entry: + %conv = zext i32 %a to i64 + ret i64 %conv + +; CHECK-LABEL: bstrpickd_zext: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 31, 0 + } -;; Negative test: Shamt < MaskIdx -define i64 @and0xff0_lshr3(i64 %a) { -; CHECK-LABEL: and0xff0_lshr3: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a0, $a0, 4080 -; CHECK-NEXT: srli.d $a0, $a0, 3 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i64 %a, 4080 - %shr = lshr i64 %and, 3 +define i64 @bstrpickd_and_lsr(i64 zeroext %n) { +entry: + %and = lshr i64 %n, 8 + %shr = and i64 %and, 4095 ret i64 %shr + +; CHECK-LABEL: bstrpickd_and_lsr: +; CHECK: bstrpick.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 19, 8 + } diff --git a/llvm/test/CodeGen/LoongArch/bstrpick_w.ll b/llvm/test/CodeGen/LoongArch/bstrpick_w.ll index 92d79019a7e3..e60de4737806 100644 --- a/llvm/test/CodeGen/LoongArch/bstrpick_w.ll +++ b/llvm/test/CodeGen/LoongArch/bstrpick_w.ll @@ -1,97 +1,18 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s -define i32 @lshr10_and255(i32 %a) { -; CHECK-LABEL: lshr10_and255: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.w $a0, $a0, 17, 10 -; CHECK-NEXT: jirl $zero, $ra, 0 - %shr = lshr i32 %a, 10 - %and = and i32 %shr, 255 +define i32 @bstrpickw_and24(i32 signext %a) { +; CHECK-LABEL: bstrpickw_and24: +; CHECK: bstrpick.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 23, 0 +entry: + %and = and i32 %a, 16777215 ret i32 %and } -define i32 @ashr20_and511(i32 %a) { -; CHECK-LABEL: ashr20_and511: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.w $a0, $a0, 28, 20 -; CHECK-NEXT: jirl $zero, $ra, 0 - %shr = ashr i32 %a, 20 +define i32 @bstrpickw_lshr_and(i32 %s, i32 %pos, i32 %sz) nounwind readnone { +; CHECK-LABEL: bstrpickw_lshr_and: +; CHECK: bstrpick.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]], 13, 5 +entry: + %shr = lshr i32 %s, 5 %and = and i32 %shr, 511 ret i32 %and } - -define i32 @zext_i16_to_i32(i16 %a) { -; CHECK-LABEL: zext_i16_to_i32: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.w $a0, $a0, 15, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - %res = zext i16 %a to i32 - ret i32 %res -} - -define i32 @and8191(i32 %a) { -; CHECK-LABEL: and8191: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.w $a0, $a0, 12, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 8191 - ret i32 %and -} - -;; Check that andi but not bstrpick.d is generated. -define i32 @and4095(i32 %a) { -; CHECK-LABEL: and4095: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a0, $a0, 4095 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 4095 - ret i32 %and -} - -;; (srl (and a, 0xff0), 4) => (BSTRPICK a, 11, 4) -define i32 @and0xff0_lshr4(i32 %a) { -; CHECK-LABEL: and0xff0_lshr4: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.w $a0, $a0, 11, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 4080 - %shr = lshr i32 %and, 4 - ret i32 %shr -} - -;; (sra (and a, 0xff0), 5) can also be combined to (BSTRPICK a, 11, 5). -;; This is because (sra (and a, 0xff0)) would be combined to (srl (and a, 0xff0), 5) -;; firstly by DAGCombiner::SimplifyDemandedBits. -define i32 @and4080_ashr5(i32 %a) { -; CHECK-LABEL: and4080_ashr5: -; CHECK: # %bb.0: -; CHECK-NEXT: bstrpick.w $a0, $a0, 11, 5 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 4080 - %shr = ashr i32 %and, 5 - ret i32 %shr -} - -;; Negative test: the second operand of AND is not a shifted mask -define i32 @and0xf30_lshr4(i32 %a) { -; CHECK-LABEL: and0xf30_lshr4: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a0, $a0, 3888 -; CHECK-NEXT: srli.w $a0, $a0, 4 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 3888 - %shr = lshr i32 %and, 4 - ret i32 %shr -} - -;; Negative test: Shamt < MaskIdx -define i32 @and0xff0_lshr3(i32 %a) { -; CHECK-LABEL: and0xff0_lshr3: -; CHECK: # %bb.0: -; CHECK-NEXT: andi $a0, $a0, 4080 -; CHECK-NEXT: srli.w $a0, $a0, 3 -; CHECK-NEXT: jirl $zero, $ra, 0 - %and = and i32 %a, 4080 - %shr = lshr i32 %and, 3 - ret i32 %shr -} diff --git a/llvm/test/CodeGen/LoongArch/builtins-loongarch-base.ll b/llvm/test/CodeGen/LoongArch/builtins-loongarch-base.ll new file mode 100644 index 000000000000..d6c8e72f2ed7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/builtins-loongarch-base.ll @@ -0,0 +1,752 @@ +; Test the base intrinsics. +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @cpucfg() { +entry: + %u32_r = alloca i32, align 4 + %u32_a = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.cpucfg(i32 %0) + store i32 %1, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.cpucfg(i32) + +; CHECK-LABEL: cpucfg: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 +; CHECK: cpucfg $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @csrrd_w() { +entry: + %u32_r = alloca i32, align 4 + %0 = call i32 @llvm.loongarch.csrrd.w(i32 1) + store i32 %0, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.csrrd.w(i32) + +; CHECK-LABEL: csrrd_w: +; CHECK: csrrd $r[[REG:[0-9]+]], 1 +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @csrrd_d() { +entry: + %u64_r = alloca i64, align 8 + %0 = call i64 @llvm.loongarch.csrrd.d(i64 1) + store i64 %0, i64* %u64_r, align 8 + ret void +} + +declare i64 @llvm.loongarch.csrrd.d(i64) + +; CHECK-LABEL: csrrd_d: +; CHECK: csrrd $r[[REG:[0-9]+]], 1 +; CHECK: st.d $r[[REG:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @csrwr_w() { +entry: + %u32_r = alloca i32, align 4 + %u32_a = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.csrwr.w(i32 %0, i32 1) + store i32 %1, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.csrwr.w(i32, i32) + +; CHECK-LABEL: csrwr_w: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 +; CHECK: csrwr $r[[REG:[0-9]+]], 1 +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @csrwr_d() { +entry: + %u64_r = alloca i64, align 8 + %u64_a = alloca i64, align 8 + %0 = load i64, i64* %u64_a, align 8 + %1 = call i64 @llvm.loongarch.csrwr.d(i64 %0, i64 1) + store i64 %1, i64* %u64_r, align 8 + ret void +} + +declare i64 @llvm.loongarch.csrwr.d(i64, i64) + +; CHECK-LABEL: csrwr_d: +; CHECK: ld.d $r[[REG:[0-9]+]], $sp, 0 +; CHECK: csrwr $r[[REG:[0-9]+]], 1 +; CHECK: st.d $r[[REG:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @csrxchg_w() { +entry: + %u32_r = alloca i32, align 4 + %u32_a = alloca i32, align 4 + %u32_b = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = load i32, i32* %u32_b, align 4 + %2 = call i32 @llvm.loongarch.csrxchg.w(i32 %0, i32 %1, i32 1) + store i32 %2, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.csrxchg.w(i32, i32, i32) + +; CHECK-LABEL: csrxchg_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 +; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: csrxchg $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], 1 +; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @csrxchg_d() { +entry: + %u64_r = alloca i64, align 8 + %u64_a = alloca i64, align 8 + %u64_b = alloca i64, align 8 + %0 = load i64, i64* %u64_a, align 8 + %1 = load i64, i64* %u64_b, align 8 + %2 = call i64 @llvm.loongarch.csrxchg.d(i64 %0, i64 %1, i64 1) + store i64 %2, i64* %u64_r, align 8 + ret void +} + +declare i64 @llvm.loongarch.csrxchg.d(i64, i64, i64) + +; CHECK-LABEL: csrxchg_d: +; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 16 +; CHECK: csrxchg $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], 1 +; CHECK: st.d $r[[REG1:[0-9]+]], $sp, 24 +; CHECK: jr $ra +; + +define void @iocsrrd_b() { +entry: + %u32_a = alloca i32, align 4 + %u8_r = alloca i8, align 1 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.iocsrrd.b(i32 %0) + %conv = trunc i32 %1 to i8 + store i8 %conv, i8* %u8_r, align 1 + ret void +} + +declare i32 @llvm.loongarch.iocsrrd.b(i32) + +; CHECK-LABEL: iocsrrd_b: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: iocsrrd.b $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.b $r[[REG:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @iocsrrd_h() { +entry: + %u32_a = alloca i32, align 4 + %u16_r = alloca i16, align 2 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.iocsrrd.h(i32 %0) + %conv = trunc i32 %1 to i16 + store i16 %conv, i16* %u16_r, align 2 + ret void +} + +declare i32 @llvm.loongarch.iocsrrd.h(i32) + +; CHECK-LABEL: iocsrrd_h: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: iocsrrd.h $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.h $r[[REG:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @iocsrrd_w() { +entry: + %u32_r = alloca i32, align 4 + %u32_a = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i32 @llvm.loongarch.iocsrrd.w(i32 %0) + store i32 %1, i32* %u32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.iocsrrd.w(i32) + +; CHECK-LABEL: iocsrrd_w: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 8 +; CHECK: iocsrrd.w $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @iocsrrd_d() { +entry: + %u32_a = alloca i32, align 4 + %u64_r = alloca i64, align 8 + %0 = load i32, i32* %u32_a, align 4 + %1 = call i64 @llvm.loongarch.iocsrrd.d(i32 %0) + store i64 %1, i64* %u64_r, align 8 + ret void +} + +declare i64 @llvm.loongarch.iocsrrd.d(i32) + +; CHECK-LABEL: iocsrrd_d: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: iocsrrd.d $r[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: st.d $r[[REG:[0-9]+]], $sp, 0 +; CHECK: jr $ra +; + +define void @iocsrwr_b() { +entry: + %u32_a = alloca i32, align 4 + %u8_a = alloca i8, align 1 + %0 = load i8, i8* %u8_a, align 1 + %conv = zext i8 %0 to i32 + %1 = load i32, i32* %u32_a, align 4 + call void @llvm.loongarch.iocsrwr.b(i32 %conv, i32 %1) + ret void +} + +declare void @llvm.loongarch.iocsrwr.b(i32, i32) + +; CHECK-LABEL: iocsrwr_b: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 +; CHECK: ld.bu $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: iocsrwr.b $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @iocsrwr_h() { +entry: + %u32_a = alloca i32, align 4 + %u16_a = alloca i16, align 2 + %0 = load i16, i16* %u16_a, align 2 + %conv = zext i16 %0 to i32 + %1 = load i32, i32* %u32_a, align 4 + call void @llvm.loongarch.iocsrwr.h(i32 %conv, i32 %1) + ret void +} + +declare void @llvm.loongarch.iocsrwr.h(i32, i32) + +; CHECK-LABEL: iocsrwr_h: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 +; CHECK: ld.hu $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: iocsrwr.h $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @iocsrwr_w() { +entry: + %u32_a = alloca i32, align 4 + %u32_b = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + %1 = load i32, i32* %u32_b, align 4 + call void @llvm.loongarch.iocsrwr.w(i32 %0, i32 %1) + ret void +} + +declare void @llvm.loongarch.iocsrwr.w(i32, i32) + +; CHECK-LABEL: iocsrwr_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 12 +; CHECK: iocsrwr.w $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @iocsrwr_d() { +entry: + %u32_a = alloca i32, align 4 + %u64_a = alloca i64, align 8 + %0 = load i64, i64* %u64_a, align 8 + %1 = load i32, i32* %u32_a, align 4 + call void @llvm.loongarch.iocsrwr.d(i64 %0, i32 %1) + ret void +} + +declare void @llvm.loongarch.iocsrwr.d(i64, i32) + +; CHECK-LABEL: iocsrwr_d: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 12 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 +; CHECK: iocsrwr.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @cacop_w() { +entry: + %i32_a = alloca i32, align 4 + %0 = load i32, i32* %i32_a, align 4 + call void @llvm.loongarch.cacop.w(i32 1, i32 %0, i32 2) + ret void +} + +declare void @llvm.loongarch.cacop.w(i32, i32, i32) + +; CHECK-LABEL: cacop_w: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: cacop 1, $r[[REG:[0-9]+]], 2 +; CHECK: jr $ra +; + +define void @cacop_d() { +entry: + %i64_a = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + call void @llvm.loongarch.cacop.d(i32 1, i64 %0, i64 2) + ret void +} + +declare void @llvm.loongarch.cacop.d(i32, i64, i64) + +; CHECK-LABEL: cacop_d: +; CHECK: ld.d $r[[REG:[0-9]+]], $sp, 8 +; CHECK: cacop 1, $r[[REG:[0-9]+]], 2 +; CHECK: jr $ra +; + +define void @rdtime_d() { +entry: + %value = alloca i64, align 8 + %timeid = alloca i64, align 8 + %0 = call { i64, i64 } asm sideeffect "rdtime.d\09$0,$1\0A\09", "=&r,=&r"() nounwind + %asmresult0 = extractvalue { i64, i64 } %0, 0 + %asmresult1 = extractvalue { i64, i64 } %0, 1 + store i64 %asmresult0, i64* %value, align 8 + store i64 %asmresult1, i64* %timeid, align 8 + ret void +} + +; CHECK-LABEL: rdtime_d: +; CHECK: rdtime.d $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] +; CHECK: st.d $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: st.d $r[[REG1:[0-9]+]], $sp, 0 +; CHECK: jr $ra +; + +define void @rdtimeh_w() { +entry: + %value = alloca i32, align 4 + %timeid = alloca i32, align 4 + %0 = call { i32, i32 } asm sideeffect "rdtimeh.w\09$0,$1\0A\09", "=&r,=&r"() nounwind + %asmresult0 = extractvalue { i32, i32 } %0, 0 + %asmresult1 = extractvalue { i32, i32 } %0, 1 + store i32 %asmresult0, i32* %value, align 4 + store i32 %asmresult1, i32* %timeid, align 4 + ret void +} + +; CHECK-LABEL: rdtimeh_w: +; CHECK: rdtimeh.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] +; CHECK: st.w $r[[REG2:[0-9]+]], $sp, 12 +; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @rdtimel_w() { +entry: + %value = alloca i32, align 4 + %timeid = alloca i32, align 4 + %0 = call { i32, i32 } asm sideeffect "rdtimel.w\09$0,$1\0A\09", "=&r,=&r"() nounwind + %asmresult0 = extractvalue { i32, i32 } %0, 0 + %asmresult1 = extractvalue { i32, i32 } %0, 1 + store i32 %asmresult0, i32* %value, align 4 + store i32 %asmresult1, i32* %timeid, align 4 + ret void +} + +; CHECK-LABEL: rdtimel_w: +; CHECK: rdtimel.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]] +; CHECK: st.w $r[[REG2:[0-9]+]], $sp, 12 +; CHECK: st.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: jr $ra +; + +define void @crc_w_b_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i8_a = alloca i8, align 1 + %0 = load i8, i8* %i8_a, align 1 + %conv = sext i8 %0 to i32 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crc.w.b.w(i32 %conv, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crc.w.b.w(i32, i32) + +; CHECK-LABEL: crc_w_b_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.b $r[[REG2:[0-9]+]], $sp, 4 +; CHECK: crc.w.b.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crc_w_h_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i16_a = alloca i16, align 2 + %0 = load i16, i16* %i16_a, align 2 + %conv = sext i16 %0 to i32 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crc.w.h.w(i32 %conv, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crc.w.h.w(i32, i32) + +; CHECK-LABEL: crc_w_h_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.h $r[[REG2:[0-9]+]], $sp, 4 +; CHECK: crc.w.h.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crc_w_w_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i32_b = alloca i32, align 4 + %0 = load i32, i32* %i32_a, align 4 + %1 = load i32, i32* %i32_b, align 4 + %2 = call i32 @llvm.loongarch.crc.w.w.w(i32 %0, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crc.w.w.w(i32, i32) + +; CHECK-LABEL: crc_w_w_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 +; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: crc.w.w.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crc_w_d_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i64_a = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crc.w.d.w(i64 %0, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crc.w.d.w(i64, i32) + +; CHECK-LABEL: crc_w_d_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 +; CHECK: crc.w.d.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crcc_w_b_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i8_a = alloca i8, align 1 + %0 = load i8, i8* %i8_a, align 1 + %conv = sext i8 %0 to i32 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crcc.w.b.w(i32 %conv, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crcc.w.b.w(i32, i32) + +; CHECK-LABEL: crcc_w_b_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.b $r[[REG2:[0-9]+]], $sp, 4 +; CHECK: crcc.w.b.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crcc_w_h_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i16_a = alloca i16, align 2 + %0 = load i16, i16* %i16_a, align 2 + %conv = sext i16 %0 to i32 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crcc.w.h.w(i32 %conv, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crcc.w.h.w(i32, i32) + +; CHECK-LABEL: crcc_w_h_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.h $r[[REG2:[0-9]+]], $sp, 4 +; CHECK: crcc.w.h.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crcc_w_w_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i32_b = alloca i32, align 4 + %0 = load i32, i32* %i32_a, align 4 + %1 = load i32, i32* %i32_b, align 4 + %2 = call i32 @llvm.loongarch.crcc.w.w.w(i32 %0, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crcc.w.w.w(i32, i32) + +; CHECK-LABEL: crcc_w_w_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 4 +; CHECK: ld.w $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: crcc.w.w.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @crcc_w_d_w() { +entry: + %i32_r = alloca i32, align 4 + %i32_a = alloca i32, align 4 + %i64_a = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + %1 = load i32, i32* %i32_a, align 4 + %2 = call i32 @llvm.loongarch.crcc.w.d.w(i64 %0, i32 %1) + store i32 %2, i32* %i32_r, align 4 + ret void +} + +declare i32 @llvm.loongarch.crcc.w.d.w(i64, i32) + +; CHECK-LABEL: crcc_w_d_w: +; CHECK: ld.w $r[[REG1:[0-9]+]], $sp, 8 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 0 +; CHECK: crcc.w.d.w $r[[REG1:[0-9]+]], $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @tlbclr() { +entry: + call void @llvm.loongarch.tlbclr() + ret void +} + +declare void @llvm.loongarch.tlbclr() + +; CHECK-LABEL: tlbclr: +; CHECK: tlbclr +; CHECK: jr $ra +; + +define void @tlbflush() { +entry: + call void @llvm.loongarch.tlbflush() + ret void +} + +declare void @llvm.loongarch.tlbflush() + +; CHECK-LABEL: tlbflush: +; CHECK: tlbflush +; CHECK: jr $ra +; + +define void @tlbfill() { +entry: + call void @llvm.loongarch.tlbfill() + ret void +} + +declare void @llvm.loongarch.tlbfill() + +; CHECK-LABEL: tlbfill: +; CHECK: tlbfill +; CHECK: jr $ra +; + +define void @tlbrd() { +entry: + call void @llvm.loongarch.tlbrd() + ret void +} + +declare void @llvm.loongarch.tlbrd() + +; CHECK-LABEL: tlbrd: +; CHECK: tlbrd +; CHECK: jr $ra +; + +define void @tlbwr() { +entry: + call void @llvm.loongarch.tlbwr() + ret void +} + +declare void @llvm.loongarch.tlbwr() + +; CHECK-LABEL: tlbwr: +; CHECK: tlbwr +; CHECK: jr $ra +; + +define void @tlbsrch() { +entry: + call void @llvm.loongarch.tlbsrch() + ret void +} + +declare void @llvm.loongarch.tlbsrch() + +; CHECK-LABEL: tlbsrch: +; CHECK: tlbsrch +; CHECK: jr $ra +; + +define void @syscall() { +entry: + call void @llvm.loongarch.syscall(i64 1) + ret void +} + +declare void @llvm.loongarch.syscall(i64) + +; CHECK-LABEL: syscall: +; CHECK: syscall 1 +; CHECK: jr $ra +; + +define void @break_builtin() { +entry: + call void @llvm.loongarch.break(i64 1) + ret void +} + +declare void @llvm.loongarch.break(i64) + +; CHECK-LABEL: break_builtin: +; CHECK: break 1 +; CHECK: jr $ra +; + +define void @asrtle_d() { +entry: + %i64_a = alloca i64, align 8 + %i64_b = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + %1 = load i64, i64* %i64_b, align 8 + call void @llvm.loongarch.asrtle.d(i64 %0, i64 %1) + ret void +} + +declare void @llvm.loongarch.asrtle.d(i64, i64) + +; CHECK-LABEL: asrtle_d: +; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 0 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: asrtle.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @asrtgt_d() { +entry: + %i64_a = alloca i64, align 8 + %i64_b = alloca i64, align 8 + %0 = load i64, i64* %i64_a, align 8 + %1 = load i64, i64* %i64_b, align 8 + call void @llvm.loongarch.asrtgt.d(i64 %0, i64 %1) + ret void +} + +declare void @llvm.loongarch.asrtgt.d(i64, i64) + +; CHECK-LABEL: asrtgt_d: +; CHECK: ld.d $r[[REG1:[0-9]+]], $sp, 0 +; CHECK: ld.d $r[[REG2:[0-9]+]], $sp, 8 +; CHECK: asrtgt.d $r[[REG2:[0-9]+]], $r[[REG1:[0-9]+]] +; CHECK: jr $ra +; + +define void @dbar() { +entry: + call void @llvm.loongarch.dbar(i64 0) + ret void +} + +declare void @llvm.loongarch.dbar(i64) + +; CHECK-LABEL: dbar: +; CHECK: dbar 0 +; CHECK: jr $ra +; + +define void @ibar() { +entry: + call void @llvm.loongarch.ibar(i64 0) + ret void +} + +declare void @llvm.loongarch.ibar(i64) + +; CHECK-LABEL: ibar: +; CHECK: ibar 0 +; CHECK: jr $ra +; + +define void @movfcsr2gr() { +entry: + %u32_r = alloca i32, align 4 + %rd = alloca i32, align 4 + %0 = call i32 asm sideeffect "movfcsr2gr $0, $$fcsr0", "=&r"() + store i32 %0, i32* %rd, align 4 + %1 = load i32, i32* %rd, align 4 + store i32 %1, i32* %u32_r, align 4 + ret void +} + +; CHECK-LABEL: movfcsr2gr: +; CHECK: movfcsr2gr $r[[REG:[0-9]+]], $fcsr[[REG:[0-9]+]] +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 8 +; CHECK: st.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: jr $ra +; + +define void @movgr2fcsr() { +entry: + %u32_a = alloca i32, align 4 + %0 = load i32, i32* %u32_a, align 4 + call void asm sideeffect "movgr2fcsr $$fcsr0, $0", "r"(i32 %0) + ret void +} + +; CHECK-LABEL: movgr2fcsr: +; CHECK: ld.w $r[[REG:[0-9]+]], $sp, 12 +; CHECK: movgr2fcsr $fcsr[[REG:[0-9]+]], $r[[REG:[0-9]+]] +; CHECK: jr $ra +; diff --git a/llvm/test/CodeGen/LoongArch/const-mult.ll b/llvm/test/CodeGen/LoongArch/const-mult.ll new file mode 100644 index 000000000000..955e162684a9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/const-mult.ll @@ -0,0 +1,245 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64-linux-gnu < %s | FileCheck %s + + +; This test is copied from Mips except the mul2730_32 and mul2730_64 + +define i32 @mul5_32(i32 signext %a) { +; CHECK-LABEL: mul5_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: alsl.w $r4, $r4, $r4, 2 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i32 %a, 5 + ret i32 %mul +} + +define i32 @mul27_32(i32 signext %a) { +; CHECK-LABEL: mul27_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: alsl.w $r5, $r4, $r4, 2 +; CHECK-NEXT: slli.w $r4, $r4, 5 +; CHECK-NEXT: sub.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i32 %a, 27 + ret i32 %mul +} + +define i32 @muln2147483643_32(i32 signext %a) { +; CHECK-LABEL: muln2147483643_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: alsl.w $r5, $r4, $r4, 2 +; CHECK-NEXT: slli.w $r4, $r4, 31 +; CHECK-NEXT: add.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i32 %a, -2147483643 + ret i32 %mul +} + +define i64 @muln9223372036854775805_64(i64 signext %a) { +; CHECK-LABEL: muln9223372036854775805_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: alsl.d $r5, $r4, $r4, 1 +; CHECK-NEXT: slli.d $r4, $r4, 63 +; CHECK-NEXT: add.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i64 %a, -9223372036854775805 + ret i64 %mul +} + +define i128 @muln170141183460469231731687303715884105725_128(i128 signext %a) { +; CHECK-LABEL: muln170141183460469231731687303715884105725_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srli.d $r6, $r4, 63 +; CHECK-NEXT: slli.d $r7, $r5, 1 +; CHECK-NEXT: or $r6, $r7, $r6 +; CHECK-NEXT: add.d $r5, $r6, $r5 +; CHECK-NEXT: slli.d $r7, $r4, 1 +; CHECK-NEXT: alsl.d $r6, $r4, $r4, 1 +; CHECK-NEXT: sltu $r7, $r6, $r7 +; CHECK-NEXT: bstrpick.d $r7, $r7, 31, 0 +; CHECK-NEXT: add.d $r5, $r5, $r7 +; CHECK-NEXT: slli.d $r4, $r4, 63 +; CHECK-NEXT: add.d $r5, $r4, $r5 +; CHECK-NEXT: move $r4, $r6 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i128 %a, -170141183460469231731687303715884105725 + ret i128 %mul +} + +define i128 @mul170141183460469231731687303715884105723_128(i128 signext %a) { +; CHECK-LABEL: mul170141183460469231731687303715884105723_128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srli.d $r6, $r4, 62 +; CHECK-NEXT: slli.d $r7, $r5, 2 +; CHECK-NEXT: or $r6, $r7, $r6 +; CHECK-NEXT: add.d $r5, $r6, $r5 +; CHECK-NEXT: slli.d $r6, $r4, 2 +; CHECK-NEXT: alsl.d $r7, $r4, $r4, 2 +; CHECK-NEXT: sltu $r6, $r7, $r6 +; CHECK-NEXT: bstrpick.d $r6, $r6, 31, 0 +; CHECK-NEXT: add.d $r5, $r5, $r6 +; CHECK-NEXT: slli.d $r4, $r4, 63 +; CHECK-NEXT: sub.d $r4, $r4, $r5 +; CHECK-NEXT: sltu $r5, $zero, $r7 +; CHECK-NEXT: bstrpick.d $r5, $r5, 31, 0 +; CHECK-NEXT: sub.d $r5, $r4, $r5 +; CHECK-NEXT: addi.d $r4, $zero, 0 +; CHECK-NEXT: sub.d $r4, $r4, $r7 +; CHECK-NEXT: jr $ra +entry: + %mul = mul nsw i128 %a, 170141183460469231731687303715884105723 + ret i128 %mul +} + +define i32 @mul42949673_32(i32 %a) { +; CHECK-LABEL: mul42949673_32: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r5, 10485 +; CHECK-NEXT: ori $r5, $r5, 3113 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %b = mul i32 %a, 42949673 + ret i32 %b +} + +define i64 @mul42949673_64(i64 %a) { +; CHECK-LABEL: mul42949673_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 10485 +; CHECK-NEXT: ori $r5, $r5, 3113 +; CHECK-NEXT: mul.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 42949673 + ret i64 %b +} + +define i32 @mul22224078_32(i32 %a) { +; CHECK-LABEL: mul22224078_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5425 +; CHECK-NEXT: ori $r5, $r5, 3278 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 22224078 + ret i32 %b +} + +define i64 @mul22224078_64(i64 %a) { +; CHECK-LABEL: mul22224078_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5425 +; CHECK-NEXT: ori $r5, $r5, 3278 +; CHECK-NEXT: mul.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 22224078 + ret i64 %b +} + +define i32 @mul22245375_32(i32 %a) { +; CHECK-LABEL: mul22245375_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5430 +; CHECK-NEXT: ori $r5, $r5, 4095 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul22245375_64(i64 %a) { +; CHECK-LABEL: mul22245375_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5430 +; CHECK-NEXT: ori $r5, $r5, 4095 +; CHECK-NEXT: mul.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 22245375 + ret i64 %b +} + +define i32 @mul25165824_32(i32 %a) { +; CHECK-LABEL: mul25165824_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5430 +; CHECK-NEXT: ori $r5, $r5, 4095 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul25165824_64(i64 %a) { +; CHECK-LABEL: mul25165824_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.d $r5, $r4, 23 +; CHECK-NEXT: slli.d $r4, $r4, 24 +; CHECK-NEXT: add.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 25165824 + ret i64 %b +} + +define i32 @mul33554432_32(i32 %a) { +; CHECK-LABEL: mul33554432_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r5, 5430 +; CHECK-NEXT: ori $r5, $r5, 4095 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 22245375 + ret i32 %b +} + +define i64 @mul33554432_64(i64 %a) { +; CHECK-LABEL: mul33554432_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.d $r4, $r4, 25 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 33554432 + ret i64 %b +} + +define i32 @mul2730_32(i32 %a) { +; CHECK-LABEL: mul2730_32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: ori $r5, $zero, 2730 +; CHECK-NEXT: mul.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i32 %a, 2730 + ret i32 %b +} + +define i64 @mul2730_64(i64 %a) { +; CHECK-LABEL: mul2730_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $r5, $zero, 2730 +; CHECK-NEXT: mul.d $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %b = mul i64 %a, 2730 + ret i64 %b +} diff --git a/llvm/test/CodeGen/LoongArch/disable-tail-calls.ll b/llvm/test/CodeGen/LoongArch/disable-tail-calls.ll new file mode 100644 index 000000000000..586daca23c93 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/disable-tail-calls.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK1 +; RUN: llc -march=loongarch64 -relocation-model=pic -disable-tail-calls < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK2 +; RUN: llc -march=loongarch64 -relocation-model=pic -disable-tail-calls=false < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK3 + +; Function with attribute #0 = { "disable-tail-calls"="true" } +define i32 @caller1(i32 %a) #0 { +; CHECK1-LABEL: caller1: +; CHECK1: # %bb.0: # %entry +; CHECK1-NEXT: addi.d $sp, $sp, -16 +; CHECK1-NEXT: .cfi_def_cfa_offset 16 +; CHECK1-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK1-NEXT: .cfi_offset 1, -8 +; CHECK1-NEXT: bl callee +; CHECK1-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK1-NEXT: addi.d $sp, $sp, 16 +; CHECK1-NEXT: jr $ra +; +; CHECK2-LABEL: caller1: +; CHECK2: # %bb.0: # %entry +; CHECK2-NEXT: addi.d $sp, $sp, -16 +; CHECK2-NEXT: .cfi_def_cfa_offset 16 +; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK2-NEXT: .cfi_offset 1, -8 +; CHECK2-NEXT: bl callee +; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK2-NEXT: addi.d $sp, $sp, 16 +; CHECK2-NEXT: jr $ra +; +; CHECK3-LABEL: caller1: +; CHECK3: # %bb.0: # %entry +; CHECK3-NEXT: b callee +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + + +; Function with attribute #1 = { "disable-tail-calls"="false" } +define i32 @caller2(i32 %a) #1 { +; CHECK1-LABEL: caller2: +; CHECK1: # %bb.0: # %entry +; CHECK1-NEXT: b callee +; +; CHECK2-LABEL: caller2: +; CHECK2: # %bb.0: # %entry +; CHECK2-NEXT: addi.d $sp, $sp, -16 +; CHECK2-NEXT: .cfi_def_cfa_offset 16 +; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK2-NEXT: .cfi_offset 1, -8 +; CHECK2-NEXT: bl callee +; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK2-NEXT: addi.d $sp, $sp, 16 +; CHECK2-NEXT: jr $ra +; +; CHECK3-LABEL: caller2: +; CHECK3: # %bb.0: # %entry +; CHECK3-NEXT: b callee +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +define i32 @caller3(i32 %a) { +; CHECK1-LABEL: caller3: +; CHECK1: # %bb.0: # %entry +; CHECK1-NEXT: b callee +; +; CHECK2-LABEL: caller3: +; CHECK2: # %bb.0: # %entry +; CHECK2-NEXT: addi.d $sp, $sp, -16 +; CHECK2-NEXT: .cfi_def_cfa_offset 16 +; CHECK2-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK2-NEXT: .cfi_offset 1, -8 +; CHECK2-NEXT: bl callee +; CHECK2-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK2-NEXT: addi.d $sp, $sp, 16 +; CHECK2-NEXT: jr $ra +; +; CHECK3-LABEL: caller3: +; CHECK3: # %bb.0: # %entry +; CHECK3-NEXT: b callee +entry: + %call = tail call i32 @callee(i32 %a) + ret i32 %call +} + +declare i32 @callee(i32) + +attributes #0 = { "disable-tail-calls"="true" } +attributes #1 = { "disable-tail-calls"="false" } diff --git a/llvm/test/CodeGen/LoongArch/divrem.ll b/llvm/test/CodeGen/LoongArch/divrem.ll new file mode 100644 index 000000000000..34293a83c25c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/divrem.ll @@ -0,0 +1,68 @@ +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s -check-prefixes=CHECK,CHECK-TRAP + +; RUN: llc -march=loongarch64 -mnocheck-zero-division -relocation-model=pic < %s | FileCheck %s -check-prefixes=CHECK,NOCHECK + +; FileCheck Prefixes: +; CHECK-TRAP - trap +; NOCHECK - Division by zero will not be detected + +define i32 @sdiv1(i32 signext %a0, i32 signext %a1) nounwind readnone { +entry: +; CHECK-LABEL: sdiv1: + +; CHECK: div.w $r4, $r4, $r5 +; CHECK-TRAP: bne $r5, $zero, 8 +; CHECK-TRAP: break 7 + +; NOCHECK-NOT: bne +; NOCHECK-NOT: break + + %div = sdiv i32 %a0, %a1 + ret i32 %div +} + +define i32 @srem1(i32 signext %a0, i32 signext %a1) nounwind readnone { +entry: +; CHECK-LABEL: srem1: + +; CHECK: mod.w $r4, $r4, $r5 +; CHECK-TRAP: bne $r5, $zero, 8 +; CHECK-TRAP: break 7 + +; NOCHECK-NOT: bne +; NOCHECK-NOT: break + + %rem = srem i32 %a0, %a1 + ret i32 %rem +} + +define i32 @udiv1(i32 signext %a0, i32 signext %a1) nounwind readnone { +entry: +; CHECK-LABEL: udiv1: + +; CHECK: div.wu $r4, $r4, $r5 +; CHECK-TRAP: bne $r5, $zero, 8 +; CHECK-TRAP: break 7 + +; NOCHECK-NOT: bne +; NOCHECK-NOT: break + + %div = udiv i32 %a0, %a1 + ret i32 %div +} + +define i32 @urem1(i32 signext %a0, i32 signext %a1) nounwind readnone { +entry: +; CHECK-LABEL: urem1: + + +; CHECK: mod.wu $r4, $r4, $r5 +; CHECK-TRAP: bne $r5, $zero, 8 +; CHECK-TRAP: break 7 + +; NOCHECK-NOT: bne +; NOCHECK-NOT: break + + %rem = urem i32 %a0, %a1 + ret i32 %rem +} diff --git a/llvm/test/CodeGen/LoongArch/double-imm.ll b/llvm/test/CodeGen/LoongArch/double-imm.ll deleted file mode 100644 index a7782cf85954..000000000000 --- a/llvm/test/CodeGen/LoongArch/double-imm.ll +++ /dev/null @@ -1,89 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -define double @f64_positive_zero() nounwind { -; LA32-LABEL: f64_positive_zero: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $zero -; LA32-NEXT: movgr2frh.w $fa0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_positive_zero: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.d $fa0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 - ret double 0.0 -} - -define double @f64_negative_zero() nounwind { -; LA32-LABEL: f64_negative_zero: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $zero -; LA32-NEXT: movgr2frh.w $fa0, $zero -; LA32-NEXT: fneg.d $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_negative_zero: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.d $fa0, $zero -; LA64-NEXT: fneg.d $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - ret double -0.0 -} - -define double @f64_constant_pi() nounwind { -; LA32-LABEL: f64_constant_pi: -; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, .LCPI2_0 -; LA32-NEXT: addi.w $a0, $a0, .LCPI2_0 -; LA32-NEXT: fld.d $fa0, $a0, 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_constant_pi: -; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, .LCPI2_0 -; LA64-NEXT: addi.d $a0, $a0, .LCPI2_0 -; LA64-NEXT: fld.d $fa0, $a0, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - ret double 3.1415926535897931159979634685441851615905761718750 -} - -define double @f64_add_fimm1(double %a) nounwind { -; LA32-LABEL: f64_add_fimm1: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $zero, 1 -; LA32-NEXT: movgr2fr.w $fa1, $a0 -; LA32-NEXT: ffint.s.w $fa1, $fa1 -; LA32-NEXT: fcvt.d.s $fa1, $fa1 -; LA32-NEXT: fadd.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_add_fimm1: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $zero, 1 -; LA64-NEXT: movgr2fr.d $fa1, $a0 -; LA64-NEXT: ffint.d.l $fa1, $fa1 -; LA64-NEXT: fadd.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = fadd double %a, 1.0 - ret double %1 -} - -define double @f64_positive_fimm1() nounwind { -; LA32-LABEL: f64_positive_fimm1: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $zero, 1 -; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.s.w $fa0, $fa0 -; LA32-NEXT: fcvt.d.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_positive_fimm1: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $zero, 1 -; LA64-NEXT: movgr2fr.d $fa0, $a0 -; LA64-NEXT: ffint.d.l $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - ret double 1.0 -} diff --git a/llvm/test/CodeGen/LoongArch/dup-tail.ll b/llvm/test/CodeGen/LoongArch/dup-tail.ll new file mode 100644 index 000000000000..cad67e98c0b4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/dup-tail.ll @@ -0,0 +1,45 @@ +; RUN: llc --mtriple=loongarch64 --relocation-model=pic -o - %s | FileCheck %s + +;; Duplicate returns to enable tail call optimizations. +declare i32 @test() +declare i32 @test1() +declare i32 @test2() +declare i32 @test3() +define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: duplicate_returns: +; CHECK: b test2 +; CHECK: b test +; CHECK: b test1 +; CHECK: b test3 +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call i32 @test() + br label %return + +if.else: ; preds = %entry + %cmp1 = icmp eq i32 %b, 0 + br i1 %cmp1, label %if.then2, label %if.else4 + +if.then2: ; preds = %if.else + %call3 = tail call i32 @test1() + br label %return + +if.else4: ; preds = %if.else + %cmp5 = icmp sgt i32 %a, %b + br i1 %cmp5, label %if.then6, label %if.else8 + +if.then6: ; preds = %if.else4 + %call7 = tail call i32 @test2() + br label %return + +if.else8: ; preds = %if.else4 + %call9 = tail call i32 @test3() + br label %return + +return: ; preds = %if.else8, %if.then6, %if.then2, %if.then + %retval = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %call9, %if.else8 ] + ret i32 %retval +} diff --git a/llvm/test/CodeGen/LoongArch/eliminateFI.ll b/llvm/test/CodeGen/LoongArch/eliminateFI.ll new file mode 100644 index 000000000000..0272c95bdb9e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/eliminateFI.ll @@ -0,0 +1,106 @@ +; Check whether LoongArchSERegisterInfo::eliminateFI works well +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define signext i32 @ldptr_w_unaligned() { +; CHECK-LABEL: ldptr_w_unaligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 + %0 = bitcast i8* %arrayidx to i32* +; the offset MUST be 0 +; CHECK: ldptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, 0 + %1 = load i32, i32* %0, align 1 + ret i32 %1 +} + +define signext i32 @ldptr_w_aligned() { +; CHECK-LABEL: ldptr_w_aligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 + %0 = bitcast i8* %arrayidx to i32* +; the offset may not be 0, but MUST be 4-bytes aligned +; CHECK: ldptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} + %1 = load i32, i32* %0, align 1 + ret i32 %1 +} + +define signext i64 @ldptr_d_unaligned() { +; CHECK-LABEL: ldptr_d_unaligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 + %0 = bitcast i8* %arrayidx to i64* +; the offset MUST be 0 +; CHECK: ldptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, 0 + %1 = load i64, i64* %0, align 1 + ret i64 %1 +} + +define signext i64 @ldptr_d_aligned() { +; CHECK-LABEL: ldptr_d_aligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 + %0 = bitcast i8* %arrayidx to i64* +; the offset may not be 0, but MUST be 4-bytes aligned +; CHECK: ldptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} + %1 = load i64, i64* %0, align 1 + ret i64 %1 +} + +define void @stptr_w_unaligned(i32 signext %val) { +; CHECK-LABEL: stptr_w_unaligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 + %0 = bitcast i8* %arrayidx to i32* +; the offset MUST be 0 +; CHECK: stptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, 0 + store i32 %val, i32* %0, align 1 + ret void +} + +define void @stptr_w_aligned(i32 signext %val) { +; CHECK-LABEL: stptr_w_aligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 + %0 = bitcast i8* %arrayidx to i32* +; the offset may not be 0, but MUST be 4-bytes aligned +; CHECK: stptr.w $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} + store i32 %val, i32* %0, align 1 + ret void +} + +define void @stptr_d_unaligned(i64 %val) { +; CHECK-LABEL: stptr_d_unaligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5001 + %0 = bitcast i8* %arrayidx to i64* +; the offset MUST be 0 +; CHECK: stptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, 0 + store i64 %val, i64* %0, align 1 + ret void +} + +define void @stptr_d_aligned(i64 %val) { +; CHECK-LABEL: stptr_d_aligned: +; CHECK: # %bb.0: # %entry +entry: + %array = alloca [6000 x i8], align 1 + %arrayidx = getelementptr inbounds [6000 x i8], [6000 x i8]* %array, i64 0, i64 5000 + %0 = bitcast i8* %arrayidx to i64* +; the offset may not be 0, but MUST be 4-bytes aligned +; CHECK: stptr.d $r{{[0-9]+}}, $r{{[0-9]+}}, {{[0-9]+}} + store i64 %val, i64* %0, align 1 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll new file mode 100644 index 000000000000..80fa7a85550d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 -O0 < %s | FileCheck %s + +@var = external global i32 + +define void @func() { +; CHECK-LABEL: func: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -2048 +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 4112 +; CHECK-NEXT: lu12i.w $r5, var +; CHECK-NEXT: ori $r5, $r5, var +; CHECK-NEXT: lu32i.d $r5, var +; CHECK-NEXT: lu52i.d $r5, $r5, var +; CHECK-NEXT: ld.w $r20, $r5, 0 +; CHECK-NEXT: ld.w $r19, $r5, 0 +; CHECK-NEXT: ld.w $r18, $r5, 0 +; CHECK-NEXT: ld.w $r17, $r5, 0 +; CHECK-NEXT: ld.w $r16, $r5, 0 +; CHECK-NEXT: ld.w $r15, $r5, 0 +; CHECK-NEXT: ld.w $r14, $r5, 0 +; CHECK-NEXT: ld.w $r13, $r5, 0 +; CHECK-NEXT: ld.w $r12, $r5, 0 +; CHECK-NEXT: ld.w $r11, $r5, 0 +; CHECK-NEXT: ld.w $r10, $r5, 0 +; CHECK-NEXT: ld.w $r9, $r5, 0 +; CHECK-NEXT: ld.w $r8, $r5, 0 +; CHECK-NEXT: ld.w $r7, $r5, 0 +; CHECK-NEXT: ld.w $r6, $r5, 0 +; CHECK-NEXT: ld.w $r4, $r5, 0 +; CHECK-NEXT: st.d $r23, $sp, 0 +; CHECK-NEXT: lu12i.w $r23, 1 +; CHECK-NEXT: ori $r23, $r23, 12 +; CHECK-NEXT: add.d $r23, $sp, $r23 +; CHECK-NEXT: st.w $r20, $r23, 0 +; CHECK-NEXT: ld.d $r23, $sp, 0 +; CHECK-NEXT: st.w $r20, $r5, 0 +; CHECK-NEXT: st.w $r19, $r5, 0 +; CHECK-NEXT: st.w $r18, $r5, 0 +; CHECK-NEXT: st.w $r17, $r5, 0 +; CHECK-NEXT: st.w $r16, $r5, 0 +; CHECK-NEXT: st.w $r15, $r5, 0 +; CHECK-NEXT: st.w $r14, $r5, 0 +; CHECK-NEXT: st.w $r13, $r5, 0 +; CHECK-NEXT: st.w $r12, $r5, 0 +; CHECK-NEXT: st.w $r11, $r5, 0 +; CHECK-NEXT: st.w $r10, $r5, 0 +; CHECK-NEXT: st.w $r9, $r5, 0 +; CHECK-NEXT: st.w $r8, $r5, 0 +; CHECK-NEXT: st.w $r7, $r5, 0 +; CHECK-NEXT: st.w $r6, $r5, 0 +; CHECK-NEXT: st.w $r4, $r5, 0 +; CHECK-NEXT: lu12i.w $r4, 1 +; CHECK-NEXT: ori $r4, $r4, 16 +; CHECK-NEXT: add.d $sp, $sp, $r4 +; CHECK-NEXT: jr $ra + %space = alloca i32, align 4 + %stackspace = alloca[1024 x i32], align 4 + + ;; Load values to increase register pressure. + %v0 = load volatile i32, i32* @var + %v1 = load volatile i32, i32* @var + %v2 = load volatile i32, i32* @var + %v3 = load volatile i32, i32* @var + %v4 = load volatile i32, i32* @var + %v5 = load volatile i32, i32* @var + %v6 = load volatile i32, i32* @var + %v7 = load volatile i32, i32* @var + %v8 = load volatile i32, i32* @var + %v9 = load volatile i32, i32* @var + %v10 = load volatile i32, i32* @var + %v11 = load volatile i32, i32* @var + %v12 = load volatile i32, i32* @var + %v13 = load volatile i32, i32* @var + %v14 = load volatile i32, i32* @var + %v15 = load volatile i32, i32* @var + + ;; Computing a stack-relative values needs an additional register. + ;; We should get an emergency spill/reload for this. + store volatile i32 %v0, i32* %space + + ;; store values so they are used. + store volatile i32 %v0, i32* @var + store volatile i32 %v1, i32* @var + store volatile i32 %v2, i32* @var + store volatile i32 %v3, i32* @var + store volatile i32 %v4, i32* @var + store volatile i32 %v5, i32* @var + store volatile i32 %v6, i32* @var + store volatile i32 %v7, i32* @var + store volatile i32 %v8, i32* @var + store volatile i32 %v9, i32* @var + store volatile i32 %v10, i32* @var + store volatile i32 %v11, i32* @var + store volatile i32 %v12, i32* @var + store volatile i32 %v13, i32* @var + store volatile i32 %v14, i32* @var + store volatile i32 %v15, i32* @var + + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/fcopysign.ll b/llvm/test/CodeGen/LoongArch/fcopysign.ll new file mode 100644 index 000000000000..c16413715ffd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fcopysign.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define float @fcopysign_s(float %a, float %b) { +; CHECK-LABEL: fcopysign_s: +; CHECK: fcopysign.s $f0, $f0, $f1 + %ret = call float @llvm.copysign.f32(float %a, float %b) + ret float %ret +} +declare float @llvm.copysign.f32(float %a, float %b) + +define double @fcopysign_d(double %a, double %b) { +; CHECK-LABEL: fcopysign_d: +; CHECK: fcopysign.d $f0, $f0, $f1 + %ret = call double @llvm.copysign.f64(double %a, double %b) + ret double %ret +} +declare double @llvm.copysign.f64(double %a, double %b) diff --git a/llvm/test/CodeGen/LoongArch/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/fence-singlethread.ll new file mode 100644 index 000000000000..f4d1a396570d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fence-singlethread.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define void @fence_singlethread() { +; CHECK-LABEL: fence_singlethread: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + fence syncscope("singlethread") seq_cst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll b/llvm/test/CodeGen/LoongArch/fence.ll similarity index 42% rename from llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll rename to llvm/test/CodeGen/LoongArch/fence.ll index f8c98bbc7138..05e2639ca632 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fence.ll +++ b/llvm/test/CodeGen/LoongArch/fence.ll @@ -1,58 +1,38 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 define void @fence_acquire() nounwind { -; LA32-LABEL: fence_acquire: -; LA32: # %bb.0: -; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; ; LA64-LABEL: fence_acquire: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: dbar 20 +; LA64-NEXT: jr $ra fence acquire ret void } define void @fence_release() nounwind { -; LA32-LABEL: fence_release: -; LA32: # %bb.0: -; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; ; LA64-LABEL: fence_release: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: dbar 18 +; LA64-NEXT: jr $ra fence release ret void } define void @fence_acq_rel() nounwind { -; LA32-LABEL: fence_acq_rel: -; LA32: # %bb.0: -; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; ; LA64-LABEL: fence_acq_rel: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: jr $ra fence acq_rel ret void } define void @fence_seq_cst() nounwind { -; LA32-LABEL: fence_seq_cst: -; LA32: # %bb.0: -; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; ; LA64-LABEL: fence_seq_cst: ; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: jr $ra fence seq_cst ret void } diff --git a/llvm/test/CodeGen/LoongArch/float-imm.ll b/llvm/test/CodeGen/LoongArch/float-imm.ll deleted file mode 100644 index a6b542c29ed7..000000000000 --- a/llvm/test/CodeGen/LoongArch/float-imm.ll +++ /dev/null @@ -1,85 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 - -define float @f32_positive_zero() nounwind { -; LA32-LABEL: f32_positive_zero: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_positive_zero: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.w $fa0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 - ret float 0.0 -} - -define float @f32_negative_zero() nounwind { -; LA32-LABEL: f32_negative_zero: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $zero -; LA32-NEXT: fneg.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_negative_zero: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.w $fa0, $zero -; LA64-NEXT: fneg.s $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - ret float -0.0 -} - -define float @f32_constant_pi() nounwind { -; LA32-LABEL: f32_constant_pi: -; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, .LCPI2_0 -; LA32-NEXT: addi.w $a0, $a0, .LCPI2_0 -; LA32-NEXT: fld.s $fa0, $a0, 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_constant_pi: -; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, .LCPI2_0 -; LA64-NEXT: addi.d $a0, $a0, .LCPI2_0 -; LA64-NEXT: fld.s $fa0, $a0, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - ret float 3.14159274101257324218750 -} - -define float @f32_add_fimm1(float %a) nounwind { -; LA32-LABEL: f32_add_fimm1: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $zero, 1 -; LA32-NEXT: movgr2fr.w $fa1, $a0 -; LA32-NEXT: ffint.s.w $fa1, $fa1 -; LA32-NEXT: fadd.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_add_fimm1: -; LA64: # %bb.0: -; LA64-NEXT: addi.w $a0, $zero, 1 -; LA64-NEXT: movgr2fr.w $fa1, $a0 -; LA64-NEXT: ffint.s.w $fa1, $fa1 -; LA64-NEXT: fadd.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = fadd float %a, 1.0 - ret float %1 -} - -define float @f32_positive_fimm1() nounwind { -; LA32-LABEL: f32_positive_fimm1: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $zero, 1 -; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.s.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_positive_fimm1: -; LA64: # %bb.0: -; LA64-NEXT: addi.w $a0, $zero, 1 -; LA64-NEXT: movgr2fr.w $fa0, $a0 -; LA64-NEXT: ffint.s.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - ret float 1.0 -} diff --git a/llvm/test/CodeGen/LoongArch/frame-info.ll b/llvm/test/CodeGen/LoongArch/frame-info.ll new file mode 100644 index 000000000000..eb4fc69fa785 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/frame-info.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -relocation-model=pic -mtriple=loongarch64 -frame-pointer=all < %s | FileCheck %s + +define void @trivial() { +; CHECK-LABEL: trivial: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $r22, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 22, -8 +; CHECK-NEXT: addi.d $r22, $sp, 16 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: ld.d $r22, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra + ret void +} + +define void @stack_alloc(i32 signext %size) { +; CHECK-LABEL: stack_alloc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r22, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $r22, $sp, 32 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: addi.w $r5, $zero, -16 +; CHECK-NEXT: lu32i.d $r5, 1 +; CHECK-NEXT: bstrpick.d $r4, $r4, 31, 0 +; CHECK-NEXT: addi.d $r4, $r4, 15 +; CHECK-NEXT: and $r4, $r4, $r5 +; CHECK-NEXT: sub.d $r4, $sp, $r4 +; CHECK-NEXT: move $sp, $r4 +; CHECK-NEXT: bl callee_with_args +; CHECK-NEXT: addi.d $sp, $r22, -32 +; CHECK-NEXT: ld.d $r22, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: jr $ra +entry: + %0 = alloca i8, i32 %size, align 16 + call void @callee_with_args(i8* nonnull %0) + ret void +} + +define void @branch_and_tail_call(i1 %a) { +; CHECK-LABEL: branch_and_tail_call: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: andi $r4, $r4, 1 +; CHECK-NEXT: beqz $r4, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %blue_pill +; CHECK-NEXT: b callee1 +; CHECK-NEXT: .LBB2_2: # %red_pill +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r22, $sp, 0 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $r22, $sp, 16 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: bl callee2 +; CHECK-NEXT: ld.d $r22, $sp, 0 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra + br i1 %a, label %blue_pill, label %red_pill +blue_pill: + tail call void @callee1() + ret void +red_pill: + call void @callee2() + ret void +} + +define void @big_frame() { +; CHECK-LABEL: big_frame: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -2032 +; CHECK-NEXT: .cfi_def_cfa_offset 2032 +; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r22, $sp, 2016 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 22, -16 +; CHECK-NEXT: addi.d $r22, $sp, 2032 +; CHECK-NEXT: .cfi_def_cfa 22, 0 +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2016 +; CHECK-NEXT: add.d $r4, $r22, $r4 +; CHECK-NEXT: addi.d $r4, $r4, 0 +; CHECK-NEXT: bl callee_with_args +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ld.d $r22, $sp, 2016 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 2032 +; CHECK-NEXT: jr $ra +entry: + %0 = alloca i8, i32 2048, align 16 + call void @callee_with_args(i8* nonnull %0) + ret void +} + +define void @varargs_frame(i32 %i, ...) { +; CHECK-LABEL: varargs_frame: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: st.d $r22, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 22, -72 +; CHECK-NEXT: addi.d $r22, $sp, 16 +; CHECK-NEXT: .cfi_def_cfa 22, 64 +; CHECK-NEXT: st.d $r11, $r22, 56 +; CHECK-NEXT: st.d $r10, $r22, 48 +; CHECK-NEXT: st.d $r9, $r22, 40 +; CHECK-NEXT: st.d $r8, $r22, 32 +; CHECK-NEXT: st.d $r7, $r22, 24 +; CHECK-NEXT: st.d $r6, $r22, 16 +; CHECK-NEXT: st.d $r5, $r22, 8 +; CHECK-NEXT: ld.d $r22, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: jr $ra + ret void +} + +declare void @callee1() +declare void @callee2() +declare void @callee_with_args(i8*) diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll deleted file mode 100644 index e0aa7db13f72..000000000000 --- a/llvm/test/CodeGen/LoongArch/frame.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s - -%struct.key_t = type { i32, [16 x i8] } - -define i32 @test() nounwind { -; CHECK-LABEL: test: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -32 -; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; CHECK-NEXT: st.w $zero, $sp, 16 -; CHECK-NEXT: st.d $zero, $sp, 8 -; CHECK-NEXT: st.d $zero, $sp, 0 -; CHECK-NEXT: addi.d $a0, $sp, 0 -; CHECK-NEXT: ori $a0, $a0, 4 -; CHECK-NEXT: bl test1 -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 - %key = alloca %struct.key_t, align 4 - call void @llvm.memset.p0i8.i64(ptr %key, i8 0, i64 20, i1 false) - %1 = getelementptr inbounds %struct.key_t, ptr %key, i64 0, i32 1, i64 0 - call void @test1(ptr %1) - ret i32 0 -} - -declare void @llvm.memset.p0i8.i64(ptr, i8, i64, i1) - -declare void @test1(ptr) diff --git a/llvm/test/CodeGen/LoongArch/fsel.ll b/llvm/test/CodeGen/LoongArch/fsel.ll new file mode 100644 index 000000000000..f41ee08c0954 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fsel.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + + +define double @olt_f64(double %a, double %b) { +; CHECK-LABEL: olt_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: fcmp.clt.d $fcc0, $f0, $f1 +; CHECK-NEXT: fsel $f0, $f1, $f0, $fcc0 +; CHECK-NEXT: jr $ra + %cond = fcmp olt double %a, %b + %ret = select i1 %cond, double %a, double %b + ret double %ret +} + +define double @ogt_f64(double %a, double %b) { +; CHECK-LABEL: ogt_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: fcmp.cule.d $fcc0, $f0, $f1 +; CHECK-NEXT: fsel $f0, $f0, $f1, $fcc0 +; CHECK-NEXT: jr $ra + %cond = fcmp ogt double %a, %b + %ret = select i1 %cond, double %a, double %b + ret double %ret +} + +define float @olt_f32(float %a, float %b) { +; CHECK-LABEL: olt_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: fcmp.clt.s $fcc0, $f0, $f1 +; CHECK-NEXT: fsel $f0, $f1, $f0, $fcc0 +; CHECK-NEXT: jr $ra + %cond = fcmp olt float %a, %b + %ret = select i1 %cond, float %a, float %b + ret float %ret +} + +define float @ogt_f32(float %a, float %b) { +; CHECK-LABEL: ogt_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: fcmp.cule.s $fcc0, $f0, $f1 +; CHECK-NEXT: fsel $f0, $f0, $f1, $fcc0 +; CHECK-NEXT: jr $ra + %cond = fcmp ogt float %a, %b + %ret = select i1 %cond, float %a, float %b + ret float %ret +} diff --git a/llvm/test/CodeGen/LoongArch/imm.ll b/llvm/test/CodeGen/LoongArch/imm.ll deleted file mode 100644 index fb0dcf21f231..000000000000 --- a/llvm/test/CodeGen/LoongArch/imm.ll +++ /dev/null @@ -1,165 +0,0 @@ -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s - -define i64 @imm0() { -; CHECK-LABEL: imm0: -; CHECK: # %bb.0: -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 0 -} - -define i64 @imm7ff0000000000000() { -; CHECK-LABEL: imm7ff0000000000000: -; CHECK: # %bb.0: -; CHECK-NEXT: lu52i.d $a0, $zero, 2047 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 9218868437227405312 -} - -define i64 @imm0000000000000fff() { -; CHECK-LABEL: imm0000000000000fff: -; CHECK: # %bb.0: -; CHECK-NEXT: ori $a0, $zero, 4095 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 4095 -} - -define i64 @imm0007ffff00000800() { -; CHECK-LABEL: imm0007ffff00000800: -; CHECK: # %bb.0: -; CHECK-NEXT: ori $a0, $zero, 2048 -; CHECK-NEXT: lu32i.d $a0, 524287 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 2251795518720000 -} - -define i64 @immfff0000000000fff() { -; CHECK-LABEL: immfff0000000000fff: -; CHECK: # %bb.0: -; CHECK-NEXT: ori $a0, $zero, 4095 -; CHECK-NEXT: lu52i.d $a0, $a0, -1 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 -4503599627366401 -} - -define i64 @imm0008000000000fff() { -; CHECK-LABEL: imm0008000000000fff: -; CHECK: # %bb.0: -; CHECK-NEXT: ori $a0, $zero, 4095 -; CHECK-NEXT: lu32i.d $a0, -524288 -; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 2251799813689343 -} - -define i64 @immfffffffffffff800() { -; CHECK-LABEL: immfffffffffffff800: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $a0, $zero, -2048 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 -2048 -} - -define i64 @imm00000000fffff800() { -; CHECK-LABEL: imm00000000fffff800: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $a0, $zero, -2048 -; CHECK-NEXT: lu32i.d $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 4294965248 -} - -define i64 @imm000ffffffffff800() { -; CHECK-LABEL: imm000ffffffffff800: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $a0, $zero, -2048 -; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 4503599627368448 -} - -define i64 @imm00080000fffff800() { -; CHECK-LABEL: imm00080000fffff800: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $a0, $zero, -2048 -; CHECK-NEXT: lu32i.d $a0, -524288 -; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 2251804108650496 -} - -define i64 @imm000000007ffff000() { -; CHECK-LABEL: imm000000007ffff000: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a0, 524287 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 2147479552 -} - -define i64 @imm0000000080000000() { -; CHECK-LABEL: imm0000000080000000: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a0, -524288 -; CHECK-NEXT: lu32i.d $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 2147483648 -} - -define i64 @imm000ffffffffff000() { -; CHECK-LABEL: imm000ffffffffff000: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a0, -1 -; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 4503599627366400 -} - -define i64 @imm7ff0000080000000() { -; CHECK-LABEL: imm7ff0000080000000: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a0, -524288 -; CHECK-NEXT: lu32i.d $a0, 0 -; CHECK-NEXT: lu52i.d $a0, $a0, 2047 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 9218868439374888960 -} - -define i64 @immffffffff80000800() { -; CHECK-LABEL: immffffffff80000800: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a0, -524288 -; CHECK-NEXT: ori $a0, $a0, 2048 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 -2147481600 -} - -define i64 @immffffffff7ffff800() { -; CHECK-LABEL: immffffffff7ffff800: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a0, 524287 -; CHECK-NEXT: ori $a0, $a0, 2048 -; CHECK-NEXT: lu32i.d $a0, -1 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 -2147485696 -} - -define i64 @imm7fffffff800007ff() { -; CHECK-LABEL: imm7fffffff800007ff: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a0, -524288 -; CHECK-NEXT: ori $a0, $a0, 2047 -; CHECK-NEXT: lu52i.d $a0, $a0, 2047 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 9223372034707294207 -} - -define i64 @imm0008000080000800() { -; CHECK-LABEL: imm0008000080000800: -; CHECK: # %bb.0: -; CHECK-NEXT: lu12i.w $a0, -524288 -; CHECK-NEXT: ori $a0, $a0, 2048 -; CHECK-NEXT: lu32i.d $a0, -524288 -; CHECK-NEXT: lu52i.d $a0, $a0, 0 -; CHECK-NEXT: jirl $zero, $ra, 0 - ret i64 2251801961170944 -} diff --git a/llvm/test/CodeGen/LoongArch/immediate.ll b/llvm/test/CodeGen/LoongArch/immediate.ll new file mode 100644 index 000000000000..1de3ef0fc9f1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/immediate.ll @@ -0,0 +1,2542 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=loongarch64 < %s | FileCheck %s +define i64 @li0000000000000000() { +; CHECK-LABEL: li0000000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r4, $zero, 0 +; CHECK-NEXT: jr $ra + ret i64 0 +} + +define i64 @li00000000000007ff() { +; CHECK-LABEL: li00000000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r4, $zero, 2047 +; CHECK-NEXT: jr $ra + ret i64 2047 +} + +define i64 @li0000000000000800() { +; CHECK-LABEL: li0000000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: jr $ra + ret i64 2048 +} + +define i64 @li0000000000000fff() { +; CHECK-LABEL: li0000000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: jr $ra + ret i64 4095 +} + +define i64 @li000000007ffff000() { +; CHECK-LABEL: li000000007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2147479552 +} + +define i64 @li000000007ffff7ff() { +; CHECK-LABEL: li000000007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 2147481599 +} + +define i64 @li000000007ffff800() { +; CHECK-LABEL: li000000007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: jr $ra + ret i64 2147481600 +} + +define i64 @li000000007fffffff() { +; CHECK-LABEL: li000000007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: jr $ra + ret i64 2147483647 +} + +define i64 @li0000000080000000() { +; CHECK-LABEL: li0000000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2147483648 +} + +define i64 @li00000000800007ff() { +; CHECK-LABEL: li00000000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2147485695 +} + +define i64 @li0000000080000800() { +; CHECK-LABEL: li0000000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2147485696 +} + +define i64 @li0000000080000fff() { +; CHECK-LABEL: li0000000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2147487743 +} + +define i64 @li00000000fffff000() { +; CHECK-LABEL: li00000000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4294963200 +} + +define i64 @li00000000fffff7ff() { +; CHECK-LABEL: li00000000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4294965247 +} + +define i64 @li00000000fffff800() { +; CHECK-LABEL: li00000000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4294965248 +} + +define i64 @li00000000ffffffff() { +; CHECK-LABEL: li00000000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4294967295 +} + +define i64 @li0007ffff00000000() { +; CHECK-LABEL: li0007ffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251795518717952 +} + +define i64 @li0007ffff000007ff() { +; CHECK-LABEL: li0007ffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251795518719999 +} + +define i64 @li0007ffff00000800() { +; CHECK-LABEL: li0007ffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251795518720000 +} + +define i64 @li0007ffff00000fff() { +; CHECK-LABEL: li0007ffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251795518722047 +} + +define i64 @li0007ffff7ffff000() { +; CHECK-LABEL: li0007ffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666197504 +} + +define i64 @li0007ffff7ffff7ff() { +; CHECK-LABEL: li0007ffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666199551 +} + +define i64 @li0007ffff7ffff800() { +; CHECK-LABEL: li0007ffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666199552 +} + +define i64 @li0007ffff7fffffff() { +; CHECK-LABEL: li0007ffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666201599 +} + +define i64 @li0007ffff80000000() { +; CHECK-LABEL: li0007ffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666201600 +} + +define i64 @li0007ffff800007ff() { +; CHECK-LABEL: li0007ffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666203647 +} + +define i64 @li0007ffff80000800() { +; CHECK-LABEL: li0007ffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666203648 +} + +define i64 @li0007ffff80000fff() { +; CHECK-LABEL: li0007ffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251797666205695 +} + +define i64 @li0007fffffffff000() { +; CHECK-LABEL: li0007fffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251799813681152 +} + +define i64 @li0007fffffffff7ff() { +; CHECK-LABEL: li0007fffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251799813683199 +} + +define i64 @li0007fffffffff800() { +; CHECK-LABEL: li0007fffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251799813683200 +} + +define i64 @li0007ffffffffffff() { +; CHECK-LABEL: li0007ffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: jr $ra + ret i64 2251799813685247 +} + +define i64 @li0008000000000000() { +; CHECK-LABEL: li0008000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251799813685248 +} + +define i64 @li00080000000007ff() { +; CHECK-LABEL: li00080000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251799813687295 +} + +define i64 @li0008000000000800() { +; CHECK-LABEL: li0008000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251799813687296 +} + +define i64 @li0008000000000fff() { +; CHECK-LABEL: li0008000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251799813689343 +} + +define i64 @li000800007ffff000() { +; CHECK-LABEL: li000800007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961164800 +} + +define i64 @li000800007ffff7ff() { +; CHECK-LABEL: li000800007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961166847 +} + +define i64 @li000800007ffff800() { +; CHECK-LABEL: li000800007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961166848 +} + +define i64 @li000800007fffffff() { +; CHECK-LABEL: li000800007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961168895 +} + +define i64 @li0008000080000000() { +; CHECK-LABEL: li0008000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961168896 +} + +define i64 @li00080000800007ff() { +; CHECK-LABEL: li00080000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961170943 +} + +define i64 @li0008000080000800() { +; CHECK-LABEL: li0008000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961170944 +} + +define i64 @li0008000080000fff() { +; CHECK-LABEL: li0008000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251801961172991 +} + +define i64 @li00080000fffff000() { +; CHECK-LABEL: li00080000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251804108648448 +} + +define i64 @li00080000fffff7ff() { +; CHECK-LABEL: li00080000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251804108650495 +} + +define i64 @li00080000fffff800() { +; CHECK-LABEL: li00080000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251804108650496 +} + +define i64 @li00080000ffffffff() { +; CHECK-LABEL: li00080000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 2251804108652543 +} + +define i64 @li000fffff00000000() { +; CHECK-LABEL: li000fffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503595332403200 +} + +define i64 @li000fffff000007ff() { +; CHECK-LABEL: li000fffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503595332405247 +} + +define i64 @li000fffff00000800() { +; CHECK-LABEL: li000fffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503595332405248 +} + +define i64 @li000fffff00000fff() { +; CHECK-LABEL: li000fffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503595332407295 +} + +define i64 @li000fffff7ffff000() { +; CHECK-LABEL: li000fffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479882752 +} + +define i64 @li000fffff7ffff7ff() { +; CHECK-LABEL: li000fffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479884799 +} + +define i64 @li000fffff7ffff800() { +; CHECK-LABEL: li000fffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479884800 +} + +define i64 @li000fffff7fffffff() { +; CHECK-LABEL: li000fffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479886847 +} + +define i64 @li000fffff80000000() { +; CHECK-LABEL: li000fffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479886848 +} + +define i64 @li000fffff800007ff() { +; CHECK-LABEL: li000fffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479888895 +} + +define i64 @li000fffff80000800() { +; CHECK-LABEL: li000fffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479888896 +} + +define i64 @li000fffff80000fff() { +; CHECK-LABEL: li000fffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503597479890943 +} + +define i64 @li000ffffffffff000() { +; CHECK-LABEL: li000ffffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503599627366400 +} + +define i64 @li000ffffffffff7ff() { +; CHECK-LABEL: li000ffffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503599627368447 +} + +define i64 @li000ffffffffff800() { +; CHECK-LABEL: li000ffffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503599627368448 +} + +define i64 @li000fffffffffffff() { +; CHECK-LABEL: li000fffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i64 4503599627370495 +} + +define i64 @li7ff0000000000000() { +; CHECK-LABEL: li7ff0000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868437227405312 +} + +define i64 @li7ff00000000007ff() { +; CHECK-LABEL: li7ff00000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868437227407359 +} + +define i64 @li7ff0000000000800() { +; CHECK-LABEL: li7ff0000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868437227407360 +} + +define i64 @li7ff0000000000fff() { +; CHECK-LABEL: li7ff0000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868437227409407 +} + +define i64 @li7ff000007ffff000() { +; CHECK-LABEL: li7ff000007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374884864 +} + +define i64 @li7ff000007ffff7ff() { +; CHECK-LABEL: li7ff000007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374886911 +} + +define i64 @li7ff000007ffff800() { +; CHECK-LABEL: li7ff000007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374886912 +} + +define i64 @li7ff000007fffffff() { +; CHECK-LABEL: li7ff000007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374888959 +} + +define i64 @li7ff0000080000000() { +; CHECK-LABEL: li7ff0000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374888960 +} + +define i64 @li7ff00000800007ff() { +; CHECK-LABEL: li7ff00000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374891007 +} + +define i64 @li7ff0000080000800() { +; CHECK-LABEL: li7ff0000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374891008 +} + +define i64 @li7ff0000080000fff() { +; CHECK-LABEL: li7ff0000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868439374893055 +} + +define i64 @li7ff00000fffff000() { +; CHECK-LABEL: li7ff00000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868441522368512 +} + +define i64 @li7ff00000fffff7ff() { +; CHECK-LABEL: li7ff00000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868441522370559 +} + +define i64 @li7ff00000fffff800() { +; CHECK-LABEL: li7ff00000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868441522370560 +} + +define i64 @li7ff00000ffffffff() { +; CHECK-LABEL: li7ff00000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9218868441522372607 +} + +define i64 @li7ff7ffff00000000() { +; CHECK-LABEL: li7ff7ffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120232746123264 +} + +define i64 @li7ff7ffff000007ff() { +; CHECK-LABEL: li7ff7ffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120232746125311 +} + +define i64 @li7ff7ffff00000800() { +; CHECK-LABEL: li7ff7ffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120232746125312 +} + +define i64 @li7ff7ffff00000fff() { +; CHECK-LABEL: li7ff7ffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120232746127359 +} + +define i64 @li7ff7ffff7ffff000() { +; CHECK-LABEL: li7ff7ffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893602816 +} + +define i64 @li7ff7ffff7ffff7ff() { +; CHECK-LABEL: li7ff7ffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893604863 +} + +define i64 @li7ff7ffff7ffff800() { +; CHECK-LABEL: li7ff7ffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893604864 +} + +define i64 @li7ff7ffff7fffffff() { +; CHECK-LABEL: li7ff7ffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893606911 +} + +define i64 @li7ff7ffff80000000() { +; CHECK-LABEL: li7ff7ffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893606912 +} + +define i64 @li7ff7ffff800007ff() { +; CHECK-LABEL: li7ff7ffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893608959 +} + +define i64 @li7ff7ffff80000800() { +; CHECK-LABEL: li7ff7ffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893608960 +} + +define i64 @li7ff7ffff80000fff() { +; CHECK-LABEL: li7ff7ffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120234893611007 +} + +define i64 @li7ff7fffffffff000() { +; CHECK-LABEL: li7ff7fffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041086464 +} + +define i64 @li7ff7fffffffff7ff() { +; CHECK-LABEL: li7ff7fffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041088511 +} + +define i64 @li7ff7fffffffff800() { +; CHECK-LABEL: li7ff7fffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041088512 +} + +define i64 @li7ff7ffffffffffff() { +; CHECK-LABEL: li7ff7ffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041090559 +} + +define i64 @li7ff8000000000000() { +; CHECK-LABEL: li7ff8000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041090560 +} + +define i64 @li7ff80000000007ff() { +; CHECK-LABEL: li7ff80000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041092607 +} + +define i64 @li7ff8000000000800() { +; CHECK-LABEL: li7ff8000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041092608 +} + +define i64 @li7ff8000000000fff() { +; CHECK-LABEL: li7ff8000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120237041094655 +} + +define i64 @li7ff800007ffff000() { +; CHECK-LABEL: li7ff800007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188570112 +} + +define i64 @li7ff800007ffff7ff() { +; CHECK-LABEL: li7ff800007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188572159 +} + +define i64 @li7ff800007ffff800() { +; CHECK-LABEL: li7ff800007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188572160 +} + +define i64 @li7ff800007fffffff() { +; CHECK-LABEL: li7ff800007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188574207 +} + +define i64 @li7ff8000080000000() { +; CHECK-LABEL: li7ff8000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188574208 +} + +define i64 @li7ff80000800007ff() { +; CHECK-LABEL: li7ff80000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188576255 +} + +define i64 @li7ff8000080000800() { +; CHECK-LABEL: li7ff8000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188576256 +} + +define i64 @li7ff8000080000fff() { +; CHECK-LABEL: li7ff8000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120239188578303 +} + +define i64 @li7ff80000fffff000() { +; CHECK-LABEL: li7ff80000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120241336053760 +} + +define i64 @li7ff80000fffff7ff() { +; CHECK-LABEL: li7ff80000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120241336055807 +} + +define i64 @li7ff80000fffff800() { +; CHECK-LABEL: li7ff80000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120241336055808 +} + +define i64 @li7ff80000ffffffff() { +; CHECK-LABEL: li7ff80000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9221120241336057855 +} + +define i64 @li7fffffff00000000() { +; CHECK-LABEL: li7fffffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372032559808512 +} + +define i64 @li7fffffff000007ff() { +; CHECK-LABEL: li7fffffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372032559810559 +} + +define i64 @li7fffffff00000800() { +; CHECK-LABEL: li7fffffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372032559810560 +} + +define i64 @li7fffffff00000fff() { +; CHECK-LABEL: li7fffffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372032559812607 +} + +define i64 @li7fffffff7ffff000() { +; CHECK-LABEL: li7fffffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707288064 +} + +define i64 @li7fffffff7ffff7ff() { +; CHECK-LABEL: li7fffffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707290111 +} + +define i64 @li7fffffff7ffff800() { +; CHECK-LABEL: li7fffffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707290112 +} + +define i64 @li7fffffff7fffffff() { +; CHECK-LABEL: li7fffffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707292159 +} + +define i64 @li7fffffff80000000() { +; CHECK-LABEL: li7fffffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707292160 +} + +define i64 @li7fffffff800007ff() { +; CHECK-LABEL: li7fffffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707294207 +} + +define i64 @li7fffffff80000800() { +; CHECK-LABEL: li7fffffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707294208 +} + +define i64 @li7fffffff80000fff() { +; CHECK-LABEL: li7fffffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372034707296255 +} + +define i64 @li7ffffffffffff000() { +; CHECK-LABEL: li7ffffffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372036854771712 +} + +define i64 @li7ffffffffffff7ff() { +; CHECK-LABEL: li7ffffffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372036854773759 +} + +define i64 @li7ffffffffffff800() { +; CHECK-LABEL: li7ffffffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372036854773760 +} + +define i64 @li7fffffffffffffff() { +; CHECK-LABEL: li7fffffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 9223372036854775807 +} + +define i64 @li8000000000000000() { +; CHECK-LABEL: li8000000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372036854775808 +} + +define i64 @li80000000000007ff() { +; CHECK-LABEL: li80000000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372036854773761 +} + +define i64 @li8000000000000800() { +; CHECK-LABEL: li8000000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372036854773760 +} + +define i64 @li8000000000000fff() { +; CHECK-LABEL: li8000000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372036854771713 +} + +define i64 @li800000007ffff000() { +; CHECK-LABEL: li800000007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707296256 +} + +define i64 @li800000007ffff7ff() { +; CHECK-LABEL: li800000007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707294209 +} + +define i64 @li800000007ffff800() { +; CHECK-LABEL: li800000007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707294208 +} + +define i64 @li800000007fffffff() { +; CHECK-LABEL: li800000007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707292161 +} + +define i64 @li8000000080000000() { +; CHECK-LABEL: li8000000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707292160 +} + +define i64 @li80000000800007ff() { +; CHECK-LABEL: li80000000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707290113 +} + +define i64 @li8000000080000800() { +; CHECK-LABEL: li8000000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707290112 +} + +define i64 @li8000000080000fff() { +; CHECK-LABEL: li8000000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372034707288065 +} + +define i64 @li80000000fffff000() { +; CHECK-LABEL: li80000000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372032559812608 +} + +define i64 @li80000000fffff7ff() { +; CHECK-LABEL: li80000000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372032559810561 +} + +define i64 @li80000000fffff800() { +; CHECK-LABEL: li80000000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372032559810560 +} + +define i64 @li80000000ffffffff() { +; CHECK-LABEL: li80000000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9223372032559808513 +} + +define i64 @li8007ffff00000000() { +; CHECK-LABEL: li8007ffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120241336057856 +} + +define i64 @li8007ffff000007ff() { +; CHECK-LABEL: li8007ffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120241336055809 +} + +define i64 @li8007ffff00000800() { +; CHECK-LABEL: li8007ffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120241336055808 +} + +define i64 @li8007ffff00000fff() { +; CHECK-LABEL: li8007ffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120241336053761 +} + +define i64 @li8007ffff7ffff000() { +; CHECK-LABEL: li8007ffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188578304 +} + +define i64 @li8007ffff7ffff7ff() { +; CHECK-LABEL: li8007ffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188576257 +} + +define i64 @li8007ffff7ffff800() { +; CHECK-LABEL: li8007ffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188576256 +} + +define i64 @li8007ffff7fffffff() { +; CHECK-LABEL: li8007ffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188574209 +} + +define i64 @li8007ffff80000000() { +; CHECK-LABEL: li8007ffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188574208 +} + +define i64 @li8007ffff800007ff() { +; CHECK-LABEL: li8007ffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188572161 +} + +define i64 @li8007ffff80000800() { +; CHECK-LABEL: li8007ffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188572160 +} + +define i64 @li8007ffff80000fff() { +; CHECK-LABEL: li8007ffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120239188570113 +} + +define i64 @li8007fffffffff000() { +; CHECK-LABEL: li8007fffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041094656 +} + +define i64 @li8007fffffffff7ff() { +; CHECK-LABEL: li8007fffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041092609 +} + +define i64 @li8007fffffffff800() { +; CHECK-LABEL: li8007fffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041092608 +} + +define i64 @li8007ffffffffffff() { +; CHECK-LABEL: li8007ffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041090561 +} + +define i64 @li8008000000000000() { +; CHECK-LABEL: li8008000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041090560 +} + +define i64 @li80080000000007ff() { +; CHECK-LABEL: li80080000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041088513 +} + +define i64 @li8008000000000800() { +; CHECK-LABEL: li8008000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041088512 +} + +define i64 @li8008000000000fff() { +; CHECK-LABEL: li8008000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120237041086465 +} + +define i64 @li800800007ffff000() { +; CHECK-LABEL: li800800007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893611008 +} + +define i64 @li800800007ffff7ff() { +; CHECK-LABEL: li800800007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893608961 +} + +define i64 @li800800007ffff800() { +; CHECK-LABEL: li800800007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893608960 +} + +define i64 @li800800007fffffff() { +; CHECK-LABEL: li800800007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893606913 +} + +define i64 @li8008000080000000() { +; CHECK-LABEL: li8008000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893606912 +} + +define i64 @li80080000800007ff() { +; CHECK-LABEL: li80080000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893604865 +} + +define i64 @li8008000080000800() { +; CHECK-LABEL: li8008000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893604864 +} + +define i64 @li8008000080000fff() { +; CHECK-LABEL: li8008000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120234893602817 +} + +define i64 @li80080000fffff000() { +; CHECK-LABEL: li80080000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120232746127360 +} + +define i64 @li80080000fffff7ff() { +; CHECK-LABEL: li80080000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120232746125313 +} + +define i64 @li80080000fffff800() { +; CHECK-LABEL: li80080000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120232746125312 +} + +define i64 @li80080000ffffffff() { +; CHECK-LABEL: li80080000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9221120232746123265 +} + +define i64 @li800fffff00000000() { +; CHECK-LABEL: li800fffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868441522372608 +} + +define i64 @li800fffff000007ff() { +; CHECK-LABEL: li800fffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868441522370561 +} + +define i64 @li800fffff00000800() { +; CHECK-LABEL: li800fffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868441522370560 +} + +define i64 @li800fffff00000fff() { +; CHECK-LABEL: li800fffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868441522368513 +} + +define i64 @li800fffff7ffff000() { +; CHECK-LABEL: li800fffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374893056 +} + +define i64 @li800fffff7ffff7ff() { +; CHECK-LABEL: li800fffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374891009 +} + +define i64 @li800fffff7ffff800() { +; CHECK-LABEL: li800fffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374891008 +} + +define i64 @li800fffff7fffffff() { +; CHECK-LABEL: li800fffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374888961 +} + +define i64 @li800fffff80000000() { +; CHECK-LABEL: li800fffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374888960 +} + +define i64 @li800fffff800007ff() { +; CHECK-LABEL: li800fffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374886913 +} + +define i64 @li800fffff80000800() { +; CHECK-LABEL: li800fffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374886912 +} + +define i64 @li800fffff80000fff() { +; CHECK-LABEL: li800fffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868439374884865 +} + +define i64 @li800ffffffffff000() { +; CHECK-LABEL: li800ffffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868437227409408 +} + +define i64 @li800ffffffffff7ff() { +; CHECK-LABEL: li800ffffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868437227407361 +} + +define i64 @li800ffffffffff800() { +; CHECK-LABEL: li800ffffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868437227407360 +} + +define i64 @li800fffffffffffff() { +; CHECK-LABEL: li800fffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, -2048 +; CHECK-NEXT: jr $ra + ret i64 -9218868437227405313 +} + +define i64 @lifff0000000000000() { +; CHECK-LABEL: lifff0000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503599627370496 +} + +define i64 @lifff00000000007ff() { +; CHECK-LABEL: lifff00000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503599627368449 +} + +define i64 @lifff0000000000800() { +; CHECK-LABEL: lifff0000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503599627368448 +} + +define i64 @lifff0000000000fff() { +; CHECK-LABEL: lifff0000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503599627366401 +} + +define i64 @lifff000007ffff000() { +; CHECK-LABEL: lifff000007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479890944 +} + +define i64 @lifff000007ffff7ff() { +; CHECK-LABEL: lifff000007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479888897 +} + +define i64 @lifff000007ffff800() { +; CHECK-LABEL: lifff000007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479888896 +} + +define i64 @lifff000007fffffff() { +; CHECK-LABEL: lifff000007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479886849 +} + +define i64 @lifff0000080000000() { +; CHECK-LABEL: lifff0000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479886848 +} + +define i64 @lifff00000800007ff() { +; CHECK-LABEL: lifff00000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479884801 +} + +define i64 @lifff0000080000800() { +; CHECK-LABEL: lifff0000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479884800 +} + +define i64 @lifff0000080000fff() { +; CHECK-LABEL: lifff0000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503597479882753 +} + +define i64 @lifff00000fffff000() { +; CHECK-LABEL: lifff00000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503595332407296 +} + +define i64 @lifff00000fffff7ff() { +; CHECK-LABEL: lifff00000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503595332405249 +} + +define i64 @lifff00000fffff800() { +; CHECK-LABEL: lifff00000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503595332405248 +} + +define i64 @lifff00000ffffffff() { +; CHECK-LABEL: lifff00000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4503595332403201 +} + +define i64 @lifff7ffff00000000() { +; CHECK-LABEL: lifff7ffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251804108652544 +} + +define i64 @lifff7ffff000007ff() { +; CHECK-LABEL: lifff7ffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251804108650497 +} + +define i64 @lifff7ffff00000800() { +; CHECK-LABEL: lifff7ffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251804108650496 +} + +define i64 @lifff7ffff00000fff() { +; CHECK-LABEL: lifff7ffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251804108648449 +} + +define i64 @lifff7ffff7ffff000() { +; CHECK-LABEL: lifff7ffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961172992 +} + +define i64 @lifff7ffff7ffff7ff() { +; CHECK-LABEL: lifff7ffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961170945 +} + +define i64 @lifff7ffff7ffff800() { +; CHECK-LABEL: lifff7ffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961170944 +} + +define i64 @lifff7ffff7fffffff() { +; CHECK-LABEL: lifff7ffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961168897 +} + +define i64 @lifff7ffff80000000() { +; CHECK-LABEL: lifff7ffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961168896 +} + +define i64 @lifff7ffff800007ff() { +; CHECK-LABEL: lifff7ffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961166849 +} + +define i64 @lifff7ffff80000800() { +; CHECK-LABEL: lifff7ffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961166848 +} + +define i64 @lifff7ffff80000fff() { +; CHECK-LABEL: lifff7ffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251801961164801 +} + +define i64 @lifff7fffffffff000() { +; CHECK-LABEL: lifff7fffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251799813689344 +} + +define i64 @lifff7fffffffff7ff() { +; CHECK-LABEL: lifff7fffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251799813687297 +} + +define i64 @lifff7fffffffff800() { +; CHECK-LABEL: lifff7fffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251799813687296 +} + +define i64 @lifff7ffffffffffff() { +; CHECK-LABEL: lifff7ffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2251799813685249 +} + +define i64 @lifff8000000000000() { +; CHECK-LABEL: lifff8000000000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251799813685248 +} + +define i64 @lifff80000000007ff() { +; CHECK-LABEL: lifff80000000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251799813683201 +} + +define i64 @lifff8000000000800() { +; CHECK-LABEL: lifff8000000000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251799813683200 +} + +define i64 @lifff8000000000fff() { +; CHECK-LABEL: lifff8000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251799813681153 +} + +define i64 @lifff800007ffff000() { +; CHECK-LABEL: lifff800007ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666205696 +} + +define i64 @lifff800007ffff7ff() { +; CHECK-LABEL: lifff800007ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666203649 +} + +define i64 @lifff800007ffff800() { +; CHECK-LABEL: lifff800007ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666203648 +} + +define i64 @lifff800007fffffff() { +; CHECK-LABEL: lifff800007fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666201601 +} + +define i64 @lifff8000080000000() { +; CHECK-LABEL: lifff8000080000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666201600 +} + +define i64 @lifff80000800007ff() { +; CHECK-LABEL: lifff80000800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666199553 +} + +define i64 @lifff8000080000800() { +; CHECK-LABEL: lifff8000080000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666199552 +} + +define i64 @lifff8000080000fff() { +; CHECK-LABEL: lifff8000080000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251797666197505 +} + +define i64 @lifff80000fffff000() { +; CHECK-LABEL: lifff80000fffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251795518722048 +} + +define i64 @lifff80000fffff7ff() { +; CHECK-LABEL: lifff80000fffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251795518720001 +} + +define i64 @lifff80000fffff800() { +; CHECK-LABEL: lifff80000fffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251795518720000 +} + +define i64 @lifff80000ffffffff() { +; CHECK-LABEL: lifff80000ffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2251795518717953 +} + +define i64 @liffffffff00000000() { +; CHECK-LABEL: liffffffff00000000: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4294967296 +} + +define i64 @liffffffff000007ff() { +; CHECK-LABEL: liffffffff000007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4294965249 +} + +define i64 @liffffffff00000800() { +; CHECK-LABEL: liffffffff00000800: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4294965248 +} + +define i64 @liffffffff00000fff() { +; CHECK-LABEL: liffffffff00000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4294963201 +} + +define i64 @liffffffff7ffff000() { +; CHECK-LABEL: liffffffff7ffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2147487744 +} + +define i64 @liffffffff7ffff7ff() { +; CHECK-LABEL: liffffffff7ffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2147485697 +} + +define i64 @liffffffff7ffff800() { +; CHECK-LABEL: liffffffff7ffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2147485696 +} + +define i64 @liffffffff7fffffff() { +; CHECK-LABEL: liffffffff7fffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -2147483649 +} + +define i64 @liffffffff80000000() { +; CHECK-LABEL: liffffffff80000000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: jr $ra + ret i64 -2147483648 +} + +define i64 @liffffffff800007ff() { +; CHECK-LABEL: liffffffff800007ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 -2147481601 +} + +define i64 @liffffffff80000800() { +; CHECK-LABEL: liffffffff80000800: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: jr $ra + ret i64 -2147481600 +} + +define i64 @liffffffff80000fff() { +; CHECK-LABEL: liffffffff80000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 4095 +; CHECK-NEXT: jr $ra + ret i64 -2147479553 +} + +define i64 @lifffffffffffff000() { +; CHECK-LABEL: lifffffffffffff000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: jr $ra + ret i64 -4096 +} + +define i64 @lifffffffffffff7ff() { +; CHECK-LABEL: lifffffffffffff7ff: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: jr $ra + ret i64 -2049 +} + +define i64 @lifffffffffffff800() { +; CHECK-LABEL: lifffffffffffff800: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r4, $zero, -2048 +; CHECK-NEXT: jr $ra + ret i64 -2048 +} + +define i64 @liffffffffffffffff() { +; CHECK-LABEL: liffffffffffffffff: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $r4, $zero, -1 +; CHECK-NEXT: jr $ra + ret i64 -1 +} diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/extra-code.ll b/llvm/test/CodeGen/LoongArch/inlineasm/extra-code.ll new file mode 100644 index 000000000000..986e27e2aab1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inlineasm/extra-code.ll @@ -0,0 +1,8 @@ +; RUN: llc -march=loongarch64 -no-integrated-as -o - %s | FileCheck %s + +define i64 @test(i64 %a) { +; CHECK: add.d $r4, $r4, $r0 +entry: + %0 = tail call i64 asm sideeffect "add.d $0, $1, ${2:z} \0A", "=r,r,Jr"(i64 %a, i64 0) + ret i64 %0 +} diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll b/llvm/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll new file mode 100644 index 000000000000..94e3306732d9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inlineasm/floating-point-in-gpr.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=loongarch64 -target-abi=lp64 -o - %s 2>&1 | FileCheck %s + +;; Test that floating-point bits can be stored in GPR. + +define void @reg_float(float %x) { +; CHECK-LABEL: reg_float: +; CHECK: movfr2gr.s $r{{[0-9]+}}, $f0 + call void asm "", "r"(float %x) + ret void +} + +define void @r10_float(float %x) { +; CHECK-LABEL: r10_float: +; CHECK: movfr2gr.s $r10, $f0 + call void asm "", "{$r10}"(float %x) + ret void +} + +define void @reg_double(double %x) { +; CHECK-LABEL: reg_double: +; CHECK: movfr2gr.d $r{{[0-9]+}}, $f0 + call void asm "", "r"(double %x) + ret void +} + +define void @r10_double(double %x) { +; CHECK-LABEL: r10_double: +; CHECK: movfr2gr.d $r10, $f0 + call void asm "", "{$r10}"(double %x) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll b/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll new file mode 100644 index 000000000000..7f58ea2ee98a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers-error.ll @@ -0,0 +1,8 @@ +; RUN: not llc -march=loongarch64 %s 2>&1 | FileCheck %s + +define void @test_i128() { +; CHECK: error: couldn't allocate input reg for constraint '{$r20}' +start: + call void asm "", "{$r20}"(i128 5) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll b/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll new file mode 100644 index 000000000000..d18a184ab1e3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inlineasm/non-native-value-type-registers.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=loongarch64 -o - %s 2>&1 | FileCheck %s + +;; Test that non native value types can be parsed. + +define void @test_i1() { +; CHECK-LABEL: test_i1: +; CHECK: ori $r6, $zero, 0 +; CHECK: jr $ra +start: + call void asm "", "{$r6}"(i1 0) + ret void +} + +;; Note: non-simple values like `i3` are only allowed in newer llvm versions (>= 12). +;; In older llvm versions (<= 11), SelectionDAGBuilder::visitInlineAsm asserts simple +;; values must be used. For details, please see https://reviews.llvm.org/D91710. +define void @test_i3() { +; CHECK-LABEL: test_i3: +; CHECK: ori $r7, $zero, 0 +; CHECK: jr $ra +start: + call void asm "", "{$r7}"(i3 0) + ret void +} + +define void @test_i8() { +; CHECK-LABEL: test_i8: +; CHECK: ori $r5, $zero, 0 +; CHECK: jr $ra +start: + call void asm "", "{$r5}"(i8 0) + ret void +} + +define void @test_i16() { +; CHECK-LABEL: test_i16: +; CHECK: ori $r20, $zero, 5 +; CHECK: jr $ra +start: + call void asm "", "{$r20}"(i16 5) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/inlineasm/preld.ll b/llvm/test/CodeGen/LoongArch/inlineasm/preld.ll new file mode 100644 index 000000000000..8dbbed99f16f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/inlineasm/preld.ll @@ -0,0 +1,8 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @preld(i32* %p) { +entry: + ; CHECK: preld 10, $r4, 23 + tail call void asm sideeffect "preld 10, $0, 23 \0A\09", "r"(i32* %p) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll deleted file mode 100644 index bfa1a59756b8..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/add.ll +++ /dev/null @@ -1,183 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'add' LLVM IR: https://llvm.org/docs/LangRef.html#add-instruction - -define i1 @add_i1(i1 %x, i1 %y) { -; LA32-LABEL: add_i1: -; LA32: # %bb.0: -; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i1: -; LA64: # %bb.0: -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i1 %x, %y - ret i1 %add -} - -define i8 @add_i8(i8 %x, i8 %y) { -; LA32-LABEL: add_i8: -; LA32: # %bb.0: -; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i8: -; LA64: # %bb.0: -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i8 %x, %y - ret i8 %add -} - -define i16 @add_i16(i16 %x, i16 %y) { -; LA32-LABEL: add_i16: -; LA32: # %bb.0: -; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i16: -; LA64: # %bb.0: -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i16 %x, %y - ret i16 %add -} - -define i32 @add_i32(i32 %x, i32 %y) { -; LA32-LABEL: add_i32: -; LA32: # %bb.0: -; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i32: -; LA64: # %bb.0: -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i32 %x, %y - ret i32 %add -} - -;; Match the pattern: -;; def : PatGprGpr_32; -define signext i32 @add_i32_sext(i32 %x, i32 %y) { -; LA32-LABEL: add_i32_sext: -; LA32: # %bb.0: -; LA32-NEXT: add.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i32_sext: -; LA64: # %bb.0: -; LA64-NEXT: add.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i32 %x, %y - ret i32 %add -} - -define i64 @add_i64(i64 %x, i64 %y) { -; LA32-LABEL: add_i64: -; LA32: # %bb.0: -; LA32-NEXT: add.w $a1, $a1, $a3 -; LA32-NEXT: add.w $a2, $a0, $a2 -; LA32-NEXT: sltu $a0, $a2, $a0 -; LA32-NEXT: add.w $a1, $a1, $a0 -; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i64: -; LA64: # %bb.0: -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i64 %x, %y - ret i64 %add -} - -define i1 @add_i1_3(i1 %x) { -; LA32-LABEL: add_i1_3: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i1_3: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i1 %x, 3 - ret i1 %add -} - -define i8 @add_i8_3(i8 %x) { -; LA32-LABEL: add_i8_3: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i8_3: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i8 %x, 3 - ret i8 %add -} - -define i16 @add_i16_3(i16 %x) { -; LA32-LABEL: add_i16_3: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i16_3: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i16 %x, 3 - ret i16 %add -} - -define i32 @add_i32_3(i32 %x) { -; LA32-LABEL: add_i32_3: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i32_3: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i32 %x, 3 - ret i32 %add -} - -;; Match the pattern: -;; def : PatGprImm_32; -define signext i32 @add_i32_3_sext(i32 %x) { -; LA32-LABEL: add_i32_3_sext: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i32_3_sext: -; LA64: # %bb.0: -; LA64-NEXT: addi.w $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i32 %x, 3 - ret i32 %add -} - -define i64 @add_i64_3(i64 %x) { -; LA32-LABEL: add_i64_3: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a2, $a0, 3 -; LA32-NEXT: sltu $a0, $a2, $a0 -; LA32-NEXT: add.w $a1, $a1, $a0 -; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: add_i64_3: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = add i64 %x, 3 - ret i64 %add -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll deleted file mode 100644 index e5c9da58c757..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/and.ll +++ /dev/null @@ -1,266 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'and' LLVM IR: https://llvm.org/docs/LangRef.html#and-instruction - -define i1 @and_i1(i1 %a, i1 %b) { -; LA32-LABEL: and_i1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i1 %a, %b - ret i1 %r -} - -define i8 @and_i8(i8 %a, i8 %b) { -; LA32-LABEL: and_i8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i8 %a, %b - ret i8 %r -} - -define i16 @and_i16(i16 %a, i16 %b) { -; LA32-LABEL: and_i16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i16 %a, %b - ret i16 %r -} - -define i32 @and_i32(i32 %a, i32 %b) { -; LA32-LABEL: and_i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i32 %a, %b - ret i32 %r -} - -define i64 @and_i64(i64 %a, i64 %b) { -; LA32-LABEL: and_i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: and $a0, $a0, $a2 -; LA32-NEXT: and $a1, $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i64 %a, %b - ret i64 %r -} - -define i1 @and_i1_0(i1 %b) { -; LA32-LABEL: and_i1_0: -; LA32: # %bb.0: # %entry -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i1_0: -; LA64: # %bb.0: # %entry -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i1 4, %b - ret i1 %r -} - -define i1 @and_i1_5(i1 %b) { -; LA32-LABEL: and_i1_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i1_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i1 5, %b - ret i1 %r -} - -define i8 @and_i8_5(i8 %b) { -; LA32-LABEL: and_i8_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i8_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i8 5, %b - ret i8 %r -} - -define i8 @and_i8_257(i8 %b) { -; LA32-LABEL: and_i8_257: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i8_257: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i8 257, %b - ret i8 %r -} - -define i16 @and_i16_5(i16 %b) { -; LA32-LABEL: and_i16_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i16_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i16 5, %b - ret i16 %r -} - -define i16 @and_i16_0x1000(i16 %b) { -; LA32-LABEL: and_i16_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a1, 1 -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i16_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i16 4096, %b - ret i16 %r -} - -define i16 @and_i16_0x10001(i16 %b) { -; LA32-LABEL: and_i16_0x10001: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i16_0x10001: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i16 65537, %b - ret i16 %r -} - -define i32 @and_i32_5(i32 %b) { -; LA32-LABEL: and_i32_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i32_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i32 5, %b - ret i32 %r -} - -define i32 @and_i32_0x1000(i32 %b) { -; LA32-LABEL: and_i32_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a1, 1 -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i32_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i32 4096, %b - ret i32 %r -} - -define i32 @and_i32_0x100000001(i32 %b) { -; LA32-LABEL: and_i32_0x100000001: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i32_0x100000001: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i32 4294967297, %b - ret i32 %r -} - -define i64 @and_i64_5(i64 %b) { -; LA32-LABEL: and_i64_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a0, $a0, 5 -; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i64_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i64 5, %b - ret i64 %r -} - -define i64 @and_i64_0x1000(i64 %b) { -; LA32-LABEL: and_i64_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a1, 1 -; LA32-NEXT: and $a0, $a0, $a1 -; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: and_i64_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: and $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = and i64 4096, %b - ret i64 %r -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll deleted file mode 100644 index 1b7e8085185a..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/ashr.ll +++ /dev/null @@ -1,168 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'ashr' LLVM IR: https://llvm.org/docs/LangRef.html#ashr-instruction - -define i1 @ashr_i1(i1 %x, i1 %y) { -; LA32-LABEL: ashr_i1: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i1: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i1 %x, %y - ret i1 %ashr -} - -define i8 @ashr_i8(i8 %x, i8 %y) { -; LA32-LABEL: ashr_i8: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i8: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: sra.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i8 %x, %y - ret i8 %ashr -} - -define i16 @ashr_i16(i16 %x, i16 %y) { -; LA32-LABEL: ashr_i16: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i16: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: sra.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i16 %x, %y - ret i16 %ashr -} - -define i32 @ashr_i32(i32 %x, i32 %y) { -; LA32-LABEL: ashr_i32: -; LA32: # %bb.0: -; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i32: -; LA64: # %bb.0: -; LA64-NEXT: sra.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i32 %x, %y - ret i32 %ashr -} - -define i64 @ashr_i64(i64 %x, i64 %y) { -; LA32-LABEL: ashr_i64: -; LA32: # %bb.0: -; LA32-NEXT: srai.w $a3, $a1, 31 -; LA32-NEXT: addi.w $a4, $a2, -32 -; LA32-NEXT: slti $a5, $a4, 0 -; LA32-NEXT: masknez $a3, $a3, $a5 -; LA32-NEXT: sra.w $a6, $a1, $a2 -; LA32-NEXT: maskeqz $a6, $a6, $a5 -; LA32-NEXT: or $a3, $a6, $a3 -; LA32-NEXT: srl.w $a0, $a0, $a2 -; LA32-NEXT: xori $a2, $a2, 31 -; LA32-NEXT: slli.w $a6, $a1, 1 -; LA32-NEXT: sll.w $a2, $a6, $a2 -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: sra.w $a1, $a1, $a4 -; LA32-NEXT: maskeqz $a0, $a0, $a5 -; LA32-NEXT: masknez $a1, $a1, $a5 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: move $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i64: -; LA64: # %bb.0: -; LA64-NEXT: sra.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i64 %x, %y - ret i64 %ashr -} - -define i1 @ashr_i1_3(i1 %x) { -; LA32-LABEL: ashr_i1_3: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i1_3: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i1 %x, 3 - ret i1 %ashr -} - -define i8 @ashr_i8_3(i8 %x) { -; LA32-LABEL: ashr_i8_3: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: srai.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i8_3: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: srai.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i8 %x, 3 - ret i8 %ashr -} - -define i16 @ashr_i16_3(i16 %x) { -; LA32-LABEL: ashr_i16_3: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: srai.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i16_3: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: srai.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i16 %x, 3 - ret i16 %ashr -} - -define i32 @ashr_i32_3(i32 %x) { -; LA32-LABEL: ashr_i32_3: -; LA32: # %bb.0: -; LA32-NEXT: srai.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i32_3: -; LA64: # %bb.0: -; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: srai.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i32 %x, 3 - ret i32 %ashr -} - -define i64 @ashr_i64_3(i64 %x) { -; LA32-LABEL: ashr_i64_3: -; LA32: # %bb.0: -; LA32-NEXT: srli.w $a0, $a0, 3 -; LA32-NEXT: slli.w $a2, $a1, 29 -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: srai.w $a1, $a1, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ashr_i64_3: -; LA64: # %bb.0: -; LA64-NEXT: srai.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %ashr = ashr i64 %x, 3 - ret i64 %ashr -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll deleted file mode 100644 index f46eca268aae..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/br.ll +++ /dev/null @@ -1,358 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefixes=ALL,LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefixes=ALL,LA64 - -define void @foo() noreturn nounwind { -; ALL-LABEL: foo: -; ALL: # %bb.0: # %entry -; ALL-NEXT: .LBB0_1: # %loop -; ALL-NEXT: # =>This Inner Loop Header: Depth=1 -; ALL-NEXT: b .LBB0_1 -entry: - br label %loop -loop: - br label %loop -} - -define void @foo_br_eq(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_eq: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: beq $a2, $a0, .LBB1_2 -; LA32-NEXT: b .LBB1_1 -; LA32-NEXT: .LBB1_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB1_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_eq: -; LA64: # %bb.0: -; LA64-NEXT: ld.wu $a2, $a1, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: beq $a2, $a0, .LBB1_2 -; LA64-NEXT: b .LBB1_1 -; LA64-NEXT: .LBB1_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB1_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp eq i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -define void @foo_br_ne(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_ne: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: bne $a2, $a0, .LBB2_2 -; LA32-NEXT: b .LBB2_1 -; LA32-NEXT: .LBB2_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB2_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_ne: -; LA64: # %bb.0: -; LA64-NEXT: ld.wu $a2, $a1, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: bne $a2, $a0, .LBB2_2 -; LA64-NEXT: b .LBB2_1 -; LA64-NEXT: .LBB2_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB2_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp ne i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -define void @foo_br_slt(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_slt: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: blt $a2, $a0, .LBB3_2 -; LA32-NEXT: b .LBB3_1 -; LA32-NEXT: .LBB3_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB3_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_slt: -; LA64: # %bb.0: -; LA64-NEXT: ld.w $a2, $a1, 0 -; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: blt $a2, $a0, .LBB3_2 -; LA64-NEXT: b .LBB3_1 -; LA64-NEXT: .LBB3_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB3_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp slt i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -define void @foo_br_sge(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_sge: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: bge $a2, $a0, .LBB4_2 -; LA32-NEXT: b .LBB4_1 -; LA32-NEXT: .LBB4_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB4_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_sge: -; LA64: # %bb.0: -; LA64-NEXT: ld.w $a2, $a1, 0 -; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: bge $a2, $a0, .LBB4_2 -; LA64-NEXT: b .LBB4_1 -; LA64-NEXT: .LBB4_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB4_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp sge i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -define void @foo_br_ult(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_ult: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: bltu $a2, $a0, .LBB5_2 -; LA32-NEXT: b .LBB5_1 -; LA32-NEXT: .LBB5_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB5_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_ult: -; LA64: # %bb.0: -; LA64-NEXT: ld.wu $a2, $a1, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: bltu $a2, $a0, .LBB5_2 -; LA64-NEXT: b .LBB5_1 -; LA64-NEXT: .LBB5_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB5_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp ult i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -define void @foo_br_uge(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_uge: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: bgeu $a2, $a0, .LBB6_2 -; LA32-NEXT: b .LBB6_1 -; LA32-NEXT: .LBB6_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB6_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_uge: -; LA64: # %bb.0: -; LA64-NEXT: ld.wu $a2, $a1, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: bgeu $a2, $a0, .LBB6_2 -; LA64-NEXT: b .LBB6_1 -; LA64-NEXT: .LBB6_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB6_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp uge i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -;; Check for condition codes that don't have a matching instruction. -define void @foo_br_sgt(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_sgt: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: blt $a0, $a2, .LBB7_2 -; LA32-NEXT: b .LBB7_1 -; LA32-NEXT: .LBB7_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB7_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_sgt: -; LA64: # %bb.0: -; LA64-NEXT: ld.w $a2, $a1, 0 -; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: blt $a0, $a2, .LBB7_2 -; LA64-NEXT: b .LBB7_1 -; LA64-NEXT: .LBB7_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB7_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp sgt i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -define void @foo_br_sle(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_sle: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: bge $a0, $a2, .LBB8_2 -; LA32-NEXT: b .LBB8_1 -; LA32-NEXT: .LBB8_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB8_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_sle: -; LA64: # %bb.0: -; LA64-NEXT: ld.w $a2, $a1, 0 -; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: bge $a0, $a2, .LBB8_2 -; LA64-NEXT: b .LBB8_1 -; LA64-NEXT: .LBB8_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB8_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp sle i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -define void @foo_br_ugt(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_ugt: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: bltu $a0, $a2, .LBB9_2 -; LA32-NEXT: b .LBB9_1 -; LA32-NEXT: .LBB9_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB9_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_ugt: -; LA64: # %bb.0: -; LA64-NEXT: ld.wu $a2, $a1, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: bltu $a0, $a2, .LBB9_2 -; LA64-NEXT: b .LBB9_1 -; LA64-NEXT: .LBB9_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB9_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp ugt i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -define void @foo_br_ule(i32 %a, ptr %b) nounwind { -; LA32-LABEL: foo_br_ule: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a2, $a1, 0 -; LA32-NEXT: bgeu $a0, $a2, .LBB10_2 -; LA32-NEXT: b .LBB10_1 -; LA32-NEXT: .LBB10_1: # %test -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: .LBB10_2: # %end -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: foo_br_ule: -; LA64: # %bb.0: -; LA64-NEXT: ld.wu $a2, $a1, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: bgeu $a0, $a2, .LBB10_2 -; LA64-NEXT: b .LBB10_1 -; LA64-NEXT: .LBB10_1: # %test -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: .LBB10_2: # %end -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %b - %cc = icmp ule i32 %val, %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %b - br label %end - -end: - ret void -} - -;; Check the case of a branch where the condition was generated in another -;; function. -define void @foo_br_cc(ptr %a, i1 %cc) nounwind { -; ALL-LABEL: foo_br_cc: -; ALL: # %bb.0: -; ALL-NEXT: ld.w $a2, $a0, 0 -; ALL-NEXT: andi $a1, $a1, 1 -; ALL-NEXT: bnez $a1, .LBB11_2 -; ALL-NEXT: b .LBB11_1 -; ALL-NEXT: .LBB11_1: # %test -; ALL-NEXT: ld.w $a0, $a0, 0 -; ALL-NEXT: .LBB11_2: # %end -; ALL-NEXT: jirl $zero, $ra, 0 - %val = load volatile i32, ptr %a - br i1 %cc, label %end, label %test -test: - %tmp = load volatile i32, ptr %a - br label %end - -end: - ret void -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll deleted file mode 100644 index 596ea22e5854..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/call.ll +++ /dev/null @@ -1,88 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck --check-prefix=LA32 %s -; RUN: llc --mtriple=loongarch64 < %s | FileCheck --check-prefix=LA64 %s - -declare i32 @external_function(i32) - -define i32 @test_call_external(i32 %a) nounwind { -; LA32-LABEL: test_call_external: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl external_function -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: test_call_external: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl external_function -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = call i32 @external_function(i32 %a) - ret i32 %1 -} - -define i32 @defined_function(i32 %a) nounwind { -; LA32-LABEL: defined_function: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: defined_function: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = add i32 %a, 1 - ret i32 %1 -} - -define i32 @test_call_defined(i32 %a) nounwind { -; LA32-LABEL: test_call_defined: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl defined_function -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: test_call_defined: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl defined_function -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = call i32 @defined_function(i32 %a) nounwind - ret i32 %1 -} - -define i32 @test_call_indirect(ptr %a, i32 %b) nounwind { -; LA32-LABEL: test_call_indirect: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: move $a2, $a0 -; LA32-NEXT: move $a0, $a1 -; LA32-NEXT: jirl $ra, $a2, 0 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: test_call_indirect: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: move $a2, $a0 -; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: jirl $ra, $a2, 0 -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = call i32 %a(i32 %b) - ret i32 %1 -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll deleted file mode 100644 index 33f6dbee748e..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll +++ /dev/null @@ -1,329 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -define float @convert_double_to_float(double %a) nounwind { -; LA32-LABEL: convert_double_to_float: -; LA32: # %bb.0: -; LA32-NEXT: fcvt.s.d $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_double_to_float: -; LA64: # %bb.0: -; LA64-NEXT: fcvt.s.d $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = fptrunc double %a to float - ret float %1 -} - -define double @convert_float_to_double(float %a) nounwind { -; LA32-LABEL: convert_float_to_double: -; LA32: # %bb.0: -; LA32-NEXT: fcvt.d.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_float_to_double: -; LA64: # %bb.0: -; LA64-NEXT: fcvt.d.s $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = fpext float %a to double - ret double %1 -} - -define double @convert_i8_to_double(i8 signext %a) nounwind { -; LA32-LABEL: convert_i8_to_double: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_i8_to_double: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.w $fa0, $a0 -; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sitofp i8 %a to double - ret double %1 -} - -define double @convert_i16_to_double(i16 signext %a) nounwind { -; LA32-LABEL: convert_i16_to_double: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_i16_to_double: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.w $fa0, $a0 -; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sitofp i16 %a to double - ret double %1 -} - -define double @convert_i32_to_double(i32 %a) nounwind { -; LA32-LABEL: convert_i32_to_double: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_i32_to_double: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.w $fa0, $a0 -; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sitofp i32 %a to double - ret double %1 -} - -define double @convert_i64_to_double(i64 %a) nounwind { -; LA32-LABEL: convert_i64_to_double: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl __floatdidf -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_i64_to_double: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.d $fa0, $a0 -; LA64-NEXT: ffint.d.l $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sitofp i64 %a to double - ret double %1 -} - -define i32 @convert_double_to_i32(double %a) nounwind { -; LA32-LABEL: convert_double_to_i32: -; LA32: # %bb.0: -; LA32-NEXT: ftintrz.w.d $fa0, $fa0 -; LA32-NEXT: movfr2gr.s $a0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_double_to_i32: -; LA64: # %bb.0: -; LA64-NEXT: ftintrz.w.d $fa0, $fa0 -; LA64-NEXT: movfr2gr.s $a0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = fptosi double %a to i32 - ret i32 %1 -} - -define i32 @convert_double_to_u32(double %a) nounwind { -; LA32-LABEL: convert_double_to_u32: -; LA32: # %bb.0: -; LA32-NEXT: pcalau12i $a0, .LCPI7_0 -; LA32-NEXT: addi.w $a0, $a0, .LCPI7_0 -; LA32-NEXT: fld.d $fa1, $a0, 0 -; LA32-NEXT: fsub.d $fa2, $fa0, $fa1 -; LA32-NEXT: ftintrz.w.d $fa2, $fa2 -; LA32-NEXT: movfr2gr.s $a0, $fa2 -; LA32-NEXT: lu12i.w $a1, -524288 -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a1, $fcc0 -; LA32-NEXT: masknez $a0, $a0, $a1 -; LA32-NEXT: ftintrz.w.d $fa0, $fa0 -; LA32-NEXT: movfr2gr.s $a2, $fa0 -; LA32-NEXT: maskeqz $a1, $a2, $a1 -; LA32-NEXT: or $a0, $a1, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_double_to_u32: -; LA64: # %bb.0: -; LA64-NEXT: ftintrz.l.d $fa0, $fa0 -; LA64-NEXT: movfr2gr.d $a0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = fptoui double %a to i32 - ret i32 %1 -} - -define i64 @convert_double_to_i64(double %a) nounwind { -; LA32-LABEL: convert_double_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl __fixdfdi -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_double_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: ftintrz.l.d $fa0, $fa0 -; LA64-NEXT: movfr2gr.d $a0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = fptosi double %a to i64 - ret i64 %1 -} - -define i64 @convert_double_to_u64(double %a) nounwind { -; LA32-LABEL: convert_double_to_u64: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl __fixunsdfdi -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_double_to_u64: -; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a0, .LCPI9_0 -; LA64-NEXT: addi.d $a0, $a0, .LCPI9_0 -; LA64-NEXT: fld.d $fa1, $a0, 0 -; LA64-NEXT: fsub.d $fa2, $fa0, $fa1 -; LA64-NEXT: ftintrz.l.d $fa2, $fa2 -; LA64-NEXT: movfr2gr.d $a0, $fa2 -; LA64-NEXT: lu52i.d $a1, $zero, -2048 -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a1, $fcc0 -; LA64-NEXT: masknez $a0, $a0, $a1 -; LA64-NEXT: ftintrz.l.d $fa0, $fa0 -; LA64-NEXT: movfr2gr.d $a2, $fa0 -; LA64-NEXT: maskeqz $a1, $a2, $a1 -; LA64-NEXT: or $a0, $a1, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = fptoui double %a to i64 - ret i64 %1 -} - -define double @convert_u8_to_double(i8 zeroext %a) nounwind { -; LA32-LABEL: convert_u8_to_double: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_u8_to_double: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.w $fa0, $a0 -; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = uitofp i8 %a to double - ret double %1 -} - -define double @convert_u16_to_double(i16 zeroext %a) nounwind { -; LA32-LABEL: convert_u16_to_double: -; LA32: # %bb.0: -; LA32-NEXT: movgr2fr.w $fa0, $a0 -; LA32-NEXT: ffint.d.w $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_u16_to_double: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.w $fa0, $a0 -; LA64-NEXT: ffint.d.w $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = uitofp i16 %a to double - ret double %1 -} - -define double @convert_u32_to_double(i32 %a) nounwind { -; LA32-LABEL: convert_u32_to_double: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: addi.w $a1, $sp, 8 -; LA32-NEXT: ori $a1, $a1, 4 -; LA32-NEXT: lu12i.w $a2, 275200 -; LA32-NEXT: st.w $a2, $a1, 0 -; LA32-NEXT: st.w $a0, $sp, 8 -; LA32-NEXT: pcalau12i $a0, .LCPI12_0 -; LA32-NEXT: addi.w $a0, $a0, .LCPI12_0 -; LA32-NEXT: fld.d $fa0, $a0, 0 -; LA32-NEXT: fld.d $fa1, $sp, 8 -; LA32-NEXT: fsub.d $fa0, $fa1, $fa0 -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_u32_to_double: -; LA64: # %bb.0: -; LA64-NEXT: lu52i.d $a1, $zero, 1107 -; LA64-NEXT: movgr2fr.d $fa0, $a1 -; LA64-NEXT: pcalau12i $a1, .LCPI12_0 -; LA64-NEXT: addi.d $a1, $a1, .LCPI12_0 -; LA64-NEXT: fld.d $fa1, $a1, 0 -; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 -; LA64-NEXT: lu12i.w $a1, 275200 -; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 -; LA64-NEXT: movgr2fr.d $fa1, $a0 -; LA64-NEXT: fadd.d $fa0, $fa1, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = uitofp i32 %a to double - ret double %1 -} - -define double @convert_u64_to_double(i64 %a) nounwind { -; LA32-LABEL: convert_u64_to_double: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl __floatundidf -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: convert_u64_to_double: -; LA64: # %bb.0: -; LA64-NEXT: srli.d $a1, $a0, 32 -; LA64-NEXT: lu52i.d $a2, $zero, 1107 -; LA64-NEXT: or $a1, $a1, $a2 -; LA64-NEXT: movgr2fr.d $fa0, $a1 -; LA64-NEXT: pcalau12i $a1, .LCPI13_0 -; LA64-NEXT: addi.d $a1, $a1, .LCPI13_0 -; LA64-NEXT: fld.d $fa1, $a1, 0 -; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 -; LA64-NEXT: lu12i.w $a1, 275200 -; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 -; LA64-NEXT: movgr2fr.d $fa1, $a0 -; LA64-NEXT: fadd.d $fa0, $fa1, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = uitofp i64 %a to double - ret double %1 -} - -define double @bitcast_i64_to_double(i64 %a, i64 %b) nounwind { -; LA32-LABEL: bitcast_i64_to_double: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: addi.w $a2, $sp, 8 -; LA32-NEXT: ori $a2, $a2, 4 -; LA32-NEXT: st.w $a1, $a2, 0 -; LA32-NEXT: st.w $a0, $sp, 8 -; LA32-NEXT: fld.d $fa0, $sp, 8 -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: bitcast_i64_to_double: -; LA64: # %bb.0: -; LA64-NEXT: movgr2fr.d $fa0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = bitcast i64 %a to double - ret double %1 -} - -define i64 @bitcast_double_to_i64(double %a) nounwind { -; LA32-LABEL: bitcast_double_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: fst.d $fa0, $sp, 8 -; LA32-NEXT: addi.w $a0, $sp, 8 -; LA32-NEXT: ori $a0, $a0, 4 -; LA32-NEXT: ld.w $a1, $a0, 0 -; LA32-NEXT: ld.w $a0, $sp, 8 -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: bitcast_double_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: movfr2gr.d $a0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = bitcast double %a to i64 - ret i64 %1 -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll deleted file mode 100644 index 15e1118d2e56..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fadd.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'fadd' LLVM IR: https://llvm.org/docs/LangRef.html#fadd-instruction - -define float @fadd_s(float %x, float %y) { -; LA32-LABEL: fadd_s: -; LA32: # %bb.0: -; LA32-NEXT: fadd.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fadd_s: -; LA64: # %bb.0: -; LA64-NEXT: fadd.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = fadd float %x, %y - ret float %add -} - -define double @fadd_d(double %x, double %y) { -; LA32-LABEL: fadd_d: -; LA32: # %bb.0: -; LA32-NEXT: fadd.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fadd_d: -; LA64: # %bb.0: -; LA64-NEXT: fadd.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %add = fadd double %x, %y - ret double %add -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll deleted file mode 100644 index bb35405abc01..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll +++ /dev/null @@ -1,257 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Test the 'fcmp' LLVM IR: https://llvm.org/docs/LangRef.html#fcmp-instruction -;; over double values. - -define i1 @fcmp_false(double %a, double %b) { -; LA32-LABEL: fcmp_false: -; LA32: # %bb.0: -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_false: -; LA64: # %bb.0: -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp false double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_oeq(double %a, double %b) { -; LA32-LABEL: fcmp_oeq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_oeq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oeq double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ogt(double %a, double %b) { -; LA32-LABEL: fcmp_ogt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ogt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ogt double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_oge(double %a, double %b) { -; LA32-LABEL: fcmp_oge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_oge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oge double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_olt(double %a, double %b) { -; LA32-LABEL: fcmp_olt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_olt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp olt double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ole(double %a, double %b) { -; LA32-LABEL: fcmp_ole: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ole: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ole double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_one(double %a, double %b) { -; LA32-LABEL: fcmp_one: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_one: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp one double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ord(double %a, double %b) { -; LA32-LABEL: fcmp_ord: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ord: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ord double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ueq(double %a, double %b) { -; LA32-LABEL: fcmp_ueq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ueq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ueq double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ugt(double %a, double %b) { -; LA32-LABEL: fcmp_ugt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ugt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ugt double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_uge(double %a, double %b) { -; LA32-LABEL: fcmp_uge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_uge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uge double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ult(double %a, double %b) { -; LA32-LABEL: fcmp_ult: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ult: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ult double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ule(double %a, double %b) { -; LA32-LABEL: fcmp_ule: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ule: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ule double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_une(double %a, double %b) { -; LA32-LABEL: fcmp_une: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_une: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp une double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_uno(double %a, double %b) { -; LA32-LABEL: fcmp_uno: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_uno: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uno double %a, %b - ret i1 %cmp -} - -define i1 @fcmp_true(double %a, double %b) { -; LA32-LABEL: fcmp_true: -; LA32: # %bb.0: -; LA32-NEXT: ori $a0, $zero, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_true: -; LA64: # %bb.0: -; LA64-NEXT: ori $a0, $zero, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp true double %a, %b - ret i1 %cmp -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll deleted file mode 100644 index 33bdd0b50bd4..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll +++ /dev/null @@ -1,257 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 - -;; Test the 'fcmp' LLVM IR: https://llvm.org/docs/LangRef.html#fcmp-instruction -;; over float values. - -define i1 @fcmp_false(float %a, float %b) { -; LA32-LABEL: fcmp_false: -; LA32: # %bb.0: -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_false: -; LA64: # %bb.0: -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp false float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_oeq(float %a, float %b) { -; LA32-LABEL: fcmp_oeq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_oeq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oeq float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ogt(float %a, float %b) { -; LA32-LABEL: fcmp_ogt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ogt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ogt float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_oge(float %a, float %b) { -; LA32-LABEL: fcmp_oge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_oge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oge float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_olt(float %a, float %b) { -; LA32-LABEL: fcmp_olt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_olt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp olt float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ole(float %a, float %b) { -; LA32-LABEL: fcmp_ole: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ole: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ole float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_one(float %a, float %b) { -; LA32-LABEL: fcmp_one: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_one: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp one float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ord(float %a, float %b) { -; LA32-LABEL: fcmp_ord: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ord: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ord float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ueq(float %a, float %b) { -; LA32-LABEL: fcmp_ueq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ueq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ueq float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ugt(float %a, float %b) { -; LA32-LABEL: fcmp_ugt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ugt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ugt float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_uge(float %a, float %b) { -; LA32-LABEL: fcmp_uge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_uge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uge float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ult(float %a, float %b) { -; LA32-LABEL: fcmp_ult: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ult: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ult float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_ule(float %a, float %b) { -; LA32-LABEL: fcmp_ule: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ule: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ule float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_une(float %a, float %b) { -; LA32-LABEL: fcmp_une: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_une: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp une float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_uno(float %a, float %b) { -; LA32-LABEL: fcmp_uno: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_uno: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uno float %a, %b - ret i1 %cmp -} - -define i1 @fcmp_true(float %a, float %b) { -; LA32-LABEL: fcmp_true: -; LA32: # %bb.0: -; LA32-NEXT: ori $a0, $zero, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_true: -; LA64: # %bb.0: -; LA64-NEXT: ori $a0, $zero, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp true float %a, %b - ret i1 %cmp -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll deleted file mode 100644 index 9c3f85950d5d..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fdiv.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'fdiv' LLVM IR: https://llvm.org/docs/LangRef.html#fdiv-instruction - -define float @fdiv_s(float %x, float %y) { -; LA32-LABEL: fdiv_s: -; LA32: # %bb.0: -; LA32-NEXT: fdiv.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fdiv_s: -; LA64: # %bb.0: -; LA64-NEXT: fdiv.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %div = fdiv float %x, %y - ret float %div -} - -define double @fdiv_d(double %x, double %y) { -; LA32-LABEL: fdiv_d: -; LA32: # %bb.0: -; LA32-NEXT: fdiv.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fdiv_d: -; LA64: # %bb.0: -; LA64-NEXT: fdiv.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %div = fdiv double %x, %y - ret double %div -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll deleted file mode 100644 index 30e0045a1467..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll +++ /dev/null @@ -1,650 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D -; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D - -define signext i8 @convert_float_to_i8(float %a) nounwind { -; LA32F-LABEL: convert_float_to_i8: -; LA32F: # %bb.0: -; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_float_to_i8: -; LA32D: # %bb.0: -; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_float_to_i8: -; LA64F: # %bb.0: -; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_float_to_i8: -; LA64D: # %bb.0: -; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 -; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = fptosi float %a to i8 - ret i8 %1 -} - -define signext i16 @convert_float_to_i16(float %a) nounwind { -; LA32F-LABEL: convert_float_to_i16: -; LA32F: # %bb.0: -; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_float_to_i16: -; LA32D: # %bb.0: -; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_float_to_i16: -; LA64F: # %bb.0: -; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_float_to_i16: -; LA64D: # %bb.0: -; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 -; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = fptosi float %a to i16 - ret i16 %1 -} - -define i32 @convert_float_to_i32(float %a) nounwind { -; LA32F-LABEL: convert_float_to_i32: -; LA32F: # %bb.0: -; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_float_to_i32: -; LA32D: # %bb.0: -; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_float_to_i32: -; LA64F: # %bb.0: -; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_float_to_i32: -; LA64D: # %bb.0: -; LA64D-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64D-NEXT: movfr2gr.s $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = fptosi float %a to i32 - ret i32 %1 -} - -define i64 @convert_float_to_i64(float %a) nounwind { -; LA32F-LABEL: convert_float_to_i64: -; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $sp, $sp, -16 -; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl __fixsfdi -; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_float_to_i64: -; LA32D: # %bb.0: -; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl __fixsfdi -; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_float_to_i64: -; LA64F: # %bb.0: -; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_float_to_i64: -; LA64D: # %bb.0: -; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 -; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = fptosi float %a to i64 - ret i64 %1 -} - -define zeroext i8 @convert_float_to_u8(float %a) nounwind { -; LA32F-LABEL: convert_float_to_u8: -; LA32F: # %bb.0: -; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_float_to_u8: -; LA32D: # %bb.0: -; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_float_to_u8: -; LA64F: # %bb.0: -; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_float_to_u8: -; LA64D: # %bb.0: -; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 -; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = fptoui float %a to i8 - ret i8 %1 -} - -define zeroext i16 @convert_float_to_u16(float %a) nounwind { -; LA32F-LABEL: convert_float_to_u16: -; LA32F: # %bb.0: -; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_float_to_u16: -; LA32D: # %bb.0: -; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_float_to_u16: -; LA64F: # %bb.0: -; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_float_to_u16: -; LA64D: # %bb.0: -; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 -; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = fptoui float %a to i16 - ret i16 %1 -} - -define i32 @convert_float_to_u32(float %a) nounwind { -; LA32F-LABEL: convert_float_to_u32: -; LA32F: # %bb.0: -; LA32F-NEXT: pcalau12i $a0, .LCPI6_0 -; LA32F-NEXT: addi.w $a0, $a0, .LCPI6_0 -; LA32F-NEXT: fld.s $fa1, $a0, 0 -; LA32F-NEXT: fsub.s $fa2, $fa0, $fa1 -; LA32F-NEXT: ftintrz.w.s $fa2, $fa2 -; LA32F-NEXT: movfr2gr.s $a0, $fa2 -; LA32F-NEXT: lu12i.w $a1, -524288 -; LA32F-NEXT: xor $a0, $a0, $a1 -; LA32F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA32F-NEXT: movcf2gr $a1, $fcc0 -; LA32F-NEXT: masknez $a0, $a0, $a1 -; LA32F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32F-NEXT: movfr2gr.s $a2, $fa0 -; LA32F-NEXT: maskeqz $a1, $a2, $a1 -; LA32F-NEXT: or $a0, $a1, $a0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_float_to_u32: -; LA32D: # %bb.0: -; LA32D-NEXT: pcalau12i $a0, .LCPI6_0 -; LA32D-NEXT: addi.w $a0, $a0, .LCPI6_0 -; LA32D-NEXT: fld.s $fa1, $a0, 0 -; LA32D-NEXT: fsub.s $fa2, $fa0, $fa1 -; LA32D-NEXT: ftintrz.w.s $fa2, $fa2 -; LA32D-NEXT: movfr2gr.s $a0, $fa2 -; LA32D-NEXT: lu12i.w $a1, -524288 -; LA32D-NEXT: xor $a0, $a0, $a1 -; LA32D-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA32D-NEXT: movcf2gr $a1, $fcc0 -; LA32D-NEXT: masknez $a0, $a0, $a1 -; LA32D-NEXT: ftintrz.w.s $fa0, $fa0 -; LA32D-NEXT: movfr2gr.s $a2, $fa0 -; LA32D-NEXT: maskeqz $a1, $a2, $a1 -; LA32D-NEXT: or $a0, $a1, $a0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_float_to_u32: -; LA64F: # %bb.0: -; LA64F-NEXT: pcalau12i $a0, .LCPI6_0 -; LA64F-NEXT: addi.d $a0, $a0, .LCPI6_0 -; LA64F-NEXT: fld.s $fa1, $a0, 0 -; LA64F-NEXT: fsub.s $fa2, $fa0, $fa1 -; LA64F-NEXT: ftintrz.w.s $fa2, $fa2 -; LA64F-NEXT: movfr2gr.s $a0, $fa2 -; LA64F-NEXT: lu12i.w $a1, -524288 -; LA64F-NEXT: xor $a0, $a0, $a1 -; LA64F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA64F-NEXT: movcf2gr $a1, $fcc0 -; LA64F-NEXT: masknez $a0, $a0, $a1 -; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64F-NEXT: movfr2gr.s $a2, $fa0 -; LA64F-NEXT: maskeqz $a1, $a2, $a1 -; LA64F-NEXT: or $a0, $a1, $a0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_float_to_u32: -; LA64D: # %bb.0: -; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 -; LA64D-NEXT: movfr2gr.d $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = fptoui float %a to i32 - ret i32 %1 -} - -define i64 @convert_float_to_u64(float %a) nounwind { -; LA32F-LABEL: convert_float_to_u64: -; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $sp, $sp, -16 -; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl __fixunssfdi -; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_float_to_u64: -; LA32D: # %bb.0: -; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl __fixunssfdi -; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_float_to_u64: -; LA64F: # %bb.0: -; LA64F-NEXT: pcalau12i $a0, .LCPI7_0 -; LA64F-NEXT: addi.d $a0, $a0, .LCPI7_0 -; LA64F-NEXT: fld.s $fa1, $a0, 0 -; LA64F-NEXT: fsub.s $fa2, $fa0, $fa1 -; LA64F-NEXT: ftintrz.w.s $fa2, $fa2 -; LA64F-NEXT: movfr2gr.s $a0, $fa2 -; LA64F-NEXT: lu52i.d $a1, $zero, -2048 -; LA64F-NEXT: xor $a0, $a0, $a1 -; LA64F-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA64F-NEXT: movcf2gr $a1, $fcc0 -; LA64F-NEXT: masknez $a0, $a0, $a1 -; LA64F-NEXT: ftintrz.w.s $fa0, $fa0 -; LA64F-NEXT: movfr2gr.s $a2, $fa0 -; LA64F-NEXT: maskeqz $a1, $a2, $a1 -; LA64F-NEXT: or $a0, $a1, $a0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_float_to_u64: -; LA64D: # %bb.0: -; LA64D-NEXT: pcalau12i $a0, .LCPI7_0 -; LA64D-NEXT: addi.d $a0, $a0, .LCPI7_0 -; LA64D-NEXT: fld.s $fa1, $a0, 0 -; LA64D-NEXT: fsub.s $fa2, $fa0, $fa1 -; LA64D-NEXT: ftintrz.l.s $fa2, $fa2 -; LA64D-NEXT: movfr2gr.d $a0, $fa2 -; LA64D-NEXT: lu52i.d $a1, $zero, -2048 -; LA64D-NEXT: xor $a0, $a0, $a1 -; LA64D-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA64D-NEXT: movcf2gr $a1, $fcc0 -; LA64D-NEXT: masknez $a0, $a0, $a1 -; LA64D-NEXT: ftintrz.l.s $fa0, $fa0 -; LA64D-NEXT: movfr2gr.d $a2, $fa0 -; LA64D-NEXT: maskeqz $a1, $a2, $a1 -; LA64D-NEXT: or $a0, $a1, $a0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = fptoui float %a to i64 - ret i64 %1 -} - -define float @convert_i8_to_float(i8 signext %a) nounwind { -; LA32F-LABEL: convert_i8_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: movgr2fr.w $fa0, $a0 -; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_i8_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: movgr2fr.w $fa0, $a0 -; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_i8_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_i8_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = sitofp i8 %a to float - ret float %1 -} - -define float @convert_i16_to_float(i16 signext %a) nounwind { -; LA32F-LABEL: convert_i16_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: movgr2fr.w $fa0, $a0 -; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_i16_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: movgr2fr.w $fa0, $a0 -; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_i16_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_i16_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = sitofp i16 %a to float - ret float %1 -} - -define float @convert_i32_to_float(i32 %a) nounwind { -; LA32F-LABEL: convert_i32_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: movgr2fr.w $fa0, $a0 -; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_i32_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: movgr2fr.w $fa0, $a0 -; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_i32_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: addi.w $a0, $a0, 0 -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_i32_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: addi.w $a0, $a0, 0 -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = sitofp i32 %a to float - ret float %1 -} - -define float @convert_i64_to_float(i64 %a) nounwind { -; LA32F-LABEL: convert_i64_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $sp, $sp, -16 -; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl __floatdisf -; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_i64_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl __floatdisf -; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_i64_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_i64_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = sitofp i64 %a to float - ret float %1 -} - -define float @convert_u8_to_float(i8 zeroext %a) nounwind { -; LA32F-LABEL: convert_u8_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: movgr2fr.w $fa0, $a0 -; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_u8_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: movgr2fr.w $fa0, $a0 -; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_u8_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_u8_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = uitofp i8 %a to float - ret float %1 -} - -define float @convert_u16_to_float(i16 zeroext %a) nounwind { -; LA32F-LABEL: convert_u16_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: movgr2fr.w $fa0, $a0 -; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_u16_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: movgr2fr.w $fa0, $a0 -; LA32D-NEXT: ffint.s.w $fa0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_u16_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_u16_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = uitofp i16 %a to float - ret float %1 -} - -define float @convert_u32_to_float(i32 %a) nounwind { -; LA32F-LABEL: convert_u32_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: srli.w $a1, $a0, 1 -; LA32F-NEXT: andi $a2, $a0, 1 -; LA32F-NEXT: or $a1, $a2, $a1 -; LA32F-NEXT: movgr2fr.w $fa0, $a1 -; LA32F-NEXT: ffint.s.w $fa0, $fa0 -; LA32F-NEXT: fadd.s $fa0, $fa0, $fa0 -; LA32F-NEXT: slti $a1, $a0, 0 -; LA32F-NEXT: movgr2cf $fcc0, $a1 -; LA32F-NEXT: movgr2fr.w $fa1, $a0 -; LA32F-NEXT: ffint.s.w $fa1, $fa1 -; LA32F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_u32_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: addi.w $a1, $sp, 8 -; LA32D-NEXT: ori $a1, $a1, 4 -; LA32D-NEXT: lu12i.w $a2, 275200 -; LA32D-NEXT: st.w $a2, $a1, 0 -; LA32D-NEXT: st.w $a0, $sp, 8 -; LA32D-NEXT: pcalau12i $a0, .LCPI14_0 -; LA32D-NEXT: addi.w $a0, $a0, .LCPI14_0 -; LA32D-NEXT: fld.d $fa0, $a0, 0 -; LA32D-NEXT: fld.d $fa1, $sp, 8 -; LA32D-NEXT: fsub.d $fa0, $fa1, $fa0 -; LA32D-NEXT: fcvt.s.d $fa0, $fa0 -; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_u32_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: bstrpick.d $a1, $a0, 31, 1 -; LA64F-NEXT: andi $a2, $a0, 1 -; LA64F-NEXT: or $a1, $a2, $a1 -; LA64F-NEXT: movgr2fr.w $fa0, $a1 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 -; LA64F-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64F-NEXT: slti $a1, $a0, 0 -; LA64F-NEXT: movgr2cf $fcc0, $a1 -; LA64F-NEXT: movgr2fr.w $fa1, $a0 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 -; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_u32_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: bstrpick.d $a1, $a0, 31, 1 -; LA64D-NEXT: andi $a2, $a0, 1 -; LA64D-NEXT: or $a1, $a2, $a1 -; LA64D-NEXT: movgr2fr.w $fa0, $a1 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 -; LA64D-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64D-NEXT: slti $a1, $a0, 0 -; LA64D-NEXT: movgr2cf $fcc0, $a1 -; LA64D-NEXT: movgr2fr.w $fa1, $a0 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 -; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = uitofp i32 %a to float - ret float %1 -} - -define float @convert_u64_to_float(i64 %a) nounwind { -; LA32F-LABEL: convert_u64_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: addi.w $sp, $sp, -16 -; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl __floatundisf -; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: convert_u64_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl __floatundisf -; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: convert_u64_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: srli.d $a1, $a0, 1 -; LA64F-NEXT: andi $a2, $a0, 1 -; LA64F-NEXT: or $a1, $a2, $a1 -; LA64F-NEXT: movgr2fr.w $fa0, $a1 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 -; LA64F-NEXT: slti $a1, $a0, 0 -; LA64F-NEXT: movgr2cf $fcc0, $a1 -; LA64F-NEXT: movgr2fr.w $fa1, $a0 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 -; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: convert_u64_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: srli.d $a1, $a0, 1 -; LA64D-NEXT: andi $a2, $a0, 1 -; LA64D-NEXT: or $a1, $a2, $a1 -; LA64D-NEXT: movgr2fr.w $fa0, $a1 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 -; LA64D-NEXT: slti $a1, $a0, 0 -; LA64D-NEXT: movgr2cf $fcc0, $a1 -; LA64D-NEXT: movgr2fr.w $fa1, $a0 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 -; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = uitofp i64 %a to float - ret float %1 -} - -define i32 @bitcast_float_to_i32(float %a) nounwind { -; LA32F-LABEL: bitcast_float_to_i32: -; LA32F: # %bb.0: -; LA32F-NEXT: movfr2gr.s $a0, $fa0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: bitcast_float_to_i32: -; LA32D: # %bb.0: -; LA32D-NEXT: movfr2gr.s $a0, $fa0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: bitcast_float_to_i32: -; LA64F: # %bb.0: -; LA64F-NEXT: movfr2gr.s $a0, $fa0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: bitcast_float_to_i32: -; LA64D: # %bb.0: -; LA64D-NEXT: movfr2gr.s $a0, $fa0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = bitcast float %a to i32 - ret i32 %1 -} - -define float @bitcast_i32_to_float(i32 %a) nounwind { -; LA32F-LABEL: bitcast_i32_to_float: -; LA32F: # %bb.0: -; LA32F-NEXT: movgr2fr.w $fa0, $a0 -; LA32F-NEXT: jirl $zero, $ra, 0 -; -; LA32D-LABEL: bitcast_i32_to_float: -; LA32D: # %bb.0: -; LA32D-NEXT: movgr2fr.w $fa0, $a0 -; LA32D-NEXT: jirl $zero, $ra, 0 -; -; LA64F-LABEL: bitcast_i32_to_float: -; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: jirl $zero, $ra, 0 -; -; LA64D-LABEL: bitcast_i32_to_float: -; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: jirl $zero, $ra, 0 - %1 = bitcast i32 %a to float - ret float %1 -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll deleted file mode 100644 index 78ee031c1301..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fmul.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'fmul' LLVM IR: https://llvm.org/docs/LangRef.html#fmul-instruction - -define float @fmul_s(float %x, float %y) { -; LA32-LABEL: fmul_s: -; LA32: # %bb.0: -; LA32-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fmul_s: -; LA64: # %bb.0: -; LA64-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %mul = fmul float %x, %y - ret float %mul -} - -define double @fmul_d(double %x, double %y) { -; LA32-LABEL: fmul_d: -; LA32: # %bb.0: -; LA32-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fmul_d: -; LA64: # %bb.0: -; LA64-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %mul = fmul double %x, %y - ret double %mul -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll deleted file mode 100644 index 3a8a4127d8e7..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fneg.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'fneg' LLVM IR: https://llvm.org/docs/LangRef.html#fneg-instruction - -define float @fneg_s(float %x) { -; LA32-LABEL: fneg_s: -; LA32: # %bb.0: -; LA32-NEXT: fneg.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fneg_s: -; LA64: # %bb.0: -; LA64-NEXT: fneg.s $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %neg = fneg float %x - ret float %neg -} - -define double @fneg_d(double %x) { -; LA32-LABEL: fneg_d: -; LA32: # %bb.0: -; LA32-NEXT: fneg.d $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fneg_d: -; LA64: # %bb.0: -; LA64-NEXT: fneg.d $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %neg = fneg double %x - ret double %neg -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll deleted file mode 100644 index 9ddf583d999c..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fsub.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'fsub' LLVM IR: https://llvm.org/docs/LangRef.html#fsub-instruction - -define float @fsub_s(float %x, float %y) { -; LA32-LABEL: fsub_s: -; LA32: # %bb.0: -; LA32-NEXT: fsub.s $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fsub_s: -; LA64: # %bb.0: -; LA64-NEXT: fsub.s $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %sub = fsub float %x, %y - ret float %sub -} - -define double @fsub_d(double %x, double %y) { -; LA32-LABEL: fsub_d: -; LA32: # %bb.0: -; LA32-NEXT: fsub.d $fa0, $fa0, $fa1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fsub_d: -; LA64: # %bb.0: -; LA64-NEXT: fsub.d $fa0, $fa0, $fa1 -; LA64-NEXT: jirl $zero, $ra, 0 - %sub = fsub double %x, %y - ret double %sub -} - -define float @fneg_s(float %x) { -; LA32-LABEL: fneg_s: -; LA32: # %bb.0: -; LA32-NEXT: fneg.s $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fneg_s: -; LA64: # %bb.0: -; LA64-NEXT: fneg.s $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = fsub float -0.0, %x - ret float %res -} - -define double @fneg_d(double %x) { -; LA32-LABEL: fneg_d: -; LA32: # %bb.0: -; LA32-NEXT: fneg.d $fa0, $fa0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fneg_d: -; LA64: # %bb.0: -; LA64-NEXT: fneg.d $fa0, $fa0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = fsub double -0.0, %x - ret double %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll deleted file mode 100644 index 947886e6b9dc..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/icmp.ll +++ /dev/null @@ -1,244 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'icmp' LLVM IR: https://llvm.org/docs/LangRef.html#icmp-instruction - -define i1 @icmp_eq(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_eq: -; LA32: # %bb.0: -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_eq: -; LA64: # %bb.0: -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp eq i32 %a, %b - ret i1 %res -} - -define i1 @icmp_ne(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_ne: -; LA32: # %bb.0: -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_ne: -; LA64: # %bb.0: -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp ne i32 %a, %b - ret i1 %res -} - -define i1 @icmp_ugt(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_ugt: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_ugt: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp ugt i32 %a, %b - ret i1 %res -} - -define i1 @icmp_uge(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_uge: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_uge: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp uge i32 %a, %b - ret i1 %res -} - -define i1 @icmp_ult(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_ult: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_ult: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp ult i32 %a, %b - ret i1 %res -} - -define i1 @icmp_ule(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_ule: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_ule: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp ule i32 %a, %b - ret i1 %res -} - -define i1 @icmp_sgt(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_sgt: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_sgt: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp sgt i32 %a, %b - ret i1 %res -} - -define i1 @icmp_sge(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_sge: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_sge: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp sge i32 %a, %b - ret i1 %res -} - -define i1 @icmp_slt(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_slt: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_slt: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp slt i32 %a, %b - ret i1 %res -} - -define i1 @icmp_sle(i32 signext %a, i32 signext %b) { -; LA32-LABEL: icmp_sle: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_sle: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp sle i32 %a, %b - ret i1 %res -} - -define i1 @icmp_slt_3(i32 signext %a) { -; LA32-LABEL: icmp_slt_3: -; LA32: # %bb.0: -; LA32-NEXT: slti $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_slt_3: -; LA64: # %bb.0: -; LA64-NEXT: slti $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp slt i32 %a, 3 - ret i1 %res -} - -define i1 @icmp_ult_3(i32 signext %a) { -; LA32-LABEL: icmp_ult_3: -; LA32: # %bb.0: -; LA32-NEXT: sltui $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_ult_3: -; LA64: # %bb.0: -; LA64-NEXT: sltui $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp ult i32 %a, 3 - ret i1 %res -} - -define i1 @icmp_eq_0(i32 signext %a) { -; LA32-LABEL: icmp_eq_0: -; LA32: # %bb.0: -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_eq_0: -; LA64: # %bb.0: -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp eq i32 %a, 0 - ret i1 %res -} - -define i1 @icmp_eq_3(i32 signext %a) { -; LA32-LABEL: icmp_eq_3: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, -3 -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_eq_3: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, -3 -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp eq i32 %a, 3 - ret i1 %res -} - -define i1 @icmp_ne_0(i32 signext %a) { -; LA32-LABEL: icmp_ne_0: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_ne_0: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp ne i32 %a, 0 - ret i1 %res -} - -define i1 @icmp_ne_3(i32 signext %a) { -; LA32-LABEL: icmp_ne_3: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $a0, $a0, -3 -; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: icmp_ne_3: -; LA64: # %bb.0: -; LA64-NEXT: addi.d $a0, $a0, -3 -; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = icmp ne i32 %a, 3 - ret i1 %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll deleted file mode 100644 index abbd700f44f7..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/indirectbr.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s - -define i32 @indirectbr(ptr %target) nounwind { -; CHECK-LABEL: indirectbr: -; CHECK: # %bb.0: -; CHECK-NEXT: jirl $zero, $a0, 0 -; CHECK-NEXT: .LBB0_1: # %test_label -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: jirl $zero, $ra, 0 - indirectbr ptr %target, [label %test_label] -test_label: - br label %ret -ret: - ret i32 0 -} - -define i32 @indirectbr_with_offset(ptr %a) nounwind { -; CHECK-LABEL: indirectbr_with_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: jirl $zero, $a0, 1380 -; CHECK-NEXT: .LBB1_1: # %test_label -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: jirl $zero, $ra, 0 - %target = getelementptr inbounds i8, ptr %a, i32 1380 - indirectbr ptr %target, [label %test_label] -test_label: - br label %ret -ret: - ret i32 0 -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll deleted file mode 100644 index 1f06c818acf2..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ /dev/null @@ -1,143 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -define i8 @load_acquire_i8(ptr %ptr) { -; LA32-LABEL: load_acquire_i8: -; LA32: # %bb.0: -; LA32-NEXT: ld.b $a0, $a0, 0 -; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: load_acquire_i8: -; LA64: # %bb.0: -; LA64-NEXT: ld.b $a0, $a0, 0 -; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load atomic i8, ptr %ptr acquire, align 1 - ret i8 %val -} - -define i16 @load_acquire_i16(ptr %ptr) { -; LA32-LABEL: load_acquire_i16: -; LA32: # %bb.0: -; LA32-NEXT: ld.h $a0, $a0, 0 -; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: load_acquire_i16: -; LA64: # %bb.0: -; LA64-NEXT: ld.h $a0, $a0, 0 -; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load atomic i16, ptr %ptr acquire, align 2 - ret i16 %val -} - -define i32 @load_acquire_i32(ptr %ptr) { -; LA32-LABEL: load_acquire_i32: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a0, $a0, 0 -; LA32-NEXT: dbar 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: load_acquire_i32: -; LA64: # %bb.0: -; LA64-NEXT: ld.w $a0, $a0, 0 -; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load atomic i32, ptr %ptr acquire, align 4 - ret i32 %val -} - -define i64 @load_acquire_i64(ptr %ptr) { -; LA32-LABEL: load_acquire_i64: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: ori $a1, $zero, 2 -; LA32-NEXT: bl __atomic_load_8 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: load_acquire_i64: -; LA64: # %bb.0: -; LA64-NEXT: ld.d $a0, $a0, 0 -; LA64-NEXT: dbar 0 -; LA64-NEXT: jirl $zero, $ra, 0 - %val = load atomic i64, ptr %ptr acquire, align 8 - ret i64 %val -} - -define void @store_release_i8(ptr %ptr, i8 signext %v) { -; LA32-LABEL: store_release_i8: -; LA32: # %bb.0: -; LA32-NEXT: dbar 0 -; LA32-NEXT: st.b $a0, $a1, 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: store_release_i8: -; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.b $a0, $a1, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - store atomic i8 %v, ptr %ptr release, align 1 - ret void -} - -define void @store_release_i16(ptr %ptr, i16 signext %v) { -; LA32-LABEL: store_release_i16: -; LA32: # %bb.0: -; LA32-NEXT: dbar 0 -; LA32-NEXT: st.h $a0, $a1, 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: store_release_i16: -; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.h $a0, $a1, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - store atomic i16 %v, ptr %ptr release, align 2 - ret void -} - -define void @store_release_i32(ptr %ptr, i32 signext %v) { -; LA32-LABEL: store_release_i32: -; LA32: # %bb.0: -; LA32-NEXT: dbar 0 -; LA32-NEXT: st.w $a0, $a1, 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: store_release_i32: -; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.w $a0, $a1, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - store atomic i32 %v, ptr %ptr release, align 4 - ret void -} - -define void @store_release_i64(ptr %ptr, i64 %v) { -; LA32-LABEL: store_release_i64: -; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: ori $a3, $zero, 3 -; LA32-NEXT: bl __atomic_store_8 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: store_release_i64: -; LA64: # %bb.0: -; LA64-NEXT: dbar 0 -; LA64-NEXT: st.d $a0, $a1, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - store atomic i64 %v, ptr %ptr release, align 8 - ret void -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll deleted file mode 100644 index 8894e3cac3fb..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll +++ /dev/null @@ -1,406 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d --relocation-model=static < %s | FileCheck %s --check-prefixes=ALL,LA32NOPIC,LA32 -; RUN: llc --mtriple=loongarch32 --mattr=+d --relocation-model=pic < %s | FileCheck %s --check-prefixes=ALL,LA32PIC,LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d --relocation-model=static < %s | FileCheck %s --check-prefixes=ALL,LA64NOPIC,LA64 -; RUN: llc --mtriple=loongarch64 --mattr=+d --relocation-model=pic < %s | FileCheck %s --check-prefixes=ALL,LA64PIC,LA64 - -;; Check load from and store to global variables. -@G = dso_local global i32 zeroinitializer, align 4 -@arr = dso_local global [10 x i32] zeroinitializer, align 4 - -define i32 @load_store_global() nounwind { -; ALL-LABEL: load_store_global: -; ALL: # %bb.0: - -; LA32NOPIC-NEXT: pcalau12i $a0, G -; LA32NOPIC-NEXT: addi.w $a1, $a0, G -; LA32PIC-NEXT: pcalau12i $a0, .LG$local -; LA32PIC-NEXT: addi.w $a1, $a0, .LG$local -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a0, $a1, 0 - -; LA64NOPIC-NEXT: pcalau12i $a0, G -; LA64NOPIC-NEXT: addi.d $a1, $a0, G -; LA64PIC-NEXT: pcalau12i $a0, .LG$local -; LA64PIC-NEXT: addi.d $a1, $a0, .LG$local -; LA64-NEXT: ld.w $a0, $a1, 0 -; LA64-NEXT: addi.d $a0, $a0, 1 -; LA64-NEXT: st.w $a0, $a1, 0 - -; ALL-NEXT: jirl $zero, $ra, 0 - - %v = load i32, ptr @G - %sum = add i32 %v, 1 - store i32 %sum, ptr @G - ret i32 %sum -} - -define i32 @load_store_global_array(i32 %a) nounwind { -; ALL-LABEL: load_store_global_array: -; ALL: # %bb.0: - -; LA32NOPIC-NEXT: pcalau12i $a1, arr -; LA32NOPIC-NEXT: addi.w $a2, $a1, arr -; LA32PIC-NEXT: pcalau12i $a1, .Larr$local -; LA32PIC-NEXT: addi.w $a2, $a1, .Larr$local -; LA32-NEXT: ld.w $a1, $a2, 0 -; LA32-NEXT: st.w $a0, $a2, 0 -; LA32NOPIC-NEXT: ld.w $a3, $a2, 0 -; LA32NOPIC-NEXT: st.w $a0, $a2, 0 -; LA32PIC-NEXT: ld.w $a3, $a2, 36 -; LA32PIC-NEXT: st.w $a0, $a2, 36 - -; LA64NOPIC-NEXT: pcalau12i $a1, arr -; LA64NOPIC-NEXT: addi.d $a2, $a1, arr -; LA64PIC-NEXT: pcalau12i $a1, .Larr$local -; LA64PIC-NEXT: addi.d $a2, $a1, .Larr$local -; LA64-NEXT: ld.w $a1, $a2, 0 -; LA64-NEXT: st.w $a0, $a2, 0 -; LA64NOPIC-NEXT: ld.w $a3, $a2, 0 -; LA64NOPIC-NEXT: st.w $a0, $a2, 0 -; LA64PIC-NEXT: ld.w $a3, $a2, 36 -; LA64PIC-NEXT: st.w $a0, $a2, 36 - -; ALL-NEXT: move $a0, $a1 -; ALL-NEXT: jirl $zero, $ra, 0 - - %1 = load volatile i32, ptr @arr, align 4 - store i32 %a, ptr @arr, align 4 - %2 = getelementptr [10 x i32], ptr @arr, i32 0, i32 9 - %3 = load volatile i32, ptr %2, align 4 - store i32 %a, ptr %2, align 4 - ret i32 %1 -} - -;; Check indexed and unindexed, sext, zext and anyext loads. - -define i64 @ld_b(ptr %a) nounwind { -; LA32-LABEL: ld_b: -; LA32: # %bb.0: -; LA32-NEXT: ld.b $a1, $a0, 0 -; LA32-NEXT: ld.b $a0, $a0, 1 -; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ld_b: -; LA64: # %bb.0: -; LA64-NEXT: ld.b $a1, $a0, 0 -; LA64-NEXT: ld.b $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i8, ptr %a, i64 1 - %2 = load i8, ptr %1 - %3 = sext i8 %2 to i64 - %4 = load volatile i8, ptr %a - ret i64 %3 -} - -define i64 @ld_h(ptr %a) nounwind { -; LA32-LABEL: ld_h: -; LA32: # %bb.0: -; LA32-NEXT: ld.h $a1, $a0, 0 -; LA32-NEXT: ld.h $a0, $a0, 4 -; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ld_h: -; LA64: # %bb.0: -; LA64-NEXT: ld.h $a1, $a0, 0 -; LA64-NEXT: ld.h $a0, $a0, 4 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i16, ptr %a, i64 2 - %2 = load i16, ptr %1 - %3 = sext i16 %2 to i64 - %4 = load volatile i16, ptr %a - ret i64 %3 -} - -define i64 @ld_w(ptr %a) nounwind { -; LA32-LABEL: ld_w: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a1, $a0, 0 -; LA32-NEXT: ld.w $a0, $a0, 12 -; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ld_w: -; LA64: # %bb.0: -; LA64-NEXT: ld.w $a1, $a0, 0 -; LA64-NEXT: ld.w $a0, $a0, 12 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i32, ptr %a, i64 3 - %2 = load i32, ptr %1 - %3 = sext i32 %2 to i64 - %4 = load volatile i32, ptr %a - ret i64 %3 -} - -define i64 @ld_d(ptr %a) nounwind { -; LA32-LABEL: ld_d: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a1, $a0, 4 -; LA32-NEXT: ld.w $a1, $a0, 0 -; LA32-NEXT: ld.w $a1, $a0, 28 -; LA32-NEXT: ld.w $a0, $a0, 24 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ld_d: -; LA64: # %bb.0: -; LA64-NEXT: ld.d $a1, $a0, 0 -; LA64-NEXT: ld.d $a0, $a0, 24 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i64, ptr %a, i64 3 - %2 = load i64, ptr %1 - %3 = load volatile i64, ptr %a - ret i64 %2 -} - -define i64 @ld_bu(ptr %a) nounwind { -; LA32-LABEL: ld_bu: -; LA32: # %bb.0: -; LA32-NEXT: ld.bu $a1, $a0, 0 -; LA32-NEXT: ld.bu $a2, $a0, 4 -; LA32-NEXT: add.w $a0, $a2, $a1 -; LA32-NEXT: sltu $a1, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ld_bu: -; LA64: # %bb.0: -; LA64-NEXT: ld.bu $a1, $a0, 0 -; LA64-NEXT: ld.bu $a0, $a0, 4 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i8, ptr %a, i64 4 - %2 = load i8, ptr %1 - %3 = zext i8 %2 to i64 - %4 = load volatile i8, ptr %a - %5 = zext i8 %4 to i64 - %6 = add i64 %3, %5 - ret i64 %6 -} - -define i64 @ld_hu(ptr %a) nounwind { -; LA32-LABEL: ld_hu: -; LA32: # %bb.0: -; LA32-NEXT: ld.hu $a1, $a0, 0 -; LA32-NEXT: ld.hu $a2, $a0, 10 -; LA32-NEXT: add.w $a0, $a2, $a1 -; LA32-NEXT: sltu $a1, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ld_hu: -; LA64: # %bb.0: -; LA64-NEXT: ld.hu $a1, $a0, 0 -; LA64-NEXT: ld.hu $a0, $a0, 10 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i16, ptr %a, i64 5 - %2 = load i16, ptr %1 - %3 = zext i16 %2 to i64 - %4 = load volatile i16, ptr %a - %5 = zext i16 %4 to i64 - %6 = add i64 %3, %5 - ret i64 %6 -} - -define i64 @ld_wu(ptr %a) nounwind { -; LA32-LABEL: ld_wu: -; LA32: # %bb.0: -; LA32-NEXT: ld.w $a1, $a0, 0 -; LA32-NEXT: ld.w $a2, $a0, 20 -; LA32-NEXT: add.w $a0, $a2, $a1 -; LA32-NEXT: sltu $a1, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ld_wu: -; LA64: # %bb.0: -; LA64-NEXT: ld.wu $a1, $a0, 0 -; LA64-NEXT: ld.wu $a0, $a0, 20 -; LA64-NEXT: add.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i32, ptr %a, i64 5 - %2 = load i32, ptr %1 - %3 = zext i32 %2 to i64 - %4 = load volatile i32, ptr %a - %5 = zext i32 %4 to i64 - %6 = add i64 %3, %5 - ret i64 %6 -} - -;; Check indexed and unindexed stores. - -define void @st_b(ptr %a, i8 %b) nounwind { -; ALL-LABEL: st_b: -; ALL: # %bb.0: -; ALL-NEXT: st.b $a1, $a0, 6 -; ALL-NEXT: st.b $a1, $a0, 0 -; ALL-NEXT: jirl $zero, $ra, 0 - store i8 %b, ptr %a - %1 = getelementptr i8, ptr %a, i64 6 - store i8 %b, ptr %1 - ret void -} - -define void @st_h(ptr %a, i16 %b) nounwind { -; ALL-LABEL: st_h: -; ALL: # %bb.0: -; ALL-NEXT: st.h $a1, $a0, 14 -; ALL-NEXT: st.h $a1, $a0, 0 -; ALL-NEXT: jirl $zero, $ra, 0 - store i16 %b, ptr %a - %1 = getelementptr i16, ptr %a, i64 7 - store i16 %b, ptr %1 - ret void -} - -define void @st_w(ptr %a, i32 %b) nounwind { -; ALL-LABEL: st_w: -; ALL: # %bb.0: -; ALL-NEXT: st.w $a1, $a0, 28 -; ALL-NEXT: st.w $a1, $a0, 0 -; ALL-NEXT: jirl $zero, $ra, 0 - store i32 %b, ptr %a - %1 = getelementptr i32, ptr %a, i64 7 - store i32 %b, ptr %1 - ret void -} - -define void @st_d(ptr %a, i64 %b) nounwind { -; LA32-LABEL: st_d: -; LA32: # %bb.0: -; LA32-NEXT: st.w $a2, $a0, 68 -; LA32-NEXT: st.w $a2, $a0, 4 -; LA32-NEXT: st.w $a1, $a0, 64 -; LA32-NEXT: st.w $a1, $a0, 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: st_d: -; LA64: # %bb.0: -; LA64-NEXT: st.d $a1, $a0, 64 -; LA64-NEXT: st.d $a1, $a0, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - store i64 %b, ptr %a - %1 = getelementptr i64, ptr %a, i64 8 - store i64 %b, ptr %1 - ret void -} - -;; Check load from and store to an i1 location. -define i64 @load_sext_zext_anyext_i1(ptr %a) nounwind { - ;; sextload i1 -; LA32-LABEL: load_sext_zext_anyext_i1: -; LA32: # %bb.0: -; LA32-NEXT: ld.b $a1, $a0, 0 -; LA32-NEXT: ld.bu $a1, $a0, 1 -; LA32-NEXT: ld.bu $a2, $a0, 2 -; LA32-NEXT: sub.w $a0, $a2, $a1 -; LA32-NEXT: sltu $a1, $a2, $a1 -; LA32-NEXT: sub.w $a1, $zero, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: load_sext_zext_anyext_i1: -; LA64: # %bb.0: -; LA64-NEXT: ld.b $a1, $a0, 0 -; LA64-NEXT: ld.bu $a1, $a0, 1 -; LA64-NEXT: ld.bu $a0, $a0, 2 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i1, ptr %a, i64 1 - %2 = load i1, ptr %1 - %3 = sext i1 %2 to i64 - ;; zextload i1 - %4 = getelementptr i1, ptr %a, i64 2 - %5 = load i1, ptr %4 - %6 = zext i1 %5 to i64 - %7 = add i64 %3, %6 - ;; extload i1 (anyext). Produced as the load is unused. - %8 = load volatile i1, ptr %a - ret i64 %7 -} - -define i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind { - ;; sextload i1 -; LA32-LABEL: load_sext_zext_anyext_i1_i16: -; LA32: # %bb.0: -; LA32-NEXT: ld.b $a1, $a0, 0 -; LA32-NEXT: ld.bu $a1, $a0, 1 -; LA32-NEXT: ld.bu $a0, $a0, 2 -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: load_sext_zext_anyext_i1_i16: -; LA64: # %bb.0: -; LA64-NEXT: ld.b $a1, $a0, 0 -; LA64-NEXT: ld.bu $a1, $a0, 1 -; LA64-NEXT: ld.bu $a0, $a0, 2 -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr i1, ptr %a, i64 1 - %2 = load i1, ptr %1 - %3 = sext i1 %2 to i16 - ;; zextload i1 - %4 = getelementptr i1, ptr %a, i64 2 - %5 = load i1, ptr %4 - %6 = zext i1 %5 to i16 - %7 = add i16 %3, %6 - ;; extload i1 (anyext). Produced as the load is unused. - %8 = load volatile i1, ptr %a - ret i16 %7 -} - -define i64 @ld_sd_constant(i64 %a) nounwind { -; LA32-LABEL: ld_sd_constant: -; LA32: # %bb.0: -; LA32-NEXT: lu12i.w $a3, -136485 -; LA32-NEXT: ori $a4, $a3, 3823 -; LA32-NEXT: ld.w $a2, $a4, 0 -; LA32-NEXT: st.w $a0, $a4, 0 -; LA32-NEXT: ori $a0, $a3, 3827 -; LA32-NEXT: ld.w $a3, $a0, 0 -; LA32-NEXT: st.w $a1, $a0, 0 -; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: move $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: ld_sd_constant: -; LA64: # %bb.0: -; LA64-NEXT: lu12i.w $a1, -136485 -; LA64-NEXT: ori $a1, $a1, 3823 -; LA64-NEXT: lu32i.d $a1, -147729 -; LA64-NEXT: lu52i.d $a2, $a1, -534 -; LA64-NEXT: ld.d $a1, $a2, 0 -; LA64-NEXT: st.d $a0, $a2, 0 -; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = inttoptr i64 16045690984833335023 to ptr - %2 = load volatile i64, ptr %1 - store i64 %a, ptr %1 - ret i64 %2 -} - -;; Check load from and store to a float location. -define float @load_store_float(ptr %a, float %b) nounwind { -; ALL-LABEL: load_store_float: -; ALL: # %bb.0: -; ALL-NEXT: fld.s $fa1, $a0, 4 -; ALL-NEXT: fst.s $fa0, $a0, 4 -; ALL-NEXT: fmov.s $fa0, $fa1 -; ALL-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr float, ptr %a, i64 1 - %2 = load float, ptr %1 - store float %b, ptr %1 - ret float %2 -} - -;; Check load from and store to a double location. -define double @load_store_double(ptr %a, double %b) nounwind { -; ALL-LABEL: load_store_double: -; ALL: # %bb.0: -; ALL-NEXT: fld.d $fa1, $a0, 8 -; ALL-NEXT: fst.d $fa0, $a0, 8 -; ALL-NEXT: fmov.d $fa0, $fa1 -; ALL-NEXT: jirl $zero, $ra, 0 - %1 = getelementptr double, ptr %a, i64 1 - %2 = load double, ptr %1 - store double %b, ptr %1 - ret double %2 -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll deleted file mode 100644 index 2f63c64de818..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/lshr.ll +++ /dev/null @@ -1,160 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'lshr' LLVM IR: https://llvm.org/docs/LangRef.html#lshr-instruction - -define i1 @lshr_i1(i1 %x, i1 %y) { -; LA32-LABEL: lshr_i1: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i1: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i1 %x, %y - ret i1 %lshr -} - -define i8 @lshr_i8(i8 %x, i8 %y) { -; LA32-LABEL: lshr_i8: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i8: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: srl.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i8 %x, %y - ret i8 %lshr -} - -define i16 @lshr_i16(i16 %x, i16 %y) { -; LA32-LABEL: lshr_i16: -; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i16: -; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: srl.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i16 %x, %y - ret i16 %lshr -} - -define i32 @lshr_i32(i32 %x, i32 %y) { -; LA32-LABEL: lshr_i32: -; LA32: # %bb.0: -; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i32: -; LA64: # %bb.0: -; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i32 %x, %y - ret i32 %lshr -} - -define i64 @lshr_i64(i64 %x, i64 %y) { -; LA32-LABEL: lshr_i64: -; LA32: # %bb.0: -; LA32-NEXT: xori $a3, $a2, 31 -; LA32-NEXT: slli.w $a4, $a1, 1 -; LA32-NEXT: sll.w $a3, $a4, $a3 -; LA32-NEXT: srl.w $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a3 -; LA32-NEXT: addi.w $a3, $a2, -32 -; LA32-NEXT: slti $a4, $a3, 0 -; LA32-NEXT: maskeqz $a0, $a0, $a4 -; LA32-NEXT: srl.w $a5, $a1, $a3 -; LA32-NEXT: masknez $a4, $a5, $a4 -; LA32-NEXT: or $a0, $a0, $a4 -; LA32-NEXT: srl.w $a1, $a1, $a2 -; LA32-NEXT: srai.w $a2, $a3, 31 -; LA32-NEXT: and $a1, $a2, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i64: -; LA64: # %bb.0: -; LA64-NEXT: srl.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i64 %x, %y - ret i64 %lshr -} - -define i1 @lshr_i1_3(i1 %x) { -; LA32-LABEL: lshr_i1_3: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i1_3: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i1 %x, 3 - ret i1 %lshr -} - -define i8 @lshr_i8_3(i8 %x) { -; LA32-LABEL: lshr_i8_3: -; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a0, $a0, 7, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i8_3: -; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 7, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i8 %x, 3 - ret i8 %lshr -} - -define i16 @lshr_i16_3(i16 %x) { -; LA32-LABEL: lshr_i16_3: -; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i16_3: -; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i16 %x, 3 - ret i16 %lshr -} - -define i32 @lshr_i32_3(i32 %x) { -; LA32-LABEL: lshr_i32_3: -; LA32: # %bb.0: -; LA32-NEXT: srli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i32_3: -; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i32 %x, 3 - ret i32 %lshr -} - -define i64 @lshr_i64_3(i64 %x) { -; LA32-LABEL: lshr_i64_3: -; LA32: # %bb.0: -; LA32-NEXT: srli.w $a0, $a0, 3 -; LA32-NEXT: slli.w $a2, $a1, 29 -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: srli.w $a1, $a1, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: lshr_i64_3: -; LA64: # %bb.0: -; LA64-NEXT: srli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %lshr = lshr i64 %x, 3 - ret i64 %lshr -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll deleted file mode 100644 index 0d31e790cf72..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll +++ /dev/null @@ -1,287 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'mul' LLVM IR: https://llvm.org/docs/LangRef.html#mul-instruction - -define i1 @mul_i1(i1 %a, i1 %b) { -; LA32-LABEL: mul_i1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mul_i1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = mul i1 %a, %b - ret i1 %r -} - -define i8 @mul_i8(i8 %a, i8 %b) { -; LA32-LABEL: mul_i8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mul_i8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = mul i8 %a, %b - ret i8 %r -} - -define i16 @mul_i16(i16 %a, i16 %b) { -; LA32-LABEL: mul_i16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mul_i16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = mul i16 %a, %b - ret i16 %r -} - -define i32 @mul_i32(i32 %a, i32 %b) { -; LA32-LABEL: mul_i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: mul.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mul_i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = mul i32 %a, %b - ret i32 %r -} - -define i64 @mul_i64(i64 %a, i64 %b) { -; LA32-LABEL: mul_i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: mul.w $a3, $a0, $a3 -; LA32-NEXT: mulh.wu $a4, $a0, $a2 -; LA32-NEXT: add.w $a3, $a4, $a3 -; LA32-NEXT: mul.w $a1, $a1, $a2 -; LA32-NEXT: add.w $a1, $a3, $a1 -; LA32-NEXT: mul.w $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mul_i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = mul i64 %a, %b - ret i64 %r -} - -define i64 @mul_pow2(i64 %a) { -; LA32-LABEL: mul_pow2: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a1, $a1, 3 -; LA32-NEXT: srli.w $a2, $a0, 29 -; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mul_pow2: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = mul i64 %a, 8 - ret i64 %1 -} - -define i64 @mul_p5(i64 %a) { -; LA32-LABEL: mul_p5: -; LA32: # %bb.0: -; LA32-NEXT: ori $a2, $zero, 5 -; LA32-NEXT: mul.w $a1, $a1, $a2 -; LA32-NEXT: mulh.wu $a3, $a0, $a2 -; LA32-NEXT: add.w $a1, $a3, $a1 -; LA32-NEXT: mul.w $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mul_p5: -; LA64: # %bb.0: -; LA64-NEXT: ori $a1, $zero, 5 -; LA64-NEXT: mul.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = mul i64 %a, 5 - ret i64 %1 -} - -define i32 @mulh_w(i32 %a, i32 %b) { -; LA32-LABEL: mulh_w: -; LA32: # %bb.0: -; LA32-NEXT: mulh.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mulh_w: -; LA64: # %bb.0: -; LA64-NEXT: mulw.d.w $a0, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 32 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i32 %a to i64 - %2 = sext i32 %b to i64 - %3 = mul i64 %1, %2 - %4 = lshr i64 %3, 32 - %5 = trunc i64 %4 to i32 - ret i32 %5 -} - -define i32 @mulh_wu(i32 %a, i32 %b) { -; LA32-LABEL: mulh_wu: -; LA32: # %bb.0: -; LA32-NEXT: mulh.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mulh_wu: -; LA64: # %bb.0: -; LA64-NEXT: mulw.d.wu $a0, $a0, $a1 -; LA64-NEXT: srli.d $a0, $a0, 32 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i32 %a to i64 - %2 = zext i32 %b to i64 - %3 = mul i64 %1, %2 - %4 = lshr i64 %3, 32 - %5 = trunc i64 %4 to i32 - ret i32 %5 -} - -define i64 @mulh_d(i64 %a, i64 %b) { -; LA32-LABEL: mulh_d: -; LA32: # %bb.0: -; LA32-NEXT: mulh.wu $a4, $a0, $a2 -; LA32-NEXT: mul.w $a5, $a1, $a2 -; LA32-NEXT: add.w $a4, $a5, $a4 -; LA32-NEXT: sltu $a5, $a4, $a5 -; LA32-NEXT: mulh.wu $a6, $a1, $a2 -; LA32-NEXT: add.w $a5, $a6, $a5 -; LA32-NEXT: mul.w $a6, $a0, $a3 -; LA32-NEXT: add.w $a4, $a6, $a4 -; LA32-NEXT: sltu $a4, $a4, $a6 -; LA32-NEXT: mulh.wu $a6, $a0, $a3 -; LA32-NEXT: add.w $a4, $a6, $a4 -; LA32-NEXT: add.w $a4, $a5, $a4 -; LA32-NEXT: sltu $a5, $a4, $a5 -; LA32-NEXT: mulh.wu $a6, $a1, $a3 -; LA32-NEXT: add.w $a5, $a6, $a5 -; LA32-NEXT: mul.w $a6, $a1, $a3 -; LA32-NEXT: add.w $a4, $a6, $a4 -; LA32-NEXT: sltu $a6, $a4, $a6 -; LA32-NEXT: add.w $a5, $a5, $a6 -; LA32-NEXT: srai.w $a6, $a1, 31 -; LA32-NEXT: mul.w $a7, $a2, $a6 -; LA32-NEXT: mulh.wu $a2, $a2, $a6 -; LA32-NEXT: add.w $a2, $a2, $a7 -; LA32-NEXT: mul.w $a6, $a3, $a6 -; LA32-NEXT: add.w $a2, $a2, $a6 -; LA32-NEXT: srai.w $a3, $a3, 31 -; LA32-NEXT: mul.w $a1, $a3, $a1 -; LA32-NEXT: mulh.wu $a6, $a3, $a0 -; LA32-NEXT: add.w $a1, $a6, $a1 -; LA32-NEXT: mul.w $a0, $a3, $a0 -; LA32-NEXT: add.w $a1, $a1, $a0 -; LA32-NEXT: add.w $a1, $a1, $a2 -; LA32-NEXT: add.w $a2, $a0, $a7 -; LA32-NEXT: sltu $a0, $a2, $a0 -; LA32-NEXT: add.w $a0, $a1, $a0 -; LA32-NEXT: add.w $a1, $a5, $a0 -; LA32-NEXT: add.w $a0, $a4, $a2 -; LA32-NEXT: sltu $a2, $a0, $a4 -; LA32-NEXT: add.w $a1, $a1, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mulh_d: -; LA64: # %bb.0: -; LA64-NEXT: mulh.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i64 %a to i128 - %2 = sext i64 %b to i128 - %3 = mul i128 %1, %2 - %4 = lshr i128 %3, 64 - %5 = trunc i128 %4 to i64 - ret i64 %5 -} - -define i64 @mulh_du(i64 %a, i64 %b) { -; LA32-LABEL: mulh_du: -; LA32: # %bb.0: -; LA32-NEXT: mulh.wu $a4, $a0, $a2 -; LA32-NEXT: mul.w $a5, $a1, $a2 -; LA32-NEXT: add.w $a4, $a5, $a4 -; LA32-NEXT: sltu $a5, $a4, $a5 -; LA32-NEXT: mulh.wu $a2, $a1, $a2 -; LA32-NEXT: add.w $a2, $a2, $a5 -; LA32-NEXT: mul.w $a5, $a0, $a3 -; LA32-NEXT: add.w $a4, $a5, $a4 -; LA32-NEXT: sltu $a4, $a4, $a5 -; LA32-NEXT: mulh.wu $a0, $a0, $a3 -; LA32-NEXT: add.w $a0, $a0, $a4 -; LA32-NEXT: mul.w $a4, $a1, $a3 -; LA32-NEXT: mulh.wu $a1, $a1, $a3 -; LA32-NEXT: add.w $a0, $a2, $a0 -; LA32-NEXT: sltu $a2, $a0, $a2 -; LA32-NEXT: add.w $a1, $a1, $a2 -; LA32-NEXT: add.w $a0, $a4, $a0 -; LA32-NEXT: sltu $a2, $a0, $a4 -; LA32-NEXT: add.w $a1, $a1, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mulh_du: -; LA64: # %bb.0: -; LA64-NEXT: mulh.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i64 %a to i128 - %2 = zext i64 %b to i128 - %3 = mul i128 %1, %2 - %4 = lshr i128 %3, 64 - %5 = trunc i128 %4 to i64 - ret i64 %5 -} - -define i64 @mulw_d_w(i32 %a, i32 %b) { -; LA32-LABEL: mulw_d_w: -; LA32: # %bb.0: -; LA32-NEXT: mul.w $a2, $a0, $a1 -; LA32-NEXT: mulh.w $a1, $a0, $a1 -; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mulw_d_w: -; LA64: # %bb.0: -; LA64-NEXT: mulw.d.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i32 %a to i64 - %2 = sext i32 %b to i64 - %3 = mul i64 %1, %2 - ret i64 %3 -} - -define i64 @mulw_d_wu(i32 %a, i32 %b) { -; LA32-LABEL: mulw_d_wu: -; LA32: # %bb.0: -; LA32-NEXT: mul.w $a2, $a0, $a1 -; LA32-NEXT: mulh.wu $a1, $a0, $a1 -; LA32-NEXT: move $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: mulw_d_wu: -; LA64: # %bb.0: -; LA64-NEXT: mulw.d.wu $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i32 %a to i64 - %2 = zext i32 %b to i64 - %3 = mul i64 %1, %2 - ret i64 %3 -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll deleted file mode 100644 index 37006573244b..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/or.ll +++ /dev/null @@ -1,264 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'or' LLVM IR: https://llvm.org/docs/LangRef.html#or-instruction - -define i1 @or_i1(i1 %a, i1 %b) { -; LA32-LABEL: or_i1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i1 %a, %b - ret i1 %r -} - -define i8 @or_i8(i8 %a, i8 %b) { -; LA32-LABEL: or_i8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i8 %a, %b - ret i8 %r -} - -define i16 @or_i16(i16 %a, i16 %b) { -; LA32-LABEL: or_i16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i16 %a, %b - ret i16 %r -} - -define i32 @or_i32(i32 %a, i32 %b) { -; LA32-LABEL: or_i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i32 %a, %b - ret i32 %r -} - -define i64 @or_i64(i64 %a, i64 %b) { -; LA32-LABEL: or_i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: or $a1, $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i64 %a, %b - ret i64 %r -} - -define i1 @or_i1_0(i1 %b) { -; LA32-LABEL: or_i1_0: -; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i1_0: -; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i1 4, %b - ret i1 %r -} - -define i1 @or_i1_5(i1 %b) { -; LA32-LABEL: or_i1_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ori $a0, $zero, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i1_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ori $a0, $zero, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i1 5, %b - ret i1 %r -} - -define i8 @or_i8_5(i8 %b) { -; LA32-LABEL: or_i8_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i8_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i8 5, %b - ret i8 %r -} - -define i8 @or_i8_257(i8 %b) { -; LA32-LABEL: or_i8_257: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i8_257: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i8 257, %b - ret i8 %r -} - -define i16 @or_i16_5(i16 %b) { -; LA32-LABEL: or_i16_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i16_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i16 5, %b - ret i16 %r -} - -define i16 @or_i16_0x1000(i16 %b) { -; LA32-LABEL: or_i16_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a1, 1 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i16_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i16 4096, %b - ret i16 %r -} - -define i16 @or_i16_0x10001(i16 %b) { -; LA32-LABEL: or_i16_0x10001: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i16_0x10001: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i16 65537, %b - ret i16 %r -} - -define i32 @or_i32_5(i32 %b) { -; LA32-LABEL: or_i32_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i32_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i32 5, %b - ret i32 %r -} - -define i32 @or_i32_0x1000(i32 %b) { -; LA32-LABEL: or_i32_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a1, 1 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i32_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i32 4096, %b - ret i32 %r -} - -define i32 @or_i32_0x100000001(i32 %b) { -; LA32-LABEL: or_i32_0x100000001: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i32_0x100000001: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i32 4294967297, %b - ret i32 %r -} - -define i64 @or_i64_5(i64 %b) { -; LA32-LABEL: or_i64_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i64_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i64 5, %b - ret i64 %r -} - -define i64 @or_i64_0x1000(i64 %b) { -; LA32-LABEL: or_i64_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a2, 1 -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: or_i64_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = or i64 4096, %b - ret i64 %r -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll deleted file mode 100644 index 1f1a5c9b920c..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +++ /dev/null @@ -1,685 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 -; RUN: llc --mtriple=loongarch32 -loongarch-check-zero-division < %s \ -; RUN: | FileCheck %s --check-prefix=LA32-TRAP -; RUN: llc --mtriple=loongarch64 -loongarch-check-zero-division < %s \ -; RUN: | FileCheck %s --check-prefix=LA64-TRAP - -;; Test the sdiv/udiv/srem/urem LLVM IR. - -define i1 @sdiv_i1(i1 %a, i1 %b) { -; LA32-LABEL: sdiv_i1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sdiv_i1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: sdiv_i1: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: sdiv_i1: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = sdiv i1 %a, %b - ret i1 %r -} - -define i8 @sdiv_i8(i8 %a, i8 %b) { -; LA32-LABEL: sdiv_i8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ext.w.b $a1, $a1 -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: div.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sdiv_i8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ext.w.b $a1, $a1 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: sdiv_i8: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: ext.w.b $a1, $a1 -; LA32-TRAP-NEXT: ext.w.b $a0, $a0 -; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: sdiv_i8: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: ext.w.b $a1, $a1 -; LA64-TRAP-NEXT: ext.w.b $a0, $a0 -; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = sdiv i8 %a, %b - ret i8 %r -} - -define i16 @sdiv_i16(i16 %a, i16 %b) { -; LA32-LABEL: sdiv_i16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ext.w.h $a1, $a1 -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: div.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sdiv_i16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ext.w.h $a1, $a1 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: sdiv_i16: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: ext.w.h $a1, $a1 -; LA32-TRAP-NEXT: ext.w.h $a0, $a0 -; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: sdiv_i16: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: ext.w.h $a1, $a1 -; LA64-TRAP-NEXT: ext.w.h $a0, $a0 -; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = sdiv i16 %a, %b - ret i16 %r -} - -define i32 @sdiv_i32(i32 %a, i32 %b) { -; LA32-LABEL: sdiv_i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: div.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sdiv_i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: sdiv_i32: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: div.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: sdiv_i32: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 -; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = sdiv i32 %a, %b - ret i32 %r -} - -define i64 @sdiv_i64(i64 %a, i64 %b) { -; LA32-LABEL: sdiv_i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: bl __divdi3 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sdiv_i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: sdiv_i64: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 -; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 -; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-TRAP-NEXT: .cfi_offset 1, -4 -; LA32-TRAP-NEXT: bl __divdi3 -; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: sdiv_i64: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = sdiv i64 %a, %b - ret i64 %r -} - -define i1 @udiv_i1(i1 %a, i1 %b) { -; LA32-LABEL: udiv_i1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: udiv_i1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: udiv_i1: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: udiv_i1: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = udiv i1 %a, %b - ret i1 %r -} - -define i8 @udiv_i8(i8 %a, i8 %b) { -; LA32-LABEL: udiv_i8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: div.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: udiv_i8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: udiv_i8: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: andi $a1, $a1, 255 -; LA32-TRAP-NEXT: andi $a0, $a0, 255 -; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: udiv_i8: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: andi $a1, $a1, 255 -; LA64-TRAP-NEXT: andi $a0, $a0, 255 -; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = udiv i8 %a, %b - ret i8 %r -} - -define i16 @udiv_i16(i16 %a, i16 %b) { -; LA32-LABEL: udiv_i16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: div.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: udiv_i16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: udiv_i16: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-TRAP-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: udiv_i16: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = udiv i16 %a, %b - ret i16 %r -} - -define i32 @udiv_i32(i32 %a, i32 %b) { -; LA32-LABEL: udiv_i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: div.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: udiv_i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: udiv_i32: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: div.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: udiv_i32: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = udiv i32 %a, %b - ret i32 %r -} - -define i64 @udiv_i64(i64 %a, i64 %b) { -; LA32-LABEL: udiv_i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: bl __udivdi3 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: udiv_i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: div.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: udiv_i64: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 -; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 -; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-TRAP-NEXT: .cfi_offset 1, -4 -; LA32-TRAP-NEXT: bl __udivdi3 -; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: udiv_i64: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: div.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = udiv i64 %a, %b - ret i64 %r -} - -define i1 @srem_i1(i1 %a, i1 %b) { -; LA32-LABEL: srem_i1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srem_i1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: srem_i1: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: move $a0, $zero -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: srem_i1: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: move $a0, $zero -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = srem i1 %a, %b - ret i1 %r -} - -define i8 @srem_i8(i8 %a, i8 %b) { -; LA32-LABEL: srem_i8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ext.w.b $a1, $a1 -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: mod.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srem_i8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ext.w.b $a1, $a1 -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: mod.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: srem_i8: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: ext.w.b $a1, $a1 -; LA32-TRAP-NEXT: ext.w.b $a0, $a0 -; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: srem_i8: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: ext.w.b $a1, $a1 -; LA64-TRAP-NEXT: ext.w.b $a0, $a0 -; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = srem i8 %a, %b - ret i8 %r -} - -define i16 @srem_i16(i16 %a, i16 %b) { -; LA32-LABEL: srem_i16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: ext.w.h $a1, $a1 -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: mod.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srem_i16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: ext.w.h $a1, $a1 -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: mod.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: srem_i16: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: ext.w.h $a1, $a1 -; LA32-TRAP-NEXT: ext.w.h $a0, $a0 -; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: srem_i16: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: ext.w.h $a1, $a1 -; LA64-TRAP-NEXT: ext.w.h $a0, $a0 -; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = srem i16 %a, %b - ret i16 %r -} - -define i32 @srem_i32(i32 %a, i32 %b) { -; LA32-LABEL: srem_i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: mod.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srem_i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.w $a1, $a1, 0 -; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: mod.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: srem_i32: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: mod.w $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: srem_i32: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 -; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = srem i32 %a, %b - ret i32 %r -} - -define i64 @srem_i64(i64 %a, i64 %b) { -; LA32-LABEL: srem_i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: bl __moddi3 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srem_i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: mod.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: srem_i64: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 -; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 -; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-TRAP-NEXT: .cfi_offset 1, -4 -; LA32-TRAP-NEXT: bl __moddi3 -; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: srem_i64: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: mod.d $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = srem i64 %a, %b - ret i64 %r -} - -define i1 @urem_i1(i1 %a, i1 %b) { -; LA32-LABEL: urem_i1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: move $a0, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: urem_i1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: move $a0, $zero -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: urem_i1: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: move $a0, $zero -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: urem_i1: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: move $a0, $zero -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = urem i1 %a, %b - ret i1 %r -} - -define i8 @urem_i8(i8 %a, i8 %b) { -; LA32-LABEL: urem_i8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: mod.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: urem_i8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: urem_i8: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: andi $a1, $a1, 255 -; LA32-TRAP-NEXT: andi $a0, $a0, 255 -; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: urem_i8: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: andi $a1, $a1, 255 -; LA64-TRAP-NEXT: andi $a0, $a0, 255 -; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = urem i8 %a, %b - ret i8 %r -} - -define i16 @urem_i16(i16 %a, i16 %b) { -; LA32-LABEL: urem_i16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: mod.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: urem_i16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: urem_i16: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-TRAP-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: urem_i16: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = urem i16 %a, %b - ret i16 %r -} - -define i32 @urem_i32(i32 %a, i32 %b) { -; LA32-LABEL: urem_i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: mod.wu $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: urem_i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: urem_i32: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: mod.wu $a0, $a0, $a1 -; LA32-TRAP-NEXT: bnez $a1, 8 -; LA32-TRAP-NEXT: break 7 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: urem_i32: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: bstrpick.d $a1, $a1, 31, 0 -; LA64-TRAP-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = urem i32 %a, %b - ret i32 %r -} - -define i64 @urem_i64(i64 %a, i64 %b) { -; LA32-LABEL: urem_i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: bl __umoddi3 -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: urem_i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: mod.du $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -; -; LA32-TRAP-LABEL: urem_i64: -; LA32-TRAP: # %bb.0: # %entry -; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 -; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 -; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-TRAP-NEXT: .cfi_offset 1, -4 -; LA32-TRAP-NEXT: bl __umoddi3 -; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 -; LA32-TRAP-NEXT: jirl $zero, $ra, 0 -; -; LA64-TRAP-LABEL: urem_i64: -; LA64-TRAP: # %bb.0: # %entry -; LA64-TRAP-NEXT: mod.du $a0, $a0, $a1 -; LA64-TRAP-NEXT: bnez $a1, 8 -; LA64-TRAP-NEXT: break 7 -; LA64-TRAP-NEXT: jirl $zero, $ra, 0 -entry: - %r = urem i64 %a, %b - ret i64 %r -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll deleted file mode 100644 index 4c6026aba5ac..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-dbl.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Test the bare double-precision floating-point values selection: -;; https://llvm.org/docs/LangRef.html#select-instruction - -define double @test(i1 %a, double %b, double %c) { -; LA32-LABEL: test: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: test: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = select i1 %a, double %b, double %c - ret double %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll deleted file mode 100644 index af4789b52258..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-flt.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 - -;; Test the bare single-precision floating-point values selection: -;; https://llvm.org/docs/LangRef.html#select-instruction - -define float @test(i1 %a, float %b, float %c) { -; LA32-LABEL: test: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: test: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = select i1 %a, float %b, float %c - ret float %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll deleted file mode 100644 index 3481e79b248b..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-bare-int.ll +++ /dev/null @@ -1,107 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the bare integers 'select' LLVM IR: https://llvm.org/docs/LangRef.html#select-instruction - -define i1 @bare_select_i1(i1 %a, i1 %b, i1 %c) { -; LA32-LABEL: bare_select_i1: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: masknez $a2, $a2, $a0 -; LA32-NEXT: maskeqz $a0, $a1, $a0 -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: bare_select_i1: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: masknez $a2, $a2, $a0 -; LA64-NEXT: maskeqz $a0, $a1, $a0 -; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = select i1 %a, i1 %b, i1 %c - ret i1 %res -} - -define i8 @bare_select_i8(i1 %a, i8 %b, i8 %c) { -; LA32-LABEL: bare_select_i8: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: masknez $a2, $a2, $a0 -; LA32-NEXT: maskeqz $a0, $a1, $a0 -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: bare_select_i8: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: masknez $a2, $a2, $a0 -; LA64-NEXT: maskeqz $a0, $a1, $a0 -; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = select i1 %a, i8 %b, i8 %c - ret i8 %res -} - -define i16 @bare_select_i16(i1 %a, i16 %b, i16 %c) { -; LA32-LABEL: bare_select_i16: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: masknez $a2, $a2, $a0 -; LA32-NEXT: maskeqz $a0, $a1, $a0 -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: bare_select_i16: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: masknez $a2, $a2, $a0 -; LA64-NEXT: maskeqz $a0, $a1, $a0 -; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = select i1 %a, i16 %b, i16 %c - ret i16 %res -} - -define i32 @bare_select_i32(i1 %a, i32 %b, i32 %c) { -; LA32-LABEL: bare_select_i32: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: masknez $a2, $a2, $a0 -; LA32-NEXT: maskeqz $a0, $a1, $a0 -; LA32-NEXT: or $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: bare_select_i32: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: masknez $a2, $a2, $a0 -; LA64-NEXT: maskeqz $a0, $a1, $a0 -; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = select i1 %a, i32 %b, i32 %c - ret i32 %res -} - -define i64 @bare_select_i64(i1 %a, i64 %b, i64 %c) { -; LA32-LABEL: bare_select_i64: -; LA32: # %bb.0: -; LA32-NEXT: andi $a5, $a0, 1 -; LA32-NEXT: masknez $a0, $a3, $a5 -; LA32-NEXT: maskeqz $a1, $a1, $a5 -; LA32-NEXT: or $a0, $a1, $a0 -; LA32-NEXT: masknez $a1, $a4, $a5 -; LA32-NEXT: maskeqz $a2, $a2, $a5 -; LA32-NEXT: or $a1, $a2, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: bare_select_i64: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: masknez $a2, $a2, $a0 -; LA64-NEXT: maskeqz $a0, $a1, $a0 -; LA64-NEXT: or $a0, $a0, $a2 -; LA64-NEXT: jirl $zero, $ra, 0 - %res = select i1 %a, i64 %b, i64 %c - ret i64 %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll deleted file mode 100644 index 4397b64d927b..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-dbl.ll +++ /dev/null @@ -1,272 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Test double-precision floating-point values selection after comparison - -define double @fcmp_false(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_false: -; LA32: # %bb.0: -; LA32-NEXT: fmov.d $fa0, $fa3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_false: -; LA64: # %bb.0: -; LA64-NEXT: fmov.d $fa0, $fa3 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp false double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_oeq(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_oeq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_oeq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oeq double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_ogt(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_ogt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ogt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ogt double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_oge(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_oge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_oge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oge double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_olt(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_olt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_olt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp olt double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_ole(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_ole: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ole: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ole double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_one(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_one: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_one: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp one double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_ord(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_ord: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ord: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ord double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_ueq(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_ueq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ueq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ueq double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_ugt(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_ugt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ugt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ugt double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_uge(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_uge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_uge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uge double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_ult(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_ult: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ult: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ult double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_ule(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_ule: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ule: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ule double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_une(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_une: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_une: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp une double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_uno(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_uno: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_uno: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uno double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} - -define double @fcmp_true(double %a, double %b, double %x, double %y) { -; LA32-LABEL: fcmp_true: -; LA32: # %bb.0: -; LA32-NEXT: fmov.d $fa0, $fa2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_true: -; LA64: # %bb.0: -; LA64-NEXT: fmov.d $fa0, $fa2 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp true double %a, %b - %res = select i1 %cmp, double %x, double %y - ret double %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll deleted file mode 100644 index 23d71493cb4b..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-flt.ll +++ /dev/null @@ -1,272 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 - -;; Test single-precision floating-point values selection after comparison - -define float @fcmp_false(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_false: -; LA32: # %bb.0: -; LA32-NEXT: fmov.s $fa0, $fa3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_false: -; LA64: # %bb.0: -; LA64-NEXT: fmov.s $fa0, $fa3 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp false float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_oeq(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_oeq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_oeq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oeq float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_ogt(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_ogt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ogt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ogt float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_oge(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_oge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_oge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oge float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_olt(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_olt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_olt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp olt float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_ole(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_ole: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ole: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ole float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_one(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_one: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_one: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp one float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_ord(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_ord: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ord: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ord float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_ueq(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_ueq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ueq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ueq float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_ugt(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_ugt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ugt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ugt float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_uge(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_uge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_uge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uge float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_ult(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_ult: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ult: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ult float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_ule(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_ule: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_ule: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ule float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_une(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_une: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_une: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp une float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_uno(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_uno: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 -; LA32-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_uno: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 -; LA64-NEXT: fsel $fa0, $fa3, $fa2, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uno float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} - -define float @fcmp_true(float %a, float %b, float %x, float %y) { -; LA32-LABEL: fcmp_true: -; LA32: # %bb.0: -; LA32-NEXT: fmov.s $fa0, $fa2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: fcmp_true: -; LA64: # %bb.0: -; LA64-NEXT: fmov.s $fa0, $fa2 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp true float %a, %b - %res = select i1 %cmp, float %x, float %y - ret float %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll deleted file mode 100644 index 9e742ee576cb..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-fpcc-int.ll +++ /dev/null @@ -1,704 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Test integers selection after `fcmp` - -define i32 @f32_fcmp_false(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_false: -; LA32: # %bb.0: -; LA32-NEXT: move $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_false: -; LA64: # %bb.0: -; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp false float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_oeq(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_oeq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_oeq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oeq float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_ogt(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_ogt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_ogt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ogt float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_oge(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_oge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_oge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oge float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_olt(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_olt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_olt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp olt float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_ole(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_ole: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_ole: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ole float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_one(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_one: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_one: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cne.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp one float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_ord(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_ord: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_ord: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cor.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ord float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_ueq(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_ueq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_ueq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cueq.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ueq float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_ugt(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_ugt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_ugt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ugt float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_uge(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_uge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_uge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.s $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uge float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_ult(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_ult: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_ult: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ult float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_ule(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_ule: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_ule: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ule float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_une(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_une: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_une: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cune.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp une float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_uno(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_uno: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_uno: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cun.s $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uno float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f32_fcmp_true(float %a, float %b, i32 %x, i32 %y) { -; LA32-LABEL: f32_fcmp_true: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f32_fcmp_true: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp true float %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_false(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_false: -; LA32: # %bb.0: -; LA32-NEXT: move $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_false: -; LA64: # %bb.0: -; LA64-NEXT: move $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp false double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_oeq(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_oeq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_oeq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oeq double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_ogt(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_ogt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_ogt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ogt double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_oge(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_oge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_oge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp oge double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_olt(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_olt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_olt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp olt double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_ole(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_ole: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_ole: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ole double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_one(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_one: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_one: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cne.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp one double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_ord(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_ord: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_ord: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cor.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ord double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_ueq(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_ueq: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_ueq: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cueq.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ueq double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_ugt(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_ugt: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_ugt: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ugt double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_uge(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_uge: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_uge: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.d $fcc0, $fa1, $fa0 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uge double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_ult(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_ult: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_ult: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cult.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ult double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_ule(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_ule: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_ule: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cule.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp ule double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_une(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_une: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_une: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cune.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp une double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_uno(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_uno: -; LA32: # %bb.0: -; LA32-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 -; LA32-NEXT: movcf2gr $a2, $fcc0 -; LA32-NEXT: masknez $a1, $a1, $a2 -; LA32-NEXT: maskeqz $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_uno: -; LA64: # %bb.0: -; LA64-NEXT: fcmp.cun.d $fcc0, $fa0, $fa1 -; LA64-NEXT: movcf2gr $a2, $fcc0 -; LA64-NEXT: masknez $a1, $a1, $a2 -; LA64-NEXT: maskeqz $a0, $a0, $a2 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp uno double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} - -define i32 @f64_fcmp_true(double %a, double %b, i32 %x, i32 %y) { -; LA32-LABEL: f64_fcmp_true: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: f64_fcmp_true: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %cmp = fcmp true double %a, %b - %res = select i1 %cmp, i32 %x, i32 %y - ret i32 %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll deleted file mode 100644 index 5ccee6b193b0..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-dbl.ll +++ /dev/null @@ -1,206 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 - -;; Test double-precision floating-point values selection after integers comparison - -define double @select_eq(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_eq: -; LA32: # %bb.0: -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_eq: -; LA64: # %bb.0: -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp eq i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_ne(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_ne: -; LA32: # %bb.0: -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ne: -; LA64: # %bb.0: -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ne i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_ugt(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_ugt: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ugt: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ugt i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_uge(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_uge: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_uge: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp uge i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_ult(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_ult: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ult: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ult i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_ule(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_ule: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ule: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ule i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_sgt(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_sgt: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sgt: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sgt i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_sge(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_sge: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sge: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sge i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_slt(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_slt: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_slt: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp slt i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} - -define double @select_sle(i32 signext %a, i32 signext %b, double %x, double %y) { -; LA32-LABEL: select_sle: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sle: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sle i32 %a, %b - %res = select i1 %cond, double %x, double %y - ret double %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll deleted file mode 100644 index 98b999776e3f..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-flt.ll +++ /dev/null @@ -1,206 +0,0 @@ -; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64 - -;; Test single-precision floating-point values selection after integers comparison - -define float @select_eq(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_eq: -; LA32: # %bb.0: -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_eq: -; LA64: # %bb.0: -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp eq i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_ne(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_ne: -; LA32: # %bb.0: -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ne: -; LA64: # %bb.0: -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ne i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_ugt(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_ugt: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ugt: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ugt i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_uge(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_uge: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_uge: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp uge i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_ult(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_ult: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ult: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ult i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_ule(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_ule: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ule: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ule i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_sgt(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_sgt: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sgt: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sgt i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_sge(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_sge: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sge: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sge i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_slt(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_slt: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_slt: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp slt i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} - -define float @select_sle(i32 signext %a, i32 signext %b, float %x, float %y) { -; LA32-LABEL: select_sle: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: movgr2cf $fcc0, $a0 -; LA32-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sle: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: movgr2cf $fcc0, $a0 -; LA64-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sle i32 %a, %b - %res = select i1 %cond, float %x, float %y - ret float %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll deleted file mode 100644 index 3b7c2adfb868..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/select-icc-int.ll +++ /dev/null @@ -1,226 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Test integers selection after integers comparison - -define i32 @select_eq(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_eq: -; LA32: # %bb.0: -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: sltui $a0, $a0, 1 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_eq: -; LA64: # %bb.0: -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: sltui $a0, $a0, 1 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp eq i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_ne(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_ne: -; LA32: # %bb.0: -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ne: -; LA64: # %bb.0: -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ne i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_ugt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_ugt: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ugt: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ugt i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_uge(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_uge: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_uge: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp uge i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_ult(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_ult: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a0, $a1 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ult: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a0, $a1 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ult i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_ule(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_ule: -; LA32: # %bb.0: -; LA32-NEXT: sltu $a0, $a1, $a0 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_ule: -; LA64: # %bb.0: -; LA64-NEXT: sltu $a0, $a1, $a0 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp ule i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_sgt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_sgt: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sgt: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sgt i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_sge(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_sge: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sge: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sge i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_slt(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_slt: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a0, $a1 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_slt: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a0, $a1 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp slt i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} - -define i32 @select_sle(i32 signext %a, i32 signext %b, i32 %x, i32 %y) { -; LA32-LABEL: select_sle: -; LA32: # %bb.0: -; LA32-NEXT: slt $a0, $a1, $a0 -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: masknez $a1, $a3, $a0 -; LA32-NEXT: maskeqz $a0, $a2, $a0 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: select_sle: -; LA64: # %bb.0: -; LA64-NEXT: slt $a0, $a1, $a0 -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: masknez $a1, $a3, $a0 -; LA64-NEXT: maskeqz $a0, $a2, $a0 -; LA64-NEXT: or $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %cond = icmp sle i32 %a, %b - %res = select i1 %cond, i32 %x, i32 %y - ret i32 %res -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll deleted file mode 100644 index 911751bc6552..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sext-zext-trunc.ll +++ /dev/null @@ -1,418 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Test sext/zext/trunc - -define i8 @sext_i1_to_i8(i1 %a) { -; LA32-LABEL: sext_i1_to_i8: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: sub.w $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i1_to_i8: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: sub.d $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i1 %a to i8 - ret i8 %1 -} - -define i16 @sext_i1_to_i16(i1 %a) { -; LA32-LABEL: sext_i1_to_i16: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: sub.w $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i1_to_i16: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: sub.d $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i1 %a to i16 - ret i16 %1 -} - -define i32 @sext_i1_to_i32(i1 %a) { -; LA32-LABEL: sext_i1_to_i32: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: sub.w $a0, $zero, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i1_to_i32: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: sub.d $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i1 %a to i32 - ret i32 %1 -} - -define i64 @sext_i1_to_i64(i1 %a) { -; LA32-LABEL: sext_i1_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: sub.w $a0, $zero, $a0 -; LA32-NEXT: move $a1, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i1_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: sub.d $a0, $zero, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i1 %a to i64 - ret i64 %1 -} - -define i16 @sext_i8_to_i16(i8 %a) { -; LA32-LABEL: sext_i8_to_i16: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i8_to_i16: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i8 %a to i16 - ret i16 %1 -} - -define i32 @sext_i8_to_i32(i8 %a) { -; LA32-LABEL: sext_i8_to_i32: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i8_to_i32: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i8 %a to i32 - ret i32 %1 -} - -define i64 @sext_i8_to_i64(i8 %a) { -; LA32-LABEL: sext_i8_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.b $a0, $a0 -; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i8_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.b $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i8 %a to i64 - ret i64 %1 -} - -define i32 @sext_i16_to_i32(i16 %a) { -; LA32-LABEL: sext_i16_to_i32: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i16_to_i32: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i16 %a to i32 - ret i32 %1 -} - -define i64 @sext_i16_to_i64(i16 %a) { -; LA32-LABEL: sext_i16_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: ext.w.h $a0, $a0 -; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i16_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: ext.w.h $a0, $a0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i16 %a to i64 - ret i64 %1 -} - -define i64 @sext_i32_to_i64(i32 %a) { -; LA32-LABEL: sext_i32_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: srai.w $a1, $a0, 31 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sext_i32_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = sext i32 %a to i64 - ret i64 %1 -} - -define i8 @zext_i1_to_i8(i1 %a) { -; LA32-LABEL: zext_i1_to_i8: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i1_to_i8: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i1 %a to i8 - ret i8 %1 -} - -define i16 @zext_i1_to_i16(i1 %a) { -; LA32-LABEL: zext_i1_to_i16: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i1_to_i16: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i1 %a to i16 - ret i16 %1 -} - -define i32 @zext_i1_to_i32(i1 %a) { -; LA32-LABEL: zext_i1_to_i32: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i1_to_i32: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i1 %a to i32 - ret i32 %1 -} - -define i64 @zext_i1_to_i64(i1 %a) { -; LA32-LABEL: zext_i1_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 1 -; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i1_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i1 %a to i64 - ret i64 %1 -} - -define i16 @zext_i8_to_i16(i8 %a) { -; LA32-LABEL: zext_i8_to_i16: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i8_to_i16: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i8 %a to i16 - ret i16 %1 -} - -define i32 @zext_i8_to_i32(i8 %a) { -; LA32-LABEL: zext_i8_to_i32: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i8_to_i32: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i8 %a to i32 - ret i32 %1 -} - -define i64 @zext_i8_to_i64(i8 %a) { -; LA32-LABEL: zext_i8_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: andi $a0, $a0, 255 -; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i8_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: andi $a0, $a0, 255 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i8 %a to i64 - ret i64 %1 -} - -define i32 @zext_i16_to_i32(i16 %a) { -; LA32-LABEL: zext_i16_to_i32: -; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i16_to_i32: -; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i16 %a to i32 - ret i32 %1 -} - -define i64 @zext_i16_to_i64(i16 %a) { -; LA32-LABEL: zext_i16_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 -; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i16_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i16 %a to i64 - ret i64 %1 -} - -define i64 @zext_i32_to_i64(i32 %a) { -; LA32-LABEL: zext_i32_to_i64: -; LA32: # %bb.0: -; LA32-NEXT: move $a1, $zero -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: zext_i32_to_i64: -; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = zext i32 %a to i64 - ret i64 %1 -} - -define i1 @trunc_i8_to_i1(i8 %a) { -; LA32-LABEL: trunc_i8_to_i1: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i8_to_i1: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i8 %a to i1 - ret i1 %1 -} - -define i1 @trunc_i16_to_i1(i16 %a) { -; LA32-LABEL: trunc_i16_to_i1: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i16_to_i1: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i16 %a to i1 - ret i1 %1 -} - -define i1 @trunc_i32_to_i1(i32 %a) { -; LA32-LABEL: trunc_i32_to_i1: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i32_to_i1: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i32 %a to i1 - ret i1 %1 -} - -define i1 @trunc_i64_to_i1(i64 %a) { -; LA32-LABEL: trunc_i64_to_i1: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i64_to_i1: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i64 %a to i1 - ret i1 %1 -} - -define i8 @trunc_i16_to_i8(i16 %a) { -; LA32-LABEL: trunc_i16_to_i8: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i16_to_i8: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i16 %a to i8 - ret i8 %1 -} - -define i8 @trunc_i32_to_i8(i32 %a) { -; LA32-LABEL: trunc_i32_to_i8: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i32_to_i8: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i32 %a to i8 - ret i8 %1 -} - -define i8 @trunc_i64_to_i8(i64 %a) { -; LA32-LABEL: trunc_i64_to_i8: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i64_to_i8: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i64 %a to i8 - ret i8 %1 -} - -define i16 @trunc_i32_to_i16(i32 %a) { -; LA32-LABEL: trunc_i32_to_i16: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i32_to_i16: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i32 %a to i16 - ret i16 %1 -} - -define i16 @trunc_i64_to_i16(i64 %a) { -; LA32-LABEL: trunc_i64_to_i16: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i64_to_i16: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i64 %a to i16 - ret i16 %1 -} - -define i32 @trunc_i64_to_i32(i64 %a) { -; LA32-LABEL: trunc_i64_to_i32: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: trunc_i64_to_i32: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = trunc i64 %a to i32 - ret i32 %1 -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll deleted file mode 100644 index de25040452b1..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/shl.ll +++ /dev/null @@ -1,156 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'shl' LLVM IR: https://llvm.org/docs/LangRef.html#shl-instruction - -define i1 @shl_i1(i1 %x, i1 %y) { -; LA32-LABEL: shl_i1: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i1: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i1 %x, %y - ret i1 %shl -} - -define i8 @shl_i8(i8 %x, i8 %y) { -; LA32-LABEL: shl_i8: -; LA32: # %bb.0: -; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i8: -; LA64: # %bb.0: -; LA64-NEXT: sll.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i8 %x, %y - ret i8 %shl -} - -define i16 @shl_i16(i16 %x, i16 %y) { -; LA32-LABEL: shl_i16: -; LA32: # %bb.0: -; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i16: -; LA64: # %bb.0: -; LA64-NEXT: sll.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i16 %x, %y - ret i16 %shl -} - -define i32 @shl_i32(i32 %x, i32 %y) { -; LA32-LABEL: shl_i32: -; LA32: # %bb.0: -; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i32: -; LA64: # %bb.0: -; LA64-NEXT: sll.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i32 %x, %y - ret i32 %shl -} - -define i64 @shl_i64(i64 %x, i64 %y) { -; LA32-LABEL: shl_i64: -; LA32: # %bb.0: -; LA32-NEXT: xori $a3, $a2, 31 -; LA32-NEXT: srli.w $a4, $a0, 1 -; LA32-NEXT: srl.w $a3, $a4, $a3 -; LA32-NEXT: sll.w $a1, $a1, $a2 -; LA32-NEXT: or $a1, $a1, $a3 -; LA32-NEXT: addi.w $a3, $a2, -32 -; LA32-NEXT: slti $a4, $a3, 0 -; LA32-NEXT: maskeqz $a1, $a1, $a4 -; LA32-NEXT: sll.w $a5, $a0, $a3 -; LA32-NEXT: masknez $a4, $a5, $a4 -; LA32-NEXT: or $a1, $a1, $a4 -; LA32-NEXT: sll.w $a0, $a0, $a2 -; LA32-NEXT: srai.w $a2, $a3, 31 -; LA32-NEXT: and $a0, $a2, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i64: -; LA64: # %bb.0: -; LA64-NEXT: sll.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i64 %x, %y - ret i64 %shl -} - -define i1 @shl_i1_3(i1 %x) { -; LA32-LABEL: shl_i1_3: -; LA32: # %bb.0: -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i1_3: -; LA64: # %bb.0: -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i1 %x, 3 - ret i1 %shl -} - -define i8 @shl_i8_3(i8 %x) { -; LA32-LABEL: shl_i8_3: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i8_3: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i8 %x, 3 - ret i8 %shl -} - -define i16 @shl_i16_3(i16 %x) { -; LA32-LABEL: shl_i16_3: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i16_3: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i16 %x, 3 - ret i16 %shl -} - -define i32 @shl_i32_3(i32 %x) { -; LA32-LABEL: shl_i32_3: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i32_3: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i32 %x, 3 - ret i32 %shl -} - -define i64 @shl_i64_3(i64 %x) { -; LA32-LABEL: shl_i64_3: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a1, $a1, 3 -; LA32-NEXT: srli.w $a2, $a0, 29 -; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: slli.w $a0, $a0, 3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: shl_i64_3: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a0, $a0, 3 -; LA64-NEXT: jirl $zero, $ra, 0 - %shl = shl i64 %x, 3 - ret i64 %shl -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll deleted file mode 100644 index dfa55c29ebae..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sub.ll +++ /dev/null @@ -1,93 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'sub' LLVM IR: https://llvm.org/docs/LangRef.html#sub-instruction - -define i1 @sub_i1(i1 %x, i1 %y) { -; LA32-LABEL: sub_i1: -; LA32: # %bb.0: -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sub_i1: -; LA64: # %bb.0: -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %sub = sub i1 %x, %y - ret i1 %sub -} - -define i8 @sub_i8(i8 %x, i8 %y) { -; LA32-LABEL: sub_i8: -; LA32: # %bb.0: -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sub_i8: -; LA64: # %bb.0: -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %sub = sub i8 %x, %y - ret i8 %sub -} - -define i16 @sub_i16(i16 %x, i16 %y) { -; LA32-LABEL: sub_i16: -; LA32: # %bb.0: -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sub_i16: -; LA64: # %bb.0: -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %sub = sub i16 %x, %y - ret i16 %sub -} - -define i32 @sub_i32(i32 %x, i32 %y) { -; LA32-LABEL: sub_i32: -; LA32: # %bb.0: -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sub_i32: -; LA64: # %bb.0: -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %sub = sub i32 %x, %y - ret i32 %sub -} - -;; Match the pattern: -;; def : PatGprGpr_32; -define signext i32 @sub_i32_sext(i32 %x, i32 %y) { -; LA32-LABEL: sub_i32_sext: -; LA32: # %bb.0: -; LA32-NEXT: sub.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sub_i32_sext: -; LA64: # %bb.0: -; LA64-NEXT: sub.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %sub = sub i32 %x, %y - ret i32 %sub -} - -define i64 @sub_i64(i64 %x, i64 %y) { -; LA32-LABEL: sub_i64: -; LA32: # %bb.0: -; LA32-NEXT: sub.w $a1, $a1, $a3 -; LA32-NEXT: sltu $a3, $a0, $a2 -; LA32-NEXT: sub.w $a1, $a1, $a3 -; LA32-NEXT: sub.w $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sub_i64: -; LA64: # %bb.0: -; LA64-NEXT: sub.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %sub = sub i64 %x, %y - ret i64 %sub -} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll deleted file mode 100644 index 2f85e645c04f..000000000000 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/xor.ll +++ /dev/null @@ -1,264 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; Exercise the 'xor' LLVM IR: https://llvm.org/docs/LangRef.html#xor-instruction - -define i1 @xor_i1(i1 %a, i1 %b) { -; LA32-LABEL: xor_i1: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i1: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i1 %a, %b - ret i1 %r -} - -define i8 @xor_i8(i8 %a, i8 %b) { -; LA32-LABEL: xor_i8: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i8: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i8 %a, %b - ret i8 %r -} - -define i16 @xor_i16(i16 %a, i16 %b) { -; LA32-LABEL: xor_i16: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i16: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i16 %a, %b - ret i16 %r -} - -define i32 @xor_i32(i32 %a, i32 %b) { -; LA32-LABEL: xor_i32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i32 %a, %b - ret i32 %r -} - -define i64 @xor_i64(i64 %a, i64 %b) { -; LA32-LABEL: xor_i64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xor $a0, $a0, $a2 -; LA32-NEXT: xor $a1, $a1, $a3 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i64 %a, %b - ret i64 %r -} - -define i1 @xor_i1_0(i1 %b) { -; LA32-LABEL: xor_i1_0: -; LA32: # %bb.0: # %entry -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i1_0: -; LA64: # %bb.0: # %entry -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i1 4, %b - ret i1 %r -} - -define i1 @xor_i1_5(i1 %b) { -; LA32-LABEL: xor_i1_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i1_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i1 5, %b - ret i1 %r -} - -define i8 @xor_i8_5(i8 %b) { -; LA32-LABEL: xor_i8_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i8_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i8 5, %b - ret i8 %r -} - -define i8 @xor_i8_257(i8 %b) { -; LA32-LABEL: xor_i8_257: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i8_257: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i8 257, %b - ret i8 %r -} - -define i16 @xor_i16_5(i16 %b) { -; LA32-LABEL: xor_i16_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i16_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i16 5, %b - ret i16 %r -} - -define i16 @xor_i16_0x1000(i16 %b) { -; LA32-LABEL: xor_i16_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a1, 1 -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i16_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i16 4096, %b - ret i16 %r -} - -define i16 @xor_i16_0x10001(i16 %b) { -; LA32-LABEL: xor_i16_0x10001: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i16_0x10001: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i16 65537, %b - ret i16 %r -} - -define i32 @xor_i32_5(i32 %b) { -; LA32-LABEL: xor_i32_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i32_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i32 5, %b - ret i32 %r -} - -define i32 @xor_i32_0x1000(i32 %b) { -; LA32-LABEL: xor_i32_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a1, 1 -; LA32-NEXT: xor $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i32_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i32 4096, %b - ret i32 %r -} - -define i32 @xor_i32_0x100000001(i32 %b) { -; LA32-LABEL: xor_i32_0x100000001: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xori $a0, $a0, 1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i32_0x100000001: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xori $a0, $a0, 1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i32 4294967297, %b - ret i32 %r -} - -define i64 @xor_i64_5(i64 %b) { -; LA32-LABEL: xor_i64_5: -; LA32: # %bb.0: # %entry -; LA32-NEXT: xori $a0, $a0, 5 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i64_5: -; LA64: # %bb.0: # %entry -; LA64-NEXT: xori $a0, $a0, 5 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i64 5, %b - ret i64 %r -} - -define i64 @xor_i64_0x1000(i64 %b) { -; LA32-LABEL: xor_i64_0x1000: -; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a2, 1 -; LA32-NEXT: xor $a0, $a0, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: xor_i64_0x1000: -; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a1, 1 -; LA64-NEXT: xor $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 -entry: - %r = xor i64 4096, %b - ret i64 %r -} diff --git a/llvm/test/CodeGen/LoongArch/jirl-verify.ll b/llvm/test/CodeGen/LoongArch/jirl-verify.ll new file mode 100644 index 000000000000..70b588bea5c4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/jirl-verify.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -verify-machineinstrs -o - %s \ +; RUN: | FileCheck %s --check-prefix=STATIC +; RUN: llc -march=loongarch64 -verify-machineinstrs -relocation-model=pic --code-model=large -o - %s \ +; RUN: | FileCheck %s --check-prefix=LARGE + +define void @test() nounwind { +; STATIC-LABEL: test: +; STATIC: # %bb.0: +; STATIC-NEXT: addi.d $sp, $sp, -16 +; STATIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; STATIC-NEXT: lu12i.w $ra, foo +; STATIC-NEXT: ori $ra, $ra, foo +; STATIC-NEXT: lu32i.d $ra, foo +; STATIC-NEXT: lu52i.d $ra, $ra, foo +; STATIC-NEXT: jirl $ra, $ra, 0 +; STATIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; STATIC-NEXT: addi.d $sp, $sp, 16 +; STATIC-NEXT: jr $ra +; +; LARGE-LABEL: test: +; LARGE: # %bb.0: +; LARGE-NEXT: addi.d $sp, $sp, -16 +; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE-NEXT: pcaddu18i $ra, foo +; LARGE-NEXT: jirl $ra, $ra, foo +; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE-NEXT: addi.d $sp, $sp, 16 +; LARGE-NEXT: jr $ra + call void @foo() nounwind + ret void +} + +declare void @foo() diff --git a/llvm/test/CodeGen/LoongArch/lasx/VExtend.ll b/llvm/test/CodeGen/LoongArch/lasx/VExtend.ll new file mode 100644 index 000000000000..1b4b52c7a8b5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/VExtend.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <4 x i64> @uvadd(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + %1 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> + %2 = add <4 x i32> %0, %1 + %3 = zext <4 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @svadd(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + %1 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> + %2 = add nsw <4 x i32> %0, %1 + %3 = sext <4 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @uvsub(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> + %1 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + %2 = sub <4 x i32> %0, %1 + %3 = zext <4 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} + +define <4 x i64> @svsub(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> + %1 = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> + %2 = sub nsw <4 x i32> %0, %1 + %3 = sext <4 x i32> %2 to <4 x i64> + ret <4 x i64> %3 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll new file mode 100644 index 000000000000..07b80895bead --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/imm_vector_lasx.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <4 x i64> @build_lasx0(<4 x i64> %a) { +; CHECK-LABEL: build_lasx0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx1(<4 x i64> %a) { +; CHECK-LABEL: build_lasx1: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, 2047 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx2(<4 x i64> %a) { +; CHECK-LABEL: build_lasx2: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx3(<4 x i64> %a) { +; CHECK-LABEL: build_lasx3: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx4(<4 x i64> %a) { +; CHECK-LABEL: build_lasx4: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx5(<4 x i64> %a) { +; CHECK-LABEL: build_lasx5: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx6(<4 x i64> %a) { +; CHECK-LABEL: build_lasx6: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx7(<4 x i64> %a) { +; CHECK-LABEL: build_lasx7: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx8(<4 x i64> %a) { +; CHECK-LABEL: build_lasx8: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx9(<4 x i64> %a) { +; CHECK-LABEL: build_lasx9: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx10(<4 x i64> %a) { +; CHECK-LABEL: build_lasx10: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx11(<4 x i64> %a) { +; CHECK-LABEL: build_lasx11: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx12(<4 x i64> %a) { +; CHECK-LABEL: build_lasx12: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} + +define <4 x i64> @build_lasx13(<4 x i64> %a) { +; CHECK-LABEL: build_lasx13: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr1, $r4 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %b = add <4 x i64> %a, + ret <4 x i64> %b +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/inline-asm.ll b/llvm/test/CodeGen/LoongArch/lasx/inline-asm.ll new file mode 100644 index 000000000000..337632491e46 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/inline-asm.ll @@ -0,0 +1,55 @@ +; A basic inline assembly test + +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +@v4i64_r = global <4 x i64> zeroinitializer, align 32 +@v8i32_r = global <8 x i32> zeroinitializer, align 32 + +define void @test1() nounwind { +entry: + ; CHECK-LABEL: test1: + %0 = call <4 x i64> asm "xvldi ${0:u}, 1", "=f"() + ; CHECK: xvldi $xr{{[1-3]?[0-9]}}, 1 + store <4 x i64> %0, <4 x i64>* @v4i64_r + ret void +} + +define void @test2() nounwind { +entry: + ; CHECK-LABEL: test2: + %0 = load <8 x i32>, <8 x i32>* @v8i32_r + %1 = call <8 x i32> asm "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f"(<8 x i32> %0) + ; CHECK: xvaddi.wu $xr{{[1-3]?[0-9]}}, $xr{{[1-3]?[0-9]}}, 1 + store <8 x i32> %1, <8 x i32>* @v8i32_r + ret void +} + +define void @test2_d() nounwind { +entry: + ; CHECK-LABEL: test2_d: + %0 = load < 4 x i64>, < 4 x i64>* @v4i64_r + %1 = call < 4 x i64> asm "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f"(< 4 x i64> %0) + ; CHECK: xvaddi.wu $xr{{[1-3]?[0-9]}}, $xr{{[1-3]?[0-9]}}, 1 + store < 4 x i64> %1, < 4 x i64>* @v4i64_r + ret void +} + +define void @test3() nounwind { +entry: + ; CHECK-LABEL: test3: + %0 = load <8 x i32>, <8 x i32>* @v8i32_r + %1 = call <8 x i32> asm sideeffect "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f,~{$xr0}"(<8 x i32> %0) + ; CHECK: xvaddi.wu $xr{{([1-9]|[1-3][0-9])}}, $xr{{([1-9]|[1-3][0-9])}}, 1 + store <8 x i32> %1, <8 x i32>* @v8i32_r + ret void +} + +define void @test3_d() nounwind { +entry: + ; CHECK-LABEL: test3_d: + %0 = load <4 x i64>, <4 x i64>* @v4i64_r + %1 = call <4 x i64> asm sideeffect "xvaddi.wu ${0:u}, ${1:u}, 1", "=f,f,~{$xr0}"(<4 x i64> %0) + ; CHECK: xvaddi.wu $xr{{([1-9]|[1-3][0-9])}}, $xr{{([1-9]|[1-3][0-9])}}, 1 + store <4 x i64> %1, <4 x i64>* @v4i64_r + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll new file mode 100644 index 000000000000..6dbaa49b1394 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @lasxB(<32 x i8> %d, <16 x i8> %s1) { +; CHECK-LABEL: lasxB: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: jr $ra +entry: + %r1 = shufflevector <16 x i8> %s1, <16 x i8> poison, <32 x i32> + %r2 = shufflevector <32 x i8> %r1, <32 x i8> %d, <32 x i32> + ret <32 x i8> %r2 +} + +define <16 x i16> @lasxH(<16 x i16> %d, <8 x i16> %s1) { +; CHECK-LABEL: lasxH: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: jr $ra +entry: + %r1 = shufflevector <8 x i16> %s1, <8 x i16> poison, <16 x i32> + %r2 = shufflevector <16 x i16> %r1, <16 x i16> %d, <16 x i32> + ret <16 x i16> %r2 +} + +define <8 x i32> @lasxW(<8 x i32> %d, <4 x i32> %s1) { +; CHECK-LABEL: lasxW: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: jr $ra +entry: + %r1 = shufflevector <4 x i32> %s1, <4 x i32> poison, <8 x i32> + %r2 = shufflevector <8 x i32> %r1, <8 x i32> %d, <8 x i32> + ret <8 x i32> %r2 +} + +define <4 x i64> @lasxD(<4 x i64> %d, <2 x i64> %s1) { +; CHECK-LABEL: lasxD: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 +; CHECK-NEXT: jr $ra +entry: + %r1 = shufflevector <2 x i64> %s1, <2 x i64> poison, <4 x i32> + %r2 = shufflevector <4 x i64> %r1, <4 x i64> %d, <4 x i32> + ret <4 x i64> %r2 +} + +define <32 x i8> @lasxB_Hi(<32 x i8> %d, <16 x i8> %s1) { +; CHECK-LABEL: lasxB_Hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 +; CHECK-NEXT: jr $ra +entry: + %r1 = shufflevector <16 x i8> %s1, <16 x i8> poison, <32 x i32> + %r2 = shufflevector <32 x i8> %r1, <32 x i8> %d, <32 x i32> + ret <32 x i8> %r2 +} + +define <16 x i16> @lasxH_Hi(<16 x i16> %d, <8 x i16> %s1) { +; CHECK-LABEL: lasxH_Hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 +; CHECK-NEXT: jr $ra +entry: + %r1 = shufflevector <8 x i16> %s1, <8 x i16> poison, <16 x i32> + %r2 = shufflevector <16 x i16> %r1, <16 x i16> %d, <16 x i32> + ret <16 x i16> %r2 +} + +define <8 x i32> @lasxW_Hi(<8 x i32> %d, <4 x i32> %s1) { +; CHECK-LABEL: lasxW_Hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 +; CHECK-NEXT: jr $ra +entry: + %r1 = shufflevector <4 x i32> %s1, <4 x i32> poison, <8 x i32> + %r2 = shufflevector <8 x i32> %r1, <8 x i32> %d, <8 x i32> + ret <8 x i32> %r2 +} + +define <4 x i64> @lasxD_Hi(<4 x i64> %d, <2 x i64> %s1) { +; CHECK-LABEL: lasxD_Hi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 +; CHECK-NEXT: jr $ra +entry: + %r1 = shufflevector <2 x i64> %s1, <2 x i64> poison, <4 x i32> + %r2 = shufflevector <4 x i64> %r1, <4 x i64> %d, <4 x i32> + ret <4 x i64> %r2 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-lasx.ll new file mode 100644 index 000000000000..05b720077a40 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-lasx.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) +declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) +declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) +declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) + +declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) +declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) + +define <32 x i8> @lasx_xvrepli_b() { +; CHECK-LABEL: lasx_xvrepli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 2) + ret <32 x i8> %0 +} + +define <16 x i16> @lasx_xvrepli_h() { +; CHECK-LABEL: lasx_xvrepli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 2) + ret <16 x i16> %0 +} + +define <8 x i32> @lasx_xvrepli_w() { +; CHECK-LABEL: lasx_xvrepli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 2) + ret <8 x i32> %0 +} + +define <4 x i64> @lasx_xvrepli_d() { +; CHECK-LABEL: lasx_xvrepli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 2) + ret <4 x i64> %0 +} + +define <4 x double> @lasx_xvpickve_d_f(<4 x double> %a) { +; CHECK-LABEL: lasx_xvpickve_d_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.d $xr0, $xr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %a, i32 2) + ret <4 x double> %0 +} + +define <8 x float> @lasx_xvpickve_w_f(<8 x float> %a) { +; CHECK-LABEL: lasx_xvpickve_w_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.w $xr0, $xr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %a, i32 2) + ret <8 x float> %0 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll b/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll new file mode 100644 index 000000000000..a0f3e6ebe0b1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @lsxavgr_v32i8(<32 x i8> noundef %0, <32 x i8> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxavgr_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = add <32 x i8> %0, + %4 = add <32 x i8> %3, %1 + %5 = sdiv <32 x i8> %4, + ret <32 x i8> %5 +} + +define <16 x i16> @lsxavgr_v16i16(<16 x i16> noundef %0, <16 x i16> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxavgr_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = add <16 x i16> %0, + %4 = add <16 x i16> %3, %1 + %5 = sdiv <16 x i16> %4, + ret <16 x i16> %5 +} + +define <8 x i32> @lsxavgr_v8i32(<8 x i32> noundef %0, <8 x i32> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxavgr_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = add <8 x i32> %0, + %4 = add <8 x i32> %3, %1 + %5 = sdiv <8 x i32> %4, + ret <8 x i32> %5 +} + +define <4 x i64> @lsxavgr_v4i64(<4 x i64> noundef %0, <4 x i64> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxavgr_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = add <4 x i64> %0, + %4 = add <4 x i64> %3, %1 + %5 = sdiv <4 x i64> %4, + ret <4 x i64> %5 +} + +define <32 x i8> @lsxavgr_v32u8(<32 x i8> noundef %0, <32 x i8> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxavgr_v32u8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = add <32 x i8> %0, + %4 = add <32 x i8> %3, %1 + %5 = lshr <32 x i8> %4, + ret <32 x i8> %5 +} + +define <16 x i16> @lsxavgr_v16u16(<16 x i16> noundef %0, <16 x i16> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxavgr_v16u16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = add <16 x i16> %0, + %4 = add <16 x i16> %3, %1 + %5 = lshr <16 x i16> %4, + ret <16 x i16> %5 +} + +define <8 x i32> @lsxavgr_v8u32(<8 x i32> noundef %0, <8 x i32> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxavgr_v8u32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = add <8 x i32> %0, + %4 = add <8 x i32> %3, %1 + %5 = lshr <8 x i32> %4, + ret <8 x i32> %5 +} + +define <4 x i64> @lsxavgr_v4u64(<4 x i64> noundef %0, <4 x i64> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxavgr_v4u64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = add <4 x i64> %0, + %4 = add <4 x i64> %3, %1 + %5 = lshr <4 x i64> %4, + ret <4 x i64> %5 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/lasxvclr.ll b/llvm/test/CodeGen/LoongArch/lasx/lasxvclr.ll new file mode 100644 index 000000000000..b406981045b2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/lasxvclr.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @clri8(<32 x i8> %0, <32 x i8> %1) { +; CHECK-LABEL: clri8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = shl <32 x i8> , %1 + %4 = xor <32 x i8> %3, + %5 = and <32 x i8> %4, %0 + ret <32 x i8> %5 +} + +define <16 x i16> @clri16(<16 x i16> %0, <16 x i16> %1) { +; CHECK-LABEL: clri16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = shl <16 x i16> , %1 + %4 = xor <16 x i16> %3, + %5 = and <16 x i16> %4, %0 + ret <16 x i16> %5 +} + +define <8 x i32> @clri32(<8 x i32> %0, <8 x i32> %1) { +; CHECK-LABEL: clri32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = shl <8 x i32> , %1 + %4 = xor <8 x i32> %3, + %5 = and <8 x i32> %4, %0 + ret <8 x i32> %5 +} + +define <4 x i64> @clri64(<4 x i64> %0, <4 x i64> %1) { +; CHECK-LABEL: clri64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = shl <4 x i64> , %1 + %4 = xor <4 x i64> %3, + %5 = and <4 x i64> %4, %0 + ret <4 x i64> %5 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/logic-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/logic-lasx.ll new file mode 100644 index 000000000000..ff28569a11e1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/logic-lasx.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <4 x i64> @not_v4i64(<4 x i64> %a) { +; CHECK-LABEL: not_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i64> %a, + ret <4 x i64> %not +} + +define <8 x i32> @not_v8i32(<8 x i32> %a) { +; CHECK-LABEL: not_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i32> %a, + ret <8 x i32> %not +} + +define <16 x i16> @not_v16i16(<16 x i16> %a) { +; CHECK-LABEL: not_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i16> %a, + ret <16 x i16> %not +} + +define <32 x i8> @not_v32i8(<32 x i8> %a) { +; CHECK-LABEL: not_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: jr $ra +entry: + %not = xor <32 x i8> %a, + ret <32 x i8> %not +} + +define <4 x i64> @andn_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: andn_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i64> %b, + %and = and <4 x i64> %not, %a + ret <4 x i64> %and +} + +define <8 x i32> @andn_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: andn_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i32> %b, + %and = and <8 x i32> %not, %a + ret <8 x i32> %and +} + +define <16 x i16> @andn_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: andn_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i16> %b, + %and = and <16 x i16> %not, %a + ret <16 x i16> %and +} + +define <32 x i8> @andn_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: andn_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <32 x i8> %b, + %and = and <32 x i8> %not, %a + ret <32 x i8> %and +} + +define <4 x i64> @orn_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: orn_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i64> %b, + %or = or <4 x i64> %not, %a + ret <4 x i64> %or +} + +define <8 x i32> @orn_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: orn_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i32> %b, + %or = or <8 x i32> %not, %a + ret <8 x i32> %or +} + +define <16 x i16> @orn_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: orn_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i16> %b, + %or = or <16 x i16> %not, %a + ret <16 x i16> %or +} + +define <32 x i8> @orn_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: orn_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <32 x i8> %b, + %or = or <32 x i8> %not, %a + ret <32 x i8> %or +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/set-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/set-lasx.ll new file mode 100644 index 000000000000..443262eac072 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/set-lasx.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @seti8(<32 x i8>) { +; CHECK-LABEL: seti8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <32 x i8> %0, + ret <32 x i8> %2 +} + +define <16 x i16> @seti16(<16 x i16>) { +; CHECK-LABEL: seti16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <16 x i16> %0, + ret <16 x i16> %2 +} + +define <8 x i32> @seti32(<8 x i32>) { +; CHECK-LABEL: seti32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <8 x i32> %0, + ret <8 x i32> %2 +} + +define <4 x i64> @seti64(<4 x i64>) { +; CHECK-LABEL: seti64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <4 x i64> %0, + ret <4 x i64> %2 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle_v4i64_1032.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle_v4i64_1032.ll new file mode 100644 index 000000000000..965cfe94c850 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle_v4i64_1032.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <4 x i64> @shuffle_v4i64_1032(<4 x i64> %vj, <4 x i64> %vk) { +; CHECK-LABEL: shuffle_v4i64_1032: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.d $r4, $xr0, 2 +; CHECK-NEXT: xvpickve2gr.d $r5, $xr0, 3 +; CHECK-NEXT: xvpickve2gr.d $r6, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.d $r7, $xr0, 1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $r7, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $r6, 1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $r5, 2 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $r4, 3 +; CHECK-NEXT: jr $ra +entry: + %vd = shufflevector <4 x i64> %vj, <4 x i64> %vk, <4 x i32> + ret <4 x i64> %vd +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll new file mode 100644 index 000000000000..1453dabaa40b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @vshf_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: vshf_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $r5, 0 +; CHECK-NEXT: xvpickve2gr.d $r5, $xr0, 3 +; CHECK-NEXT: xvpickve2gr.d $r6, $xr0, 2 +; CHECK-NEXT: xvpickve2gr.d $r7, $xr0, 0 +; CHECK-NEXT: xvpickve2gr.d $r8, $xr0, 1 +; CHECK-NEXT: revb.d $r8, $r8 +; CHECK-NEXT: revb.d $r7, $r7 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $r7, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $r8, 1 +; CHECK-NEXT: revb.d $r6, $r6 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $r6, 2 +; CHECK-NEXT: revb.d $r5, $r5 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $r5, 3 +; CHECK-NEXT: xvst $xr0, $r4, 0 +; CHECK-NEXT: jr $ra + %v1 = load <32 x i8>, ptr %a0 + %v2 = shufflevector <32 x i8> %v1, <32 x i8> undef, <32 x i32> + store <32 x i8> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/vext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/vext2xv.ll new file mode 100644 index 000000000000..7bd3dca73f7e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/vext2xv.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <4 x i64> @s_v4i32_v4i64(<4 x i32> %a0) { +; CHECK-LABEL: s_v4i32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: jr $ra + %1 = sext <4 x i32> %a0 to <4 x i64> + ret <4 x i64> %1 +} + +define <4 x i64> @z_v4i32_v4i64(<4 x i32> %a0) { +; CHECK-LABEL: z_v4i32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: jr $ra + %1 = zext <4 x i32> %a0 to <4 x i64> + ret <4 x i64> %1 +} + +define <16 x i16> @s_v16i8_v16i16(<16 x i8> %A) { +; CHECK-LABEL: s_v16i8_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 +; CHECK-NEXT: jr $ra + entry: + %B = sext <16 x i8> %A to <16 x i16> + ret <16 x i16> %B +} + +define <16 x i16> @z_v16i8_v16i16(<16 x i8> %A) { +; CHECK-LABEL: z_v16i8_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 +; CHECK-NEXT: jr $ra + entry: + %B = zext <16 x i8> %A to <16 x i16> + ret <16 x i16> %B +} + +define <8 x i32> @s_v8i16_v8i32(<8 x i16> %x) { +; CHECK-LABEL: s_v8i16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: jr $ra + %1 = sext <8 x i16> %x to <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32> @z_v8i16_v8i32(<8 x i16> %x) { +; CHECK-LABEL: z_v8i16_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0 +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: jr $ra + %1 = zext <8 x i16> %x to <8 x i32> + ret <8 x i32> %1 +} + diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvabsd.ll b/llvm/test/CodeGen/LoongArch/lasx/xvabsd.ll new file mode 100644 index 000000000000..4c4e31f0fed2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/xvabsd.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @xvabsd_b(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: xvabsd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.b $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <32 x i8> %b, %a + %subba = sub <32 x i8> %b, %a + %subab = sub <32 x i8> %a, %b + %select = select <32 x i1> %icmp, <32 x i8> %subba, <32 x i8> %subab + ret <32 x i8> %select +} + +define <16 x i16> @xvabsd_h(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: xvabsd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.h $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <16 x i16> %b, %a + %subba = sub <16 x i16> %b, %a + %subab = sub <16 x i16> %a, %b + %select = select <16 x i1> %icmp, <16 x i16> %subba, <16 x i16> %subab + ret <16 x i16> %select +} + +define <8 x i32> @xvabsd_w(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: xvabsd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.w $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <8 x i32> %b, %a + %subba = sub <8 x i32> %b, %a + %subab = sub <8 x i32> %a, %b + %select = select <8 x i1> %icmp, <8 x i32> %subba, <8 x i32> %subab + ret <8 x i32> %select +} + +define <4 x i64> @xvabsd_d(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: xvabsd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <4 x i64> %b, %a + %subba = sub <4 x i64> %b, %a + %subab = sub <4 x i64> %a, %b + %select = select <4 x i1> %icmp, <4 x i64> %subba, <4 x i64> %subab + ret <4 x i64> %select +} + +define <32 x i8> @xvabsd_bu(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: xvabsd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <32 x i8> %b, %a + %subba = sub <32 x i8> %b, %a + %subab = sub <32 x i8> %a, %b + %select = select <32 x i1> %icmp, <32 x i8> %subba, <32 x i8> %subab + ret <32 x i8> %select +} + +define <16 x i16> @xvabsd_hu(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: xvabsd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <16 x i16> %b, %a + %subba = sub <16 x i16> %b, %a + %subab = sub <16 x i16> %a, %b + %select = select <16 x i1> %icmp, <16 x i16> %subba, <16 x i16> %subab + ret <16 x i16> %select +} + +define <8 x i32> @xvabsd_wu(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: xvabsd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <8 x i32> %b, %a + %subba = sub <8 x i32> %b, %a + %subab = sub <8 x i32> %a, %b + %select = select <8 x i1> %icmp, <8 x i32> %subba, <8 x i32> %subab + ret <8 x i32> %select +} + +define <4 x i64> @xvabsd_du(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: xvabsd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.du $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <4 x i64> %b, %a + %subba = sub <4 x i64> %b, %a + %subab = sub <4 x i64> %a, %b + %select = select <4 x i1> %icmp, <4 x i64> %subba, <4 x i64> %subab + ret <4 x i64> %select +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvadda.ll b/llvm/test/CodeGen/LoongArch/lasx/xvadda.ll new file mode 100644 index 000000000000..a849fef3e20b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/xvadda.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @xvaddab(<32 x i8>, <32 x i8>) { +; CHECK-LABEL: xvaddab: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <32 x i8> %0, zeroinitializer + %4 = sub <32 x i8> zeroinitializer, %0 + %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> %0 + %6 = icmp slt <32 x i8> %1, zeroinitializer + %7 = sub <32 x i8> zeroinitializer, %1 + %8 = select <32 x i1> %6, <32 x i8> %7, <32 x i8> %1 + %9 = add <32 x i8> %5, %8 + ret <32 x i8> %9 +} + +define <16 x i16> @xvaddah(<16 x i16>, <16 x i16>) { +; CHECK-LABEL: xvaddah: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <16 x i16> %0, zeroinitializer + %4 = sub <16 x i16> zeroinitializer, %0 + %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> %0 + %6 = icmp slt <16 x i16> %1, zeroinitializer + %7 = sub <16 x i16> zeroinitializer, %1 + %8 = select <16 x i1> %6, <16 x i16> %7, <16 x i16> %1 + %9 = add <16 x i16> %5, %8 + ret <16 x i16> %9 +} + +define <8 x i32> @xvaddaw(<8 x i32>, <8 x i32>) { +; CHECK-LABEL: xvaddaw: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <8 x i32> %0, zeroinitializer + %4 = sub nsw <8 x i32> zeroinitializer, %0 + %5 = select <8 x i1> %3, <8 x i32> %4, <8 x i32> %0 + %6 = icmp slt <8 x i32> %1, zeroinitializer + %7 = sub nsw <8 x i32> zeroinitializer, %1 + %8 = select <8 x i1> %6, <8 x i32> %7, <8 x i32> %1 + %9 = add nuw nsw <8 x i32> %5, %8 + ret <8 x i32> %9 +} + +define <4 x i64> @xvaddad(<4 x i64>, <4 x i64>) { +; CHECK-LABEL: xvaddad: +; CHECK: # %bb.0: +; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <4 x i64> %0, zeroinitializer + %4 = sub nsw <4 x i64> zeroinitializer, %0 + %5 = select <4 x i1> %3, <4 x i64> %4, <4 x i64> %0 + %6 = icmp slt <4 x i64> %1, zeroinitializer + %7 = sub nsw <4 x i64> zeroinitializer, %1 + %8 = select <4 x i1> %6, <4 x i64> %7, <4 x i64> %1 + %9 = add nuw nsw <4 x i64> %5, %8 + ret <4 x i64> %9 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvaddsub.ll b/llvm/test/CodeGen/LoongArch/lasx/xvaddsub.ll new file mode 100644 index 000000000000..eb2c493d2165 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/xvaddsub.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <4 x i64> @svaddev(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svaddev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = add nsw <8 x i32> %c, %b + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = sext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @uvaddev(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvaddev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = add <8 x i32> %c, %b + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = zext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @uvsubev(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvsubev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = sub <8 x i32> %b, %c + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = zext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @svsubev(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svsubev: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = sub nsw <8 x i32> %b, %c + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = sext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @uvaddod(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvaddod: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = add <8 x i32> %c, %b + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = zext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @svaddod(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svaddod: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = add nsw <8 x i32> %c, %b + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = sext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @uvsubod(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: uvsubod: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = sub <8 x i32> %b, %c + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = zext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} + +define <4 x i64> @svsubod(<8 x i32> %b, <8 x i32> %c) { +; CHECK-LABEL: svsubod: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %0 = sub nsw <8 x i32> %b, %c + %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> + %2 = sext <4 x i32> %1 to <4 x i64> + ret <4 x i64> %2 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvfcvt.ll b/llvm/test/CodeGen/LoongArch/lasx/xvfcvt.ll new file mode 100644 index 000000000000..dc0be96d01c3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/xvfcvt.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double>, <4 x double>) + +define <8 x float> @lasx_xvfcvt_s_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcvt_s_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr0, $xr1 +; CHECK-NEXT: jr $ra +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> %va, <4 x double> %vb) + ret <8 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvhadd.ll b/llvm/test/CodeGen/LoongArch/lasx/xvhadd.ll new file mode 100644 index 000000000000..5b452c5eb1a8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/xvhadd.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <4 x i64> @mul(<4 x i64> %a, <8 x i32> %m, <8 x i32> %n) { +; CHECK-LABEL: mul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr1, $xr2 +; CHECK-NEXT: jr $ra +entry: + %0 = shufflevector <8 x i32> %n, <8 x i32> undef, <2 x i32> + %1 = shufflevector <8 x i32> %m, <8 x i32> undef, <2 x i32> + %2 = add nsw <2 x i32> %0, %1 + %3 = sext <2 x i32> %2 to <2 x i64> + %4 = shufflevector <8 x i32> %n, <8 x i32> undef, <2 x i32> + %5 = shufflevector <8 x i32> %m, <8 x i32> undef, <2 x i32> + %6 = add nsw <2 x i32> %4, %5 + %7 = sext <2 x i32> %6 to <2 x i64> + %vecins16 = shufflevector <2 x i64> %3, <2 x i64> %7, <4 x i32> + ret <4 x i64> %vecins16 +} + diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvilvh.ll b/llvm/test/CodeGen/LoongArch/lasx/xvilvh.ll new file mode 100644 index 000000000000..11f96f435691 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/xvilvh.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @xvilvhb(<32 x i8> %vj, <32 x i8> %vk) { +; CHECK-LABEL: xvilvhb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.b $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %vd = shufflevector <32 x i8> %vj, <32 x i8> %vk, <32 x i32> + ret <32 x i8> %vd +} + +define <16 x i16> @xvilvhh(<16 x i16> %vj, <16 x i16> %vk) { +; CHECK-LABEL: xvilvhh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.h $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %vd = shufflevector <16 x i16> %vj, <16 x i16> %vk, <16 x i32> + ret <16 x i16> %vd +} + +define <8 x i32> @xvilvhw(<8 x i32> %vj, <8 x i32> %vk) { +; CHECK-LABEL: xvilvhw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %vd = shufflevector <8 x i32> %vj, <8 x i32> %vk, <8 x i32> + ret <8 x i32> %vd +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvilvl.ll b/llvm/test/CodeGen/LoongArch/lasx/xvilvl.ll new file mode 100644 index 000000000000..7249bc76cf03 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/xvilvl.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + +define <32 x i8> @xvilvlb(<32 x i8> %vj, <32 x i8> %vk) { +; CHECK-LABEL: xvilvlb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.b $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %vd = shufflevector <32 x i8> %vj, <32 x i8> %vk, <32 x i32> + ret <32 x i8> %vd +} + +define <16 x i16> @xvilvlh(<16 x i16> %vj, <16 x i16> %vk) { +; CHECK-LABEL: xvilvlh: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.h $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %vd = shufflevector <16 x i16> %vj, <16 x i16> %vk, <16 x i32> + ret <16 x i16> %vd +} + +define <8 x i32> @xvilvlw(<8 x i32> %vj, <8 x i32> %vk) { +; CHECK-LABEL: xvilvlw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.w $xr0, $xr1, $xr0 +; CHECK-NEXT: jr $ra +entry: + %vd = shufflevector <8 x i32> %vj, <8 x i32> %vk, <8 x i32> + ret <8 x i32> %vd +} diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll new file mode 100644 index 000000000000..8395b264fc46 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ldptr.ll @@ -0,0 +1,70 @@ +; Check whether ld.w/ld.d/ldptr.w/ldptr.d/ldx.w/ldx.d instructions are properly generated +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define signext i32 @ld_w(i32* %p) { +; CHECK-LABEL: ld_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.w $r4, $r4, 2044 +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i32, i32* %p, i64 511 + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +define signext i32 @ldptr_w(i32* %p) { +; CHECK-LABEL: ldptr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ldptr.w $r4, $r4, 2048 +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i32, i32* %p, i64 512 + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +define signext i32 @ldx_w(i32* %p) { +; CHECK-LABEL: ldx_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r[[REG:[0-9]+]], 8 +; CHECK-NEXT: ldx.w $r4, $r4, $r[[REG:[0-9]+]] +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i32, i32* %p, i64 8192 + %val = load i32, i32* %addr, align 4 + ret i32 %val +} + +define i64 @ld_d(i64* %p) { +; CHECK-LABEL: ld_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $r4, $r4, 2040 +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i64, i64* %p, i64 255 + %val = load i64, i64* %addr, align 8 + ret i64 %val +} + +define i64 @ldptr_d(i64* %p) { +; CHECK-LABEL: ldptr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ldptr.d $r4, $r4, 2048 +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i64, i64* %p, i64 256 + %val = load i64, i64* %addr, align 8 + ret i64 %val +} + +define i64 @ldx_d(i64* %p) { +; CHECK-LABEL: ldx_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $r[[REG:[0-9]+]], 8 +; CHECK-NEXT: ldx.d $r4, $r4, $r[[REG:[0-9]+]] +; CHECK-NEXT: jr $ra +entry: + %addr = getelementptr inbounds i64, i64* %p, i64 4096 + %val = load i64, i64* %addr, align 8 + ret i64 %val +} diff --git a/llvm/test/CodeGen/LoongArch/lit.local.cfg b/llvm/test/CodeGen/LoongArch/lit.local.cfg index a54f5aeca4ab..6223fc691edc 100644 --- a/llvm/test/CodeGen/LoongArch/lit.local.cfg +++ b/llvm/test/CodeGen/LoongArch/lit.local.cfg @@ -1,13 +1,3 @@ -import os - -config.suffixes = ['.ll', '.mir', '.test', '.txt'] - -extract_section_path = os.path.join(config.llvm_src_root, - 'utils', 'extract-section.py') - -config.substitutions.append(('extract-section', - "'%s' %s %s" % (config.python_executable, - extract_section_path, '--bits-endian little'))) - if not 'LoongArch' in config.root.targets: config.unsupported = True + diff --git a/llvm/test/CodeGen/LoongArch/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/load-store-atomic.ll new file mode 100644 index 000000000000..414d4078bd07 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/load-store-atomic.ll @@ -0,0 +1,310 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define i8 @load_acquire_i8(ptr %ptr) { +; CHECK-LABEL: load_acquire_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.b $r4, $r4, 0 +; CHECK-NEXT: dbar 20 +; CHECK-NEXT: jr $ra + %val = load atomic i8, ptr %ptr acquire, align 1 + ret i8 %val +} + +define i16 @load_acquire_i16(ptr %ptr) { +; CHECK-LABEL: load_acquire_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.h $r4, $r4, 0 +; CHECK-NEXT: dbar 20 +; CHECK-NEXT: jr $ra + %val = load atomic i16, ptr %ptr acquire, align 2 + ret i16 %val +} + +define i32 @load_acquire_i32(ptr %ptr) { +; CHECK-LABEL: load_acquire_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $r4, $r4, 0 +; CHECK-NEXT: dbar 20 +; CHECK-NEXT: jr $ra + %val = load atomic i32, ptr %ptr acquire, align 4 + ret i32 %val +} + +define i64 @load_acquire_i64(ptr %ptr) { +; CHECK-LABEL: load_acquire_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $r4, $r4, 0 +; CHECK-NEXT: dbar 20 +; CHECK-NEXT: jr $ra + %val = load atomic i64, ptr %ptr acquire, align 8 + ret i64 %val +} + +define i8 @load_unordered_i8(ptr %ptr) { +; CHECK-LABEL: load_unordered_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.b $r4, $r4, 0 +; CHECK-NEXT: jr $ra + %val = load atomic i8, ptr %ptr unordered, align 1 + ret i8 %val +} + +define i16 @load_unordered_i16(ptr %ptr) { +; CHECK-LABEL: load_unordered_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.h $r4, $r4, 0 +; CHECK-NEXT: jr $ra + %val = load atomic i16, ptr %ptr unordered, align 2 + ret i16 %val +} + +define i32 @load_unordered_i32(ptr %ptr) { +; CHECK-LABEL: load_unordered_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $r4, $r4, 0 +; CHECK-NEXT: jr $ra + %val = load atomic i32, ptr %ptr unordered, align 4 + ret i32 %val +} + +define i64 @load_unordered_i64(ptr %ptr) { +; CHECK-LABEL: load_unordered_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + %val = load atomic i64, ptr %ptr unordered, align 8 + ret i64 %val +} + +define i8 @load_monotonic_i8(ptr %ptr) { +; CHECK-LABEL: load_monotonic_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.b $r4, $r4, 0 +; CHECK-NEXT: jr $ra + %val = load atomic i8, ptr %ptr monotonic, align 1 + ret i8 %val +} + +define i16 @load_monotonic_i16(ptr %ptr) { +; CHECK-LABEL: load_monotonic_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.h $r4, $r4, 0 +; CHECK-NEXT: jr $ra + %val = load atomic i16, ptr %ptr monotonic, align 2 + ret i16 %val +} + +define i32 @load_monotonic_i32(ptr %ptr) { +; CHECK-LABEL: load_monotonic_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $r4, $r4, 0 +; CHECK-NEXT: jr $ra + %val = load atomic i32, ptr %ptr monotonic, align 4 + ret i32 %val +} + +define i64 @load_monotonic_i64(ptr %ptr) { +; CHECK-LABEL: load_monotonic_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $r4, $r4, 0 +; CHECK-NEXT: jr $ra + %val = load atomic i64, ptr %ptr monotonic, align 8 + ret i64 %val +} + +define i8 @load_seq_cst_i8(ptr %ptr) { +; CHECK-LABEL: load_seq_cst_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.b $r4, $r4, 0 +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + %val = load atomic i8, ptr %ptr seq_cst, align 1 + ret i8 %val +} + +define i16 @load_seq_cst_i16(ptr %ptr) { +; CHECK-LABEL: load_seq_cst_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.h $r4, $r4, 0 +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + %val = load atomic i16, ptr %ptr seq_cst, align 2 + ret i16 %val +} + +define i32 @load_seq_cst_i32(ptr %ptr) { +; CHECK-LABEL: load_seq_cst_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $r4, $r4, 0 +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + %val = load atomic i32, ptr %ptr seq_cst, align 4 + ret i32 %val +} + +define i64 @load_seq_cst_i64(ptr %ptr) { +; CHECK-LABEL: load_seq_cst_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $r4, $r4, 0 +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + %val = load atomic i64, ptr %ptr seq_cst, align 8 + ret i64 %val +} + +define void @store_release_i8(ptr %ptr, i8 signext %v) { +; CHECK-LABEL: store_release_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 18 +; CHECK-NEXT: st.b $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i8 %v, ptr %ptr release, align 1 + ret void +} + +define void @store_release_i16(ptr %ptr, i16 signext %v) { +; CHECK-LABEL: store_release_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 18 +; CHECK-NEXT: st.h $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i16 %v, ptr %ptr release, align 2 + ret void +} + +define void @store_release_i32(ptr %ptr, i32 signext %v) { +; CHECK-LABEL: store_release_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 18 +; CHECK-NEXT: st.w $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i32 %v, ptr %ptr release, align 4 + ret void +} + +define void @store_release_i64(ptr %ptr, i64 %v) { +; CHECK-LABEL: store_release_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 18 +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i64 %v, ptr %ptr release, align 8 + ret void +} + +define void @store_unordered_i8(ptr %ptr, i8 signext %v) { +; CHECK-LABEL: store_unordered_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: st.b $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i8 %v, ptr %ptr unordered, align 1 + ret void +} + +define void @store_unordered_i16(ptr %ptr, i16 signext %v) { +; CHECK-LABEL: store_unordered_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: st.h $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i16 %v, ptr %ptr unordered, align 2 + ret void +} + +define void @store_unordered_i32(ptr %ptr, i32 signext %v) { +; CHECK-LABEL: store_unordered_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: st.w $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i32 %v, ptr %ptr unordered, align 4 + ret void +} + +define void @store_unordered_i64(ptr %ptr, i64 %v) { +; CHECK-LABEL: store_unordered_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i64 %v, ptr %ptr unordered, align 8 + ret void +} + +define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { +; CHECK-LABEL: store_monotonic_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: st.b $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i8 %v, ptr %ptr monotonic, align 1 + ret void +} + +define void @store_monotonic_i16(ptr %ptr, i16 signext %v) { +; CHECK-LABEL: store_monotonic_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: st.h $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i16 %v, ptr %ptr monotonic, align 2 + ret void +} + +define void @store_monotonic_i32(ptr %ptr, i32 signext %v) { +; CHECK-LABEL: store_monotonic_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: st.w $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i32 %v, ptr %ptr monotonic, align 4 + ret void +} + +define void @store_monotonic_i64(ptr %ptr, i64 %v) { +; CHECK-LABEL: store_monotonic_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: jr $ra + store atomic i64 %v, ptr %ptr monotonic, align 8 + ret void +} + +define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { +; CHECK-LABEL: store_seq_cst_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: st.b $r5, $r4, 0 +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + store atomic i8 %v, ptr %ptr seq_cst, align 1 + ret void +} + +define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { +; CHECK-LABEL: store_seq_cst_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: st.h $r5, $r4, 0 +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + store atomic i16 %v, ptr %ptr seq_cst, align 2 + ret void +} + +define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { +; CHECK-LABEL: store_seq_cst_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: st.w $r5, $r4, 0 +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + store atomic i32 %v, ptr %ptr seq_cst, align 4 + ret void +} + +define void @store_seq_cst_i64(ptr %ptr, i64 %v) { +; CHECK-LABEL: store_seq_cst_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: st.d $r5, $r4, 0 +; CHECK-NEXT: dbar 16 +; CHECK-NEXT: jr $ra + store atomic i64 %v, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/logic-op.ll b/llvm/test/CodeGen/LoongArch/logic-op.ll new file mode 100644 index 000000000000..c1029c1ff246 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/logic-op.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define signext i32 @foo32(i32 signext %a) { +; CHECK-LABEL: foo32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sltui $r4, $r4, 1 +; CHECK-NEXT: jr $ra +entry: + %tobool = icmp eq i32 %a, 0 + %conv = zext i1 %tobool to i32 + ret i32 %conv +} + +define i64 @foo(i64 %a) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sltui $r4, $r4, 1 +; CHECK-NEXT: jr $ra +entry: + %tobool = icmp eq i64 %a, 0 + %conv = zext i1 %tobool to i64 + ret i64 %conv +} + +define i64 @not(i64 %a) { +; CHECK-LABEL: not: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra +entry: + %not = xor i64 %a, -1 + ret i64 %not +} + +define i64 @and(i64 %a, i64 %b) { +; CHECK-LABEL: and: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: and $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %and = and i64 %b, %a + ret i64 %and +} + +define i64 @or(i64 %a, i64 %b) { +; CHECK-LABEL: or: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: or $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %or = or i64 %b, %a + ret i64 %or +} + +define i64 @xor(i64 %a, i64 %b) { +; CHECK-LABEL: xor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xor $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %xor = xor i64 %b, %a + ret i64 %xor +} + +define i64 @nor(i64 %a, i64 %b) { +; CHECK-LABEL: nor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nor $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %or = or i64 %b, %a + %not = xor i64 %or, -1 + ret i64 %not +} + +define i64 @andn(i64 %a, i64 %b) { +; CHECK-LABEL: andn: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %not = xor i64 %b, -1 + %and = and i64 %not, %a + ret i64 %and +} + +define signext i32 @andn32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: andn32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %not = xor i32 %b, -1 + %and = and i32 %not, %a + ret i32 %and +} + +define i64 @orn(i64 %a, i64 %b) { +; CHECK-LABEL: orn: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: orn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %not = xor i64 %b, -1 + %or = or i64 %not, %a + ret i64 %or +} + +define signext i32 @orn32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: orn32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: orn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %not = xor i32 %b, -1 + %or = or i32 %not, %a + ret i32 %or +} + +define signext i32 @and32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: and32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: and $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %and = and i32 %b, %a + ret i32 %and +} + +define signext i32 @or32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: or32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: or $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %or = or i32 %b, %a + ret i32 %or +} + +define signext i32 @xor32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: xor32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xor $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %xor = xor i32 %b, %a + ret i32 %xor +} + +define signext i32 @nor32(i32 signext %a, i32 signext %b) { +; CHECK-LABEL: nor32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nor $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %or = or i32 %b, %a + %not = xor i32 %or, -1 + ret i32 %not +} + +define signext i32 @not32(i32 signext %a) { +; CHECK-LABEL: not32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra +entry: + %not = xor i32 %a, -1 + ret i32 %not +} + diff --git a/llvm/test/CodeGen/LoongArch/lshr.ll b/llvm/test/CodeGen/LoongArch/lshr.ll new file mode 100644 index 000000000000..54e4a5f2d159 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lshr.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define signext i32 @foo(i32 %a) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: bstrpick.d $r4, $r4, 31, 1 +; CHECK-NEXT: jr $ra +entry: + %b = lshr i32 %a, 1 + ret i32 %b +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll b/llvm/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll new file mode 100644 index 000000000000..97b23be80fbd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/imm_vector_lsx.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +define <2 x i64> @build_lsx0(<2 x i64> %a) { +; CHECK-LABEL: build_lsx0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -1 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx1(<2 x i64> %a) { +; CHECK-LABEL: build_lsx1: +; CHECK: # %bb.0: +; CHECK-NEXT: lu52i.d $r4, $zero, 2047 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx2(<2 x i64> %a) { +; CHECK-LABEL: build_lsx2: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 2048 +; CHECK-NEXT: lu32i.d $r4, 524287 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx3(<2 x i64> %a) { +; CHECK-LABEL: build_lsx3: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu52i.d $r4, $r4, -1 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx4(<2 x i64> %a) { +; CHECK-LABEL: build_lsx4: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $r4, $zero, 4095 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx5(<2 x i64> %a) { +; CHECK-LABEL: build_lsx5: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx6(<2 x i64> %a) { +; CHECK-LABEL: build_lsx6: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx7(<2 x i64> %a) { +; CHECK-LABEL: build_lsx7: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $r4, $zero, -2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx8(<2 x i64> %a) { +; CHECK-LABEL: build_lsx8: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx9(<2 x i64> %a) { +; CHECK-LABEL: build_lsx9: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -1 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx10(<2 x i64> %a) { +; CHECK-LABEL: build_lsx10: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: lu32i.d $r4, 0 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx11(<2 x i64> %a) { +; CHECK-LABEL: build_lsx11: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, 524287 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -1 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx12(<2 x i64> %a) { +; CHECK-LABEL: build_lsx12: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2047 +; CHECK-NEXT: lu52i.d $r4, $r4, 2047 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <2 x i64> @build_lsx13(<2 x i64> %a) { +; CHECK-LABEL: build_lsx13: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $r4, -524288 +; CHECK-NEXT: ori $r4, $r4, 2048 +; CHECK-NEXT: lu32i.d $r4, -524288 +; CHECK-NEXT: lu52i.d $r4, $r4, 0 +; CHECK-NEXT: vreplgr2vr.d $vr1, $r4 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %b = add <2 x i64> %a, + ret <2 x i64> %b +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/inline-asm.ll b/llvm/test/CodeGen/LoongArch/lsx/inline-asm.ll new file mode 100644 index 000000000000..37cb6dfc999c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/inline-asm.ll @@ -0,0 +1,34 @@ +; A basic inline assembly test + +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +@v2i64_r = global <2 x i64> zeroinitializer, align 16 + +define void @test1() nounwind { +entry: + ; CHECK-LABEL: test1: + %0 = call <2 x i64> asm "vldi ${0:w}, 1", "=f"() + ; CHECK: vldi $vr{{[1-3]?[0-9]}}, 1 + store <2 x i64> %0, <2 x i64>* @v2i64_r + ret void +} + +define void @test2() nounwind { +entry: + ; CHECK-LABEL: test2: + %0 = load <2 x i64>, <2 x i64>* @v2i64_r + %1 = call <2 x i64> asm "vaddi.wu ${0:w}, ${1:w}, 1", "=f,f"(<2 x i64> %0) + ; CHECK: vaddi.wu $vr{{[1-3]?[0-9]}}, $vr{{[1-3]?[0-9]}}, 1 + store <2 x i64> %1, <2 x i64>* @v2i64_r + ret void +} + +define void @test3() nounwind { +entry: + ; CHECK-LABEL: test3: + %0 = load <2 x i64>, <2 x i64>* @v2i64_r + %1 = call <2 x i64> asm sideeffect "vaddi.wu ${0:w}, ${1:w}, 1", "=f,f,~{$vr0}"(<2 x i64> %0) + ; CHECK: vaddi.wu $vr{{([1-9]|[1-3][0-9])}}, $vr{{([1-9]|[1-3][0-9])}}, 1 + store <2 x i64> %1, <2 x i64>* @v2i64_r + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-lsx.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-lsx.ll new file mode 100644 index 000000000000..60ff93095580 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-lsx.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) +declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) +declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) +declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) +declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) +declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) +declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) + +define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: lsx_vsrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.b.h $vr0, $vr1, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %a, <16 x i8> %b, i32 2) + ret <16 x i8> %0 +} + +define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: lsx_vsrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.h.w $vr0, $vr1, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %a, <8 x i16> %b, i32 2) + ret <8 x i16> %0 +} + +define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: lsx_vsrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.w.d $vr0, $vr1, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %a, <4 x i32> %b, i32 2) + ret <4 x i32> %0 +} + +define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: lsx_vsrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.d.q $vr0, $vr1, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %a, <2 x i64> %b, i32 2) + ret <2 x i64> %0 +} + +define <16 x i8> @lsx_vrepli_b() { +; CHECK-LABEL: lsx_vrepli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 2) + ret <16 x i8> %0 +} + +define <8 x i16> @lsx_vrepli_h() { +; CHECK-LABEL: lsx_vrepli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 2) + ret <8 x i16> %0 +} + +define <4 x i32> @lsx_vrepli_w() { +; CHECK-LABEL: lsx_vrepli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 2) + ret <4 x i32> %0 +} + +define <2 x i64> @lsx_vrepli_d() { +; CHECK-LABEL: lsx_vrepli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, 2 +; CHECK-NEXT: jr $ra +entry: + %0 = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 2) + ret <2 x i64> %0 +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/logic-lsx.ll b/llvm/test/CodeGen/LoongArch/lsx/logic-lsx.ll new file mode 100644 index 000000000000..0dd29b27eb30 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/logic-lsx.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +define <2 x i64> @not_v2i64(<2 x i64> %a) { +; CHECK-LABEL: not_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <2 x i64> %a, + ret <2 x i64> %not +} + +define <4 x i32> @not_v4i32(<4 x i32> %a) { +; CHECK-LABEL: not_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i32> %a, + ret <4 x i32> %not +} + +define <8 x i16> @not_v8i16(<8 x i16> %a) { +; CHECK-LABEL: not_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i16> %a, + ret <8 x i16> %not +} + +define <16 x i8> @not_v16i8(<16 x i8> %a) { +; CHECK-LABEL: not_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i8> %a, + ret <16 x i8> %not +} + + +define <2 x i64> @andn_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: andn_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <2 x i64> %b, + %and = and <2 x i64> %not, %a + ret <2 x i64> %and +} + +define <4 x i32> @andn_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: andn_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i32> %b, + %and = and <4 x i32> %not, %a + ret <4 x i32> %and +} + +define <8 x i16> @andn_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: andn_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i16> %b, + %and = and <8 x i16> %not, %a + ret <8 x i16> %and +} + +define <16 x i8> @andn_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: andn_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i8> %b, + %and = and <16 x i8> %not, %a + ret <16 x i8> %and +} + + +define <2 x i64> @orn_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: orn_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <2 x i64> %b, + %or = or <2 x i64> %not, %a + ret <2 x i64> %or +} + +define <4 x i32> @orn_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: orn_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <4 x i32> %b, + %or = or <4 x i32> %not, %a + ret <4 x i32> %or +} + +define <8 x i16> @orn_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: orn_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <8 x i16> %b, + %or = or <8 x i16> %not, %a + ret <8 x i16> %or +} + +define <16 x i8> @orn_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: orn_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %not = xor <16 x i8> %b, + %or = or <16 x i8> %not, %a + ret <16 x i8> %or +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll b/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll new file mode 100644 index 000000000000..8441ed1b0a1a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +define <16 x i8> @lsxvavg_v16i8(<16 x i8> noundef %0, <16 x i8> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxvavg_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = add <16 x i8> %0, + %4 = add <16 x i8> %3, %1 + %5 = sdiv <16 x i8> %4, + ret <16 x i8> %5 +} + +define <8 x i16> @lsxvavg_v8i16(<8 x i16> noundef %0, <8 x i16> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxvavg_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = add <8 x i16> %0, + %4 = add <8 x i16> %3, %1 + %5 = sdiv <8 x i16> %4, + ret <8 x i16> %5 +} + +define <4 x i32> @lsxvavg_v4i32(<4 x i32> noundef %0, <4 x i32> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxvavg_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = add <4 x i32> %0, + %4 = add <4 x i32> %3, %1 + %5 = sdiv <4 x i32> %4, + ret <4 x i32> %5 +} + +define <2 x i64> @lsxvavg_v2i64(<2 x i64> noundef %0, <2 x i64> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxvavg_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = add <2 x i64> %0, + %4 = add <2 x i64> %3, %1 + %5 = sdiv <2 x i64> %4, + ret <2 x i64> %5 +} + +define <16 x i8> @lsxvavg_v16u8(<16 x i8> noundef %0, <16 x i8> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxvavg_v16u8: +; CHECK: # %bb.0: +; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = add <16 x i8> %0, + %4 = add <16 x i8> %3, %1 + %5 = lshr <16 x i8> %4, + ret <16 x i8> %5 +} + +define <8 x i16> @lsxvavg_v8u16(<8 x i16> noundef %0, <8 x i16> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxvavg_v8u16: +; CHECK: # %bb.0: +; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = add <8 x i16> %0, + %4 = add <8 x i16> %3, %1 + %5 = lshr <8 x i16> %4, + ret <8 x i16> %5 +} + +define <4 x i32> @lsxvavg_v4u32(<4 x i32> noundef %0, <4 x i32> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxvavg_v4u32: +; CHECK: # %bb.0: +; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = add <4 x i32> %0, + %4 = add <4 x i32> %3, %1 + %5 = lshr <4 x i32> %4, + ret <4 x i32> %5 +} + +define <2 x i64> @lsxvavg_v2u64(<2 x i64> noundef %0, <2 x i64> noundef %1) local_unnamed_addr #0 { +; CHECK-LABEL: lsxvavg_v2u64: +; CHECK: # %bb.0: +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = add <2 x i64> %0, + %4 = add <2 x i64> %3, %1 + %5 = lshr <2 x i64> %4, + ret <2 x i64> %5 +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/lsxvclr.ll b/llvm/test/CodeGen/LoongArch/lsx/lsxvclr.ll new file mode 100644 index 000000000000..951254baade5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/lsxvclr.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +define <16 x i8> @clri8(<16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: clri8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %shl = shl <16 x i8> , %c + %xor = xor <16 x i8> %shl, + %and = and <16 x i8> %xor, %b + ret <16 x i8> %and +} + +define <8 x i16> @clri16(<8 x i16> %b, <8 x i16> %c) { +; CHECK-LABEL: clri16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %shl = shl <8 x i16> , %c + %xor = xor <8 x i16> %shl, + %and = and <8 x i16> %xor, %b + ret <8 x i16> %and +} + +define <4 x i32> @clri32(<4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: clri32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %shl = shl <4 x i32> , %c + %xor = xor <4 x i32> %shl, + %and = and <4 x i32> %xor, %b + ret <4 x i32> %and +} + +define <2 x i64> @clri64(<2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: clri64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %shl = shl <2 x i64> , %c + %xor = xor <2 x i64> %shl, + %and = and <2 x i64> %xor, %b + ret <2 x i64> %and +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/set-lsx.ll b/llvm/test/CodeGen/LoongArch/lsx/set-lsx.ll new file mode 100644 index 000000000000..69f19297d37a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/set-lsx.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +define <16 x i8> @seti8(<16 x i8>) { +; CHECK-LABEL: seti8: +; CHECK: # %bb.0: +; CHECK-NEXT: vbitseti.b $vr0, $vr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <16 x i8> %0, + ret <16 x i8> %2 +} + +define <8 x i16> @seti16(<8 x i16>) { +; CHECK-LABEL: seti16: +; CHECK: # %bb.0: +; CHECK-NEXT: vbitseti.h $vr0, $vr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <8 x i16> %0, + ret <8 x i16> %2 +} + +define <4 x i32> @seti32(<4 x i32>) { +; CHECK-LABEL: seti32: +; CHECK: # %bb.0: +; CHECK-NEXT: vbitseti.w $vr0, $vr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <4 x i32> %0, + ret <4 x i32> %2 +} + +define <2 x i64> @seti64(<2 x i64>) { +; CHECK-LABEL: seti64: +; CHECK: # %bb.0: +; CHECK-NEXT: vbitseti.d $vr0, $vr0, 6 +; CHECK-NEXT: jr $ra + %2 = or <2 x i64> %0, + ret <2 x i64> %2 +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll new file mode 100644 index 000000000000..25e4eb0722c7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @vshf_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: vshf_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $r5, 0 +; CHECK-NEXT: vpickve2gr.d $r5, $vr0, 0 +; CHECK-NEXT: vpickve2gr.d $r6, $vr0, 1 +; CHECK-NEXT: revb.d $r6, $r6 +; CHECK-NEXT: revb.d $r5, $r5 +; CHECK-NEXT: vinsgr2vr.d $vr0, $r5, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $r6, 1 +; CHECK-NEXT: vst $vr0, $r4, 0 +; CHECK-NEXT: jr $ra + %v1 = load <16 x i8>, ptr %a0 + %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> + store <16 x i8> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vabsd.ll b/llvm/test/CodeGen/LoongArch/lsx/vabsd.ll new file mode 100644 index 000000000000..86201ae0f592 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/vabsd.ll @@ -0,0 +1,262 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +define <16 x i8> @vabsd_b(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vabsd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.b $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <16 x i8> %b, %a + %subba = sub <16 x i8> %b, %a + %subab = sub <16 x i8> %a, %b + %select = select <16 x i1> %icmp, <16 x i8> %subba, <16 x i8> %subab + ret <16 x i8> %select +} + +define <8 x i16> @vabsd_h(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vabsd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.h $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <8 x i16> %b, %a + %subba = sub <8 x i16> %b, %a + %subab = sub <8 x i16> %a, %b + %select = select <8 x i1> %icmp, <8 x i16> %subba, <8 x i16> %subab + ret <8 x i16> %select +} + +define <8 x i8> @vabsd_h_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: vabsd_h_v8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.h $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <8 x i8> %b, %a + %subba = sub <8 x i8> %b, %a + %subab = sub <8 x i8> %a, %b + %select = select <8 x i1> %icmp, <8 x i8> %subba, <8 x i8> %subab + ret <8 x i8> %select +} + +define <4 x i32> @vabsd_w(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vabsd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.w $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <4 x i32> %b, %a + %subba = sub <4 x i32> %b, %a + %subab = sub <4 x i32> %a, %b + %select = select <4 x i1> %icmp, <4 x i32> %subba, <4 x i32> %subab + ret <4 x i32> %select +} + +define <4 x i16> @vabsd_w_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: vabsd_w_v4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.w $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <4 x i16> %b, %a + %subba = sub <4 x i16> %b, %a + %subab = sub <4 x i16> %a, %b + %select = select <4 x i1> %icmp, <4 x i16> %subba, <4 x i16> %subab + ret <4 x i16> %select +} + +define <4 x i8> @vabsd_w_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: vabsd_w_v4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.w $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <4 x i8> %b, %a + %subba = sub <4 x i8> %b, %a + %subab = sub <4 x i8> %a, %b + %select = select <4 x i1> %icmp, <4 x i8> %subba, <4 x i8> %subab + ret <4 x i8> %select +} + +define <2 x i64> @vabsd_d(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vabsd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.d $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <2 x i64> %b, %a + %subba = sub <2 x i64> %b, %a + %subab = sub <2 x i64> %a, %b + %select = select <2 x i1> %icmp, <2 x i64> %subba, <2 x i64> %subab + ret <2 x i64> %select +} + +define <2 x i32> @vabsd_d_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vabsd_d_v2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.d $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <2 x i32> %b, %a + %subba = sub <2 x i32> %b, %a + %subab = sub <2 x i32> %a, %b + %select = select <2 x i1> %icmp, <2 x i32> %subba, <2 x i32> %subab + ret <2 x i32> %select +} + +define <2 x i16> @vabsd_d_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: vabsd_d_v2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.d $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <2 x i16> %b, %a + %subba = sub <2 x i16> %b, %a + %subab = sub <2 x i16> %a, %b + %select = select <2 x i1> %icmp, <2 x i16> %subba, <2 x i16> %subab + ret <2 x i16> %select +} + +define <2 x i8> @vabsd_d_v2i8(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: vabsd_d_v2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.d $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp sgt <2 x i8> %b, %a + %subba = sub <2 x i8> %b, %a + %subab = sub <2 x i8> %a, %b + %select = select <2 x i1> %icmp, <2 x i8> %subba, <2 x i8> %subab + ret <2 x i8> %select +} + +define <16 x i8> @vabsd_bu(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vabsd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <16 x i8> %b, %a + %subba = sub <16 x i8> %b, %a + %subab = sub <16 x i8> %a, %b + %select = select <16 x i1> %icmp, <16 x i8> %subba, <16 x i8> %subab + ret <16 x i8> %select +} + +define <8 x i16> @vabsd_hu(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vabsd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <8 x i16> %b, %a + %subba = sub <8 x i16> %b, %a + %subab = sub <8 x i16> %a, %b + %select = select <8 x i1> %icmp, <8 x i16> %subba, <8 x i16> %subab + ret <8 x i16> %select +} + +define <8 x i8> @vabsd_hu_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: vabsd_hu_v8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <8 x i8> %b, %a + %subba = sub <8 x i8> %b, %a + %subab = sub <8 x i8> %a, %b + %select = select <8 x i1> %icmp, <8 x i8> %subba, <8 x i8> %subab + ret <8 x i8> %select +} + +define <4 x i32> @vabsd_wu(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vabsd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <4 x i32> %b, %a + %subba = sub <4 x i32> %b, %a + %subab = sub <4 x i32> %a, %b + %select = select <4 x i1> %icmp, <4 x i32> %subba, <4 x i32> %subab + ret <4 x i32> %select +} + +define <4 x i16> @vabsd_wu_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: vabsd_wu_v4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <4 x i16> %b, %a + %subba = sub <4 x i16> %b, %a + %subab = sub <4 x i16> %a, %b + %select = select <4 x i1> %icmp, <4 x i16> %subba, <4 x i16> %subab + ret <4 x i16> %select +} + +define <4 x i8> @vabsd_wu_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: vabsd_wu_v4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <4 x i8> %b, %a + %subba = sub <4 x i8> %b, %a + %subab = sub <4 x i8> %a, %b + %select = select <4 x i1> %icmp, <4 x i8> %subba, <4 x i8> %subab + ret <4 x i8> %select +} + +define <2 x i64> @vabsd_du(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: vabsd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.du $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <2 x i64> %b, %a + %subba = sub <2 x i64> %b, %a + %subab = sub <2 x i64> %a, %b + %select = select <2 x i1> %icmp, <2 x i64> %subba, <2 x i64> %subab + ret <2 x i64> %select +} + +define <2 x i32> @vabsd_du_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: vabsd_du_v2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.du $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <2 x i32> %b, %a + %subba = sub <2 x i32> %b, %a + %subab = sub <2 x i32> %a, %b + %select = select <2 x i1> %icmp, <2 x i32> %subba, <2 x i32> %subab + ret <2 x i32> %select +} + +define <2 x i16> @vabsd_du_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: vabsd_du_v2i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.du $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <2 x i16> %b, %a + %subba = sub <2 x i16> %b, %a + %subab = sub <2 x i16> %a, %b + %select = select <2 x i1> %icmp, <2 x i16> %subba, <2 x i16> %subab + ret <2 x i16> %select +} + +define <2 x i8> @vabsd_du_v2i8(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: vabsd_du_v2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.du $vr0, $vr1, $vr0 +; CHECK-NEXT: jr $ra +entry: + %icmp = icmp ugt <2 x i8> %b, %a + %subba = sub <2 x i8> %b, %a + %subab = sub <2 x i8> %a, %b + %select = select <2 x i1> %icmp, <2 x i8> %subba, <2 x i8> %subab + ret <2 x i8> %select +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vadda.ll b/llvm/test/CodeGen/LoongArch/lsx/vadda.ll new file mode 100644 index 000000000000..4c987fb1b76c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/vadda.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s + +define <16 x i8> @vaddab(<16 x i8>, <16 x i8>) { +; CHECK-LABEL: vaddab: +; CHECK: # %bb.0: +; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <16 x i8> %0, zeroinitializer + %4 = sub <16 x i8> zeroinitializer, %0 + %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %0 + %6 = icmp slt <16 x i8> %1, zeroinitializer + %7 = sub <16 x i8> zeroinitializer, %1 + %8 = select <16 x i1> %6, <16 x i8> %7, <16 x i8> %1 + %9 = add <16 x i8> %5, %8 + ret <16 x i8> %9 +} + +define <8 x i16> @vaddah(<8 x i16>, <8 x i16>) { +; CHECK-LABEL: vaddah: +; CHECK: # %bb.0: +; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <8 x i16> %0, zeroinitializer + %4 = sub <8 x i16> zeroinitializer, %0 + %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %0 + %6 = icmp slt <8 x i16> %1, zeroinitializer + %7 = sub <8 x i16> zeroinitializer, %1 + %8 = select <8 x i1> %6, <8 x i16> %7, <8 x i16> %1 + %9 = add <8 x i16> %5, %8 + ret <8 x i16> %9 +} + +define <4 x i32> @vaddaw(<4 x i32>, <4 x i32>) { +; CHECK-LABEL: vaddaw: +; CHECK: # %bb.0: +; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <4 x i32> %0, zeroinitializer + %4 = sub nsw <4 x i32> zeroinitializer, %0 + %5 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %0 + %6 = icmp slt <4 x i32> %1, zeroinitializer + %7 = sub nsw <4 x i32> zeroinitializer, %1 + %8 = select <4 x i1> %6, <4 x i32> %7, <4 x i32> %1 + %9 = add nuw nsw <4 x i32> %5, %8 + ret <4 x i32> %9 +} + +define <2 x i64> @vaddad(<2 x i64>, <2 x i64>) { +; CHECK-LABEL: vaddad: +; CHECK: # %bb.0: +; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra + %3 = icmp slt <2 x i64> %0, zeroinitializer + %4 = sub nsw <2 x i64> zeroinitializer, %0 + %5 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %0 + %6 = icmp slt <2 x i64> %1, zeroinitializer + %7 = sub nsw <2 x i64> zeroinitializer, %1 + %8 = select <2 x i1> %6, <2 x i64> %7, <2 x i64> %1 + %9 = add nuw nsw <2 x i64> %5, %8 + ret <2 x i64> %9 +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vfcvt.ll b/llvm/test/CodeGen/LoongArch/lsx/vfcvt.ll new file mode 100644 index 000000000000..53e262ccffca --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/vfcvt.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double>, <2 x double>) + +define <4 x float> @lsx_vfcvt_s_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcvt_s_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvt.s.d $vr0, $vr0, $vr1 +; CHECK-NEXT: jr $ra +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> %va, <2 x double> %vb) + ret <4 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lu12i.ll b/llvm/test/CodeGen/LoongArch/lu12i.ll new file mode 100644 index 000000000000..55fd40edd1d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lu12i.ll @@ -0,0 +1,7 @@ +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define i32 @foo() { +; CHECK: lu12i.w $r4, -1 +entry: + ret i32 -4096 +} diff --git a/llvm/test/CodeGen/LoongArch/mcpu_load.ll b/llvm/test/CodeGen/LoongArch/mcpu_load.ll new file mode 100644 index 000000000000..c0c782fea6c1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/mcpu_load.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -mcpu=la264 -o - %s | FileCheck -check-prefix=ALIGNED %s +; RUN: llc -march=loongarch64 -mcpu=la364 -o - %s | FileCheck -check-prefix=ALIGNED %s +; RUN: llc -march=loongarch64 -mcpu=la464 -o - %s | FileCheck -check-prefix=UNALIGNED %s + +define i32 @i32_load(i32* %p) { +; ALIGNED-LABEL: i32_load: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.hu $r4, $r4, 2 +; ALIGNED-NEXT: slli.w $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra +; +; UNALIGNED-LABEL: i32_load: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.w $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define signext i32 @i32_sextload(i32* %p) { +; ALIGNED-LABEL: i32_sextload: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.h $r4, $r4, 2 +; ALIGNED-NEXT: slli.d $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra +; +; UNALIGNED-LABEL: i32_sextload: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.w $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define zeroext i32 @i32_zextload(i32* %p) { +; ALIGNED-LABEL: i32_zextload: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.hu $r4, $r4, 2 +; ALIGNED-NEXT: slli.d $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra +; +; UNALIGNED-LABEL: i32_zextload: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.wu $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define i64 @i64_load(i64* %p) { +; ALIGNED-LABEL: i64_load: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.wu $r5, $r4, 0 +; ALIGNED-NEXT: ld.wu $r4, $r4, 4 +; ALIGNED-NEXT: slli.d $r4, $r4, 32 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra +; +; UNALIGNED-LABEL: i64_load: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.d $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra + %tmp = load i64, i64* %p, align 4 + ret i64 %tmp +} diff --git a/llvm/test/CodeGen/LoongArch/misc.mir b/llvm/test/CodeGen/LoongArch/misc.mir deleted file mode 100644 index 56793c583904..000000000000 --- a/llvm/test/CodeGen/LoongArch/misc.mir +++ /dev/null @@ -1,200 +0,0 @@ -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=obj -o - \ -# RUN: | extract-section .text \ -# RUN: | FileCheck %s -check-prefix=CHECK-ENC -# RUN: llc %s -mtriple=loongarch64 -start-after=prologepilog -O0 -filetype=asm -o - \ -# RUN: | FileCheck %s -check-prefix=CHECK-ASM - -# ------------------------------------------------------------------------------------------------- -# Encoding format: I15 -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------------------------------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------------------------------------- -# opcode | imm15 -# ---------------------------------------------------+--------------------------------------------- - ---- -# CHECK-LABEL: test_DBAR: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: dbar 0 -name: test_DBAR -body: | - bb.0: - DBAR 0 -... ---- -# CHECK-LABEL: test_IBAR: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 1 1 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: ibar 0 -name: test_IBAR -body: | - bb.0: - IBAR 0 -... ---- -# CHECK-LABEL: test_SYSCALL: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 -# CHECK-ASM: syscall 100 -name: test_SYSCALL -body: | - bb.0: - SYSCALL 100 -... ---- -# CHECK-LABEL: test_BREAK: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 -# CHECK-ASM: break 199 -name: test_BREAK -body: | - bb.0: - BREAK 199 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: I26 -# ------------------------------------------------------------------------------------------------- -# ------------------+-----------------------------------------------+------------------------------ -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------+-----------------------------------------------+------------------------------ -# opcode | imm26{15-0} | imm26{25-16} -# ------------------+-----------------------------------------------+------------------------------ - ---- -# CHECK-LABEL: test_B: -# CHECK-ENC: 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: b 80 -name: test_B -body: | - bb.0: - B 80 -... ---- -# CHECK-LABEL: test_BL: -# CHECK-ENC: 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: bl 136 -name: test_BL -body: | - bb.0: - BL 136 -... - -# -------------------------------------------------------------------------------------------------------- -# Encoding format: BSTR_W -# -------------------------------------------------------------------------------------------------------- -# ---------------------------------+--------------+---------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------+--------------+---------+--------------+--------------+--------------- -# opcode{11-1} | msb |opcode{0}| lsb | rj | rd -# ---------------------------------+--------------+---------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_BSTRINS_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bstrins.w $a0, $a1, 7, 2 -name: test_BSTRINS_W -body: | - bb.0: - $r4 = BSTRINS_W $r4, $r5, 7, 2 -... ---- -# CHECK-LABEL: test_BSTRPICK_W: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bstrpick.w $a0, $a1, 10, 4 -name: test_BSTRPICK_W -body: | - bb.0: - $r4 = BSTRPICK_W $r5, 10, 4 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: BSTR_D -# ------------------------------------------------------------------------------------------------- -# ------------------------------+-----------------+-----------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------+-----------------+-----------------+--------------+--------------- -# opcode | msb | lsb | rj | rd -# ------------------------------+-----------------+-----------------+--------------+--------------- - ---- -# CHECK-LABEL: test_BSTRINS_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bstrins.d $a0, $a1, 7, 2 -name: test_BSTRINS_D -body: | - bb.0: - $r4 = BSTRINS_D $r4, $r5, 7, 2 -... ---- -# CHECK-LABEL: test_BSTRPICK_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: bstrpick.d $a0, $a1, 39, 22 -name: test_BSTRPICK_D -body: | - bb.0: - $r4 = BSTRPICK_D $r5, 39, 22 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: ASRT -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------+--------------+--------------- -# opcode | rk | rj | 0x0 -# ---------------------------------------------------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_ASRTLE_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: asrtle.d $a0, $a1 -name: test_ASRTLE_D -body: | - bb.0: - ASRTLE_D $r4, $r5 -... ---- -# CHECK-LABEL: test_ASRTGT_D: -# CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: asrtgt.d $a0, $a1 -name: test_ASRTGT_D -body: | - bb.0: - ASRTGT_D $r4, $r5 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: PRELD -# ------------------------------------------------------------------------------------------------- -# ------------------------------+-----------------------------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ------------------------------+-----------------------------------+--------------+--------------- -# opcode | imm12 | rj | imm5 -# ------------------------------+-----------------------------------+--------------+--------------- - ---- -# CHECK-LABEL: test_PRELD: -# CHECK-ENC: 0 0 1 0 1 0 1 0 1 1 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 1 1 1 -# CHECK-ASM: preld 15, $a0, 21 -name: test_PRELD -body: | - bb.0: - PRELD 15, $r4, 21 -... - -# ------------------------------------------------------------------------------------------------- -# Encoding format: PRELDX -# ------------------------------------------------------------------------------------------------- -# ---------------------------------------------------+--------------+--------------+--------------- -# 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 -# ---------------------------------------------------+--------------+--------------+--------------- -# opcode | rk | rj | imm5 -# ---------------------------------------------------+--------------+--------------+--------------- - ---- -# CHECK-LABEL: test_PRELDX: -# CHECK-ENC: 0 0 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 1 0 1 1 -# CHECK-ASM: preldx 11, $a0, $a1 -name: test_PRELDX -body: | - bb.0: - PRELDX 11, $r4, $r5 diff --git a/llvm/test/CodeGen/LoongArch/named-register.ll b/llvm/test/CodeGen/LoongArch/named-register.ll new file mode 100644 index 000000000000..0b0660fcab65 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/named-register.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define i64 @get_r2() { +; CHECK-LABEL: get_r2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $r4, $tp +; CHECK-NEXT: jr $ra +entry: + %0 = call i64 @llvm.read_register.i64(metadata !0) + ret i64 %0 +} + +define i64 @get_r21() { +; CHECK-LABEL: get_r21: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: move $r4, $r21 +; CHECK-NEXT: jr $ra +entry: + %0 = call i64 @llvm.read_register.i64(metadata !1) + ret i64 %0 +} + +declare i64 @llvm.read_register.i64(metadata) + +!llvm.named.register.$r2 = !{!0} +!llvm.named.register.$r21 = !{!1} + +!0 = !{!"$r2"} +!1 = !{!"$r21"} diff --git a/llvm/test/CodeGen/LoongArch/nomerge.ll b/llvm/test/CodeGen/LoongArch/nomerge.ll new file mode 100644 index 000000000000..a8ce632259b5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/nomerge.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -mtriple=loongarch64 -relocation-model=pic -o - | FileCheck %s + +define void @foo(i32 %i) { +entry: + switch i32 %i, label %if.end3 [ + i32 5, label %if.then + i32 7, label %if.then2 + ] + +if.then: + tail call void @bar() #0 + br label %if.end3 + +if.then2: + tail call void @bar() #0 + br label %if.end3 + +if.end3: + tail call void @bar() #0 + ret void +} + +declare void @bar() + +attributes #0 = { nomerge } + +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK: .LBB0_1: # %entry +; CHECK: .LBB0_2: # %if.then +; CHECK-NEXT: bl bar +; CHECK: .LBB0_3: # %if.then2 +; CHECK-NEXT: bl bar +; CHECK: .LBB0_4: # %if.end3 +; CHECK: b bar diff --git a/llvm/test/CodeGen/LoongArch/noti32.ll b/llvm/test/CodeGen/LoongArch/noti32.ll new file mode 100644 index 000000000000..9aa8c4391256 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/noti32.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define i8 @nor_i8(i8 %a, i8 %b) nounwind { +; CHECK-LABEL: nor_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: or $r4, $r4, $r5 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %or = or i8 %a, %b + %neg = xor i8 %or, -1 + ret i8 %neg +} + +define i16 @nor_i16(i16 %a, i16 %b) nounwind { +; CHECK-LABEL: nor_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: or $r4, $r4, $r5 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %or = or i16 %a, %b + %neg = xor i16 %or, -1 + ret i16 %neg +} + +define i32 @nor_i32(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: nor_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: or $r4, $r4, $r5 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %or = or i32 %a, %b + %neg = xor i32 %or, -1 + ret i32 %neg +} + +define i8 @nor_zero_i8(i8 %a) nounwind { +; CHECK-LABEL: nor_zero_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i8 %a, -1 + ret i8 %neg +} + +define i16 @nor_zero_i16(i16 %a) nounwind { +; CHECK-LABEL: nor_zero_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i16 %a, -1 + ret i16 %neg +} + +define i32 @nor_zero_i32(i32 %a) nounwind { +; CHECK-LABEL: nor_zero_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: nor $r4, $zero, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i32 %a, -1 + ret i32 %neg +} + +define i8 @orn_i8(i8 %a, i8 %b) nounwind { +; CHECK-LABEL: orn_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: nor $r5, $zero, $r5 +; CHECK-NEXT: or $r4, $r5, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i8 %b, -1 + %or = or i8 %neg, %a + ret i8 %or +} + +define i16 @orn_i16(i16 %a, i16 %b) nounwind { +; CHECK-LABEL: orn_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: nor $r5, $zero, $r5 +; CHECK-NEXT: or $r4, $r5, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i16 %b, -1 + %or = or i16 %neg, %a + ret i16 %or +} + +define i32 @orn_i32(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: orn_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: nor $r5, $zero, $r5 +; CHECK-NEXT: or $r4, $r5, $r4 +; CHECK-NEXT: jr $ra + %neg = xor i32 %b, -1 + %or = or i32 %neg, %a + ret i32 %or +} + +define i8 @andn_i8(i8 %a, i8 %b) nounwind { +; CHECK-LABEL: andn_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i8 %b, -1 + %and = and i8 %neg, %a + ret i8 %and +} + +define i16 @andn_i16(i16 %a, i16 %b) nounwind { +; CHECK-LABEL: andn_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i16 %b, -1 + %and = and i16 %neg, %a + ret i16 %and +} + +define i32 @andn_i32(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: andn_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: andn $r4, $r4, $r5 +; CHECK-NEXT: jr $ra + %neg = xor i32 %b, -1 + %and = and i32 %neg, %a + ret i32 %and +} diff --git a/llvm/test/CodeGen/LoongArch/peephole-load-store-addi.ll b/llvm/test/CodeGen/LoongArch/peephole-load-store-addi.ll new file mode 100644 index 000000000000..541ea4256e92 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/peephole-load-store-addi.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define i8 @load_i8() nounwind { +; CHECK-LABEL: load_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.bu $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i8, i8* inttoptr (i64 40 to i8*), align 8 + ret i8 %a +} +define signext i8 @load_i8_sext() nounwind { +; CHECK-LABEL: load_i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.b $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i8, i8* inttoptr (i64 40 to i8*), align 8 + ret i8 %a +} + +define i16 @load_i16() nounwind { +; CHECK-LABEL: load_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.hu $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i16, i16* inttoptr (i64 40 to i16*), align 8 + ret i16 %a +} + +define signext i16 @load_i16_sext() nounwind { +; CHECK-LABEL: load_i16_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.h $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i16, i16* inttoptr (i64 40 to i16*), align 8 + ret i16 %a +} + +define i32 @load_i32() nounwind { +; CHECK-LABEL: load_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i32, i32* inttoptr (i64 40 to i32*), align 8 + ret i32 %a +} + +define signext i32 @load_i32_sext() nounwind { +; CHECK-LABEL: load_i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i32, i32* inttoptr (i64 40 to i32*), align 8 + ret i32 %a +} + +define i64 @load_i64() nounwind { +; CHECK-LABEL: load_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $r4, $zero, 40 +; CHECK-NEXT: jr $ra + %a = load i64, i64* inttoptr (i64 40 to i64*), align 8 + ret i64 %a +} + +define void @store_i8(i8 %v) nounwind { +; CHECK-LABEL: store_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: st.b $r4, $zero, 40 +; CHECK-NEXT: jr $ra + store i8 %v, i8* inttoptr (i64 40 to i8*), align 8 + ret void +} + +define void @store_i16(i16 %v) nounwind { +; CHECK-LABEL: store_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: st.h $r4, $zero, 40 +; CHECK-NEXT: jr $ra + store i16 %v, i16* inttoptr (i64 40 to i16*), align 8 + ret void +} + +define void @store_i32(i32 %v) nounwind { +; CHECK-LABEL: store_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: st.w $r4, $zero, 40 +; CHECK-NEXT: jr $ra + store i32 %v, i32* inttoptr (i64 40 to i32*), align 8 + ret void +} + +define void @store_i64(i64 %v) nounwind { +; CHECK-LABEL: store_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: st.d $r4, $zero, 40 +; CHECK-NEXT: jr $ra + store i64 %v, i64* inttoptr (i64 40 to i64*), align 8 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll b/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll deleted file mode 100644 index 1878e0ed2424..000000000000 --- a/llvm/test/CodeGen/LoongArch/shift-masked-shamt.ll +++ /dev/null @@ -1,255 +0,0 @@ -; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 - -;; This test checks that unnecessary masking of shift amount operands is -;; eliminated during instruction selection. The test needs to ensure that the -;; masking is not removed if it may affect the shift amount. - -define i32 @sll_redundant_mask(i32 %a, i32 %b) { -; LA32-LABEL: sll_redundant_mask: -; LA32: # %bb.0: -; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sll_redundant_mask: -; LA64: # %bb.0: -; LA64-NEXT: sll.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = and i32 %b, 31 - %2 = shl i32 %a, %1 - ret i32 %2 -} - -define i32 @sll_non_redundant_mask(i32 %a, i32 %b) { -; LA32-LABEL: sll_non_redundant_mask: -; LA32: # %bb.0: -; LA32-NEXT: andi $a1, $a1, 15 -; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sll_non_redundant_mask: -; LA64: # %bb.0: -; LA64-NEXT: andi $a1, $a1, 15 -; LA64-NEXT: sll.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = and i32 %b, 15 - %2 = shl i32 %a, %1 - ret i32 %2 -} - -define i32 @srl_redundant_mask(i32 %a, i32 %b) { -; LA32-LABEL: srl_redundant_mask: -; LA32: # %bb.0: -; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srl_redundant_mask: -; LA64: # %bb.0: -; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = and i32 %b, 4095 - %2 = lshr i32 %a, %1 - ret i32 %2 -} - -define i32 @srl_non_redundant_mask(i32 %a, i32 %b) { -; LA32-LABEL: srl_non_redundant_mask: -; LA32: # %bb.0: -; LA32-NEXT: andi $a1, $a1, 7 -; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srl_non_redundant_mask: -; LA64: # %bb.0: -; LA64-NEXT: andi $a1, $a1, 7 -; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = and i32 %b, 7 - %2 = lshr i32 %a, %1 - ret i32 %2 -} - -define i32 @sra_redundant_mask(i32 %a, i32 %b) { -; LA32-LABEL: sra_redundant_mask: -; LA32: # %bb.0: -; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sra_redundant_mask: -; LA64: # %bb.0: -; LA64-NEXT: sra.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = and i32 %b, 65535 - %2 = ashr i32 %a, %1 - ret i32 %2 -} - -define i32 @sra_non_redundant_mask(i32 %a, i32 %b) { -; LA32-LABEL: sra_non_redundant_mask: -; LA32: # %bb.0: -; LA32-NEXT: andi $a1, $a1, 32 -; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sra_non_redundant_mask: -; LA64: # %bb.0: -; LA64-NEXT: andi $a1, $a1, 32 -; LA64-NEXT: sra.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = and i32 %b, 32 - %2 = ashr i32 %a, %1 - ret i32 %2 -} - -define i32 @sll_redundant_mask_zeros(i32 %a, i32 %b) { -; LA32-LABEL: sll_redundant_mask_zeros: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a1, $a1, 1 -; LA32-NEXT: sll.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sll_redundant_mask_zeros: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a1, $a1, 1 -; LA64-NEXT: sll.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = shl i32 %b, 1 - %2 = and i32 %1, 30 - %3 = shl i32 %a, %2 - ret i32 %3 -} - -define i32 @srl_redundant_mask_zeros(i32 %a, i32 %b) { -; LA32-LABEL: srl_redundant_mask_zeros: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a1, $a1, 2 -; LA32-NEXT: srl.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srl_redundant_mask_zeros: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a1, $a1, 2 -; LA64-NEXT: srl.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = shl i32 %b, 2 - %2 = and i32 %1, 28 - %3 = lshr i32 %a, %2 - ret i32 %3 -} - -define i32 @sra_redundant_mask_zeros(i32 %a, i32 %b) { -; LA32-LABEL: sra_redundant_mask_zeros: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a1, $a1, 3 -; LA32-NEXT: sra.w $a0, $a0, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sra_redundant_mask_zeros: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a1, $a1, 3 -; LA64-NEXT: sra.w $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = shl i32 %b, 3 - %2 = and i32 %1, 24 - %3 = ashr i32 %a, %2 - ret i32 %3 -} - -define i64 @sll_redundant_mask_zeros_i64(i64 %a, i64 %b) { -; LA32-LABEL: sll_redundant_mask_zeros_i64: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a2, $a2, 2 -; LA32-NEXT: srli.w $a3, $a0, 1 -; LA32-NEXT: andi $a4, $a2, 60 -; LA32-NEXT: xori $a5, $a4, 31 -; LA32-NEXT: srl.w $a3, $a3, $a5 -; LA32-NEXT: sll.w $a1, $a1, $a2 -; LA32-NEXT: or $a1, $a1, $a3 -; LA32-NEXT: addi.w $a3, $a4, -32 -; LA32-NEXT: slti $a4, $a3, 0 -; LA32-NEXT: maskeqz $a1, $a1, $a4 -; LA32-NEXT: sll.w $a5, $a0, $a3 -; LA32-NEXT: masknez $a4, $a5, $a4 -; LA32-NEXT: or $a1, $a1, $a4 -; LA32-NEXT: sll.w $a0, $a0, $a2 -; LA32-NEXT: srai.w $a2, $a3, 31 -; LA32-NEXT: and $a0, $a2, $a0 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sll_redundant_mask_zeros_i64: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a1, $a1, 2 -; LA64-NEXT: sll.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = shl i64 %b, 2 - %2 = and i64 %1, 60 - %3 = shl i64 %a, %2 - ret i64 %3 -} - -define i64 @srl_redundant_mask_zeros_i64(i64 %a, i64 %b) { -; LA32-LABEL: srl_redundant_mask_zeros_i64: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a2, $a2, 3 -; LA32-NEXT: slli.w $a3, $a1, 1 -; LA32-NEXT: andi $a4, $a2, 56 -; LA32-NEXT: xori $a5, $a4, 31 -; LA32-NEXT: sll.w $a3, $a3, $a5 -; LA32-NEXT: srl.w $a0, $a0, $a2 -; LA32-NEXT: or $a0, $a0, $a3 -; LA32-NEXT: addi.w $a3, $a4, -32 -; LA32-NEXT: slti $a4, $a3, 0 -; LA32-NEXT: maskeqz $a0, $a0, $a4 -; LA32-NEXT: srl.w $a5, $a1, $a3 -; LA32-NEXT: masknez $a4, $a5, $a4 -; LA32-NEXT: or $a0, $a0, $a4 -; LA32-NEXT: srl.w $a1, $a1, $a2 -; LA32-NEXT: srai.w $a2, $a3, 31 -; LA32-NEXT: and $a1, $a2, $a1 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: srl_redundant_mask_zeros_i64: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a1, $a1, 3 -; LA64-NEXT: srl.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = shl i64 %b, 3 - %2 = and i64 %1, 56 - %3 = lshr i64 %a, %2 - ret i64 %3 -} - -define i64 @sra_redundant_mask_zeros_i64(i64 %a, i64 %b) { -; LA32-LABEL: sra_redundant_mask_zeros_i64: -; LA32: # %bb.0: -; LA32-NEXT: slli.w $a3, $a2, 4 -; LA32-NEXT: srai.w $a2, $a1, 31 -; LA32-NEXT: andi $a4, $a3, 48 -; LA32-NEXT: addi.w $a5, $a4, -32 -; LA32-NEXT: slti $a6, $a5, 0 -; LA32-NEXT: masknez $a2, $a2, $a6 -; LA32-NEXT: sra.w $a7, $a1, $a3 -; LA32-NEXT: maskeqz $a7, $a7, $a6 -; LA32-NEXT: or $a2, $a7, $a2 -; LA32-NEXT: srl.w $a0, $a0, $a3 -; LA32-NEXT: xori $a3, $a4, 31 -; LA32-NEXT: slli.w $a4, $a1, 1 -; LA32-NEXT: sll.w $a3, $a4, $a3 -; LA32-NEXT: or $a0, $a0, $a3 -; LA32-NEXT: sra.w $a1, $a1, $a5 -; LA32-NEXT: maskeqz $a0, $a0, $a6 -; LA32-NEXT: masknez $a1, $a1, $a6 -; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: move $a1, $a2 -; LA32-NEXT: jirl $zero, $ra, 0 -; -; LA64-LABEL: sra_redundant_mask_zeros_i64: -; LA64: # %bb.0: -; LA64-NEXT: slli.d $a1, $a1, 4 -; LA64-NEXT: sra.d $a0, $a0, $a1 -; LA64-NEXT: jirl $zero, $ra, 0 - %1 = shl i64 %b, 4 - %2 = and i64 %1, 48 - %3 = ashr i64 %a, %2 - ret i64 %3 -} diff --git a/llvm/test/CodeGen/LoongArch/signext.ll b/llvm/test/CodeGen/LoongArch/signext.ll new file mode 100644 index 000000000000..13c710d14168 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/signext.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define i32 @foo(i32 signext %a) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: jr $ra + ret i32 %a +} + +define signext i32 @foo1() { +; CHECK-LABEL: foo1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $r4, $zero, 0 +; CHECK-NEXT: ori $r5, $zero, 896 +; CHECK-NEXT: move $r6, $r4 +; CHECK-NEXT: .LBB1_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add.w $r4, $r4, $r6 +; CHECK-NEXT: addi.w $r6, $r6, 1 +; CHECK-NEXT: bne $r6, $r5, .LBB1_1 +; CHECK-NEXT: # %bb.2: # %for.end +; CHECK-NEXT: jr $ra +entry: + br label %for.body + +for.body: + %sum.013 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %add = add i32 %sum.013, %i.010 + %inc = add nuw nsw i32 %i.010, 1 + %exitcond = icmp eq i32 %inc, 896 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %add +} diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll new file mode 100644 index 000000000000..0a54e0f8fb76 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/stptr.ll @@ -0,0 +1,52 @@ +; Check whether st.w/st.d/stptr.w/stptr.d/stx.w/stx.d instructions are properly generated +; RUN: llc -march=loongarch64 -o - %s | FileCheck %s + +define void @st_w(i32* %p, i32 signext %val) { +; CHECK: st.w $r5, $r4, 2044 +; CHECK: jr $ra + %addr = getelementptr inbounds i32, i32* %p, i64 511 + store i32 %val, i32* %addr, align 4 + ret void +} + +define void @stptr_w(i32* %p, i32 signext %val) { +; CHECK: stptr.w $r5, $r4, 2048 +; CHECK: jr $ra + %addr = getelementptr inbounds i32, i32* %p, i64 512 + store i32 %val, i32* %addr, align 4 + ret void +} + +define void @stx_w(i32* %p, i32 signext %val) { +; CHECK: lu12i.w $r[[REG:[0-9]+]], 8 +; CHECK: stx.w $r5, $r4, $r[[REG:[0-9]+]] +; CHECK: jr $ra + %addr = getelementptr inbounds i32, i32* %p, i64 8192 + store i32 %val, i32* %addr, align 4 + ret void +} + +define void @st_d(i64* %p, i64 %val) { +; CHECK: st.d $r5, $r4, 2040 +; CHECK: jr $ra + %addr = getelementptr inbounds i64, i64* %p, i64 255 + store i64 %val, i64* %addr, align 8 + ret void +} + +define void @stptr_d(i64* %p, i64 %val) { +; CHECK: stptr.d $r5, $r4, 2048 +; CHECK: jr $ra + %addr = getelementptr inbounds i64, i64* %p, i64 256 + store i64 %val, i64* %addr, align 8 + ret void +} + +define void @stx_d(i64* %p, i64 %val) { +; CHECK: lu12i.w $r[[REG:[0-9]+]], 8 +; CHECK: stx.d $r5, $r4, $r[[REG:[0-9]+]] +; CHECK: jr $ra + %addr = getelementptr inbounds i64, i64* %p, i64 4096 + store i64 %val, i64* %addr, align 8 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/tailcall-R.ll b/llvm/test/CodeGen/LoongArch/tailcall-R.ll new file mode 100644 index 000000000000..2445e32eaa97 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/tailcall-R.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s + +@errors = external local_unnamed_addr global i32, align 4 + +define signext i32 @compare(i8* %x, i8* %y) { +; CHECK-LABEL: compare: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 23, -16 +; CHECK-NEXT: ld.w $r23, $r5, 0 +; CHECK-NEXT: ld.d $r6, $r4, 8 +; CHECK-NEXT: beqz $r23, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %land.lhs.true +; CHECK-NEXT: ld.w $r4, $r4, 0 +; CHECK-NEXT: st.d $r6, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ld.d $r5, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: jirl $ra, $r5, 0 +; CHECK-NEXT: ld.d $r6, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: beqz $r4, .LBB0_3 +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: la.got $r4, errors +; CHECK-NEXT: # la expanded slot +; CHECK-NEXT: ld.w $r5, $r4, 0 +; CHECK-NEXT: addi.w $r5, $r5, 1 +; CHECK-NEXT: st.w $r5, $r4, 0 +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: move $r4, $r23 +; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: jr $r6 +entry: + %compare = getelementptr inbounds i8, i8* %x, i64 8 + %0 = bitcast i8* %compare to i32 (i32)** + %1 = load i32 (i32)*, i32 (i32)** %0, align 8 + %elt = bitcast i8* %y to i32* + %2 = load i32, i32* %elt, align 8 + %cmp = icmp eq i32 %2, 0 + br i1 %cmp, label %if.end, label %land.lhs.true + +land.lhs.true: ; preds = %entry + %elt3 = bitcast i8* %x to i32* + %3 = load i32, i32* %elt3, align 8 + %call4 = tail call signext i32 %1(i32 signext %3) + %cmp5 = icmp eq i32 %call4, 0 + br i1 %cmp5, label %if.end, label %if.then + +if.then: ; preds = %land.lhs.true + %4 = load i32, i32* @errors, align 4 + %inc = add nsw i32 %4, 1 + store i32 %inc, i32* @errors, align 4 + br label %if.end + +if.end: ; preds = %if.then, %land.lhs.true, %entry + %call6 = tail call signext i32 %1(i32 signext %2) + ret i32 %call6 +} diff --git a/llvm/test/CodeGen/LoongArch/tailcall-check.ll b/llvm/test/CodeGen/LoongArch/tailcall-check.ll new file mode 100644 index 000000000000..2b5902d69bb6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/tailcall-check.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s + +; Perform tail call optimization for global address. +declare i32 @callee_tail(i32 %i) +define i32 @caller_tail(i32 %i) { +; CHECK-LABEL: caller_tail: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b callee_tail +entry: + %r = tail call i32 @callee_tail(i32 %i) + ret i32 %r +} + + +; Do not tail call optimize functions with varargs. +declare i32 @callee_varargs(i32, ...) +define void @caller_varargs(i32 %a, i32 %b) { +; CHECK-LABEL: caller_varargs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: move $r6, $r5 +; CHECK-NEXT: move $r7, $r4 +; CHECK-NEXT: bl callee_varargs +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra +entry: + %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a) + ret void +} + + +; Do not tail call optimize if stack is used to pass parameters. +declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) +define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) { +; CHECK-LABEL: caller_args: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: ld.d $r12, $sp, 64 +; CHECK-NEXT: ld.d $r13, $sp, 72 +; CHECK-NEXT: ld.d $r14, $sp, 80 +; CHECK-NEXT: ld.d $r15, $sp, 88 +; CHECK-NEXT: ld.d $r16, $sp, 96 +; CHECK-NEXT: ld.d $r17, $sp, 104 +; CHECK-NEXT: st.d $r17, $sp, 40 +; CHECK-NEXT: st.d $r16, $sp, 32 +; CHECK-NEXT: st.d $r15, $sp, 24 +; CHECK-NEXT: st.d $r14, $sp, 16 +; CHECK-NEXT: st.d $r13, $sp, 8 +; CHECK-NEXT: st.d $r12, $sp, 0 +; CHECK-NEXT: bl callee_args +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: jr $ra +entry: + %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) + ret i32 %r +} + + +; Do not tail call optimize for exception-handling functions. +declare void @callee_interrupt() +define void @caller_interrupt() #0 { +; CHECK-LABEL: caller_interrupt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: bl callee_interrupt +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra +entry: + tail call void @callee_interrupt() + ret void +} +attributes #0 = { "interrupt"="machine" } + + +; Do not tail call optimize functions with byval parameters. +declare i32 @callee_byval(i32** byval(i32*) %a) +define i32 @caller_byval() { +; CHECK-LABEL: caller_byval: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: ld.d $r4, $sp, 16 +; CHECK-NEXT: st.d $r4, $sp, 0 +; CHECK-NEXT: bl callee_byval +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: jr $ra +entry: + %a = alloca i32* + %r = tail call i32 @callee_byval(i32** byval(i32*) %a) + ret i32 %r +} + + +; Do not tail call optimize if callee uses structret semantics. +%struct.A = type { i32 } +@a = global %struct.A zeroinitializer + +declare void @callee_struct(%struct.A* sret(%struct.A) %a) +define void @caller_nostruct() { +; CHECK-LABEL: caller_nostruct: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: la.got $r4, a +; CHECK-NEXT: # la expanded slot +; CHECK-NEXT: bl callee_struct +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra +entry: + tail call void @callee_struct(%struct.A* sret(%struct.A) @a) + ret void +} + + +; Do not tail call optimize if caller uses structret semantics. +declare void @callee_nostruct() +define void @caller_struct(%struct.A* sret(%struct.A) %a) { +; CHECK-LABEL: caller_struct: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: st.d $r23, $sp, 0 # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset 1, -8 +; CHECK-NEXT: .cfi_offset 23, -16 +; CHECK-NEXT: move $r23, $r4 +; CHECK-NEXT: bl callee_nostruct +; CHECK-NEXT: move $r4, $r23 +; CHECK-NEXT: ld.d $r23, $sp, 0 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: jr $ra +entry: + tail call void @callee_nostruct() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/tailcall-mem.ll b/llvm/test/CodeGen/LoongArch/tailcall-mem.ll new file mode 100644 index 000000000000..68ddaa8997b0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/tailcall-mem.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s + + +define void @tail_memcpy(i8* %p, i8* %q, i32 %n) { +; CHECK-LABEL: tail_memcpy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b memcpy +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) + ret void +} + +define void @tail_memmove(i8* %p, i8* %q, i32 %n) { +; CHECK-LABEL: tail_memmove: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b memmove +entry: + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) + ret void +} + +define void @tail_memset(i8* %p, i8 %c, i32 %n) { +; CHECK-LABEL: tail_memset: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b memset +entry: + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) + diff --git a/llvm/test/CodeGen/LoongArch/tailcall.ll b/llvm/test/CodeGen/LoongArch/tailcall.ll new file mode 100644 index 000000000000..984df2cb63d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/tailcall.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -relocation-model=pic < %s | FileCheck %s + +define void @f() { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: b foo +entry: + tail call void bitcast (void (...)* @foo to void ()*)() + ret void +} + +declare void @foo(...) diff --git a/llvm/test/CodeGen/LoongArch/target_support.ll b/llvm/test/CodeGen/LoongArch/target_support.ll deleted file mode 100644 index b7796e63387e..000000000000 --- a/llvm/test/CodeGen/LoongArch/target_support.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: llc --version | FileCheck %s -; CHECK: loongarch32 - 32-bit LoongArch -; CHECK: loongarch64 - 64-bit LoongArch diff --git a/llvm/test/CodeGen/LoongArch/thread-pointer.ll b/llvm/test/CodeGen/LoongArch/thread-pointer.ll new file mode 100644 index 000000000000..06a5886c4cab --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/thread-pointer.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +declare i8* @llvm.thread.pointer() nounwind readnone + +define i8* @thread_pointer() { +; CHECK: move $r4, $tp + %1 = tail call i8* @llvm.thread.pointer() + ret i8* %1 +} diff --git a/llvm/test/CodeGen/LoongArch/trap.ll b/llvm/test/CodeGen/LoongArch/trap.ll new file mode 100644 index 000000000000..4a4b54438912 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/trap.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define void @test_trap() nounwind { +; CHECK-LABEL: test_trap: +; CHECK: # %bb.0: +; CHECK-NEXT: break 0 +; CHECK-NEXT: jr $ra + call void @llvm.trap() + ret void +} + +declare void @llvm.trap() diff --git a/llvm/test/CodeGen/LoongArch/trunc.ll b/llvm/test/CodeGen/LoongArch/trunc.ll new file mode 100644 index 000000000000..d1b5a3a14f49 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/trunc.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 < %s | FileCheck %s + +define signext i32 @foo1(i64 %a, i64 %b) { +; CHECK-LABEL: foo1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add.w $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %add = add nsw i32 %conv1, %conv + ret i32 %add +} + +define signext i32 @foo2(i64 %a, i64 %b) { +; CHECK-LABEL: foo2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sub.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %sub = sub nsw i32 %conv, %conv1 + ret i32 %sub +} + +define signext i32 @foo3(i64 %a, i64 %b) { +; CHECK-LABEL: foo3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sll.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %shl = shl i32 %conv, %conv1 + ret i32 %shl +} + +define signext i32 @foo4(i64 %a, i64 %b) { +; CHECK-LABEL: foo4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: srl.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %shr = lshr i32 %conv, %conv1 + ret i32 %shr +} + +define signext i32 @foo5(i64 %a, i64 %b) { +; CHECK-LABEL: foo5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mul.w $r4, $r5, $r4 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %mul = mul nsw i32 %conv1, %conv + ret i32 %mul +} + +define signext i32 @foo6(i64 %a, i64 %b) { +; CHECK-LABEL: foo6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sra.w $r4, $r4, $r5 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %shr = ashr i32 %conv, %conv1 + ret i32 %shr +} + +define signext i32 @sdiv(i64 %a, i64 %b) { +; CHECK-LABEL: sdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: div.w $r4, $r4, $r5 +; CHECK-NEXT: bne $r5, $zero, 8 +; CHECK-NEXT: break 7 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %div = sdiv i32 %conv, %conv1 + ret i32 %div +} + +define signext i32 @udiv(i64 %a, i64 %b) { +; CHECK-LABEL: udiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli.w $r5, $r5, 0 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: div.wu $r4, $r4, $r5 +; CHECK-NEXT: bne $r5, $zero, 8 +; CHECK-NEXT: break 7 +; CHECK-NEXT: slli.w $r4, $r4, 0 +; CHECK-NEXT: jr $ra +entry: + %conv = trunc i64 %a to i32 + %conv1 = trunc i64 %b to i32 + %div = udiv i32 %conv, %conv1 + ret i32 %div +} diff --git a/llvm/test/CodeGen/LoongArch/unalignment.ll b/llvm/test/CodeGen/LoongArch/unalignment.ll new file mode 100644 index 000000000000..d468a361f96a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/unalignment.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=loongarch64 -o - %s | FileCheck -check-prefix=UNALIGNED %s +; RUN: llc -march=loongarch64 -mattr=+unaligned-access -o - %s | FileCheck -check-prefix=UNALIGNED %s +; RUN: llc -march=loongarch64 -mattr=-unaligned-access -o - %s | FileCheck -check-prefix=ALIGNED %s + +define i32 @i32_load(i32* %p) { +; UNALIGNED-LABEL: i32_load: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.w $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra +; +; ALIGNED-LABEL: i32_load: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.hu $r4, $r4, 2 +; ALIGNED-NEXT: slli.w $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define signext i32 @i32_sextload(i32* %p) { +; UNALIGNED-LABEL: i32_sextload: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.w $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra +; +; ALIGNED-LABEL: i32_sextload: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.h $r4, $r4, 2 +; ALIGNED-NEXT: slli.d $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define zeroext i32 @i32_zextload(i32* %p) { +; UNALIGNED-LABEL: i32_zextload: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.wu $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra +; +; ALIGNED-LABEL: i32_zextload: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.hu $r5, $r4, 0 +; ALIGNED-NEXT: ld.hu $r4, $r4, 2 +; ALIGNED-NEXT: slli.d $r4, $r4, 16 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra + %tmp = load i32, i32* %p, align 2 + ret i32 %tmp +} + +define i64 @i64_load(i64* %p) { +; UNALIGNED-LABEL: i64_load: +; UNALIGNED: # %bb.0: +; UNALIGNED-NEXT: ld.d $r4, $r4, 0 +; UNALIGNED-NEXT: jr $ra +; +; ALIGNED-LABEL: i64_load: +; ALIGNED: # %bb.0: +; ALIGNED-NEXT: ld.wu $r5, $r4, 0 +; ALIGNED-NEXT: ld.wu $r4, $r4, 4 +; ALIGNED-NEXT: slli.d $r4, $r4, 32 +; ALIGNED-NEXT: or $r4, $r4, $r5 +; ALIGNED-NEXT: jr $ra + %tmp = load i64, i64* %p, align 4 + ret i64 %tmp +} diff --git a/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll b/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll new file mode 100644 index 000000000000..a360bddb7265 --- /dev/null +++ b/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll @@ -0,0 +1,7548 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS32 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS32O0 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS32R2 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS32R6 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS32R6O0 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS4 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64R2 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64R6 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64R6O0 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MM32 + +; We want to verify the produced code is well formed all optimization levels, the rest of the tests which ensure correctness. +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 + +; Keep one big-endian check so that we don't reduce testing, but don't add more +; since endianness doesn't affect the body of the atomic operations. +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS32EB + +@x = common global i32 0, align 4 + +define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadAdd32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: $BB0_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: addu $3, $2, $4 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB0_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadAdd32: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: $BB0_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB0_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadAdd32: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: $BB0_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: addu $3, $2, $4 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB0_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadAdd32: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: $BB0_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: addu $3, $2, $4 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB0_1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadAdd32: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) +; MIPS32R6O0-NEXT: $BB0_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB0_1 +; MIPS32R6O0-NEXT: nop +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadAdd32: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB0_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: addu $3, $2, $4 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB0_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadAdd32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB0_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: addu $3, $2, $4 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB0_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadAdd32: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB0_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: addu $3, $2, $4 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB0_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadAdd32: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB0_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: addu $3, $2, $4 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB0_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadAdd32: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB0_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadAdd32: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: $BB0_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: addu16 $3, $2, $4 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB0_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadAdd32: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: $BB0_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: addu $3, $2, $4 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB0_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadAdd32: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: $BB0_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: addu $3, $2, $4 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB0_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadAdd32: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: $BB0_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: addu $3, $2, $4 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB0_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadAdd32: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: $BB0_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: addu $3, $2, $4 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB0_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw add i32* @x, i32 %incr monotonic + ret i32 %0 + +} + +define i32 @AtomicLoadSub32(i32 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadSub32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: $BB1_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: subu $3, $2, $4 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB1_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadSub32: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: $BB1_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: subu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB1_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadSub32: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: $BB1_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: subu $3, $2, $4 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB1_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadSub32: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: $BB1_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: subu $3, $2, $4 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB1_1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadSub32: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) +; MIPS32R6O0-NEXT: $BB1_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: subu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB1_1 +; MIPS32R6O0-NEXT: nop +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadSub32: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB1_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: subu $3, $2, $4 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB1_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadSub32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB1_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: subu $3, $2, $4 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB1_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadSub32: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB1_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: subu $3, $2, $4 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB1_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadSub32: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB1_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: subu $3, $2, $4 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB1_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadSub32: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB1_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: subu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadSub32: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: $BB1_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: subu16 $3, $2, $4 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB1_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadSub32: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: $BB1_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: subu $3, $2, $4 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB1_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadSub32: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: $BB1_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: subu $3, $2, $4 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB1_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadSub32: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: $BB1_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: subu $3, $2, $4 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB1_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadSub32: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: $BB1_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: subu $3, $2, $4 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB1_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw sub i32* @x, i32 %incr monotonic + ret i32 %0 + +} + +define i32 @AtomicLoadXor32(i32 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadXor32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: $BB2_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: xor $3, $2, $4 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB2_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadXor32: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: $BB2_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: xor $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB2_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadXor32: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: $BB2_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: xor $3, $2, $4 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB2_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadXor32: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: $BB2_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: xor $3, $2, $4 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB2_1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadXor32: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) +; MIPS32R6O0-NEXT: $BB2_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: xor $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB2_1 +; MIPS32R6O0-NEXT: nop +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadXor32: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB2_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: xor $3, $2, $4 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB2_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadXor32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB2_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: xor $3, $2, $4 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB2_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadXor32: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB2_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: xor $3, $2, $4 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB2_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadXor32: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB2_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: xor $3, $2, $4 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB2_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadXor32: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB2_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: xor $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadXor32: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: $BB2_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: xor $3, $2, $4 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB2_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadXor32: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: $BB2_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: xor $3, $2, $4 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB2_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadXor32: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: $BB2_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: xor $3, $2, $4 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB2_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadXor32: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: $BB2_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: xor $3, $2, $4 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB2_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadXor32: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: $BB2_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: xor $3, $2, $4 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB2_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw xor i32* @x, i32 %incr monotonic + ret i32 %0 +} + +define i32 @AtomicLoadOr32(i32 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadOr32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: $BB3_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: or $3, $2, $4 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB3_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadOr32: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: $BB3_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: or $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB3_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadOr32: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: $BB3_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: or $3, $2, $4 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB3_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadOr32: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: $BB3_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: or $3, $2, $4 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB3_1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadOr32: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) +; MIPS32R6O0-NEXT: $BB3_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: or $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB3_1 +; MIPS32R6O0-NEXT: nop +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadOr32: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB3_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: or $3, $2, $4 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB3_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadOr32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB3_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: or $3, $2, $4 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB3_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadOr32: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB3_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: or $3, $2, $4 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB3_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadOr32: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB3_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: or $3, $2, $4 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB3_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadOr32: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB3_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: or $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadOr32: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: $BB3_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: or $3, $2, $4 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB3_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadOr32: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: $BB3_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: or $3, $2, $4 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB3_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadOr32: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: $BB3_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: or $3, $2, $4 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB3_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadOr32: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: $BB3_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: or $3, $2, $4 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB3_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadOr32: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: $BB3_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: or $3, $2, $4 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB3_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw or i32* @x, i32 %incr monotonic + ret i32 %0 +} + +define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadAnd32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: $BB4_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: and $3, $2, $4 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB4_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadAnd32: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: $BB4_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: and $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB4_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadAnd32: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: $BB4_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: and $3, $2, $4 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB4_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadAnd32: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: $BB4_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: and $3, $2, $4 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB4_1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadAnd32: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) +; MIPS32R6O0-NEXT: $BB4_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: and $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB4_1 +; MIPS32R6O0-NEXT: nop +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadAnd32: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB4_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: and $3, $2, $4 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB4_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadAnd32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB4_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: and $3, $2, $4 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB4_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadAnd32: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB4_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: and $3, $2, $4 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB4_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadAnd32: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB4_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: and $3, $2, $4 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB4_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadAnd32: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB4_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadAnd32: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: $BB4_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: and $3, $2, $4 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB4_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadAnd32: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: $BB4_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: and $3, $2, $4 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB4_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadAnd32: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: $BB4_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: and $3, $2, $4 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB4_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadAnd32: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: $BB4_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: and $3, $2, $4 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB4_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadAnd32: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: $BB4_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: and $3, $2, $4 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB4_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw and i32* @x, i32 %incr monotonic + ret i32 %0 +} + +define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadNand32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: $BB5_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: and $3, $2, $4 +; MIPS32-NEXT: nor $3, $zero, $3 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB5_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadNand32: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: $BB5_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: and $1, $2, $4 +; MIPS32O0-NEXT: nor $1, $zero, $1 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB5_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadNand32: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: $BB5_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: and $3, $2, $4 +; MIPS32R2-NEXT: nor $3, $zero, $3 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB5_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadNand32: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: $BB5_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: and $3, $2, $4 +; MIPS32R6-NEXT: nor $3, $zero, $3 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB5_1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadNand32: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) +; MIPS32R6O0-NEXT: $BB5_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: and $1, $2, $4 +; MIPS32R6O0-NEXT: nor $1, $zero, $1 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB5_1 +; MIPS32R6O0-NEXT: nop +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadNand32: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB5_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: and $3, $2, $4 +; MIPS4-NEXT: nor $3, $zero, $3 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB5_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadNand32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB5_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: and $3, $2, $4 +; MIPS64-NEXT: nor $3, $zero, $3 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB5_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadNand32: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB5_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: and $3, $2, $4 +; MIPS64R2-NEXT: nor $3, $zero, $3 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB5_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadNand32: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB5_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: and $3, $2, $4 +; MIPS64R6-NEXT: nor $3, $zero, $3 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB5_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadNand32: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB5_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: nor $1, $zero, $1 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadNand32: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: $BB5_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: and $3, $2, $4 +; MM32-NEXT: nor $3, $zero, $3 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB5_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadNand32: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: $BB5_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: and $3, $2, $4 +; O1-NEXT: nor $3, $zero, $3 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB5_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadNand32: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: $BB5_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: and $3, $2, $4 +; O2-NEXT: nor $3, $zero, $3 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB5_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadNand32: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: $BB5_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: and $3, $2, $4 +; O3-NEXT: nor $3, $zero, $3 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB5_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadNand32: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: $BB5_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: and $3, $2, $4 +; MIPS32EB-NEXT: nor $3, $zero, $3 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB5_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw nand i32* @x, i32 %incr monotonic + ret i32 %0 + +} + +define i32 @AtomicSwap32(i32 signext %newval) nounwind { +; MIPS32-LABEL: AtomicSwap32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: sw $4, 4($sp) +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: $BB6_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: move $3, $4 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB6_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 8 +; +; MIPS32O0-LABEL: AtomicSwap32: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: sw $4, 4($sp) +; MIPS32O0-NEXT: lw $4, 4($sp) +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: $BB6_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: move $1, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB6_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicSwap32: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addiu $sp, $sp, -8 +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: sw $4, 4($sp) +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: $BB6_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: move $3, $4 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB6_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: addiu $sp, $sp, 8 +; +; MIPS32R6-LABEL: AtomicSwap32: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addiu $sp, $sp, -8 +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: sw $4, 4($sp) +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: $BB6_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: move $3, $4 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB6_1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: addiu $sp, $sp, 8 +; +; MIPS32R6O0-LABEL: AtomicSwap32: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: sw $4, 4($sp) +; MIPS32R6O0-NEXT: lw $4, 4($sp) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) +; MIPS32R6O0-NEXT: $BB6_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: move $1, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB6_1 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicSwap32: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: daddiu $sp, $sp, -16 +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) +; MIPS4-NEXT: sw $4, 12($sp) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB6_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: move $3, $4 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB6_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64-LABEL: AtomicSwap32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) +; MIPS64-NEXT: sw $4, 12($sp) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB6_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: move $3, $4 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB6_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R2-LABEL: AtomicSwap32: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: daddiu $sp, $sp, -16 +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) +; MIPS64R2-NEXT: sw $4, 12($sp) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB6_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: move $3, $4 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB6_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6-LABEL: AtomicSwap32: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) +; MIPS64R6-NEXT: sw $4, 12($sp) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB6_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: move $3, $4 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB6_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jr $ra +; MIPS64R6-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6O0-LABEL: AtomicSwap32: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) +; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: sw $2, 12($sp) +; MIPS64R6O0-NEXT: lw $4, 12($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB6_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicSwap32: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addiu $sp, $sp, -8 +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: sw $4, 4($sp) +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: $BB6_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: or $3, $4, $zero +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB6_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: addiusp 8 +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicSwap32: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addiu $sp, $sp, -8 +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: sw $4, 4($sp) +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: $BB6_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: move $3, $4 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB6_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: addiu $sp, $sp, 8 +; +; O2-LABEL: AtomicSwap32: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addiu $sp, $sp, -8 +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: sw $4, 4($sp) +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: $BB6_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: move $3, $4 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB6_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: addiu $sp, $sp, 8 +; +; O3-LABEL: AtomicSwap32: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addiu $sp, $sp, -8 +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: sw $4, 4($sp) +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: $BB6_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: move $3, $4 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB6_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: addiu $sp, $sp, 8 +; +; MIPS32EB-LABEL: AtomicSwap32: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addiu $sp, $sp, -8 +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: sw $4, 4($sp) +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: $BB6_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: move $3, $4 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB6_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: addiu $sp, $sp, 8 +entry: + %newval.addr = alloca i32, align 4 + store i32 %newval, i32* %newval.addr, align 4 + %tmp = load i32, i32* %newval.addr, align 4 + %0 = atomicrmw xchg i32* @x, i32 %tmp monotonic + ret i32 %0 + +} + +define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { +; MIPS32-LABEL: AtomicCmpSwap32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: sw $5, 4($sp) +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: $BB7_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: bne $2, $4, $BB7_3 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS32-NEXT: move $3, $5 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB7_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB7_3: # %entry +; MIPS32-NEXT: sync +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: addiu $sp, $sp, 8 +; +; MIPS32O0-LABEL: AtomicCmpSwap32: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: sw $5, 4($sp) +; MIPS32O0-NEXT: lw $6, 4($sp) +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: move $5, $4 +; MIPS32O0-NEXT: $BB7_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: bne $2, $5, $BB7_3 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS32O0-NEXT: move $1, $6 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB7_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: $BB7_3: # %entry +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: xor $1, $2, $4 +; MIPS32O0-NEXT: sltiu $1, $1, 1 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicCmpSwap32: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addiu $sp, $sp, -8 +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: sw $5, 4($sp) +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: $BB7_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: bne $2, $4, $BB7_3 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS32R2-NEXT: move $3, $5 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB7_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: $BB7_3: # %entry +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: addiu $sp, $sp, 8 +; +; MIPS32R6-LABEL: AtomicCmpSwap32: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addiu $sp, $sp, -8 +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: sw $5, 4($sp) +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: $BB7_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: bnec $2, $4, $BB7_3 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS32R6-NEXT: move $3, $5 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB7_1 +; MIPS32R6-NEXT: $BB7_3: # %entry +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: addiu $sp, $sp, 8 +; +; MIPS32R6O0-LABEL: AtomicCmpSwap32: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: sw $5, 4($sp) +; MIPS32R6O0-NEXT: lw $5, 4($sp) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) +; MIPS32R6O0-NEXT: $BB7_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: bnec $2, $4, $BB7_3 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS32R6O0-NEXT: move $1, $5 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB7_1 +; MIPS32R6O0-NEXT: $BB7_3: # %entry +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicCmpSwap32: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: daddiu $sp, $sp, -16 +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS4-NEXT: sw $5, 12($sp) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB7_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: bne $2, $4, .LBB7_3 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS4-NEXT: move $3, $5 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB7_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: .LBB7_3: # %entry +; MIPS4-NEXT: sync +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64-LABEL: AtomicCmpSwap32: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64-NEXT: sw $5, 12($sp) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB7_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: bne $2, $4, .LBB7_3 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64-NEXT: move $3, $5 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB7_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: .LBB7_3: # %entry +; MIPS64-NEXT: sync +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R2-LABEL: AtomicCmpSwap32: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: daddiu $sp, $sp, -16 +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64R2-NEXT: sw $5, 12($sp) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB7_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: bne $2, $4, .LBB7_3 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64R2-NEXT: move $3, $5 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB7_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: .LBB7_3: # %entry +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6-LABEL: AtomicCmpSwap32: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64R6-NEXT: sw $5, 12($sp) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB7_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: bnec $2, $4, .LBB7_3 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64R6-NEXT: move $3, $5 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB7_1 +; MIPS64R6-NEXT: .LBB7_3: # %entry +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: jr $ra +; MIPS64R6-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6O0-LABEL: AtomicCmpSwap32: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sw $2, 12($sp) +; MIPS64R6O0-NEXT: lw $5, 12($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB7_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 +; MIPS64R6O0-NEXT: .LBB7_3: # %entry +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicCmpSwap32: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addiu $sp, $sp, -8 +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: sw $5, 4($sp) +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: $BB7_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: bne $2, $4, $BB7_3 +; MM32-NEXT: nop +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MM32-NEXT: move $3, $5 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB7_1 +; MM32-NEXT: $BB7_3: # %entry +; MM32-NEXT: sync +; MM32-NEXT: addiusp 8 +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicCmpSwap32: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addiu $sp, $sp, -8 +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: sw $5, 4($sp) +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: $BB7_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: bne $2, $4, $BB7_3 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: # in Loop: Header=BB7_1 Depth=1 +; O1-NEXT: move $3, $5 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB7_1 +; O1-NEXT: nop +; O1-NEXT: $BB7_3: # %entry +; O1-NEXT: sync +; O1-NEXT: jr $ra +; O1-NEXT: addiu $sp, $sp, 8 +; +; O2-LABEL: AtomicCmpSwap32: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addiu $sp, $sp, -8 +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: sw $5, 4($sp) +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: $BB7_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: bne $2, $4, $BB7_3 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: # in Loop: Header=BB7_1 Depth=1 +; O2-NEXT: move $3, $5 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB7_1 +; O2-NEXT: nop +; O2-NEXT: $BB7_3: # %entry +; O2-NEXT: sync +; O2-NEXT: jr $ra +; O2-NEXT: addiu $sp, $sp, 8 +; +; O3-LABEL: AtomicCmpSwap32: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addiu $sp, $sp, -8 +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: sw $5, 4($sp) +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: $BB7_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: bne $2, $4, $BB7_3 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: # in Loop: Header=BB7_1 Depth=1 +; O3-NEXT: move $3, $5 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB7_1 +; O3-NEXT: nop +; O3-NEXT: $BB7_3: # %entry +; O3-NEXT: sync +; O3-NEXT: jr $ra +; O3-NEXT: addiu $sp, $sp, 8 +; +; MIPS32EB-LABEL: AtomicCmpSwap32: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addiu $sp, $sp, -8 +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: sw $5, 4($sp) +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: $BB7_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: bne $2, $4, $BB7_3 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS32EB-NEXT: move $3, $5 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB7_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: $BB7_3: # %entry +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: addiu $sp, $sp, 8 +entry: + %newval.addr = alloca i32, align 4 + store i32 %newval, i32* %newval.addr, align 4 + %tmp = load i32, i32* %newval.addr, align 4 + %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic + %1 = extractvalue { i32, i1 } %0, 0 + ret i32 %1 + +} + +@y = common global i8 0, align 1 + +define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadAdd8: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(y)($1) +; MIPS32-NEXT: addiu $2, $zero, -4 +; MIPS32-NEXT: and $3, $1, $2 +; MIPS32-NEXT: andi $1, $1, 3 +; MIPS32-NEXT: sll $1, $1, 3 +; MIPS32-NEXT: ori $2, $zero, 255 +; MIPS32-NEXT: sllv $5, $2, $1 +; MIPS32-NEXT: nor $6, $zero, $5 +; MIPS32-NEXT: sllv $4, $4, $1 +; MIPS32-NEXT: $BB8_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $7, 0($3) +; MIPS32-NEXT: addu $8, $7, $4 +; MIPS32-NEXT: and $8, $8, $5 +; MIPS32-NEXT: and $9, $7, $6 +; MIPS32-NEXT: or $9, $9, $8 +; MIPS32-NEXT: sc $9, 0($3) +; MIPS32-NEXT: beqz $9, $BB8_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: and $2, $7, $5 +; MIPS32-NEXT: srlv $2, $2, $1 +; MIPS32-NEXT: sll $2, $2, 24 +; MIPS32-NEXT: sra $2, $2, 24 +; MIPS32-NEXT: # %bb.3: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadAdd8: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(y)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 +; MIPS32O0-NEXT: $BB8_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: addu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB8_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 +; MIPS32O0-NEXT: # %bb.3: # %entry +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: # %bb.4: # %entry +; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadAdd8: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(y)($1) +; MIPS32R2-NEXT: addiu $2, $zero, -4 +; MIPS32R2-NEXT: and $3, $1, $2 +; MIPS32R2-NEXT: andi $1, $1, 3 +; MIPS32R2-NEXT: sll $1, $1, 3 +; MIPS32R2-NEXT: ori $2, $zero, 255 +; MIPS32R2-NEXT: sllv $5, $2, $1 +; MIPS32R2-NEXT: nor $6, $zero, $5 +; MIPS32R2-NEXT: sllv $4, $4, $1 +; MIPS32R2-NEXT: $BB8_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $7, 0($3) +; MIPS32R2-NEXT: addu $8, $7, $4 +; MIPS32R2-NEXT: and $8, $8, $5 +; MIPS32R2-NEXT: and $9, $7, $6 +; MIPS32R2-NEXT: or $9, $9, $8 +; MIPS32R2-NEXT: sc $9, 0($3) +; MIPS32R2-NEXT: beqz $9, $BB8_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: and $2, $7, $5 +; MIPS32R2-NEXT: srlv $2, $2, $1 +; MIPS32R2-NEXT: seb $2, $2 +; MIPS32R2-NEXT: # %bb.3: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadAdd8: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(y)($1) +; MIPS32R6-NEXT: addiu $2, $zero, -4 +; MIPS32R6-NEXT: and $3, $1, $2 +; MIPS32R6-NEXT: andi $1, $1, 3 +; MIPS32R6-NEXT: sll $1, $1, 3 +; MIPS32R6-NEXT: ori $2, $zero, 255 +; MIPS32R6-NEXT: sllv $5, $2, $1 +; MIPS32R6-NEXT: nor $6, $zero, $5 +; MIPS32R6-NEXT: sllv $4, $4, $1 +; MIPS32R6-NEXT: $BB8_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $7, 0($3) +; MIPS32R6-NEXT: addu $8, $7, $4 +; MIPS32R6-NEXT: and $8, $8, $5 +; MIPS32R6-NEXT: and $9, $7, $6 +; MIPS32R6-NEXT: or $9, $9, $8 +; MIPS32R6-NEXT: sc $9, 0($3) +; MIPS32R6-NEXT: beqzc $9, $BB8_1 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: and $2, $7, $5 +; MIPS32R6-NEXT: srlv $2, $2, $1 +; MIPS32R6-NEXT: seb $2, $2 +; MIPS32R6-NEXT: # %bb.3: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadAdd8: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(y)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 +; MIPS32R6O0-NEXT: $BB8_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: addu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB8_1 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 +; MIPS32R6O0-NEXT: # %bb.3: # %entry +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: # %bb.4: # %entry +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadAdd8: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS4-NEXT: ld $1, %got_disp(y)($1) +; MIPS4-NEXT: daddiu $2, $zero, -4 +; MIPS4-NEXT: and $3, $1, $2 +; MIPS4-NEXT: andi $1, $1, 3 +; MIPS4-NEXT: sll $1, $1, 3 +; MIPS4-NEXT: ori $2, $zero, 255 +; MIPS4-NEXT: sllv $5, $2, $1 +; MIPS4-NEXT: nor $6, $zero, $5 +; MIPS4-NEXT: sllv $4, $4, $1 +; MIPS4-NEXT: .LBB8_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $7, 0($3) +; MIPS4-NEXT: addu $8, $7, $4 +; MIPS4-NEXT: and $8, $8, $5 +; MIPS4-NEXT: and $9, $7, $6 +; MIPS4-NEXT: or $9, $9, $8 +; MIPS4-NEXT: sc $9, 0($3) +; MIPS4-NEXT: beqz $9, .LBB8_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: and $2, $7, $5 +; MIPS4-NEXT: srlv $2, $2, $1 +; MIPS4-NEXT: sll $2, $2, 24 +; MIPS4-NEXT: sra $2, $2, 24 +; MIPS4-NEXT: # %bb.3: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadAdd8: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64-NEXT: ld $1, %got_disp(y)($1) +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $3, $1, $2 +; MIPS64-NEXT: andi $1, $1, 3 +; MIPS64-NEXT: sll $1, $1, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $5, $2, $1 +; MIPS64-NEXT: nor $6, $zero, $5 +; MIPS64-NEXT: sllv $4, $4, $1 +; MIPS64-NEXT: .LBB8_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $7, 0($3) +; MIPS64-NEXT: addu $8, $7, $4 +; MIPS64-NEXT: and $8, $8, $5 +; MIPS64-NEXT: and $9, $7, $6 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($3) +; MIPS64-NEXT: beqz $9, .LBB8_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: and $2, $7, $5 +; MIPS64-NEXT: srlv $2, $2, $1 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: sra $2, $2, 24 +; MIPS64-NEXT: # %bb.3: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadAdd8: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R2-NEXT: daddiu $2, $zero, -4 +; MIPS64R2-NEXT: and $3, $1, $2 +; MIPS64R2-NEXT: andi $1, $1, 3 +; MIPS64R2-NEXT: sll $1, $1, 3 +; MIPS64R2-NEXT: ori $2, $zero, 255 +; MIPS64R2-NEXT: sllv $5, $2, $1 +; MIPS64R2-NEXT: nor $6, $zero, $5 +; MIPS64R2-NEXT: sllv $4, $4, $1 +; MIPS64R2-NEXT: .LBB8_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $7, 0($3) +; MIPS64R2-NEXT: addu $8, $7, $4 +; MIPS64R2-NEXT: and $8, $8, $5 +; MIPS64R2-NEXT: and $9, $7, $6 +; MIPS64R2-NEXT: or $9, $9, $8 +; MIPS64R2-NEXT: sc $9, 0($3) +; MIPS64R2-NEXT: beqz $9, .LBB8_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: and $2, $7, $5 +; MIPS64R2-NEXT: srlv $2, $2, $1 +; MIPS64R2-NEXT: seb $2, $2 +; MIPS64R2-NEXT: # %bb.3: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadAdd8: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $3, $1, $2 +; MIPS64R6-NEXT: andi $1, $1, 3 +; MIPS64R6-NEXT: sll $1, $1, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $5, $2, $1 +; MIPS64R6-NEXT: nor $6, $zero, $5 +; MIPS64R6-NEXT: sllv $4, $4, $1 +; MIPS64R6-NEXT: .LBB8_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $7, 0($3) +; MIPS64R6-NEXT: addu $8, $7, $4 +; MIPS64R6-NEXT: and $8, $8, $5 +; MIPS64R6-NEXT: and $9, $7, $6 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($3) +; MIPS64R6-NEXT: beqzc $9, .LBB8_1 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: and $2, $7, $5 +; MIPS64R6-NEXT: srlv $2, $2, $1 +; MIPS64R6-NEXT: seb $2, $2 +; MIPS64R6-NEXT: # %bb.3: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadAdd8: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 +; MIPS64R6O0-NEXT: .LBB8_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: addu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB8_1 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 +; MIPS64R6O0-NEXT: # %bb.3: # %entry +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # %bb.4: # %entry +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadAdd8: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(y)($2) +; MM32-NEXT: addiu $2, $zero, -4 +; MM32-NEXT: and $3, $1, $2 +; MM32-NEXT: andi $1, $1, 3 +; MM32-NEXT: sll $1, $1, 3 +; MM32-NEXT: ori $2, $zero, 255 +; MM32-NEXT: sllv $5, $2, $1 +; MM32-NEXT: nor $6, $zero, $5 +; MM32-NEXT: sllv $4, $4, $1 +; MM32-NEXT: $BB8_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $7, 0($3) +; MM32-NEXT: addu $8, $7, $4 +; MM32-NEXT: and $8, $8, $5 +; MM32-NEXT: and $9, $7, $6 +; MM32-NEXT: or $9, $9, $8 +; MM32-NEXT: sc $9, 0($3) +; MM32-NEXT: beqzc $9, $BB8_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: and $2, $7, $5 +; MM32-NEXT: srlv $2, $2, $1 +; MM32-NEXT: seb $2, $2 +; MM32-NEXT: # %bb.3: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadAdd8: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(y)($1) +; O1-NEXT: addiu $2, $zero, -4 +; O1-NEXT: and $3, $1, $2 +; O1-NEXT: andi $1, $1, 3 +; O1-NEXT: sll $1, $1, 3 +; O1-NEXT: ori $2, $zero, 255 +; O1-NEXT: sllv $5, $2, $1 +; O1-NEXT: nor $6, $zero, $5 +; O1-NEXT: sllv $4, $4, $1 +; O1-NEXT: $BB8_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $7, 0($3) +; O1-NEXT: addu $8, $7, $4 +; O1-NEXT: and $8, $8, $5 +; O1-NEXT: and $9, $7, $6 +; O1-NEXT: or $9, $9, $8 +; O1-NEXT: sc $9, 0($3) +; O1-NEXT: beqz $9, $BB8_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: and $2, $7, $5 +; O1-NEXT: srlv $2, $2, $1 +; O1-NEXT: sll $2, $2, 24 +; O1-NEXT: sra $2, $2, 24 +; O1-NEXT: # %bb.3: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadAdd8: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(y)($1) +; O2-NEXT: addiu $2, $zero, -4 +; O2-NEXT: and $3, $1, $2 +; O2-NEXT: andi $1, $1, 3 +; O2-NEXT: sll $1, $1, 3 +; O2-NEXT: ori $2, $zero, 255 +; O2-NEXT: sllv $5, $2, $1 +; O2-NEXT: nor $6, $zero, $5 +; O2-NEXT: sllv $4, $4, $1 +; O2-NEXT: $BB8_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $7, 0($3) +; O2-NEXT: addu $8, $7, $4 +; O2-NEXT: and $8, $8, $5 +; O2-NEXT: and $9, $7, $6 +; O2-NEXT: or $9, $9, $8 +; O2-NEXT: sc $9, 0($3) +; O2-NEXT: beqz $9, $BB8_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: and $2, $7, $5 +; O2-NEXT: srlv $2, $2, $1 +; O2-NEXT: sll $2, $2, 24 +; O2-NEXT: sra $2, $2, 24 +; O2-NEXT: # %bb.3: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadAdd8: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: addiu $2, $zero, -4 +; O3-NEXT: lw $1, %got(y)($1) +; O3-NEXT: and $3, $1, $2 +; O3-NEXT: andi $1, $1, 3 +; O3-NEXT: ori $2, $zero, 255 +; O3-NEXT: sll $1, $1, 3 +; O3-NEXT: sllv $5, $2, $1 +; O3-NEXT: sllv $4, $4, $1 +; O3-NEXT: nor $6, $zero, $5 +; O3-NEXT: $BB8_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $7, 0($3) +; O3-NEXT: addu $8, $7, $4 +; O3-NEXT: and $8, $8, $5 +; O3-NEXT: and $9, $7, $6 +; O3-NEXT: or $9, $9, $8 +; O3-NEXT: sc $9, 0($3) +; O3-NEXT: beqz $9, $BB8_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: and $2, $7, $5 +; O3-NEXT: srlv $2, $2, $1 +; O3-NEXT: sll $2, $2, 24 +; O3-NEXT: sra $2, $2, 24 +; O3-NEXT: # %bb.3: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadAdd8: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(y)($1) +; MIPS32EB-NEXT: addiu $2, $zero, -4 +; MIPS32EB-NEXT: and $3, $1, $2 +; MIPS32EB-NEXT: andi $1, $1, 3 +; MIPS32EB-NEXT: xori $1, $1, 3 +; MIPS32EB-NEXT: sll $1, $1, 3 +; MIPS32EB-NEXT: ori $2, $zero, 255 +; MIPS32EB-NEXT: sllv $5, $2, $1 +; MIPS32EB-NEXT: nor $6, $zero, $5 +; MIPS32EB-NEXT: sllv $4, $4, $1 +; MIPS32EB-NEXT: $BB8_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $7, 0($3) +; MIPS32EB-NEXT: addu $8, $7, $4 +; MIPS32EB-NEXT: and $8, $8, $5 +; MIPS32EB-NEXT: and $9, $7, $6 +; MIPS32EB-NEXT: or $9, $9, $8 +; MIPS32EB-NEXT: sc $9, 0($3) +; MIPS32EB-NEXT: beqz $9, $BB8_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: and $2, $7, $5 +; MIPS32EB-NEXT: srlv $2, $2, $1 +; MIPS32EB-NEXT: sll $2, $2, 24 +; MIPS32EB-NEXT: sra $2, $2, 24 +; MIPS32EB-NEXT: # %bb.3: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw add i8* @y, i8 %incr monotonic + ret i8 %0 +} + +define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadSub8: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(y)($1) +; MIPS32-NEXT: addiu $2, $zero, -4 +; MIPS32-NEXT: and $3, $1, $2 +; MIPS32-NEXT: andi $1, $1, 3 +; MIPS32-NEXT: sll $1, $1, 3 +; MIPS32-NEXT: ori $2, $zero, 255 +; MIPS32-NEXT: sllv $5, $2, $1 +; MIPS32-NEXT: nor $6, $zero, $5 +; MIPS32-NEXT: sllv $4, $4, $1 +; MIPS32-NEXT: $BB9_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $7, 0($3) +; MIPS32-NEXT: subu $8, $7, $4 +; MIPS32-NEXT: and $8, $8, $5 +; MIPS32-NEXT: and $9, $7, $6 +; MIPS32-NEXT: or $9, $9, $8 +; MIPS32-NEXT: sc $9, 0($3) +; MIPS32-NEXT: beqz $9, $BB9_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: and $2, $7, $5 +; MIPS32-NEXT: srlv $2, $2, $1 +; MIPS32-NEXT: sll $2, $2, 24 +; MIPS32-NEXT: sra $2, $2, 24 +; MIPS32-NEXT: # %bb.3: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadSub8: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(y)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 +; MIPS32O0-NEXT: $BB9_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: subu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB9_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 +; MIPS32O0-NEXT: # %bb.3: # %entry +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: # %bb.4: # %entry +; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadSub8: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(y)($1) +; MIPS32R2-NEXT: addiu $2, $zero, -4 +; MIPS32R2-NEXT: and $3, $1, $2 +; MIPS32R2-NEXT: andi $1, $1, 3 +; MIPS32R2-NEXT: sll $1, $1, 3 +; MIPS32R2-NEXT: ori $2, $zero, 255 +; MIPS32R2-NEXT: sllv $5, $2, $1 +; MIPS32R2-NEXT: nor $6, $zero, $5 +; MIPS32R2-NEXT: sllv $4, $4, $1 +; MIPS32R2-NEXT: $BB9_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $7, 0($3) +; MIPS32R2-NEXT: subu $8, $7, $4 +; MIPS32R2-NEXT: and $8, $8, $5 +; MIPS32R2-NEXT: and $9, $7, $6 +; MIPS32R2-NEXT: or $9, $9, $8 +; MIPS32R2-NEXT: sc $9, 0($3) +; MIPS32R2-NEXT: beqz $9, $BB9_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: and $2, $7, $5 +; MIPS32R2-NEXT: srlv $2, $2, $1 +; MIPS32R2-NEXT: seb $2, $2 +; MIPS32R2-NEXT: # %bb.3: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadSub8: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(y)($1) +; MIPS32R6-NEXT: addiu $2, $zero, -4 +; MIPS32R6-NEXT: and $3, $1, $2 +; MIPS32R6-NEXT: andi $1, $1, 3 +; MIPS32R6-NEXT: sll $1, $1, 3 +; MIPS32R6-NEXT: ori $2, $zero, 255 +; MIPS32R6-NEXT: sllv $5, $2, $1 +; MIPS32R6-NEXT: nor $6, $zero, $5 +; MIPS32R6-NEXT: sllv $4, $4, $1 +; MIPS32R6-NEXT: $BB9_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $7, 0($3) +; MIPS32R6-NEXT: subu $8, $7, $4 +; MIPS32R6-NEXT: and $8, $8, $5 +; MIPS32R6-NEXT: and $9, $7, $6 +; MIPS32R6-NEXT: or $9, $9, $8 +; MIPS32R6-NEXT: sc $9, 0($3) +; MIPS32R6-NEXT: beqzc $9, $BB9_1 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: and $2, $7, $5 +; MIPS32R6-NEXT: srlv $2, $2, $1 +; MIPS32R6-NEXT: seb $2, $2 +; MIPS32R6-NEXT: # %bb.3: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadSub8: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(y)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 +; MIPS32R6O0-NEXT: $BB9_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: subu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB9_1 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 +; MIPS32R6O0-NEXT: # %bb.3: # %entry +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: # %bb.4: # %entry +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadSub8: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS4-NEXT: ld $1, %got_disp(y)($1) +; MIPS4-NEXT: daddiu $2, $zero, -4 +; MIPS4-NEXT: and $3, $1, $2 +; MIPS4-NEXT: andi $1, $1, 3 +; MIPS4-NEXT: sll $1, $1, 3 +; MIPS4-NEXT: ori $2, $zero, 255 +; MIPS4-NEXT: sllv $5, $2, $1 +; MIPS4-NEXT: nor $6, $zero, $5 +; MIPS4-NEXT: sllv $4, $4, $1 +; MIPS4-NEXT: .LBB9_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $7, 0($3) +; MIPS4-NEXT: subu $8, $7, $4 +; MIPS4-NEXT: and $8, $8, $5 +; MIPS4-NEXT: and $9, $7, $6 +; MIPS4-NEXT: or $9, $9, $8 +; MIPS4-NEXT: sc $9, 0($3) +; MIPS4-NEXT: beqz $9, .LBB9_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: and $2, $7, $5 +; MIPS4-NEXT: srlv $2, $2, $1 +; MIPS4-NEXT: sll $2, $2, 24 +; MIPS4-NEXT: sra $2, $2, 24 +; MIPS4-NEXT: # %bb.3: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadSub8: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64-NEXT: ld $1, %got_disp(y)($1) +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $3, $1, $2 +; MIPS64-NEXT: andi $1, $1, 3 +; MIPS64-NEXT: sll $1, $1, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $5, $2, $1 +; MIPS64-NEXT: nor $6, $zero, $5 +; MIPS64-NEXT: sllv $4, $4, $1 +; MIPS64-NEXT: .LBB9_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $7, 0($3) +; MIPS64-NEXT: subu $8, $7, $4 +; MIPS64-NEXT: and $8, $8, $5 +; MIPS64-NEXT: and $9, $7, $6 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($3) +; MIPS64-NEXT: beqz $9, .LBB9_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: and $2, $7, $5 +; MIPS64-NEXT: srlv $2, $2, $1 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: sra $2, $2, 24 +; MIPS64-NEXT: # %bb.3: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadSub8: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R2-NEXT: daddiu $2, $zero, -4 +; MIPS64R2-NEXT: and $3, $1, $2 +; MIPS64R2-NEXT: andi $1, $1, 3 +; MIPS64R2-NEXT: sll $1, $1, 3 +; MIPS64R2-NEXT: ori $2, $zero, 255 +; MIPS64R2-NEXT: sllv $5, $2, $1 +; MIPS64R2-NEXT: nor $6, $zero, $5 +; MIPS64R2-NEXT: sllv $4, $4, $1 +; MIPS64R2-NEXT: .LBB9_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $7, 0($3) +; MIPS64R2-NEXT: subu $8, $7, $4 +; MIPS64R2-NEXT: and $8, $8, $5 +; MIPS64R2-NEXT: and $9, $7, $6 +; MIPS64R2-NEXT: or $9, $9, $8 +; MIPS64R2-NEXT: sc $9, 0($3) +; MIPS64R2-NEXT: beqz $9, .LBB9_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: and $2, $7, $5 +; MIPS64R2-NEXT: srlv $2, $2, $1 +; MIPS64R2-NEXT: seb $2, $2 +; MIPS64R2-NEXT: # %bb.3: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadSub8: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $3, $1, $2 +; MIPS64R6-NEXT: andi $1, $1, 3 +; MIPS64R6-NEXT: sll $1, $1, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $5, $2, $1 +; MIPS64R6-NEXT: nor $6, $zero, $5 +; MIPS64R6-NEXT: sllv $4, $4, $1 +; MIPS64R6-NEXT: .LBB9_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $7, 0($3) +; MIPS64R6-NEXT: subu $8, $7, $4 +; MIPS64R6-NEXT: and $8, $8, $5 +; MIPS64R6-NEXT: and $9, $7, $6 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($3) +; MIPS64R6-NEXT: beqzc $9, .LBB9_1 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: and $2, $7, $5 +; MIPS64R6-NEXT: srlv $2, $2, $1 +; MIPS64R6-NEXT: seb $2, $2 +; MIPS64R6-NEXT: # %bb.3: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadSub8: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 +; MIPS64R6O0-NEXT: .LBB9_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: subu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB9_1 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 +; MIPS64R6O0-NEXT: # %bb.3: # %entry +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # %bb.4: # %entry +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadSub8: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(y)($2) +; MM32-NEXT: addiu $2, $zero, -4 +; MM32-NEXT: and $3, $1, $2 +; MM32-NEXT: andi $1, $1, 3 +; MM32-NEXT: sll $1, $1, 3 +; MM32-NEXT: ori $2, $zero, 255 +; MM32-NEXT: sllv $5, $2, $1 +; MM32-NEXT: nor $6, $zero, $5 +; MM32-NEXT: sllv $4, $4, $1 +; MM32-NEXT: $BB9_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $7, 0($3) +; MM32-NEXT: subu $8, $7, $4 +; MM32-NEXT: and $8, $8, $5 +; MM32-NEXT: and $9, $7, $6 +; MM32-NEXT: or $9, $9, $8 +; MM32-NEXT: sc $9, 0($3) +; MM32-NEXT: beqzc $9, $BB9_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: and $2, $7, $5 +; MM32-NEXT: srlv $2, $2, $1 +; MM32-NEXT: seb $2, $2 +; MM32-NEXT: # %bb.3: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadSub8: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(y)($1) +; O1-NEXT: addiu $2, $zero, -4 +; O1-NEXT: and $3, $1, $2 +; O1-NEXT: andi $1, $1, 3 +; O1-NEXT: sll $1, $1, 3 +; O1-NEXT: ori $2, $zero, 255 +; O1-NEXT: sllv $5, $2, $1 +; O1-NEXT: nor $6, $zero, $5 +; O1-NEXT: sllv $4, $4, $1 +; O1-NEXT: $BB9_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $7, 0($3) +; O1-NEXT: subu $8, $7, $4 +; O1-NEXT: and $8, $8, $5 +; O1-NEXT: and $9, $7, $6 +; O1-NEXT: or $9, $9, $8 +; O1-NEXT: sc $9, 0($3) +; O1-NEXT: beqz $9, $BB9_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: and $2, $7, $5 +; O1-NEXT: srlv $2, $2, $1 +; O1-NEXT: sll $2, $2, 24 +; O1-NEXT: sra $2, $2, 24 +; O1-NEXT: # %bb.3: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadSub8: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(y)($1) +; O2-NEXT: addiu $2, $zero, -4 +; O2-NEXT: and $3, $1, $2 +; O2-NEXT: andi $1, $1, 3 +; O2-NEXT: sll $1, $1, 3 +; O2-NEXT: ori $2, $zero, 255 +; O2-NEXT: sllv $5, $2, $1 +; O2-NEXT: nor $6, $zero, $5 +; O2-NEXT: sllv $4, $4, $1 +; O2-NEXT: $BB9_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $7, 0($3) +; O2-NEXT: subu $8, $7, $4 +; O2-NEXT: and $8, $8, $5 +; O2-NEXT: and $9, $7, $6 +; O2-NEXT: or $9, $9, $8 +; O2-NEXT: sc $9, 0($3) +; O2-NEXT: beqz $9, $BB9_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: and $2, $7, $5 +; O2-NEXT: srlv $2, $2, $1 +; O2-NEXT: sll $2, $2, 24 +; O2-NEXT: sra $2, $2, 24 +; O2-NEXT: # %bb.3: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadSub8: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: addiu $2, $zero, -4 +; O3-NEXT: lw $1, %got(y)($1) +; O3-NEXT: and $3, $1, $2 +; O3-NEXT: andi $1, $1, 3 +; O3-NEXT: ori $2, $zero, 255 +; O3-NEXT: sll $1, $1, 3 +; O3-NEXT: sllv $5, $2, $1 +; O3-NEXT: sllv $4, $4, $1 +; O3-NEXT: nor $6, $zero, $5 +; O3-NEXT: $BB9_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $7, 0($3) +; O3-NEXT: subu $8, $7, $4 +; O3-NEXT: and $8, $8, $5 +; O3-NEXT: and $9, $7, $6 +; O3-NEXT: or $9, $9, $8 +; O3-NEXT: sc $9, 0($3) +; O3-NEXT: beqz $9, $BB9_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: and $2, $7, $5 +; O3-NEXT: srlv $2, $2, $1 +; O3-NEXT: sll $2, $2, 24 +; O3-NEXT: sra $2, $2, 24 +; O3-NEXT: # %bb.3: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadSub8: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(y)($1) +; MIPS32EB-NEXT: addiu $2, $zero, -4 +; MIPS32EB-NEXT: and $3, $1, $2 +; MIPS32EB-NEXT: andi $1, $1, 3 +; MIPS32EB-NEXT: xori $1, $1, 3 +; MIPS32EB-NEXT: sll $1, $1, 3 +; MIPS32EB-NEXT: ori $2, $zero, 255 +; MIPS32EB-NEXT: sllv $5, $2, $1 +; MIPS32EB-NEXT: nor $6, $zero, $5 +; MIPS32EB-NEXT: sllv $4, $4, $1 +; MIPS32EB-NEXT: $BB9_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $7, 0($3) +; MIPS32EB-NEXT: subu $8, $7, $4 +; MIPS32EB-NEXT: and $8, $8, $5 +; MIPS32EB-NEXT: and $9, $7, $6 +; MIPS32EB-NEXT: or $9, $9, $8 +; MIPS32EB-NEXT: sc $9, 0($3) +; MIPS32EB-NEXT: beqz $9, $BB9_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: and $2, $7, $5 +; MIPS32EB-NEXT: srlv $2, $2, $1 +; MIPS32EB-NEXT: sll $2, $2, 24 +; MIPS32EB-NEXT: sra $2, $2, 24 +; MIPS32EB-NEXT: # %bb.3: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw sub i8* @y, i8 %incr monotonic + ret i8 %0 + +} + +define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadNand8: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(y)($1) +; MIPS32-NEXT: addiu $2, $zero, -4 +; MIPS32-NEXT: and $3, $1, $2 +; MIPS32-NEXT: andi $1, $1, 3 +; MIPS32-NEXT: sll $1, $1, 3 +; MIPS32-NEXT: ori $2, $zero, 255 +; MIPS32-NEXT: sllv $5, $2, $1 +; MIPS32-NEXT: nor $6, $zero, $5 +; MIPS32-NEXT: sllv $4, $4, $1 +; MIPS32-NEXT: $BB10_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $7, 0($3) +; MIPS32-NEXT: and $8, $7, $4 +; MIPS32-NEXT: nor $8, $zero, $8 +; MIPS32-NEXT: and $8, $8, $5 +; MIPS32-NEXT: and $9, $7, $6 +; MIPS32-NEXT: or $9, $9, $8 +; MIPS32-NEXT: sc $9, 0($3) +; MIPS32-NEXT: beqz $9, $BB10_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: and $2, $7, $5 +; MIPS32-NEXT: srlv $2, $2, $1 +; MIPS32-NEXT: sll $2, $2, 24 +; MIPS32-NEXT: sra $2, $2, 24 +; MIPS32-NEXT: # %bb.3: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadNand8: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(y)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 +; MIPS32O0-NEXT: $BB10_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: and $3, $2, $6 +; MIPS32O0-NEXT: nor $3, $zero, $3 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB10_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 +; MIPS32O0-NEXT: # %bb.3: # %entry +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: # %bb.4: # %entry +; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadNand8: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(y)($1) +; MIPS32R2-NEXT: addiu $2, $zero, -4 +; MIPS32R2-NEXT: and $3, $1, $2 +; MIPS32R2-NEXT: andi $1, $1, 3 +; MIPS32R2-NEXT: sll $1, $1, 3 +; MIPS32R2-NEXT: ori $2, $zero, 255 +; MIPS32R2-NEXT: sllv $5, $2, $1 +; MIPS32R2-NEXT: nor $6, $zero, $5 +; MIPS32R2-NEXT: sllv $4, $4, $1 +; MIPS32R2-NEXT: $BB10_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $7, 0($3) +; MIPS32R2-NEXT: and $8, $7, $4 +; MIPS32R2-NEXT: nor $8, $zero, $8 +; MIPS32R2-NEXT: and $8, $8, $5 +; MIPS32R2-NEXT: and $9, $7, $6 +; MIPS32R2-NEXT: or $9, $9, $8 +; MIPS32R2-NEXT: sc $9, 0($3) +; MIPS32R2-NEXT: beqz $9, $BB10_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: and $2, $7, $5 +; MIPS32R2-NEXT: srlv $2, $2, $1 +; MIPS32R2-NEXT: seb $2, $2 +; MIPS32R2-NEXT: # %bb.3: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadNand8: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(y)($1) +; MIPS32R6-NEXT: addiu $2, $zero, -4 +; MIPS32R6-NEXT: and $3, $1, $2 +; MIPS32R6-NEXT: andi $1, $1, 3 +; MIPS32R6-NEXT: sll $1, $1, 3 +; MIPS32R6-NEXT: ori $2, $zero, 255 +; MIPS32R6-NEXT: sllv $5, $2, $1 +; MIPS32R6-NEXT: nor $6, $zero, $5 +; MIPS32R6-NEXT: sllv $4, $4, $1 +; MIPS32R6-NEXT: $BB10_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $7, 0($3) +; MIPS32R6-NEXT: and $8, $7, $4 +; MIPS32R6-NEXT: nor $8, $zero, $8 +; MIPS32R6-NEXT: and $8, $8, $5 +; MIPS32R6-NEXT: and $9, $7, $6 +; MIPS32R6-NEXT: or $9, $9, $8 +; MIPS32R6-NEXT: sc $9, 0($3) +; MIPS32R6-NEXT: beqzc $9, $BB10_1 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: and $2, $7, $5 +; MIPS32R6-NEXT: srlv $2, $2, $1 +; MIPS32R6-NEXT: seb $2, $2 +; MIPS32R6-NEXT: # %bb.3: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadNand8: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(y)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 +; MIPS32R6O0-NEXT: $BB10_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: and $3, $2, $6 +; MIPS32R6O0-NEXT: nor $3, $zero, $3 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB10_1 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 +; MIPS32R6O0-NEXT: # %bb.3: # %entry +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: # %bb.4: # %entry +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadNand8: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS4-NEXT: ld $1, %got_disp(y)($1) +; MIPS4-NEXT: daddiu $2, $zero, -4 +; MIPS4-NEXT: and $3, $1, $2 +; MIPS4-NEXT: andi $1, $1, 3 +; MIPS4-NEXT: sll $1, $1, 3 +; MIPS4-NEXT: ori $2, $zero, 255 +; MIPS4-NEXT: sllv $5, $2, $1 +; MIPS4-NEXT: nor $6, $zero, $5 +; MIPS4-NEXT: sllv $4, $4, $1 +; MIPS4-NEXT: .LBB10_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $7, 0($3) +; MIPS4-NEXT: and $8, $7, $4 +; MIPS4-NEXT: nor $8, $zero, $8 +; MIPS4-NEXT: and $8, $8, $5 +; MIPS4-NEXT: and $9, $7, $6 +; MIPS4-NEXT: or $9, $9, $8 +; MIPS4-NEXT: sc $9, 0($3) +; MIPS4-NEXT: beqz $9, .LBB10_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: and $2, $7, $5 +; MIPS4-NEXT: srlv $2, $2, $1 +; MIPS4-NEXT: sll $2, $2, 24 +; MIPS4-NEXT: sra $2, $2, 24 +; MIPS4-NEXT: # %bb.3: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadNand8: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64-NEXT: ld $1, %got_disp(y)($1) +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $3, $1, $2 +; MIPS64-NEXT: andi $1, $1, 3 +; MIPS64-NEXT: sll $1, $1, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $5, $2, $1 +; MIPS64-NEXT: nor $6, $zero, $5 +; MIPS64-NEXT: sllv $4, $4, $1 +; MIPS64-NEXT: .LBB10_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $7, 0($3) +; MIPS64-NEXT: and $8, $7, $4 +; MIPS64-NEXT: nor $8, $zero, $8 +; MIPS64-NEXT: and $8, $8, $5 +; MIPS64-NEXT: and $9, $7, $6 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($3) +; MIPS64-NEXT: beqz $9, .LBB10_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: and $2, $7, $5 +; MIPS64-NEXT: srlv $2, $2, $1 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: sra $2, $2, 24 +; MIPS64-NEXT: # %bb.3: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadNand8: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R2-NEXT: daddiu $2, $zero, -4 +; MIPS64R2-NEXT: and $3, $1, $2 +; MIPS64R2-NEXT: andi $1, $1, 3 +; MIPS64R2-NEXT: sll $1, $1, 3 +; MIPS64R2-NEXT: ori $2, $zero, 255 +; MIPS64R2-NEXT: sllv $5, $2, $1 +; MIPS64R2-NEXT: nor $6, $zero, $5 +; MIPS64R2-NEXT: sllv $4, $4, $1 +; MIPS64R2-NEXT: .LBB10_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $7, 0($3) +; MIPS64R2-NEXT: and $8, $7, $4 +; MIPS64R2-NEXT: nor $8, $zero, $8 +; MIPS64R2-NEXT: and $8, $8, $5 +; MIPS64R2-NEXT: and $9, $7, $6 +; MIPS64R2-NEXT: or $9, $9, $8 +; MIPS64R2-NEXT: sc $9, 0($3) +; MIPS64R2-NEXT: beqz $9, .LBB10_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: and $2, $7, $5 +; MIPS64R2-NEXT: srlv $2, $2, $1 +; MIPS64R2-NEXT: seb $2, $2 +; MIPS64R2-NEXT: # %bb.3: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadNand8: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $3, $1, $2 +; MIPS64R6-NEXT: andi $1, $1, 3 +; MIPS64R6-NEXT: sll $1, $1, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $5, $2, $1 +; MIPS64R6-NEXT: nor $6, $zero, $5 +; MIPS64R6-NEXT: sllv $4, $4, $1 +; MIPS64R6-NEXT: .LBB10_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $7, 0($3) +; MIPS64R6-NEXT: and $8, $7, $4 +; MIPS64R6-NEXT: nor $8, $zero, $8 +; MIPS64R6-NEXT: and $8, $8, $5 +; MIPS64R6-NEXT: and $9, $7, $6 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($3) +; MIPS64R6-NEXT: beqzc $9, .LBB10_1 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: and $2, $7, $5 +; MIPS64R6-NEXT: srlv $2, $2, $1 +; MIPS64R6-NEXT: seb $2, $2 +; MIPS64R6-NEXT: # %bb.3: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadNand8: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 +; MIPS64R6O0-NEXT: .LBB10_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: and $3, $2, $6 +; MIPS64R6O0-NEXT: nor $3, $zero, $3 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB10_1 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 +; MIPS64R6O0-NEXT: # %bb.3: # %entry +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # %bb.4: # %entry +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadNand8: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(y)($2) +; MM32-NEXT: addiu $2, $zero, -4 +; MM32-NEXT: and $3, $1, $2 +; MM32-NEXT: andi $1, $1, 3 +; MM32-NEXT: sll $1, $1, 3 +; MM32-NEXT: ori $2, $zero, 255 +; MM32-NEXT: sllv $5, $2, $1 +; MM32-NEXT: nor $6, $zero, $5 +; MM32-NEXT: sllv $4, $4, $1 +; MM32-NEXT: $BB10_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $7, 0($3) +; MM32-NEXT: and $8, $7, $4 +; MM32-NEXT: nor $8, $zero, $8 +; MM32-NEXT: and $8, $8, $5 +; MM32-NEXT: and $9, $7, $6 +; MM32-NEXT: or $9, $9, $8 +; MM32-NEXT: sc $9, 0($3) +; MM32-NEXT: beqzc $9, $BB10_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: and $2, $7, $5 +; MM32-NEXT: srlv $2, $2, $1 +; MM32-NEXT: seb $2, $2 +; MM32-NEXT: # %bb.3: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadNand8: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(y)($1) +; O1-NEXT: addiu $2, $zero, -4 +; O1-NEXT: and $3, $1, $2 +; O1-NEXT: andi $1, $1, 3 +; O1-NEXT: sll $1, $1, 3 +; O1-NEXT: ori $2, $zero, 255 +; O1-NEXT: sllv $5, $2, $1 +; O1-NEXT: nor $6, $zero, $5 +; O1-NEXT: sllv $4, $4, $1 +; O1-NEXT: $BB10_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $7, 0($3) +; O1-NEXT: and $8, $7, $4 +; O1-NEXT: nor $8, $zero, $8 +; O1-NEXT: and $8, $8, $5 +; O1-NEXT: and $9, $7, $6 +; O1-NEXT: or $9, $9, $8 +; O1-NEXT: sc $9, 0($3) +; O1-NEXT: beqz $9, $BB10_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: and $2, $7, $5 +; O1-NEXT: srlv $2, $2, $1 +; O1-NEXT: sll $2, $2, 24 +; O1-NEXT: sra $2, $2, 24 +; O1-NEXT: # %bb.3: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadNand8: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(y)($1) +; O2-NEXT: addiu $2, $zero, -4 +; O2-NEXT: and $3, $1, $2 +; O2-NEXT: andi $1, $1, 3 +; O2-NEXT: sll $1, $1, 3 +; O2-NEXT: ori $2, $zero, 255 +; O2-NEXT: sllv $5, $2, $1 +; O2-NEXT: nor $6, $zero, $5 +; O2-NEXT: sllv $4, $4, $1 +; O2-NEXT: $BB10_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $7, 0($3) +; O2-NEXT: and $8, $7, $4 +; O2-NEXT: nor $8, $zero, $8 +; O2-NEXT: and $8, $8, $5 +; O2-NEXT: and $9, $7, $6 +; O2-NEXT: or $9, $9, $8 +; O2-NEXT: sc $9, 0($3) +; O2-NEXT: beqz $9, $BB10_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: and $2, $7, $5 +; O2-NEXT: srlv $2, $2, $1 +; O2-NEXT: sll $2, $2, 24 +; O2-NEXT: sra $2, $2, 24 +; O2-NEXT: # %bb.3: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadNand8: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: addiu $2, $zero, -4 +; O3-NEXT: lw $1, %got(y)($1) +; O3-NEXT: and $3, $1, $2 +; O3-NEXT: andi $1, $1, 3 +; O3-NEXT: ori $2, $zero, 255 +; O3-NEXT: sll $1, $1, 3 +; O3-NEXT: sllv $5, $2, $1 +; O3-NEXT: sllv $4, $4, $1 +; O3-NEXT: nor $6, $zero, $5 +; O3-NEXT: $BB10_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $7, 0($3) +; O3-NEXT: and $8, $7, $4 +; O3-NEXT: nor $8, $zero, $8 +; O3-NEXT: and $8, $8, $5 +; O3-NEXT: and $9, $7, $6 +; O3-NEXT: or $9, $9, $8 +; O3-NEXT: sc $9, 0($3) +; O3-NEXT: beqz $9, $BB10_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: and $2, $7, $5 +; O3-NEXT: srlv $2, $2, $1 +; O3-NEXT: sll $2, $2, 24 +; O3-NEXT: sra $2, $2, 24 +; O3-NEXT: # %bb.3: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadNand8: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(y)($1) +; MIPS32EB-NEXT: addiu $2, $zero, -4 +; MIPS32EB-NEXT: and $3, $1, $2 +; MIPS32EB-NEXT: andi $1, $1, 3 +; MIPS32EB-NEXT: xori $1, $1, 3 +; MIPS32EB-NEXT: sll $1, $1, 3 +; MIPS32EB-NEXT: ori $2, $zero, 255 +; MIPS32EB-NEXT: sllv $5, $2, $1 +; MIPS32EB-NEXT: nor $6, $zero, $5 +; MIPS32EB-NEXT: sllv $4, $4, $1 +; MIPS32EB-NEXT: $BB10_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $7, 0($3) +; MIPS32EB-NEXT: and $8, $7, $4 +; MIPS32EB-NEXT: nor $8, $zero, $8 +; MIPS32EB-NEXT: and $8, $8, $5 +; MIPS32EB-NEXT: and $9, $7, $6 +; MIPS32EB-NEXT: or $9, $9, $8 +; MIPS32EB-NEXT: sc $9, 0($3) +; MIPS32EB-NEXT: beqz $9, $BB10_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: and $2, $7, $5 +; MIPS32EB-NEXT: srlv $2, $2, $1 +; MIPS32EB-NEXT: sll $2, $2, 24 +; MIPS32EB-NEXT: sra $2, $2, 24 +; MIPS32EB-NEXT: # %bb.3: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw nand i8* @y, i8 %incr monotonic + ret i8 %0 + +} + +define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { +; MIPS32-LABEL: AtomicSwap8: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(y)($1) +; MIPS32-NEXT: addiu $2, $zero, -4 +; MIPS32-NEXT: and $3, $1, $2 +; MIPS32-NEXT: andi $1, $1, 3 +; MIPS32-NEXT: sll $1, $1, 3 +; MIPS32-NEXT: ori $2, $zero, 255 +; MIPS32-NEXT: sllv $5, $2, $1 +; MIPS32-NEXT: nor $6, $zero, $5 +; MIPS32-NEXT: sllv $4, $4, $1 +; MIPS32-NEXT: $BB11_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $7, 0($3) +; MIPS32-NEXT: and $8, $4, $5 +; MIPS32-NEXT: and $9, $7, $6 +; MIPS32-NEXT: or $9, $9, $8 +; MIPS32-NEXT: sc $9, 0($3) +; MIPS32-NEXT: beqz $9, $BB11_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: and $2, $7, $5 +; MIPS32-NEXT: srlv $2, $2, $1 +; MIPS32-NEXT: sll $2, $2, 24 +; MIPS32-NEXT: sra $2, $2, 24 +; MIPS32-NEXT: # %bb.3: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicSwap8: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(y)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 +; MIPS32O0-NEXT: $BB11_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: and $3, $6, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB11_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 +; MIPS32O0-NEXT: # %bb.3: # %entry +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: # %bb.4: # %entry +; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicSwap8: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(y)($1) +; MIPS32R2-NEXT: addiu $2, $zero, -4 +; MIPS32R2-NEXT: and $3, $1, $2 +; MIPS32R2-NEXT: andi $1, $1, 3 +; MIPS32R2-NEXT: sll $1, $1, 3 +; MIPS32R2-NEXT: ori $2, $zero, 255 +; MIPS32R2-NEXT: sllv $5, $2, $1 +; MIPS32R2-NEXT: nor $6, $zero, $5 +; MIPS32R2-NEXT: sllv $4, $4, $1 +; MIPS32R2-NEXT: $BB11_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $7, 0($3) +; MIPS32R2-NEXT: and $8, $4, $5 +; MIPS32R2-NEXT: and $9, $7, $6 +; MIPS32R2-NEXT: or $9, $9, $8 +; MIPS32R2-NEXT: sc $9, 0($3) +; MIPS32R2-NEXT: beqz $9, $BB11_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: and $2, $7, $5 +; MIPS32R2-NEXT: srlv $2, $2, $1 +; MIPS32R2-NEXT: seb $2, $2 +; MIPS32R2-NEXT: # %bb.3: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicSwap8: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(y)($1) +; MIPS32R6-NEXT: addiu $2, $zero, -4 +; MIPS32R6-NEXT: and $3, $1, $2 +; MIPS32R6-NEXT: andi $1, $1, 3 +; MIPS32R6-NEXT: sll $1, $1, 3 +; MIPS32R6-NEXT: ori $2, $zero, 255 +; MIPS32R6-NEXT: sllv $5, $2, $1 +; MIPS32R6-NEXT: nor $6, $zero, $5 +; MIPS32R6-NEXT: sllv $4, $4, $1 +; MIPS32R6-NEXT: $BB11_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $7, 0($3) +; MIPS32R6-NEXT: and $8, $4, $5 +; MIPS32R6-NEXT: and $9, $7, $6 +; MIPS32R6-NEXT: or $9, $9, $8 +; MIPS32R6-NEXT: sc $9, 0($3) +; MIPS32R6-NEXT: beqzc $9, $BB11_1 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: and $2, $7, $5 +; MIPS32R6-NEXT: srlv $2, $2, $1 +; MIPS32R6-NEXT: seb $2, $2 +; MIPS32R6-NEXT: # %bb.3: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicSwap8: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(y)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 +; MIPS32R6O0-NEXT: $BB11_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: and $3, $6, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB11_1 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 +; MIPS32R6O0-NEXT: # %bb.3: # %entry +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: # %bb.4: # %entry +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicSwap8: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) +; MIPS4-NEXT: ld $1, %got_disp(y)($1) +; MIPS4-NEXT: daddiu $2, $zero, -4 +; MIPS4-NEXT: and $3, $1, $2 +; MIPS4-NEXT: andi $1, $1, 3 +; MIPS4-NEXT: sll $1, $1, 3 +; MIPS4-NEXT: ori $2, $zero, 255 +; MIPS4-NEXT: sllv $5, $2, $1 +; MIPS4-NEXT: nor $6, $zero, $5 +; MIPS4-NEXT: sllv $4, $4, $1 +; MIPS4-NEXT: .LBB11_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $7, 0($3) +; MIPS4-NEXT: and $8, $4, $5 +; MIPS4-NEXT: and $9, $7, $6 +; MIPS4-NEXT: or $9, $9, $8 +; MIPS4-NEXT: sc $9, 0($3) +; MIPS4-NEXT: beqz $9, .LBB11_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: and $2, $7, $5 +; MIPS4-NEXT: srlv $2, $2, $1 +; MIPS4-NEXT: sll $2, $2, 24 +; MIPS4-NEXT: sra $2, $2, 24 +; MIPS4-NEXT: # %bb.3: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicSwap8: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) +; MIPS64-NEXT: ld $1, %got_disp(y)($1) +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $3, $1, $2 +; MIPS64-NEXT: andi $1, $1, 3 +; MIPS64-NEXT: sll $1, $1, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $5, $2, $1 +; MIPS64-NEXT: nor $6, $zero, $5 +; MIPS64-NEXT: sllv $4, $4, $1 +; MIPS64-NEXT: .LBB11_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $7, 0($3) +; MIPS64-NEXT: and $8, $4, $5 +; MIPS64-NEXT: and $9, $7, $6 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($3) +; MIPS64-NEXT: beqz $9, .LBB11_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: and $2, $7, $5 +; MIPS64-NEXT: srlv $2, $2, $1 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: sra $2, $2, 24 +; MIPS64-NEXT: # %bb.3: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicSwap8: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R2-NEXT: daddiu $2, $zero, -4 +; MIPS64R2-NEXT: and $3, $1, $2 +; MIPS64R2-NEXT: andi $1, $1, 3 +; MIPS64R2-NEXT: sll $1, $1, 3 +; MIPS64R2-NEXT: ori $2, $zero, 255 +; MIPS64R2-NEXT: sllv $5, $2, $1 +; MIPS64R2-NEXT: nor $6, $zero, $5 +; MIPS64R2-NEXT: sllv $4, $4, $1 +; MIPS64R2-NEXT: .LBB11_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $7, 0($3) +; MIPS64R2-NEXT: and $8, $4, $5 +; MIPS64R2-NEXT: and $9, $7, $6 +; MIPS64R2-NEXT: or $9, $9, $8 +; MIPS64R2-NEXT: sc $9, 0($3) +; MIPS64R2-NEXT: beqz $9, .LBB11_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: and $2, $7, $5 +; MIPS64R2-NEXT: srlv $2, $2, $1 +; MIPS64R2-NEXT: seb $2, $2 +; MIPS64R2-NEXT: # %bb.3: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicSwap8: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $3, $1, $2 +; MIPS64R6-NEXT: andi $1, $1, 3 +; MIPS64R6-NEXT: sll $1, $1, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $5, $2, $1 +; MIPS64R6-NEXT: nor $6, $zero, $5 +; MIPS64R6-NEXT: sllv $4, $4, $1 +; MIPS64R6-NEXT: .LBB11_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $7, 0($3) +; MIPS64R6-NEXT: and $8, $4, $5 +; MIPS64R6-NEXT: and $9, $7, $6 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($3) +; MIPS64R6-NEXT: beqzc $9, .LBB11_1 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: and $2, $7, $5 +; MIPS64R6-NEXT: srlv $2, $2, $1 +; MIPS64R6-NEXT: seb $2, $2 +; MIPS64R6-NEXT: # %bb.3: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicSwap8: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 +; MIPS64R6O0-NEXT: .LBB11_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: and $3, $6, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB11_1 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 +; MIPS64R6O0-NEXT: # %bb.3: # %entry +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # %bb.4: # %entry +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicSwap8: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(y)($2) +; MM32-NEXT: addiu $2, $zero, -4 +; MM32-NEXT: and $3, $1, $2 +; MM32-NEXT: andi $1, $1, 3 +; MM32-NEXT: sll $1, $1, 3 +; MM32-NEXT: ori $2, $zero, 255 +; MM32-NEXT: sllv $5, $2, $1 +; MM32-NEXT: nor $6, $zero, $5 +; MM32-NEXT: sllv $4, $4, $1 +; MM32-NEXT: $BB11_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $7, 0($3) +; MM32-NEXT: and $8, $4, $5 +; MM32-NEXT: and $9, $7, $6 +; MM32-NEXT: or $9, $9, $8 +; MM32-NEXT: sc $9, 0($3) +; MM32-NEXT: beqzc $9, $BB11_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: and $2, $7, $5 +; MM32-NEXT: srlv $2, $2, $1 +; MM32-NEXT: seb $2, $2 +; MM32-NEXT: # %bb.3: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicSwap8: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(y)($1) +; O1-NEXT: addiu $2, $zero, -4 +; O1-NEXT: and $3, $1, $2 +; O1-NEXT: andi $1, $1, 3 +; O1-NEXT: sll $1, $1, 3 +; O1-NEXT: ori $2, $zero, 255 +; O1-NEXT: sllv $5, $2, $1 +; O1-NEXT: nor $6, $zero, $5 +; O1-NEXT: sllv $4, $4, $1 +; O1-NEXT: $BB11_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $7, 0($3) +; O1-NEXT: and $8, $4, $5 +; O1-NEXT: and $9, $7, $6 +; O1-NEXT: or $9, $9, $8 +; O1-NEXT: sc $9, 0($3) +; O1-NEXT: beqz $9, $BB11_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: and $2, $7, $5 +; O1-NEXT: srlv $2, $2, $1 +; O1-NEXT: sll $2, $2, 24 +; O1-NEXT: sra $2, $2, 24 +; O1-NEXT: # %bb.3: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicSwap8: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(y)($1) +; O2-NEXT: addiu $2, $zero, -4 +; O2-NEXT: and $3, $1, $2 +; O2-NEXT: andi $1, $1, 3 +; O2-NEXT: sll $1, $1, 3 +; O2-NEXT: ori $2, $zero, 255 +; O2-NEXT: sllv $5, $2, $1 +; O2-NEXT: nor $6, $zero, $5 +; O2-NEXT: sllv $4, $4, $1 +; O2-NEXT: $BB11_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $7, 0($3) +; O2-NEXT: and $8, $4, $5 +; O2-NEXT: and $9, $7, $6 +; O2-NEXT: or $9, $9, $8 +; O2-NEXT: sc $9, 0($3) +; O2-NEXT: beqz $9, $BB11_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: and $2, $7, $5 +; O2-NEXT: srlv $2, $2, $1 +; O2-NEXT: sll $2, $2, 24 +; O2-NEXT: sra $2, $2, 24 +; O2-NEXT: # %bb.3: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicSwap8: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: addiu $2, $zero, -4 +; O3-NEXT: lw $1, %got(y)($1) +; O3-NEXT: and $3, $1, $2 +; O3-NEXT: andi $1, $1, 3 +; O3-NEXT: ori $2, $zero, 255 +; O3-NEXT: sll $1, $1, 3 +; O3-NEXT: sllv $5, $2, $1 +; O3-NEXT: sllv $4, $4, $1 +; O3-NEXT: nor $6, $zero, $5 +; O3-NEXT: $BB11_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $7, 0($3) +; O3-NEXT: and $8, $4, $5 +; O3-NEXT: and $9, $7, $6 +; O3-NEXT: or $9, $9, $8 +; O3-NEXT: sc $9, 0($3) +; O3-NEXT: beqz $9, $BB11_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: and $2, $7, $5 +; O3-NEXT: srlv $2, $2, $1 +; O3-NEXT: sll $2, $2, 24 +; O3-NEXT: sra $2, $2, 24 +; O3-NEXT: # %bb.3: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicSwap8: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(y)($1) +; MIPS32EB-NEXT: addiu $2, $zero, -4 +; MIPS32EB-NEXT: and $3, $1, $2 +; MIPS32EB-NEXT: andi $1, $1, 3 +; MIPS32EB-NEXT: xori $1, $1, 3 +; MIPS32EB-NEXT: sll $1, $1, 3 +; MIPS32EB-NEXT: ori $2, $zero, 255 +; MIPS32EB-NEXT: sllv $5, $2, $1 +; MIPS32EB-NEXT: nor $6, $zero, $5 +; MIPS32EB-NEXT: sllv $4, $4, $1 +; MIPS32EB-NEXT: $BB11_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $7, 0($3) +; MIPS32EB-NEXT: and $8, $4, $5 +; MIPS32EB-NEXT: and $9, $7, $6 +; MIPS32EB-NEXT: or $9, $9, $8 +; MIPS32EB-NEXT: sc $9, 0($3) +; MIPS32EB-NEXT: beqz $9, $BB11_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: and $2, $7, $5 +; MIPS32EB-NEXT: srlv $2, $2, $1 +; MIPS32EB-NEXT: sll $2, $2, 24 +; MIPS32EB-NEXT: sra $2, $2, 24 +; MIPS32EB-NEXT: # %bb.3: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw xchg i8* @y, i8 %newval monotonic + ret i8 %0 +} + +define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind { +; MIPS32-LABEL: AtomicCmpSwap8: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(y)($1) +; MIPS32-NEXT: addiu $2, $zero, -4 +; MIPS32-NEXT: and $3, $1, $2 +; MIPS32-NEXT: andi $1, $1, 3 +; MIPS32-NEXT: sll $1, $1, 3 +; MIPS32-NEXT: ori $2, $zero, 255 +; MIPS32-NEXT: sllv $6, $2, $1 +; MIPS32-NEXT: nor $7, $zero, $6 +; MIPS32-NEXT: andi $2, $4, 255 +; MIPS32-NEXT: sllv $4, $2, $1 +; MIPS32-NEXT: andi $2, $5, 255 +; MIPS32-NEXT: sllv $5, $2, $1 +; MIPS32-NEXT: $BB12_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $8, 0($3) +; MIPS32-NEXT: and $9, $8, $6 +; MIPS32-NEXT: bne $9, $4, $BB12_3 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS32-NEXT: and $8, $8, $7 +; MIPS32-NEXT: or $8, $8, $5 +; MIPS32-NEXT: sc $8, 0($3) +; MIPS32-NEXT: beqz $8, $BB12_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB12_3: # %entry +; MIPS32-NEXT: sync +; MIPS32-NEXT: srlv $2, $9, $1 +; MIPS32-NEXT: sll $2, $2, 24 +; MIPS32-NEXT: sra $2, $2, 24 +; MIPS32-NEXT: # %bb.4: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicCmpSwap8: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: addu $3, $2, $25 +; MIPS32O0-NEXT: move $1, $5 +; MIPS32O0-NEXT: move $2, $4 +; MIPS32O0-NEXT: lw $3, %got(y)($3) +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 +; MIPS32O0-NEXT: ori $3, $zero, 255 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 255 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 255 +; MIPS32O0-NEXT: sllv $8, $1, $9 +; MIPS32O0-NEXT: $BB12_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB12_3 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB12_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: $BB12_3: # %entry +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 +; MIPS32O0-NEXT: # %bb.4: # %entry +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: # %bb.5: # %entry +; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicCmpSwap8: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(y)($1) +; MIPS32R2-NEXT: addiu $2, $zero, -4 +; MIPS32R2-NEXT: and $3, $1, $2 +; MIPS32R2-NEXT: andi $1, $1, 3 +; MIPS32R2-NEXT: sll $1, $1, 3 +; MIPS32R2-NEXT: ori $2, $zero, 255 +; MIPS32R2-NEXT: sllv $6, $2, $1 +; MIPS32R2-NEXT: nor $7, $zero, $6 +; MIPS32R2-NEXT: andi $2, $4, 255 +; MIPS32R2-NEXT: sllv $4, $2, $1 +; MIPS32R2-NEXT: andi $2, $5, 255 +; MIPS32R2-NEXT: sllv $5, $2, $1 +; MIPS32R2-NEXT: $BB12_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $8, 0($3) +; MIPS32R2-NEXT: and $9, $8, $6 +; MIPS32R2-NEXT: bne $9, $4, $BB12_3 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS32R2-NEXT: and $8, $8, $7 +; MIPS32R2-NEXT: or $8, $8, $5 +; MIPS32R2-NEXT: sc $8, 0($3) +; MIPS32R2-NEXT: beqz $8, $BB12_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: $BB12_3: # %entry +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: srlv $2, $9, $1 +; MIPS32R2-NEXT: seb $2, $2 +; MIPS32R2-NEXT: # %bb.4: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicCmpSwap8: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(y)($1) +; MIPS32R6-NEXT: addiu $2, $zero, -4 +; MIPS32R6-NEXT: and $3, $1, $2 +; MIPS32R6-NEXT: andi $1, $1, 3 +; MIPS32R6-NEXT: sll $1, $1, 3 +; MIPS32R6-NEXT: ori $2, $zero, 255 +; MIPS32R6-NEXT: sllv $6, $2, $1 +; MIPS32R6-NEXT: nor $7, $zero, $6 +; MIPS32R6-NEXT: andi $2, $4, 255 +; MIPS32R6-NEXT: sllv $4, $2, $1 +; MIPS32R6-NEXT: andi $2, $5, 255 +; MIPS32R6-NEXT: sllv $5, $2, $1 +; MIPS32R6-NEXT: $BB12_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $8, 0($3) +; MIPS32R6-NEXT: and $9, $8, $6 +; MIPS32R6-NEXT: bnec $9, $4, $BB12_3 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS32R6-NEXT: and $8, $8, $7 +; MIPS32R6-NEXT: or $8, $8, $5 +; MIPS32R6-NEXT: sc $8, 0($3) +; MIPS32R6-NEXT: beqzc $8, $BB12_1 +; MIPS32R6-NEXT: $BB12_3: # %entry +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: srlv $2, $9, $1 +; MIPS32R6-NEXT: seb $2, $2 +; MIPS32R6-NEXT: # %bb.4: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicCmpSwap8: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $3, $2, $25 +; MIPS32R6O0-NEXT: move $1, $5 +; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $a1 killed $at +; MIPS32R6O0-NEXT: # kill: def $a0 killed $v0 +; MIPS32R6O0-NEXT: lw $3, %got(y)($3) +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 255 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 255 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 255 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 +; MIPS32R6O0-NEXT: $BB12_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB12_3 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB12_1 +; MIPS32R6O0-NEXT: $BB12_3: # %entry +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seb $1, $1 +; MIPS32R6O0-NEXT: # %bb.4: # %entry +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: # %bb.5: # %entry +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicCmpSwap8: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS4-NEXT: ld $1, %got_disp(y)($1) +; MIPS4-NEXT: daddiu $2, $zero, -4 +; MIPS4-NEXT: and $3, $1, $2 +; MIPS4-NEXT: andi $1, $1, 3 +; MIPS4-NEXT: sll $1, $1, 3 +; MIPS4-NEXT: ori $2, $zero, 255 +; MIPS4-NEXT: sllv $6, $2, $1 +; MIPS4-NEXT: nor $7, $zero, $6 +; MIPS4-NEXT: andi $2, $4, 255 +; MIPS4-NEXT: sllv $4, $2, $1 +; MIPS4-NEXT: andi $2, $5, 255 +; MIPS4-NEXT: sllv $5, $2, $1 +; MIPS4-NEXT: .LBB12_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $8, 0($3) +; MIPS4-NEXT: and $9, $8, $6 +; MIPS4-NEXT: bne $9, $4, .LBB12_3 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS4-NEXT: and $8, $8, $7 +; MIPS4-NEXT: or $8, $8, $5 +; MIPS4-NEXT: sc $8, 0($3) +; MIPS4-NEXT: beqz $8, .LBB12_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: .LBB12_3: # %entry +; MIPS4-NEXT: sync +; MIPS4-NEXT: srlv $2, $9, $1 +; MIPS4-NEXT: sll $2, $2, 24 +; MIPS4-NEXT: sra $2, $2, 24 +; MIPS4-NEXT: # %bb.4: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicCmpSwap8: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64-NEXT: ld $1, %got_disp(y)($1) +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $3, $1, $2 +; MIPS64-NEXT: andi $1, $1, 3 +; MIPS64-NEXT: sll $1, $1, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $6, $2, $1 +; MIPS64-NEXT: nor $7, $zero, $6 +; MIPS64-NEXT: andi $2, $4, 255 +; MIPS64-NEXT: sllv $4, $2, $1 +; MIPS64-NEXT: andi $2, $5, 255 +; MIPS64-NEXT: sllv $5, $2, $1 +; MIPS64-NEXT: .LBB12_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $8, 0($3) +; MIPS64-NEXT: and $9, $8, $6 +; MIPS64-NEXT: bne $9, $4, .LBB12_3 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS64-NEXT: and $8, $8, $7 +; MIPS64-NEXT: or $8, $8, $5 +; MIPS64-NEXT: sc $8, 0($3) +; MIPS64-NEXT: beqz $8, .LBB12_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: .LBB12_3: # %entry +; MIPS64-NEXT: sync +; MIPS64-NEXT: srlv $2, $9, $1 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: sra $2, $2, 24 +; MIPS64-NEXT: # %bb.4: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicCmpSwap8: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R2-NEXT: daddiu $2, $zero, -4 +; MIPS64R2-NEXT: and $3, $1, $2 +; MIPS64R2-NEXT: andi $1, $1, 3 +; MIPS64R2-NEXT: sll $1, $1, 3 +; MIPS64R2-NEXT: ori $2, $zero, 255 +; MIPS64R2-NEXT: sllv $6, $2, $1 +; MIPS64R2-NEXT: nor $7, $zero, $6 +; MIPS64R2-NEXT: andi $2, $4, 255 +; MIPS64R2-NEXT: sllv $4, $2, $1 +; MIPS64R2-NEXT: andi $2, $5, 255 +; MIPS64R2-NEXT: sllv $5, $2, $1 +; MIPS64R2-NEXT: .LBB12_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $8, 0($3) +; MIPS64R2-NEXT: and $9, $8, $6 +; MIPS64R2-NEXT: bne $9, $4, .LBB12_3 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS64R2-NEXT: and $8, $8, $7 +; MIPS64R2-NEXT: or $8, $8, $5 +; MIPS64R2-NEXT: sc $8, 0($3) +; MIPS64R2-NEXT: beqz $8, .LBB12_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: .LBB12_3: # %entry +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: srlv $2, $9, $1 +; MIPS64R2-NEXT: seb $2, $2 +; MIPS64R2-NEXT: # %bb.4: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicCmpSwap8: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $3, $1, $2 +; MIPS64R6-NEXT: andi $1, $1, 3 +; MIPS64R6-NEXT: sll $1, $1, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $6, $2, $1 +; MIPS64R6-NEXT: nor $7, $zero, $6 +; MIPS64R6-NEXT: andi $2, $4, 255 +; MIPS64R6-NEXT: sllv $4, $2, $1 +; MIPS64R6-NEXT: andi $2, $5, 255 +; MIPS64R6-NEXT: sllv $5, $2, $1 +; MIPS64R6-NEXT: .LBB12_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $8, 0($3) +; MIPS64R6-NEXT: and $9, $8, $6 +; MIPS64R6-NEXT: bnec $9, $4, .LBB12_3 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS64R6-NEXT: and $8, $8, $7 +; MIPS64R6-NEXT: or $8, $8, $5 +; MIPS64R6-NEXT: sc $8, 0($3) +; MIPS64R6-NEXT: beqzc $8, .LBB12_1 +; MIPS64R6-NEXT: .LBB12_3: # %entry +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: srlv $2, $9, $1 +; MIPS64R6-NEXT: seb $2, $2 +; MIPS64R6-NEXT: # %bb.4: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicCmpSwap8: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $3, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: ld $3, %got_disp(y)($3) +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 255 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 +; MIPS64R6O0-NEXT: andi $1, $1, 255 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 +; MIPS64R6O0-NEXT: .LBB12_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB12_3 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB12_1 +; MIPS64R6O0-NEXT: .LBB12_3: # %entry +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seb $1, $1 +; MIPS64R6O0-NEXT: # %bb.4: # %entry +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # %bb.5: # %entry +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicCmpSwap8: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(y)($2) +; MM32-NEXT: addiu $2, $zero, -4 +; MM32-NEXT: and $3, $1, $2 +; MM32-NEXT: andi $1, $1, 3 +; MM32-NEXT: sll $1, $1, 3 +; MM32-NEXT: ori $2, $zero, 255 +; MM32-NEXT: sllv $6, $2, $1 +; MM32-NEXT: nor $7, $zero, $6 +; MM32-NEXT: andi $2, $4, 255 +; MM32-NEXT: sllv $4, $2, $1 +; MM32-NEXT: andi $2, $5, 255 +; MM32-NEXT: sllv $5, $2, $1 +; MM32-NEXT: $BB12_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $8, 0($3) +; MM32-NEXT: and $9, $8, $6 +; MM32-NEXT: bne $9, $4, $BB12_3 +; MM32-NEXT: nop +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MM32-NEXT: and $8, $8, $7 +; MM32-NEXT: or $8, $8, $5 +; MM32-NEXT: sc $8, 0($3) +; MM32-NEXT: beqzc $8, $BB12_1 +; MM32-NEXT: $BB12_3: # %entry +; MM32-NEXT: sync +; MM32-NEXT: srlv $2, $9, $1 +; MM32-NEXT: seb $2, $2 +; MM32-NEXT: # %bb.4: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicCmpSwap8: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(y)($1) +; O1-NEXT: addiu $2, $zero, -4 +; O1-NEXT: and $3, $1, $2 +; O1-NEXT: andi $1, $1, 3 +; O1-NEXT: sll $1, $1, 3 +; O1-NEXT: ori $2, $zero, 255 +; O1-NEXT: sllv $6, $2, $1 +; O1-NEXT: nor $7, $zero, $6 +; O1-NEXT: andi $2, $4, 255 +; O1-NEXT: sllv $4, $2, $1 +; O1-NEXT: andi $2, $5, 255 +; O1-NEXT: sllv $5, $2, $1 +; O1-NEXT: $BB12_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $8, 0($3) +; O1-NEXT: and $9, $8, $6 +; O1-NEXT: bne $9, $4, $BB12_3 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: # in Loop: Header=BB12_1 Depth=1 +; O1-NEXT: and $8, $8, $7 +; O1-NEXT: or $8, $8, $5 +; O1-NEXT: sc $8, 0($3) +; O1-NEXT: beqz $8, $BB12_1 +; O1-NEXT: nop +; O1-NEXT: $BB12_3: # %entry +; O1-NEXT: sync +; O1-NEXT: srlv $2, $9, $1 +; O1-NEXT: sll $2, $2, 24 +; O1-NEXT: sra $2, $2, 24 +; O1-NEXT: # %bb.4: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicCmpSwap8: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(y)($1) +; O2-NEXT: addiu $2, $zero, -4 +; O2-NEXT: and $3, $1, $2 +; O2-NEXT: andi $1, $1, 3 +; O2-NEXT: sll $1, $1, 3 +; O2-NEXT: ori $2, $zero, 255 +; O2-NEXT: sllv $6, $2, $1 +; O2-NEXT: nor $7, $zero, $6 +; O2-NEXT: andi $2, $4, 255 +; O2-NEXT: sllv $4, $2, $1 +; O2-NEXT: andi $2, $5, 255 +; O2-NEXT: sllv $5, $2, $1 +; O2-NEXT: $BB12_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $8, 0($3) +; O2-NEXT: and $9, $8, $6 +; O2-NEXT: bne $9, $4, $BB12_3 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: # in Loop: Header=BB12_1 Depth=1 +; O2-NEXT: and $8, $8, $7 +; O2-NEXT: or $8, $8, $5 +; O2-NEXT: sc $8, 0($3) +; O2-NEXT: beqz $8, $BB12_1 +; O2-NEXT: nop +; O2-NEXT: $BB12_3: # %entry +; O2-NEXT: sync +; O2-NEXT: srlv $2, $9, $1 +; O2-NEXT: sll $2, $2, 24 +; O2-NEXT: sra $2, $2, 24 +; O2-NEXT: # %bb.4: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicCmpSwap8: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: addiu $2, $zero, -4 +; O3-NEXT: lw $1, %got(y)($1) +; O3-NEXT: and $3, $1, $2 +; O3-NEXT: andi $1, $1, 3 +; O3-NEXT: ori $2, $zero, 255 +; O3-NEXT: sll $1, $1, 3 +; O3-NEXT: sllv $6, $2, $1 +; O3-NEXT: andi $2, $4, 255 +; O3-NEXT: sllv $4, $2, $1 +; O3-NEXT: andi $2, $5, 255 +; O3-NEXT: nor $7, $zero, $6 +; O3-NEXT: sllv $5, $2, $1 +; O3-NEXT: $BB12_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $8, 0($3) +; O3-NEXT: and $9, $8, $6 +; O3-NEXT: bne $9, $4, $BB12_3 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: # in Loop: Header=BB12_1 Depth=1 +; O3-NEXT: and $8, $8, $7 +; O3-NEXT: or $8, $8, $5 +; O3-NEXT: sc $8, 0($3) +; O3-NEXT: beqz $8, $BB12_1 +; O3-NEXT: nop +; O3-NEXT: $BB12_3: # %entry +; O3-NEXT: sync +; O3-NEXT: srlv $2, $9, $1 +; O3-NEXT: sll $2, $2, 24 +; O3-NEXT: sra $2, $2, 24 +; O3-NEXT: # %bb.4: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicCmpSwap8: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(y)($1) +; MIPS32EB-NEXT: addiu $2, $zero, -4 +; MIPS32EB-NEXT: and $3, $1, $2 +; MIPS32EB-NEXT: andi $1, $1, 3 +; MIPS32EB-NEXT: xori $1, $1, 3 +; MIPS32EB-NEXT: sll $1, $1, 3 +; MIPS32EB-NEXT: ori $2, $zero, 255 +; MIPS32EB-NEXT: sllv $6, $2, $1 +; MIPS32EB-NEXT: nor $7, $zero, $6 +; MIPS32EB-NEXT: andi $2, $4, 255 +; MIPS32EB-NEXT: sllv $4, $2, $1 +; MIPS32EB-NEXT: andi $2, $5, 255 +; MIPS32EB-NEXT: sllv $5, $2, $1 +; MIPS32EB-NEXT: $BB12_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $8, 0($3) +; MIPS32EB-NEXT: and $9, $8, $6 +; MIPS32EB-NEXT: bne $9, $4, $BB12_3 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: # in Loop: Header=BB12_1 Depth=1 +; MIPS32EB-NEXT: and $8, $8, $7 +; MIPS32EB-NEXT: or $8, $8, $5 +; MIPS32EB-NEXT: sc $8, 0($3) +; MIPS32EB-NEXT: beqz $8, $BB12_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: $BB12_3: # %entry +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: srlv $2, $9, $1 +; MIPS32EB-NEXT: sll $2, $2, 24 +; MIPS32EB-NEXT: sra $2, $2, 24 +; MIPS32EB-NEXT: # %bb.4: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %pair0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic + %0 = extractvalue { i8, i1 } %pair0, 0 + ret i8 %0 +} + +define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind { +; MIPS32-LABEL: AtomicCmpSwapRes8: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $1, $zero, -4 +; MIPS32-NEXT: and $2, $4, $1 +; MIPS32-NEXT: andi $1, $4, 3 +; MIPS32-NEXT: sll $3, $1, 3 +; MIPS32-NEXT: ori $1, $zero, 255 +; MIPS32-NEXT: sllv $4, $1, $3 +; MIPS32-NEXT: nor $7, $zero, $4 +; MIPS32-NEXT: andi $1, $5, 255 +; MIPS32-NEXT: sllv $8, $1, $3 +; MIPS32-NEXT: andi $1, $6, 255 +; MIPS32-NEXT: sllv $6, $1, $3 +; MIPS32-NEXT: $BB13_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $9, 0($2) +; MIPS32-NEXT: and $10, $9, $4 +; MIPS32-NEXT: bne $10, $8, $BB13_3 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS32-NEXT: and $9, $9, $7 +; MIPS32-NEXT: or $9, $9, $6 +; MIPS32-NEXT: sc $9, 0($2) +; MIPS32-NEXT: beqz $9, $BB13_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB13_3: # %entry +; MIPS32-NEXT: sync +; MIPS32-NEXT: srlv $1, $10, $3 +; MIPS32-NEXT: sll $1, $1, 24 +; MIPS32-NEXT: sra $1, $1, 24 +; MIPS32-NEXT: # %bb.4: # %entry +; MIPS32-NEXT: xor $1, $1, $5 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: sltiu $2, $1, 1 +; +; MIPS32O0-LABEL: AtomicCmpSwapRes8: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: move $1, $6 +; MIPS32O0-NEXT: move $2, $5 +; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 +; MIPS32O0-NEXT: ori $3, $zero, 255 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 255 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 255 +; MIPS32O0-NEXT: sllv $8, $1, $9 +; MIPS32O0-NEXT: $BB13_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB13_3 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB13_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: $BB13_3: # %entry +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 +; MIPS32O0-NEXT: # %bb.4: # %entry +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: # %bb.5: # %entry +; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $2, $2, 24 +; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: xor $1, $1, $2 +; MIPS32O0-NEXT: sltiu $2, $1, 1 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicCmpSwapRes8: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: addiu $1, $zero, -4 +; MIPS32R2-NEXT: and $2, $4, $1 +; MIPS32R2-NEXT: andi $1, $4, 3 +; MIPS32R2-NEXT: sll $3, $1, 3 +; MIPS32R2-NEXT: ori $1, $zero, 255 +; MIPS32R2-NEXT: sllv $4, $1, $3 +; MIPS32R2-NEXT: nor $7, $zero, $4 +; MIPS32R2-NEXT: andi $1, $5, 255 +; MIPS32R2-NEXT: sllv $8, $1, $3 +; MIPS32R2-NEXT: andi $1, $6, 255 +; MIPS32R2-NEXT: sllv $6, $1, $3 +; MIPS32R2-NEXT: $BB13_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $9, 0($2) +; MIPS32R2-NEXT: and $10, $9, $4 +; MIPS32R2-NEXT: bne $10, $8, $BB13_3 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS32R2-NEXT: and $9, $9, $7 +; MIPS32R2-NEXT: or $9, $9, $6 +; MIPS32R2-NEXT: sc $9, 0($2) +; MIPS32R2-NEXT: beqz $9, $BB13_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: $BB13_3: # %entry +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: srlv $1, $10, $3 +; MIPS32R2-NEXT: seb $1, $1 +; MIPS32R2-NEXT: # %bb.4: # %entry +; MIPS32R2-NEXT: xor $1, $1, $5 +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: sltiu $2, $1, 1 +; +; MIPS32R6-LABEL: AtomicCmpSwapRes8: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: addiu $1, $zero, -4 +; MIPS32R6-NEXT: and $2, $4, $1 +; MIPS32R6-NEXT: andi $1, $4, 3 +; MIPS32R6-NEXT: sll $3, $1, 3 +; MIPS32R6-NEXT: ori $1, $zero, 255 +; MIPS32R6-NEXT: sllv $4, $1, $3 +; MIPS32R6-NEXT: nor $7, $zero, $4 +; MIPS32R6-NEXT: andi $1, $5, 255 +; MIPS32R6-NEXT: sllv $8, $1, $3 +; MIPS32R6-NEXT: andi $1, $6, 255 +; MIPS32R6-NEXT: sllv $6, $1, $3 +; MIPS32R6-NEXT: $BB13_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $9, 0($2) +; MIPS32R6-NEXT: and $10, $9, $4 +; MIPS32R6-NEXT: bnec $10, $8, $BB13_3 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS32R6-NEXT: and $9, $9, $7 +; MIPS32R6-NEXT: or $9, $9, $6 +; MIPS32R6-NEXT: sc $9, 0($2) +; MIPS32R6-NEXT: beqzc $9, $BB13_1 +; MIPS32R6-NEXT: $BB13_3: # %entry +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: srlv $1, $10, $3 +; MIPS32R6-NEXT: seb $1, $1 +; MIPS32R6-NEXT: # %bb.4: # %entry +; MIPS32R6-NEXT: xor $1, $1, $5 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: sltiu $2, $1, 1 +; +; MIPS32R6O0-LABEL: AtomicCmpSwapRes8: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: move $1, $6 +; MIPS32R6O0-NEXT: move $2, $5 +; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: move $3, $4 +; MIPS32R6O0-NEXT: # kill: def $a2 killed $at +; MIPS32R6O0-NEXT: # kill: def $a1 killed $v0 +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 255 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 255 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 255 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 +; MIPS32R6O0-NEXT: $BB13_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB13_3 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB13_1 +; MIPS32R6O0-NEXT: $BB13_3: # %entry +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seb $1, $1 +; MIPS32R6O0-NEXT: # %bb.4: # %entry +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: # %bb.5: # %entry +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: xor $1, $1, $2 +; MIPS32R6O0-NEXT: sltiu $2, $1, 1 +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicCmpSwapRes8: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: daddiu $1, $zero, -4 +; MIPS4-NEXT: and $2, $4, $1 +; MIPS4-NEXT: andi $1, $4, 3 +; MIPS4-NEXT: sll $3, $1, 3 +; MIPS4-NEXT: ori $1, $zero, 255 +; MIPS4-NEXT: sllv $4, $1, $3 +; MIPS4-NEXT: nor $7, $zero, $4 +; MIPS4-NEXT: andi $1, $5, 255 +; MIPS4-NEXT: sllv $8, $1, $3 +; MIPS4-NEXT: andi $1, $6, 255 +; MIPS4-NEXT: sllv $6, $1, $3 +; MIPS4-NEXT: .LBB13_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $9, 0($2) +; MIPS4-NEXT: and $10, $9, $4 +; MIPS4-NEXT: bne $10, $8, .LBB13_3 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS4-NEXT: and $9, $9, $7 +; MIPS4-NEXT: or $9, $9, $6 +; MIPS4-NEXT: sc $9, 0($2) +; MIPS4-NEXT: beqz $9, .LBB13_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: .LBB13_3: # %entry +; MIPS4-NEXT: sync +; MIPS4-NEXT: srlv $1, $10, $3 +; MIPS4-NEXT: sll $1, $1, 24 +; MIPS4-NEXT: sra $1, $1, 24 +; MIPS4-NEXT: # %bb.4: # %entry +; MIPS4-NEXT: xor $1, $1, $5 +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: sltiu $2, $1, 1 +; +; MIPS64-LABEL: AtomicCmpSwapRes8: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: daddiu $1, $zero, -4 +; MIPS64-NEXT: and $2, $4, $1 +; MIPS64-NEXT: andi $1, $4, 3 +; MIPS64-NEXT: sll $3, $1, 3 +; MIPS64-NEXT: ori $1, $zero, 255 +; MIPS64-NEXT: sllv $4, $1, $3 +; MIPS64-NEXT: nor $7, $zero, $4 +; MIPS64-NEXT: andi $1, $5, 255 +; MIPS64-NEXT: sllv $8, $1, $3 +; MIPS64-NEXT: andi $1, $6, 255 +; MIPS64-NEXT: sllv $6, $1, $3 +; MIPS64-NEXT: .LBB13_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $9, 0($2) +; MIPS64-NEXT: and $10, $9, $4 +; MIPS64-NEXT: bne $10, $8, .LBB13_3 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS64-NEXT: and $9, $9, $7 +; MIPS64-NEXT: or $9, $9, $6 +; MIPS64-NEXT: sc $9, 0($2) +; MIPS64-NEXT: beqz $9, .LBB13_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: .LBB13_3: # %entry +; MIPS64-NEXT: sync +; MIPS64-NEXT: srlv $1, $10, $3 +; MIPS64-NEXT: sll $1, $1, 24 +; MIPS64-NEXT: sra $1, $1, 24 +; MIPS64-NEXT: # %bb.4: # %entry +; MIPS64-NEXT: xor $1, $1, $5 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: sltiu $2, $1, 1 +; +; MIPS64R2-LABEL: AtomicCmpSwapRes8: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: daddiu $1, $zero, -4 +; MIPS64R2-NEXT: and $2, $4, $1 +; MIPS64R2-NEXT: andi $1, $4, 3 +; MIPS64R2-NEXT: sll $3, $1, 3 +; MIPS64R2-NEXT: ori $1, $zero, 255 +; MIPS64R2-NEXT: sllv $4, $1, $3 +; MIPS64R2-NEXT: nor $7, $zero, $4 +; MIPS64R2-NEXT: andi $1, $5, 255 +; MIPS64R2-NEXT: sllv $8, $1, $3 +; MIPS64R2-NEXT: andi $1, $6, 255 +; MIPS64R2-NEXT: sllv $6, $1, $3 +; MIPS64R2-NEXT: .LBB13_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $9, 0($2) +; MIPS64R2-NEXT: and $10, $9, $4 +; MIPS64R2-NEXT: bne $10, $8, .LBB13_3 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS64R2-NEXT: and $9, $9, $7 +; MIPS64R2-NEXT: or $9, $9, $6 +; MIPS64R2-NEXT: sc $9, 0($2) +; MIPS64R2-NEXT: beqz $9, .LBB13_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: .LBB13_3: # %entry +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: srlv $1, $10, $3 +; MIPS64R2-NEXT: seb $1, $1 +; MIPS64R2-NEXT: # %bb.4: # %entry +; MIPS64R2-NEXT: xor $1, $1, $5 +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: sltiu $2, $1, 1 +; +; MIPS64R6-LABEL: AtomicCmpSwapRes8: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: daddiu $1, $zero, -4 +; MIPS64R6-NEXT: and $2, $4, $1 +; MIPS64R6-NEXT: andi $1, $4, 3 +; MIPS64R6-NEXT: sll $3, $1, 3 +; MIPS64R6-NEXT: ori $1, $zero, 255 +; MIPS64R6-NEXT: sllv $4, $1, $3 +; MIPS64R6-NEXT: nor $7, $zero, $4 +; MIPS64R6-NEXT: andi $1, $5, 255 +; MIPS64R6-NEXT: sllv $8, $1, $3 +; MIPS64R6-NEXT: andi $1, $6, 255 +; MIPS64R6-NEXT: sllv $6, $1, $3 +; MIPS64R6-NEXT: .LBB13_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $9, 0($2) +; MIPS64R6-NEXT: and $10, $9, $4 +; MIPS64R6-NEXT: bnec $10, $8, .LBB13_3 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS64R6-NEXT: and $9, $9, $7 +; MIPS64R6-NEXT: or $9, $9, $6 +; MIPS64R6-NEXT: sc $9, 0($2) +; MIPS64R6-NEXT: beqzc $9, .LBB13_1 +; MIPS64R6-NEXT: .LBB13_3: # %entry +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: srlv $1, $10, $3 +; MIPS64R6-NEXT: seb $1, $1 +; MIPS64R6-NEXT: # %bb.4: # %entry +; MIPS64R6-NEXT: xor $1, $1, $5 +; MIPS64R6-NEXT: jr $ra +; MIPS64R6-NEXT: sltiu $2, $1, 1 +; +; MIPS64R6O0-LABEL: AtomicCmpSwapRes8: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: move $3, $4 +; MIPS64R6O0-NEXT: move $1, $6 +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 255 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 +; MIPS64R6O0-NEXT: andi $1, $1, 255 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 +; MIPS64R6O0-NEXT: .LBB13_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB13_3 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB13_1 +; MIPS64R6O0-NEXT: .LBB13_3: # %entry +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seb $1, $1 +; MIPS64R6O0-NEXT: # %bb.4: # %entry +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # %bb.5: # %entry +; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: xor $1, $1, $2 +; MIPS64R6O0-NEXT: sltiu $2, $1, 1 +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicCmpSwapRes8: +; MM32: # %bb.0: # %entry +; MM32-NEXT: addiu $1, $zero, -4 +; MM32-NEXT: and $2, $4, $1 +; MM32-NEXT: andi $1, $4, 3 +; MM32-NEXT: sll $3, $1, 3 +; MM32-NEXT: ori $1, $zero, 255 +; MM32-NEXT: sllv $4, $1, $3 +; MM32-NEXT: nor $7, $zero, $4 +; MM32-NEXT: andi $1, $5, 255 +; MM32-NEXT: sllv $8, $1, $3 +; MM32-NEXT: andi $1, $6, 255 +; MM32-NEXT: sllv $6, $1, $3 +; MM32-NEXT: $BB13_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $9, 0($2) +; MM32-NEXT: and $10, $9, $4 +; MM32-NEXT: bne $10, $8, $BB13_3 +; MM32-NEXT: nop +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MM32-NEXT: and $9, $9, $7 +; MM32-NEXT: or $9, $9, $6 +; MM32-NEXT: sc $9, 0($2) +; MM32-NEXT: beqzc $9, $BB13_1 +; MM32-NEXT: $BB13_3: # %entry +; MM32-NEXT: sync +; MM32-NEXT: srlv $1, $10, $3 +; MM32-NEXT: seb $1, $1 +; MM32-NEXT: # %bb.4: # %entry +; MM32-NEXT: xor $1, $1, $5 +; MM32-NEXT: jr $ra +; MM32-NEXT: sltiu $2, $1, 1 +; +; O1-LABEL: AtomicCmpSwapRes8: +; O1: # %bb.0: # %entry +; O1-NEXT: addiu $1, $zero, -4 +; O1-NEXT: and $2, $4, $1 +; O1-NEXT: andi $1, $4, 3 +; O1-NEXT: sll $3, $1, 3 +; O1-NEXT: ori $1, $zero, 255 +; O1-NEXT: sllv $4, $1, $3 +; O1-NEXT: nor $7, $zero, $4 +; O1-NEXT: andi $1, $5, 255 +; O1-NEXT: sllv $8, $1, $3 +; O1-NEXT: andi $1, $6, 255 +; O1-NEXT: sllv $6, $1, $3 +; O1-NEXT: $BB13_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $9, 0($2) +; O1-NEXT: and $10, $9, $4 +; O1-NEXT: bne $10, $8, $BB13_3 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: # in Loop: Header=BB13_1 Depth=1 +; O1-NEXT: and $9, $9, $7 +; O1-NEXT: or $9, $9, $6 +; O1-NEXT: sc $9, 0($2) +; O1-NEXT: beqz $9, $BB13_1 +; O1-NEXT: nop +; O1-NEXT: $BB13_3: # %entry +; O1-NEXT: sync +; O1-NEXT: srlv $1, $10, $3 +; O1-NEXT: sll $1, $1, 24 +; O1-NEXT: sra $1, $1, 24 +; O1-NEXT: # %bb.4: # %entry +; O1-NEXT: xor $1, $1, $5 +; O1-NEXT: jr $ra +; O1-NEXT: sltiu $2, $1, 1 +; +; O2-LABEL: AtomicCmpSwapRes8: +; O2: # %bb.0: # %entry +; O2-NEXT: addiu $1, $zero, -4 +; O2-NEXT: and $2, $4, $1 +; O2-NEXT: andi $1, $4, 3 +; O2-NEXT: sll $3, $1, 3 +; O2-NEXT: ori $1, $zero, 255 +; O2-NEXT: sllv $4, $1, $3 +; O2-NEXT: nor $7, $zero, $4 +; O2-NEXT: andi $1, $5, 255 +; O2-NEXT: sllv $8, $1, $3 +; O2-NEXT: andi $1, $6, 255 +; O2-NEXT: sllv $6, $1, $3 +; O2-NEXT: $BB13_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $9, 0($2) +; O2-NEXT: and $10, $9, $4 +; O2-NEXT: bne $10, $8, $BB13_3 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: # in Loop: Header=BB13_1 Depth=1 +; O2-NEXT: and $9, $9, $7 +; O2-NEXT: or $9, $9, $6 +; O2-NEXT: sc $9, 0($2) +; O2-NEXT: beqz $9, $BB13_1 +; O2-NEXT: nop +; O2-NEXT: $BB13_3: # %entry +; O2-NEXT: sync +; O2-NEXT: srlv $1, $10, $3 +; O2-NEXT: sll $1, $1, 24 +; O2-NEXT: sra $1, $1, 24 +; O2-NEXT: # %bb.4: # %entry +; O2-NEXT: xor $1, $1, $5 +; O2-NEXT: jr $ra +; O2-NEXT: sltiu $2, $1, 1 +; +; O3-LABEL: AtomicCmpSwapRes8: +; O3: # %bb.0: # %entry +; O3-NEXT: addiu $1, $zero, -4 +; O3-NEXT: and $2, $4, $1 +; O3-NEXT: andi $1, $4, 3 +; O3-NEXT: sll $3, $1, 3 +; O3-NEXT: ori $1, $zero, 255 +; O3-NEXT: sllv $4, $1, $3 +; O3-NEXT: andi $1, $5, 255 +; O3-NEXT: sllv $8, $1, $3 +; O3-NEXT: andi $1, $6, 255 +; O3-NEXT: nor $7, $zero, $4 +; O3-NEXT: sllv $6, $1, $3 +; O3-NEXT: $BB13_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $9, 0($2) +; O3-NEXT: and $10, $9, $4 +; O3-NEXT: bne $10, $8, $BB13_3 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: # in Loop: Header=BB13_1 Depth=1 +; O3-NEXT: and $9, $9, $7 +; O3-NEXT: or $9, $9, $6 +; O3-NEXT: sc $9, 0($2) +; O3-NEXT: beqz $9, $BB13_1 +; O3-NEXT: nop +; O3-NEXT: $BB13_3: # %entry +; O3-NEXT: sync +; O3-NEXT: srlv $1, $10, $3 +; O3-NEXT: sll $1, $1, 24 +; O3-NEXT: sra $1, $1, 24 +; O3-NEXT: # %bb.4: # %entry +; O3-NEXT: xor $1, $1, $5 +; O3-NEXT: jr $ra +; O3-NEXT: sltiu $2, $1, 1 +; +; MIPS32EB-LABEL: AtomicCmpSwapRes8: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: addiu $1, $zero, -4 +; MIPS32EB-NEXT: and $2, $4, $1 +; MIPS32EB-NEXT: andi $1, $4, 3 +; MIPS32EB-NEXT: xori $1, $1, 3 +; MIPS32EB-NEXT: sll $3, $1, 3 +; MIPS32EB-NEXT: ori $1, $zero, 255 +; MIPS32EB-NEXT: sllv $4, $1, $3 +; MIPS32EB-NEXT: nor $7, $zero, $4 +; MIPS32EB-NEXT: andi $1, $5, 255 +; MIPS32EB-NEXT: sllv $8, $1, $3 +; MIPS32EB-NEXT: andi $1, $6, 255 +; MIPS32EB-NEXT: sllv $6, $1, $3 +; MIPS32EB-NEXT: $BB13_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $9, 0($2) +; MIPS32EB-NEXT: and $10, $9, $4 +; MIPS32EB-NEXT: bne $10, $8, $BB13_3 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: # in Loop: Header=BB13_1 Depth=1 +; MIPS32EB-NEXT: and $9, $9, $7 +; MIPS32EB-NEXT: or $9, $9, $6 +; MIPS32EB-NEXT: sc $9, 0($2) +; MIPS32EB-NEXT: beqz $9, $BB13_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: $BB13_3: # %entry +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: srlv $1, $10, $3 +; MIPS32EB-NEXT: sll $1, $1, 24 +; MIPS32EB-NEXT: sra $1, $1, 24 +; MIPS32EB-NEXT: # %bb.4: # %entry +; MIPS32EB-NEXT: xor $1, $1, $5 +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: sltiu $2, $1, 1 +entry: + %0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic + %1 = extractvalue { i8, i1 } %0, 1 + ret i1 %1 +; FIXME: -march=mips produces a redundant sign extension here... +; FIXME: ...Leading to this split check. + +} + +; Check one i16 so that we cover the seh sign extend +@z = common global i16 0, align 1 + +define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadAdd16: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(z)($1) +; MIPS32-NEXT: addiu $2, $zero, -4 +; MIPS32-NEXT: and $3, $1, $2 +; MIPS32-NEXT: andi $1, $1, 3 +; MIPS32-NEXT: sll $1, $1, 3 +; MIPS32-NEXT: ori $2, $zero, 65535 +; MIPS32-NEXT: sllv $5, $2, $1 +; MIPS32-NEXT: nor $6, $zero, $5 +; MIPS32-NEXT: sllv $4, $4, $1 +; MIPS32-NEXT: $BB14_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $7, 0($3) +; MIPS32-NEXT: addu $8, $7, $4 +; MIPS32-NEXT: and $8, $8, $5 +; MIPS32-NEXT: and $9, $7, $6 +; MIPS32-NEXT: or $9, $9, $8 +; MIPS32-NEXT: sc $9, 0($3) +; MIPS32-NEXT: beqz $9, $BB14_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: and $2, $7, $5 +; MIPS32-NEXT: srlv $2, $2, $1 +; MIPS32-NEXT: sll $2, $2, 16 +; MIPS32-NEXT: sra $2, $2, 16 +; MIPS32-NEXT: # %bb.3: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadAdd16: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(z)($1) +; MIPS32O0-NEXT: addiu $2, $zero, -4 +; MIPS32O0-NEXT: and $5, $1, $2 +; MIPS32O0-NEXT: andi $1, $1, 3 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 65535 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 +; MIPS32O0-NEXT: $BB14_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: addu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB14_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 +; MIPS32O0-NEXT: # %bb.3: # %entry +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: # %bb.4: # %entry +; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $2, $1, 16 +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadAdd16: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(z)($1) +; MIPS32R2-NEXT: addiu $2, $zero, -4 +; MIPS32R2-NEXT: and $3, $1, $2 +; MIPS32R2-NEXT: andi $1, $1, 3 +; MIPS32R2-NEXT: sll $1, $1, 3 +; MIPS32R2-NEXT: ori $2, $zero, 65535 +; MIPS32R2-NEXT: sllv $5, $2, $1 +; MIPS32R2-NEXT: nor $6, $zero, $5 +; MIPS32R2-NEXT: sllv $4, $4, $1 +; MIPS32R2-NEXT: $BB14_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $7, 0($3) +; MIPS32R2-NEXT: addu $8, $7, $4 +; MIPS32R2-NEXT: and $8, $8, $5 +; MIPS32R2-NEXT: and $9, $7, $6 +; MIPS32R2-NEXT: or $9, $9, $8 +; MIPS32R2-NEXT: sc $9, 0($3) +; MIPS32R2-NEXT: beqz $9, $BB14_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: and $2, $7, $5 +; MIPS32R2-NEXT: srlv $2, $2, $1 +; MIPS32R2-NEXT: seh $2, $2 +; MIPS32R2-NEXT: # %bb.3: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadAdd16: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(z)($1) +; MIPS32R6-NEXT: addiu $2, $zero, -4 +; MIPS32R6-NEXT: and $3, $1, $2 +; MIPS32R6-NEXT: andi $1, $1, 3 +; MIPS32R6-NEXT: sll $1, $1, 3 +; MIPS32R6-NEXT: ori $2, $zero, 65535 +; MIPS32R6-NEXT: sllv $5, $2, $1 +; MIPS32R6-NEXT: nor $6, $zero, $5 +; MIPS32R6-NEXT: sllv $4, $4, $1 +; MIPS32R6-NEXT: $BB14_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $7, 0($3) +; MIPS32R6-NEXT: addu $8, $7, $4 +; MIPS32R6-NEXT: and $8, $8, $5 +; MIPS32R6-NEXT: and $9, $7, $6 +; MIPS32R6-NEXT: or $9, $9, $8 +; MIPS32R6-NEXT: sc $9, 0($3) +; MIPS32R6-NEXT: beqzc $9, $BB14_1 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: and $2, $7, $5 +; MIPS32R6-NEXT: srlv $2, $2, $1 +; MIPS32R6-NEXT: seh $2, $2 +; MIPS32R6-NEXT: # %bb.3: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadAdd16: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 +; MIPS32R6O0-NEXT: lw $1, %got(z)($1) +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 +; MIPS32R6O0-NEXT: andi $1, $1, 3 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 65535 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 +; MIPS32R6O0-NEXT: $BB14_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: addu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB14_1 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seh $1, $1 +; MIPS32R6O0-NEXT: # %bb.3: # %entry +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: # %bb.4: # %entry +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadAdd16: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS4-NEXT: ld $1, %got_disp(z)($1) +; MIPS4-NEXT: daddiu $2, $zero, -4 +; MIPS4-NEXT: and $3, $1, $2 +; MIPS4-NEXT: andi $1, $1, 3 +; MIPS4-NEXT: sll $1, $1, 3 +; MIPS4-NEXT: ori $2, $zero, 65535 +; MIPS4-NEXT: sllv $5, $2, $1 +; MIPS4-NEXT: nor $6, $zero, $5 +; MIPS4-NEXT: sllv $4, $4, $1 +; MIPS4-NEXT: .LBB14_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $7, 0($3) +; MIPS4-NEXT: addu $8, $7, $4 +; MIPS4-NEXT: and $8, $8, $5 +; MIPS4-NEXT: and $9, $7, $6 +; MIPS4-NEXT: or $9, $9, $8 +; MIPS4-NEXT: sc $9, 0($3) +; MIPS4-NEXT: beqz $9, .LBB14_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: and $2, $7, $5 +; MIPS4-NEXT: srlv $2, $2, $1 +; MIPS4-NEXT: sll $2, $2, 16 +; MIPS4-NEXT: sra $2, $2, 16 +; MIPS4-NEXT: # %bb.3: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadAdd16: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64-NEXT: ld $1, %got_disp(z)($1) +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $3, $1, $2 +; MIPS64-NEXT: andi $1, $1, 3 +; MIPS64-NEXT: sll $1, $1, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $5, $2, $1 +; MIPS64-NEXT: nor $6, $zero, $5 +; MIPS64-NEXT: sllv $4, $4, $1 +; MIPS64-NEXT: .LBB14_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $7, 0($3) +; MIPS64-NEXT: addu $8, $7, $4 +; MIPS64-NEXT: and $8, $8, $5 +; MIPS64-NEXT: and $9, $7, $6 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($3) +; MIPS64-NEXT: beqz $9, .LBB14_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: and $2, $7, $5 +; MIPS64-NEXT: srlv $2, $2, $1 +; MIPS64-NEXT: sll $2, $2, 16 +; MIPS64-NEXT: sra $2, $2, 16 +; MIPS64-NEXT: # %bb.3: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadAdd16: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R2-NEXT: ld $1, %got_disp(z)($1) +; MIPS64R2-NEXT: daddiu $2, $zero, -4 +; MIPS64R2-NEXT: and $3, $1, $2 +; MIPS64R2-NEXT: andi $1, $1, 3 +; MIPS64R2-NEXT: sll $1, $1, 3 +; MIPS64R2-NEXT: ori $2, $zero, 65535 +; MIPS64R2-NEXT: sllv $5, $2, $1 +; MIPS64R2-NEXT: nor $6, $zero, $5 +; MIPS64R2-NEXT: sllv $4, $4, $1 +; MIPS64R2-NEXT: .LBB14_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $7, 0($3) +; MIPS64R2-NEXT: addu $8, $7, $4 +; MIPS64R2-NEXT: and $8, $8, $5 +; MIPS64R2-NEXT: and $9, $7, $6 +; MIPS64R2-NEXT: or $9, $9, $8 +; MIPS64R2-NEXT: sc $9, 0($3) +; MIPS64R2-NEXT: beqz $9, .LBB14_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: and $2, $7, $5 +; MIPS64R2-NEXT: srlv $2, $2, $1 +; MIPS64R2-NEXT: seh $2, $2 +; MIPS64R2-NEXT: # %bb.3: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadAdd16: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R6-NEXT: ld $1, %got_disp(z)($1) +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $3, $1, $2 +; MIPS64R6-NEXT: andi $1, $1, 3 +; MIPS64R6-NEXT: sll $1, $1, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $5, $2, $1 +; MIPS64R6-NEXT: nor $6, $zero, $5 +; MIPS64R6-NEXT: sllv $4, $4, $1 +; MIPS64R6-NEXT: .LBB14_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $7, 0($3) +; MIPS64R6-NEXT: addu $8, $7, $4 +; MIPS64R6-NEXT: and $8, $8, $5 +; MIPS64R6-NEXT: and $9, $7, $6 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($3) +; MIPS64R6-NEXT: beqzc $9, .LBB14_1 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: and $2, $7, $5 +; MIPS64R6-NEXT: srlv $2, $2, $1 +; MIPS64R6-NEXT: seh $2, $2 +; MIPS64R6-NEXT: # %bb.3: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadAdd16: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(z)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 2 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 +; MIPS64R6O0-NEXT: .LBB14_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: addu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB14_1 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seh $1, $1 +; MIPS64R6O0-NEXT: # %bb.3: # %entry +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # %bb.4: # %entry +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadAdd16: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(z)($2) +; MM32-NEXT: addiu $2, $zero, -4 +; MM32-NEXT: and $3, $1, $2 +; MM32-NEXT: andi $1, $1, 3 +; MM32-NEXT: sll $1, $1, 3 +; MM32-NEXT: ori $2, $zero, 65535 +; MM32-NEXT: sllv $5, $2, $1 +; MM32-NEXT: nor $6, $zero, $5 +; MM32-NEXT: sllv $4, $4, $1 +; MM32-NEXT: $BB14_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $7, 0($3) +; MM32-NEXT: addu $8, $7, $4 +; MM32-NEXT: and $8, $8, $5 +; MM32-NEXT: and $9, $7, $6 +; MM32-NEXT: or $9, $9, $8 +; MM32-NEXT: sc $9, 0($3) +; MM32-NEXT: beqzc $9, $BB14_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: and $2, $7, $5 +; MM32-NEXT: srlv $2, $2, $1 +; MM32-NEXT: seh $2, $2 +; MM32-NEXT: # %bb.3: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadAdd16: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(z)($1) +; O1-NEXT: addiu $2, $zero, -4 +; O1-NEXT: and $3, $1, $2 +; O1-NEXT: andi $1, $1, 3 +; O1-NEXT: sll $1, $1, 3 +; O1-NEXT: ori $2, $zero, 65535 +; O1-NEXT: sllv $5, $2, $1 +; O1-NEXT: nor $6, $zero, $5 +; O1-NEXT: sllv $4, $4, $1 +; O1-NEXT: $BB14_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $7, 0($3) +; O1-NEXT: addu $8, $7, $4 +; O1-NEXT: and $8, $8, $5 +; O1-NEXT: and $9, $7, $6 +; O1-NEXT: or $9, $9, $8 +; O1-NEXT: sc $9, 0($3) +; O1-NEXT: beqz $9, $BB14_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: and $2, $7, $5 +; O1-NEXT: srlv $2, $2, $1 +; O1-NEXT: sll $2, $2, 16 +; O1-NEXT: sra $2, $2, 16 +; O1-NEXT: # %bb.3: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadAdd16: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(z)($1) +; O2-NEXT: addiu $2, $zero, -4 +; O2-NEXT: and $3, $1, $2 +; O2-NEXT: andi $1, $1, 3 +; O2-NEXT: sll $1, $1, 3 +; O2-NEXT: ori $2, $zero, 65535 +; O2-NEXT: sllv $5, $2, $1 +; O2-NEXT: nor $6, $zero, $5 +; O2-NEXT: sllv $4, $4, $1 +; O2-NEXT: $BB14_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $7, 0($3) +; O2-NEXT: addu $8, $7, $4 +; O2-NEXT: and $8, $8, $5 +; O2-NEXT: and $9, $7, $6 +; O2-NEXT: or $9, $9, $8 +; O2-NEXT: sc $9, 0($3) +; O2-NEXT: beqz $9, $BB14_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: and $2, $7, $5 +; O2-NEXT: srlv $2, $2, $1 +; O2-NEXT: sll $2, $2, 16 +; O2-NEXT: sra $2, $2, 16 +; O2-NEXT: # %bb.3: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadAdd16: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: addiu $2, $zero, -4 +; O3-NEXT: lw $1, %got(z)($1) +; O3-NEXT: and $3, $1, $2 +; O3-NEXT: andi $1, $1, 3 +; O3-NEXT: ori $2, $zero, 65535 +; O3-NEXT: sll $1, $1, 3 +; O3-NEXT: sllv $5, $2, $1 +; O3-NEXT: sllv $4, $4, $1 +; O3-NEXT: nor $6, $zero, $5 +; O3-NEXT: $BB14_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $7, 0($3) +; O3-NEXT: addu $8, $7, $4 +; O3-NEXT: and $8, $8, $5 +; O3-NEXT: and $9, $7, $6 +; O3-NEXT: or $9, $9, $8 +; O3-NEXT: sc $9, 0($3) +; O3-NEXT: beqz $9, $BB14_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: and $2, $7, $5 +; O3-NEXT: srlv $2, $2, $1 +; O3-NEXT: sll $2, $2, 16 +; O3-NEXT: sra $2, $2, 16 +; O3-NEXT: # %bb.3: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadAdd16: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(z)($1) +; MIPS32EB-NEXT: addiu $2, $zero, -4 +; MIPS32EB-NEXT: and $3, $1, $2 +; MIPS32EB-NEXT: andi $1, $1, 3 +; MIPS32EB-NEXT: xori $1, $1, 2 +; MIPS32EB-NEXT: sll $1, $1, 3 +; MIPS32EB-NEXT: ori $2, $zero, 65535 +; MIPS32EB-NEXT: sllv $5, $2, $1 +; MIPS32EB-NEXT: nor $6, $zero, $5 +; MIPS32EB-NEXT: sllv $4, $4, $1 +; MIPS32EB-NEXT: $BB14_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $7, 0($3) +; MIPS32EB-NEXT: addu $8, $7, $4 +; MIPS32EB-NEXT: and $8, $8, $5 +; MIPS32EB-NEXT: and $9, $7, $6 +; MIPS32EB-NEXT: or $9, $9, $8 +; MIPS32EB-NEXT: sc $9, 0($3) +; MIPS32EB-NEXT: beqz $9, $BB14_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: and $2, $7, $5 +; MIPS32EB-NEXT: srlv $2, $2, $1 +; MIPS32EB-NEXT: sll $2, $2, 16 +; MIPS32EB-NEXT: sra $2, $2, 16 +; MIPS32EB-NEXT: # %bb.3: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw add i16* @z, i16 %incr monotonic + ret i16 %0 + +} + +; Test that the i16 return value from cmpxchg is recognised as signed, +; so that setCC doesn't end up comparing an unsigned value to a signed +; value. +; The rest of the functions here are testing the atomic expansion, so +; we just match the end of the function. +define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { +; MIPS32-LABEL: foo: +; MIPS32: # %bb.0: +; MIPS32-NEXT: addu $1, $5, $6 +; MIPS32-NEXT: sync +; MIPS32-NEXT: addiu $2, $zero, -4 +; MIPS32-NEXT: and $3, $4, $2 +; MIPS32-NEXT: andi $2, $4, 3 +; MIPS32-NEXT: sll $4, $2, 3 +; MIPS32-NEXT: ori $2, $zero, 65535 +; MIPS32-NEXT: sllv $5, $2, $4 +; MIPS32-NEXT: nor $6, $zero, $5 +; MIPS32-NEXT: andi $2, $1, 65535 +; MIPS32-NEXT: sllv $8, $2, $4 +; MIPS32-NEXT: andi $2, $7, 65535 +; MIPS32-NEXT: sllv $7, $2, $4 +; MIPS32-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $9, 0($3) +; MIPS32-NEXT: and $10, $9, $5 +; MIPS32-NEXT: bne $10, $8, $BB15_3 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS32-NEXT: and $9, $9, $6 +; MIPS32-NEXT: or $9, $9, $7 +; MIPS32-NEXT: sc $9, 0($3) +; MIPS32-NEXT: beqz $9, $BB15_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB15_3: +; MIPS32-NEXT: sync +; MIPS32-NEXT: srlv $2, $10, $4 +; MIPS32-NEXT: sll $2, $2, 16 +; MIPS32-NEXT: sra $2, $2, 16 +; MIPS32-NEXT: # %bb.4: +; MIPS32-NEXT: sll $1, $1, 16 +; MIPS32-NEXT: sra $1, $1, 16 +; MIPS32-NEXT: xor $1, $2, $1 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sync +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: foo: +; MIPS32O0: # %bb.0: +; MIPS32O0-NEXT: addiu $sp, $sp, -8 +; MIPS32O0-NEXT: .cfi_def_cfa_offset 8 +; MIPS32O0-NEXT: move $1, $7 +; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: addu $2, $5, $6 +; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 +; MIPS32O0-NEXT: ori $3, $zero, 65535 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 65535 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 65535 +; MIPS32O0-NEXT: sllv $8, $1, $9 +; MIPS32O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB15_3 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB15_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: $BB15_3: +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 +; MIPS32O0-NEXT: # %bb.4: +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: # %bb.5: +; MIPS32O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 +; MIPS32O0-NEXT: xor $1, $2, $1 +; MIPS32O0-NEXT: sltiu $3, $1, 1 +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: addiu $sp, $sp, 8 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: foo: +; MIPS32R2: # %bb.0: +; MIPS32R2-NEXT: addu $1, $5, $6 +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: addiu $2, $zero, -4 +; MIPS32R2-NEXT: and $3, $4, $2 +; MIPS32R2-NEXT: andi $2, $4, 3 +; MIPS32R2-NEXT: sll $4, $2, 3 +; MIPS32R2-NEXT: ori $2, $zero, 65535 +; MIPS32R2-NEXT: sllv $5, $2, $4 +; MIPS32R2-NEXT: nor $6, $zero, $5 +; MIPS32R2-NEXT: andi $2, $1, 65535 +; MIPS32R2-NEXT: sllv $8, $2, $4 +; MIPS32R2-NEXT: andi $2, $7, 65535 +; MIPS32R2-NEXT: sllv $7, $2, $4 +; MIPS32R2-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $9, 0($3) +; MIPS32R2-NEXT: and $10, $9, $5 +; MIPS32R2-NEXT: bne $10, $8, $BB15_3 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS32R2-NEXT: and $9, $9, $6 +; MIPS32R2-NEXT: or $9, $9, $7 +; MIPS32R2-NEXT: sc $9, 0($3) +; MIPS32R2-NEXT: beqz $9, $BB15_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: $BB15_3: +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: srlv $2, $10, $4 +; MIPS32R2-NEXT: seh $2, $2 +; MIPS32R2-NEXT: # %bb.4: +; MIPS32R2-NEXT: seh $1, $1 +; MIPS32R2-NEXT: xor $1, $2, $1 +; MIPS32R2-NEXT: sltiu $3, $1, 1 +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: foo: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: addu $1, $5, $6 +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: addiu $2, $zero, -4 +; MIPS32R6-NEXT: and $3, $4, $2 +; MIPS32R6-NEXT: andi $2, $4, 3 +; MIPS32R6-NEXT: sll $4, $2, 3 +; MIPS32R6-NEXT: ori $2, $zero, 65535 +; MIPS32R6-NEXT: sllv $5, $2, $4 +; MIPS32R6-NEXT: nor $6, $zero, $5 +; MIPS32R6-NEXT: andi $2, $1, 65535 +; MIPS32R6-NEXT: sllv $8, $2, $4 +; MIPS32R6-NEXT: andi $2, $7, 65535 +; MIPS32R6-NEXT: sllv $7, $2, $4 +; MIPS32R6-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $9, 0($3) +; MIPS32R6-NEXT: and $10, $9, $5 +; MIPS32R6-NEXT: bnec $10, $8, $BB15_3 +; MIPS32R6-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS32R6-NEXT: and $9, $9, $6 +; MIPS32R6-NEXT: or $9, $9, $7 +; MIPS32R6-NEXT: sc $9, 0($3) +; MIPS32R6-NEXT: beqzc $9, $BB15_1 +; MIPS32R6-NEXT: $BB15_3: +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: srlv $2, $10, $4 +; MIPS32R6-NEXT: seh $2, $2 +; MIPS32R6-NEXT: # %bb.4: +; MIPS32R6-NEXT: seh $1, $1 +; MIPS32R6-NEXT: xor $1, $2, $1 +; MIPS32R6-NEXT: sltiu $3, $1, 1 +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: foo: +; MIPS32R6O0: # %bb.0: +; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 +; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R6O0-NEXT: move $1, $7 +; MIPS32R6O0-NEXT: move $3, $4 +; MIPS32R6O0-NEXT: # kill: def $a3 killed $at +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a2 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a1 +; MIPS32R6O0-NEXT: addu $2, $5, $6 +; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 65535 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 65535 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 65535 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 +; MIPS32R6O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB15_3 +; MIPS32R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB15_1 +; MIPS32R6O0-NEXT: $BB15_3: +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seh $1, $1 +; MIPS32R6O0-NEXT: # %bb.4: +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: # %bb.5: +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: seh $1, $1 +; MIPS32R6O0-NEXT: xor $1, $2, $1 +; MIPS32R6O0-NEXT: sltiu $3, $1, 1 +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: foo: +; MIPS4: # %bb.0: +; MIPS4-NEXT: sll $1, $6, 0 +; MIPS4-NEXT: sll $2, $5, 0 +; MIPS4-NEXT: addu $1, $2, $1 +; MIPS4-NEXT: sync +; MIPS4-NEXT: sll $2, $7, 0 +; MIPS4-NEXT: daddiu $3, $zero, -4 +; MIPS4-NEXT: and $3, $4, $3 +; MIPS4-NEXT: andi $4, $4, 3 +; MIPS4-NEXT: sll $4, $4, 3 +; MIPS4-NEXT: ori $5, $zero, 65535 +; MIPS4-NEXT: sllv $5, $5, $4 +; MIPS4-NEXT: nor $6, $zero, $5 +; MIPS4-NEXT: andi $7, $1, 65535 +; MIPS4-NEXT: sllv $7, $7, $4 +; MIPS4-NEXT: andi $2, $2, 65535 +; MIPS4-NEXT: sllv $8, $2, $4 +; MIPS4-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $9, 0($3) +; MIPS4-NEXT: and $10, $9, $5 +; MIPS4-NEXT: bne $10, $7, .LBB15_3 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS4-NEXT: and $9, $9, $6 +; MIPS4-NEXT: or $9, $9, $8 +; MIPS4-NEXT: sc $9, 0($3) +; MIPS4-NEXT: beqz $9, .LBB15_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: .LBB15_3: +; MIPS4-NEXT: sync +; MIPS4-NEXT: srlv $2, $10, $4 +; MIPS4-NEXT: sll $2, $2, 16 +; MIPS4-NEXT: sra $2, $2, 16 +; MIPS4-NEXT: # %bb.4: +; MIPS4-NEXT: sll $1, $1, 16 +; MIPS4-NEXT: sra $1, $1, 16 +; MIPS4-NEXT: xor $1, $2, $1 +; MIPS4-NEXT: sltiu $3, $1, 1 +; MIPS4-NEXT: sync +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: foo: +; MIPS64: # %bb.0: +; MIPS64-NEXT: sll $1, $6, 0 +; MIPS64-NEXT: sll $2, $5, 0 +; MIPS64-NEXT: addu $1, $2, $1 +; MIPS64-NEXT: sync +; MIPS64-NEXT: sll $2, $7, 0 +; MIPS64-NEXT: daddiu $3, $zero, -4 +; MIPS64-NEXT: and $3, $4, $3 +; MIPS64-NEXT: andi $4, $4, 3 +; MIPS64-NEXT: sll $4, $4, 3 +; MIPS64-NEXT: ori $5, $zero, 65535 +; MIPS64-NEXT: sllv $5, $5, $4 +; MIPS64-NEXT: nor $6, $zero, $5 +; MIPS64-NEXT: andi $7, $1, 65535 +; MIPS64-NEXT: sllv $7, $7, $4 +; MIPS64-NEXT: andi $2, $2, 65535 +; MIPS64-NEXT: sllv $8, $2, $4 +; MIPS64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $9, 0($3) +; MIPS64-NEXT: and $10, $9, $5 +; MIPS64-NEXT: bne $10, $7, .LBB15_3 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS64-NEXT: and $9, $9, $6 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($3) +; MIPS64-NEXT: beqz $9, .LBB15_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: .LBB15_3: +; MIPS64-NEXT: sync +; MIPS64-NEXT: srlv $2, $10, $4 +; MIPS64-NEXT: sll $2, $2, 16 +; MIPS64-NEXT: sra $2, $2, 16 +; MIPS64-NEXT: # %bb.4: +; MIPS64-NEXT: sll $1, $1, 16 +; MIPS64-NEXT: sra $1, $1, 16 +; MIPS64-NEXT: xor $1, $2, $1 +; MIPS64-NEXT: sltiu $3, $1, 1 +; MIPS64-NEXT: sync +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: foo: +; MIPS64R2: # %bb.0: +; MIPS64R2-NEXT: sll $1, $6, 0 +; MIPS64R2-NEXT: sll $2, $5, 0 +; MIPS64R2-NEXT: addu $1, $2, $1 +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: sll $2, $7, 0 +; MIPS64R2-NEXT: daddiu $3, $zero, -4 +; MIPS64R2-NEXT: and $3, $4, $3 +; MIPS64R2-NEXT: andi $4, $4, 3 +; MIPS64R2-NEXT: sll $4, $4, 3 +; MIPS64R2-NEXT: ori $5, $zero, 65535 +; MIPS64R2-NEXT: sllv $5, $5, $4 +; MIPS64R2-NEXT: nor $6, $zero, $5 +; MIPS64R2-NEXT: andi $7, $1, 65535 +; MIPS64R2-NEXT: sllv $7, $7, $4 +; MIPS64R2-NEXT: andi $2, $2, 65535 +; MIPS64R2-NEXT: sllv $8, $2, $4 +; MIPS64R2-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $9, 0($3) +; MIPS64R2-NEXT: and $10, $9, $5 +; MIPS64R2-NEXT: bne $10, $7, .LBB15_3 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS64R2-NEXT: and $9, $9, $6 +; MIPS64R2-NEXT: or $9, $9, $8 +; MIPS64R2-NEXT: sc $9, 0($3) +; MIPS64R2-NEXT: beqz $9, .LBB15_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: .LBB15_3: +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: srlv $2, $10, $4 +; MIPS64R2-NEXT: seh $2, $2 +; MIPS64R2-NEXT: # %bb.4: +; MIPS64R2-NEXT: seh $1, $1 +; MIPS64R2-NEXT: xor $1, $2, $1 +; MIPS64R2-NEXT: sltiu $3, $1, 1 +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: foo: +; MIPS64R6: # %bb.0: +; MIPS64R6-NEXT: sll $1, $6, 0 +; MIPS64R6-NEXT: sll $2, $5, 0 +; MIPS64R6-NEXT: addu $1, $2, $1 +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: sll $2, $7, 0 +; MIPS64R6-NEXT: daddiu $3, $zero, -4 +; MIPS64R6-NEXT: and $3, $4, $3 +; MIPS64R6-NEXT: andi $4, $4, 3 +; MIPS64R6-NEXT: sll $4, $4, 3 +; MIPS64R6-NEXT: ori $5, $zero, 65535 +; MIPS64R6-NEXT: sllv $5, $5, $4 +; MIPS64R6-NEXT: nor $6, $zero, $5 +; MIPS64R6-NEXT: andi $7, $1, 65535 +; MIPS64R6-NEXT: sllv $7, $7, $4 +; MIPS64R6-NEXT: andi $2, $2, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $4 +; MIPS64R6-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $9, 0($3) +; MIPS64R6-NEXT: and $10, $9, $5 +; MIPS64R6-NEXT: bnec $10, $7, .LBB15_3 +; MIPS64R6-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS64R6-NEXT: and $9, $9, $6 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($3) +; MIPS64R6-NEXT: beqzc $9, .LBB15_1 +; MIPS64R6-NEXT: .LBB15_3: +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: srlv $2, $10, $4 +; MIPS64R6-NEXT: seh $2, $2 +; MIPS64R6-NEXT: # %bb.4: +; MIPS64R6-NEXT: seh $1, $1 +; MIPS64R6-NEXT: xor $1, $2, $1 +; MIPS64R6-NEXT: sltiu $3, $1, 1 +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: foo: +; MIPS64R6O0: # %bb.0: +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: .cfi_def_cfa_offset 16 +; MIPS64R6O0-NEXT: move $3, $4 +; MIPS64R6O0-NEXT: move $1, $7 +; MIPS64R6O0-NEXT: sll $1, $1, 0 +; MIPS64R6O0-NEXT: move $2, $6 +; MIPS64R6O0-NEXT: sll $4, $2, 0 +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sll $2, $2, 0 +; MIPS64R6O0-NEXT: addu $2, $2, $4 +; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 2 +; MIPS64R6O0-NEXT: sll $9, $3, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 65535 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 +; MIPS64R6O0-NEXT: andi $1, $1, 65535 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 +; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB15_3 +; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB15_1 +; MIPS64R6O0-NEXT: .LBB15_3: +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seh $1, $1 +; MIPS64R6O0-NEXT: # %bb.4: +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: # %bb.5: +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: seh $1, $1 +; MIPS64R6O0-NEXT: xor $1, $2, $1 +; MIPS64R6O0-NEXT: sltiu $3, $1, 1 +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: foo: +; MM32: # %bb.0: +; MM32-NEXT: addu16 $3, $5, $6 +; MM32-NEXT: sync +; MM32-NEXT: addiu $1, $zero, -4 +; MM32-NEXT: and $1, $4, $1 +; MM32-NEXT: andi $2, $4, 3 +; MM32-NEXT: sll $4, $2, 3 +; MM32-NEXT: ori $2, $zero, 65535 +; MM32-NEXT: sllv $5, $2, $4 +; MM32-NEXT: nor $6, $zero, $5 +; MM32-NEXT: andi $2, $3, 65535 +; MM32-NEXT: sllv $8, $2, $4 +; MM32-NEXT: andi $2, $7, 65535 +; MM32-NEXT: sllv $7, $2, $4 +; MM32-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $9, 0($1) +; MM32-NEXT: and $10, $9, $5 +; MM32-NEXT: bne $10, $8, $BB15_3 +; MM32-NEXT: nop +; MM32-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MM32-NEXT: and $9, $9, $6 +; MM32-NEXT: or $9, $9, $7 +; MM32-NEXT: sc $9, 0($1) +; MM32-NEXT: beqzc $9, $BB15_1 +; MM32-NEXT: $BB15_3: +; MM32-NEXT: sync +; MM32-NEXT: srlv $2, $10, $4 +; MM32-NEXT: seh $2, $2 +; MM32-NEXT: # %bb.4: +; MM32-NEXT: seh $1, $3 +; MM32-NEXT: xor $1, $2, $1 +; MM32-NEXT: sltiu $3, $1, 1 +; MM32-NEXT: sync +; MM32-NEXT: jrc $ra +; +; O1-LABEL: foo: +; O1: # %bb.0: +; O1-NEXT: addu $1, $5, $6 +; O1-NEXT: sync +; O1-NEXT: addiu $2, $zero, -4 +; O1-NEXT: and $3, $4, $2 +; O1-NEXT: andi $2, $4, 3 +; O1-NEXT: sll $4, $2, 3 +; O1-NEXT: ori $2, $zero, 65535 +; O1-NEXT: sllv $5, $2, $4 +; O1-NEXT: nor $6, $zero, $5 +; O1-NEXT: andi $2, $1, 65535 +; O1-NEXT: sllv $8, $2, $4 +; O1-NEXT: andi $2, $7, 65535 +; O1-NEXT: sllv $7, $2, $4 +; O1-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $9, 0($3) +; O1-NEXT: and $10, $9, $5 +; O1-NEXT: bne $10, $8, $BB15_3 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; O1-NEXT: and $9, $9, $6 +; O1-NEXT: or $9, $9, $7 +; O1-NEXT: sc $9, 0($3) +; O1-NEXT: beqz $9, $BB15_1 +; O1-NEXT: nop +; O1-NEXT: $BB15_3: +; O1-NEXT: sync +; O1-NEXT: srlv $2, $10, $4 +; O1-NEXT: sll $2, $2, 16 +; O1-NEXT: sra $2, $2, 16 +; O1-NEXT: # %bb.4: +; O1-NEXT: sll $1, $1, 16 +; O1-NEXT: sra $1, $1, 16 +; O1-NEXT: xor $1, $2, $1 +; O1-NEXT: sltiu $3, $1, 1 +; O1-NEXT: sync +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: foo: +; O2: # %bb.0: +; O2-NEXT: addu $1, $5, $6 +; O2-NEXT: sync +; O2-NEXT: addiu $2, $zero, -4 +; O2-NEXT: and $3, $4, $2 +; O2-NEXT: andi $2, $4, 3 +; O2-NEXT: sll $4, $2, 3 +; O2-NEXT: ori $2, $zero, 65535 +; O2-NEXT: sllv $5, $2, $4 +; O2-NEXT: nor $6, $zero, $5 +; O2-NEXT: andi $2, $1, 65535 +; O2-NEXT: sllv $8, $2, $4 +; O2-NEXT: andi $2, $7, 65535 +; O2-NEXT: sllv $7, $2, $4 +; O2-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $9, 0($3) +; O2-NEXT: and $10, $9, $5 +; O2-NEXT: bne $10, $8, $BB15_3 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; O2-NEXT: and $9, $9, $6 +; O2-NEXT: or $9, $9, $7 +; O2-NEXT: sc $9, 0($3) +; O2-NEXT: beqz $9, $BB15_1 +; O2-NEXT: nop +; O2-NEXT: $BB15_3: +; O2-NEXT: sync +; O2-NEXT: srlv $2, $10, $4 +; O2-NEXT: sll $2, $2, 16 +; O2-NEXT: sra $2, $2, 16 +; O2-NEXT: # %bb.4: +; O2-NEXT: sll $1, $1, 16 +; O2-NEXT: sra $1, $1, 16 +; O2-NEXT: xor $1, $2, $1 +; O2-NEXT: sltiu $3, $1, 1 +; O2-NEXT: sync +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: foo: +; O3: # %bb.0: +; O3-NEXT: addiu $2, $zero, -4 +; O3-NEXT: addu $1, $5, $6 +; O3-NEXT: sync +; O3-NEXT: and $3, $4, $2 +; O3-NEXT: andi $2, $4, 3 +; O3-NEXT: sll $4, $2, 3 +; O3-NEXT: ori $2, $zero, 65535 +; O3-NEXT: sllv $5, $2, $4 +; O3-NEXT: andi $2, $1, 65535 +; O3-NEXT: sll $1, $1, 16 +; O3-NEXT: sllv $8, $2, $4 +; O3-NEXT: andi $2, $7, 65535 +; O3-NEXT: nor $6, $zero, $5 +; O3-NEXT: sra $1, $1, 16 +; O3-NEXT: sllv $7, $2, $4 +; O3-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $9, 0($3) +; O3-NEXT: and $10, $9, $5 +; O3-NEXT: bne $10, $8, $BB15_3 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; O3-NEXT: and $9, $9, $6 +; O3-NEXT: or $9, $9, $7 +; O3-NEXT: sc $9, 0($3) +; O3-NEXT: beqz $9, $BB15_1 +; O3-NEXT: nop +; O3-NEXT: $BB15_3: +; O3-NEXT: sync +; O3-NEXT: srlv $2, $10, $4 +; O3-NEXT: sll $2, $2, 16 +; O3-NEXT: sra $2, $2, 16 +; O3-NEXT: # %bb.4: +; O3-NEXT: sync +; O3-NEXT: xor $1, $2, $1 +; O3-NEXT: jr $ra +; O3-NEXT: sltiu $3, $1, 1 +; +; MIPS32EB-LABEL: foo: +; MIPS32EB: # %bb.0: +; MIPS32EB-NEXT: addu $1, $5, $6 +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: addiu $2, $zero, -4 +; MIPS32EB-NEXT: and $3, $4, $2 +; MIPS32EB-NEXT: andi $2, $4, 3 +; MIPS32EB-NEXT: xori $2, $2, 2 +; MIPS32EB-NEXT: sll $4, $2, 3 +; MIPS32EB-NEXT: ori $2, $zero, 65535 +; MIPS32EB-NEXT: sllv $5, $2, $4 +; MIPS32EB-NEXT: nor $6, $zero, $5 +; MIPS32EB-NEXT: andi $2, $1, 65535 +; MIPS32EB-NEXT: sllv $8, $2, $4 +; MIPS32EB-NEXT: andi $2, $7, 65535 +; MIPS32EB-NEXT: sllv $7, $2, $4 +; MIPS32EB-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $9, 0($3) +; MIPS32EB-NEXT: and $10, $9, $5 +; MIPS32EB-NEXT: bne $10, $8, $BB15_3 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 +; MIPS32EB-NEXT: and $9, $9, $6 +; MIPS32EB-NEXT: or $9, $9, $7 +; MIPS32EB-NEXT: sc $9, 0($3) +; MIPS32EB-NEXT: beqz $9, $BB15_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: $BB15_3: +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: srlv $2, $10, $4 +; MIPS32EB-NEXT: sll $2, $2, 16 +; MIPS32EB-NEXT: sra $2, $2, 16 +; MIPS32EB-NEXT: # %bb.4: +; MIPS32EB-NEXT: sll $1, $1, 16 +; MIPS32EB-NEXT: sra $1, $1, 16 +; MIPS32EB-NEXT: xor $1, $2, $1 +; MIPS32EB-NEXT: sltiu $3, $1, 1 +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop + %desired = add i16 %l, %r + %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst seq_cst + ret {i16, i1} %res +} + +@countsint = common global i32 0, align 4 + +define i32 @CheckSync(i32 signext %v) nounwind noinline { +; MIPS32-LABEL: CheckSync: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: sync +; MIPS32-NEXT: lw $1, %got(countsint)($1) +; MIPS32-NEXT: $BB16_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: addu $3, $2, $4 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB16_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: sync +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: CheckSync: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: lw $3, %got(countsint)($1) +; MIPS32O0-NEXT: $BB16_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB16_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: CheckSync: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: lw $1, %got(countsint)($1) +; MIPS32R2-NEXT: $BB16_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: addu $3, $2, $4 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB16_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: CheckSync: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: lw $1, %got(countsint)($1) +; MIPS32R6-NEXT: $BB16_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: addu $3, $2, $4 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB16_1 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: CheckSync: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: lw $3, %got(countsint)($1) +; MIPS32R6O0-NEXT: $BB16_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB16_1 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: CheckSync: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) +; MIPS4-NEXT: sync +; MIPS4-NEXT: ld $1, %got_disp(countsint)($1) +; MIPS4-NEXT: .LBB16_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: addu $3, $2, $4 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB16_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: sync +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: CheckSync: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) +; MIPS64-NEXT: sync +; MIPS64-NEXT: ld $1, %got_disp(countsint)($1) +; MIPS64-NEXT: .LBB16_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: addu $3, $2, $4 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB16_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: sync +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: CheckSync: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: ld $1, %got_disp(countsint)($1) +; MIPS64R2-NEXT: .LBB16_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: addu $3, $2, $4 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB16_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: CheckSync: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: ld $1, %got_disp(countsint)($1) +; MIPS64R6-NEXT: .LBB16_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: addu $3, $2, $4 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB16_1 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: CheckSync: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: ld $3, %got_disp(countsint)($1) +; MIPS64R6O0-NEXT: .LBB16_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB16_1 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: CheckSync: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: sync +; MM32-NEXT: lw $1, %got(countsint)($2) +; MM32-NEXT: $BB16_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: addu16 $3, $2, $4 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB16_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: sync +; MM32-NEXT: jrc $ra +; +; O1-LABEL: CheckSync: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: sync +; O1-NEXT: lw $1, %got(countsint)($1) +; O1-NEXT: $BB16_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: addu $3, $2, $4 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB16_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: sync +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: CheckSync: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: sync +; O2-NEXT: lw $1, %got(countsint)($1) +; O2-NEXT: $BB16_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: addu $3, $2, $4 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB16_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: sync +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: CheckSync: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: sync +; O3-NEXT: lw $1, %got(countsint)($1) +; O3-NEXT: $BB16_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: addu $3, $2, $4 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB16_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: sync +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: CheckSync: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: lw $1, %got(countsint)($1) +; MIPS32EB-NEXT: $BB16_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: addu $3, $2, $4 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB16_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw add i32* @countsint, i32 %v seq_cst + ret i32 %0 +} + +; make sure that this assertion in +; TwoAddressInstructionPass::TryInstructionTransform does not fail: +; +; line 1203: assert(TargetRegisterInfo::isVirtualRegister(regB) && +; +; it failed when MipsDAGToDAGISel::ReplaceUsesWithZeroReg replaced an +; operand of an atomic instruction with register $zero. +@a = external global i32 + +define i32 @zeroreg() nounwind { +; MIPS32-LABEL: zeroreg: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: sync +; MIPS32-NEXT: addiu $2, $zero, 0 +; MIPS32-NEXT: addiu $3, $zero, 1 +; MIPS32-NEXT: lw $1, %got(a)($1) +; MIPS32-NEXT: $BB17_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $4, 0($1) +; MIPS32-NEXT: bne $4, $3, $BB17_3 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS32-NEXT: move $5, $2 +; MIPS32-NEXT: sc $5, 0($1) +; MIPS32-NEXT: beqz $5, $BB17_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB17_3: # %entry +; MIPS32-NEXT: xor $1, $4, $3 +; MIPS32-NEXT: sltiu $2, $1, 1 +; MIPS32-NEXT: sync +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: zeroreg: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: lw $4, %got(a)($1) +; MIPS32O0-NEXT: addiu $6, $zero, 0 +; MIPS32O0-NEXT: addiu $2, $zero, 1 +; MIPS32O0-NEXT: move $5, $2 +; MIPS32O0-NEXT: $BB17_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $1, 0($4) +; MIPS32O0-NEXT: bne $1, $5, $BB17_3 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS32O0-NEXT: move $3, $6 +; MIPS32O0-NEXT: sc $3, 0($4) +; MIPS32O0-NEXT: beqz $3, $BB17_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: $BB17_3: # %entry +; MIPS32O0-NEXT: xor $2, $1, $2 +; MIPS32O0-NEXT: sltiu $2, $2, 1 +; MIPS32O0-NEXT: sync +; MIPS32O0-NEXT: addiu $2, $zero, 1 +; MIPS32O0-NEXT: xor $1, $1, $2 +; MIPS32O0-NEXT: sltiu $1, $1, 1 +; MIPS32O0-NEXT: andi $2, $1, 1 +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: zeroreg: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: addiu $2, $zero, 0 +; MIPS32R2-NEXT: addiu $3, $zero, 1 +; MIPS32R2-NEXT: lw $1, %got(a)($1) +; MIPS32R2-NEXT: $BB17_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $4, 0($1) +; MIPS32R2-NEXT: bne $4, $3, $BB17_3 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS32R2-NEXT: move $5, $2 +; MIPS32R2-NEXT: sc $5, 0($1) +; MIPS32R2-NEXT: beqz $5, $BB17_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: $BB17_3: # %entry +; MIPS32R2-NEXT: xor $1, $4, $3 +; MIPS32R2-NEXT: sltiu $2, $1, 1 +; MIPS32R2-NEXT: sync +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: zeroreg: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: addiu $2, $zero, 0 +; MIPS32R6-NEXT: addiu $3, $zero, 1 +; MIPS32R6-NEXT: lw $1, %got(a)($1) +; MIPS32R6-NEXT: $BB17_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $4, 0($1) +; MIPS32R6-NEXT: bnec $4, $3, $BB17_3 +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS32R6-NEXT: move $5, $2 +; MIPS32R6-NEXT: sc $5, 0($1) +; MIPS32R6-NEXT: beqzc $5, $BB17_1 +; MIPS32R6-NEXT: $BB17_3: # %entry +; MIPS32R6-NEXT: xor $1, $4, $3 +; MIPS32R6-NEXT: sltiu $2, $1, 1 +; MIPS32R6-NEXT: sync +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: zeroreg: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: lw $4, %got(a)($1) +; MIPS32R6O0-NEXT: addiu $6, $zero, 0 +; MIPS32R6O0-NEXT: addiu $2, $zero, 1 +; MIPS32R6O0-NEXT: move $5, $2 +; MIPS32R6O0-NEXT: $BB17_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $1, 0($4) +; MIPS32R6O0-NEXT: bnec $1, $5, $BB17_3 +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS32R6O0-NEXT: move $3, $6 +; MIPS32R6O0-NEXT: sc $3, 0($4) +; MIPS32R6O0-NEXT: beqzc $3, $BB17_1 +; MIPS32R6O0-NEXT: $BB17_3: # %entry +; MIPS32R6O0-NEXT: xor $1, $1, $2 +; MIPS32R6O0-NEXT: sltiu $2, $1, 1 +; MIPS32R6O0-NEXT: sync +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: zeroreg: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) +; MIPS4-NEXT: sync +; MIPS4-NEXT: addiu $2, $zero, 0 +; MIPS4-NEXT: addiu $3, $zero, 1 +; MIPS4-NEXT: ld $1, %got_disp(a)($1) +; MIPS4-NEXT: .LBB17_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $4, 0($1) +; MIPS4-NEXT: bne $4, $3, .LBB17_3 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS4-NEXT: move $5, $2 +; MIPS4-NEXT: sc $5, 0($1) +; MIPS4-NEXT: beqz $5, .LBB17_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: .LBB17_3: # %entry +; MIPS4-NEXT: xor $1, $4, $3 +; MIPS4-NEXT: sltiu $2, $1, 1 +; MIPS4-NEXT: sync +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: zeroreg: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) +; MIPS64-NEXT: sync +; MIPS64-NEXT: addiu $2, $zero, 0 +; MIPS64-NEXT: addiu $3, $zero, 1 +; MIPS64-NEXT: ld $1, %got_disp(a)($1) +; MIPS64-NEXT: .LBB17_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $4, 0($1) +; MIPS64-NEXT: bne $4, $3, .LBB17_3 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS64-NEXT: move $5, $2 +; MIPS64-NEXT: sc $5, 0($1) +; MIPS64-NEXT: beqz $5, .LBB17_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: .LBB17_3: # %entry +; MIPS64-NEXT: xor $1, $4, $3 +; MIPS64-NEXT: sltiu $2, $1, 1 +; MIPS64-NEXT: sync +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: zeroreg: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: addiu $2, $zero, 0 +; MIPS64R2-NEXT: addiu $3, $zero, 1 +; MIPS64R2-NEXT: ld $1, %got_disp(a)($1) +; MIPS64R2-NEXT: .LBB17_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $4, 0($1) +; MIPS64R2-NEXT: bne $4, $3, .LBB17_3 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS64R2-NEXT: move $5, $2 +; MIPS64R2-NEXT: sc $5, 0($1) +; MIPS64R2-NEXT: beqz $5, .LBB17_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: .LBB17_3: # %entry +; MIPS64R2-NEXT: xor $1, $4, $3 +; MIPS64R2-NEXT: sltiu $2, $1, 1 +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: zeroreg: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: addiu $2, $zero, 0 +; MIPS64R6-NEXT: addiu $3, $zero, 1 +; MIPS64R6-NEXT: ld $1, %got_disp(a)($1) +; MIPS64R6-NEXT: .LBB17_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $4, 0($1) +; MIPS64R6-NEXT: bnec $4, $3, .LBB17_3 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS64R6-NEXT: move $5, $2 +; MIPS64R6-NEXT: sc $5, 0($1) +; MIPS64R6-NEXT: beqzc $5, .LBB17_1 +; MIPS64R6-NEXT: .LBB17_3: # %entry +; MIPS64R6-NEXT: xor $1, $4, $3 +; MIPS64R6-NEXT: sltiu $2, $1, 1 +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: zeroreg: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: ld $4, %got_disp(a)($1) +; MIPS64R6O0-NEXT: addiu $6, $zero, 0 +; MIPS64R6O0-NEXT: addiu $2, $zero, 1 +; MIPS64R6O0-NEXT: move $5, $2 +; MIPS64R6O0-NEXT: .LBB17_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $1, 0($4) +; MIPS64R6O0-NEXT: bnec $1, $5, .LBB17_3 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS64R6O0-NEXT: move $3, $6 +; MIPS64R6O0-NEXT: sc $3, 0($4) +; MIPS64R6O0-NEXT: beqzc $3, .LBB17_1 +; MIPS64R6O0-NEXT: .LBB17_3: # %entry +; MIPS64R6O0-NEXT: xor $1, $1, $2 +; MIPS64R6O0-NEXT: sltiu $2, $1, 1 +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: zeroreg: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: sync +; MM32-NEXT: li16 $3, 0 +; MM32-NEXT: li16 $4, 1 +; MM32-NEXT: lw $1, %got(a)($2) +; MM32-NEXT: $BB17_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: bne $2, $4, $BB17_3 +; MM32-NEXT: nop +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MM32-NEXT: move $5, $3 +; MM32-NEXT: sc $5, 0($1) +; MM32-NEXT: beqzc $5, $BB17_1 +; MM32-NEXT: $BB17_3: # %entry +; MM32-NEXT: sync +; MM32-NEXT: xor $1, $2, $4 +; MM32-NEXT: sltiu $2, $1, 1 +; MM32-NEXT: sync +; MM32-NEXT: jrc $ra +; +; O1-LABEL: zeroreg: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: sync +; O1-NEXT: addiu $2, $zero, 0 +; O1-NEXT: addiu $3, $zero, 1 +; O1-NEXT: lw $1, %got(a)($1) +; O1-NEXT: $BB17_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $4, 0($1) +; O1-NEXT: bne $4, $3, $BB17_3 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: # in Loop: Header=BB17_1 Depth=1 +; O1-NEXT: move $5, $2 +; O1-NEXT: sc $5, 0($1) +; O1-NEXT: beqz $5, $BB17_1 +; O1-NEXT: nop +; O1-NEXT: $BB17_3: # %entry +; O1-NEXT: xor $1, $4, $3 +; O1-NEXT: sltiu $2, $1, 1 +; O1-NEXT: sync +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: zeroreg: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: sync +; O2-NEXT: addiu $2, $zero, 0 +; O2-NEXT: addiu $3, $zero, 1 +; O2-NEXT: lw $1, %got(a)($1) +; O2-NEXT: $BB17_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $4, 0($1) +; O2-NEXT: bne $4, $3, $BB17_3 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: # in Loop: Header=BB17_1 Depth=1 +; O2-NEXT: move $5, $2 +; O2-NEXT: sc $5, 0($1) +; O2-NEXT: beqz $5, $BB17_1 +; O2-NEXT: nop +; O2-NEXT: $BB17_3: # %entry +; O2-NEXT: xor $1, $4, $3 +; O2-NEXT: sltiu $2, $1, 1 +; O2-NEXT: sync +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: zeroreg: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: addiu $2, $zero, 0 +; O3-NEXT: addiu $3, $zero, 1 +; O3-NEXT: sync +; O3-NEXT: lw $1, %got(a)($1) +; O3-NEXT: $BB17_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $4, 0($1) +; O3-NEXT: bne $4, $3, $BB17_3 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: # in Loop: Header=BB17_1 Depth=1 +; O3-NEXT: move $5, $2 +; O3-NEXT: sc $5, 0($1) +; O3-NEXT: beqz $5, $BB17_1 +; O3-NEXT: nop +; O3-NEXT: $BB17_3: # %entry +; O3-NEXT: sync +; O3-NEXT: xor $1, $4, $3 +; O3-NEXT: jr $ra +; O3-NEXT: sltiu $2, $1, 1 +; +; MIPS32EB-LABEL: zeroreg: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: addiu $2, $zero, 0 +; MIPS32EB-NEXT: addiu $3, $zero, 1 +; MIPS32EB-NEXT: lw $1, %got(a)($1) +; MIPS32EB-NEXT: $BB17_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $4, 0($1) +; MIPS32EB-NEXT: bne $4, $3, $BB17_3 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: # in Loop: Header=BB17_1 Depth=1 +; MIPS32EB-NEXT: move $5, $2 +; MIPS32EB-NEXT: sc $5, 0($1) +; MIPS32EB-NEXT: beqz $5, $BB17_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: $BB17_3: # %entry +; MIPS32EB-NEXT: xor $1, $4, $3 +; MIPS32EB-NEXT: sltiu $2, $1, 1 +; MIPS32EB-NEXT: sync +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %pair0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst seq_cst + %0 = extractvalue { i32, i1 } %pair0, 0 + %1 = icmp eq i32 %0, 1 + %conv = zext i1 %1 to i32 + ret i32 %conv +} + +; Check that MIPS32R6 has the correct offset range. +; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store. +define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind { +; MIPS32-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: lui $2, %hi(_gp_disp) +; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32-NEXT: addu $1, $2, $25 +; MIPS32-NEXT: lw $1, %got(x)($1) +; MIPS32-NEXT: addiu $1, $1, 1024 +; MIPS32-NEXT: $BB18_1: # %entry +; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32-NEXT: ll $2, 0($1) +; MIPS32-NEXT: addu $3, $2, $4 +; MIPS32-NEXT: sc $3, 0($1) +; MIPS32-NEXT: beqz $3, $BB18_1 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.2: # %entry +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS32O0-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS32O0: # %bb.0: # %entry +; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32O0-NEXT: addu $1, $2, $25 +; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: addiu $3, $1, 1024 +; MIPS32O0-NEXT: $BB18_1: # %entry +; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB18_1 +; MIPS32O0-NEXT: nop +; MIPS32O0-NEXT: # %bb.2: # %entry +; MIPS32O0-NEXT: jr $ra +; MIPS32O0-NEXT: nop +; +; MIPS32R2-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS32R2: # %bb.0: # %entry +; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R2-NEXT: addu $1, $2, $25 +; MIPS32R2-NEXT: lw $1, %got(x)($1) +; MIPS32R2-NEXT: addiu $1, $1, 1024 +; MIPS32R2-NEXT: $BB18_1: # %entry +; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R2-NEXT: ll $2, 0($1) +; MIPS32R2-NEXT: addu $3, $2, $4 +; MIPS32R2-NEXT: sc $3, 0($1) +; MIPS32R2-NEXT: beqz $3, $BB18_1 +; MIPS32R2-NEXT: nop +; MIPS32R2-NEXT: # %bb.2: # %entry +; MIPS32R2-NEXT: jr $ra +; MIPS32R2-NEXT: nop +; +; MIPS32R6-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS32R6: # %bb.0: # %entry +; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6-NEXT: addu $1, $2, $25 +; MIPS32R6-NEXT: lw $1, %got(x)($1) +; MIPS32R6-NEXT: addiu $1, $1, 1024 +; MIPS32R6-NEXT: $BB18_1: # %entry +; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6-NEXT: ll $2, 0($1) +; MIPS32R6-NEXT: addu $3, $2, $4 +; MIPS32R6-NEXT: sc $3, 0($1) +; MIPS32R6-NEXT: beqzc $3, $BB18_1 +; MIPS32R6-NEXT: nop +; MIPS32R6-NEXT: # %bb.2: # %entry +; MIPS32R6-NEXT: jrc $ra +; +; MIPS32R6O0-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS32R6O0: # %bb.0: # %entry +; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) +; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32R6O0-NEXT: addu $1, $2, $25 +; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: addiu $3, $1, 1024 +; MIPS32R6O0-NEXT: $BB18_1: # %entry +; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB18_1 +; MIPS32R6O0-NEXT: nop +; MIPS32R6O0-NEXT: # %bb.2: # %entry +; MIPS32R6O0-NEXT: jrc $ra +; +; MIPS4-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: daddiu $1, $1, 1024 +; MIPS4-NEXT: .LBB18_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: ll $2, 0($1) +; MIPS4-NEXT: addu $3, $2, $4 +; MIPS4-NEXT: sc $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB18_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: daddiu $1, $1, 1024 +; MIPS64-NEXT: .LBB18_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: ll $2, 0($1) +; MIPS64-NEXT: addu $3, $2, $4 +; MIPS64-NEXT: sc $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB18_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: daddiu $1, $1, 1024 +; MIPS64R2-NEXT: .LBB18_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: ll $2, 0($1) +; MIPS64R2-NEXT: addu $3, $2, $4 +; MIPS64R2-NEXT: sc $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB18_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: daddiu $1, $1, 1024 +; MIPS64R6-NEXT: .LBB18_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: ll $2, 0($1) +; MIPS64R6-NEXT: addu $3, $2, $4 +; MIPS64R6-NEXT: sc $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB18_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) +; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 +; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: daddiu $3, $1, 1024 +; MIPS64R6O0-NEXT: .LBB18_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB18_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; MM32-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MM32: # %bb.0: # %entry +; MM32-NEXT: lui $2, %hi(_gp_disp) +; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) +; MM32-NEXT: addu $2, $2, $25 +; MM32-NEXT: lw $1, %got(x)($2) +; MM32-NEXT: addiu $1, $1, 1024 +; MM32-NEXT: $BB18_1: # %entry +; MM32-NEXT: # =>This Inner Loop Header: Depth=1 +; MM32-NEXT: ll $2, 0($1) +; MM32-NEXT: addu16 $3, $2, $4 +; MM32-NEXT: sc $3, 0($1) +; MM32-NEXT: beqzc $3, $BB18_1 +; MM32-NEXT: # %bb.2: # %entry +; MM32-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadAdd32_OffGt9Bit: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $2, %hi(_gp_disp) +; O1-NEXT: addiu $2, $2, %lo(_gp_disp) +; O1-NEXT: addu $1, $2, $25 +; O1-NEXT: lw $1, %got(x)($1) +; O1-NEXT: addiu $1, $1, 1024 +; O1-NEXT: $BB18_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: ll $2, 0($1) +; O1-NEXT: addu $3, $2, $4 +; O1-NEXT: sc $3, 0($1) +; O1-NEXT: beqz $3, $BB18_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadAdd32_OffGt9Bit: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $2, %hi(_gp_disp) +; O2-NEXT: addiu $2, $2, %lo(_gp_disp) +; O2-NEXT: addu $1, $2, $25 +; O2-NEXT: lw $1, %got(x)($1) +; O2-NEXT: addiu $1, $1, 1024 +; O2-NEXT: $BB18_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: ll $2, 0($1) +; O2-NEXT: addu $3, $2, $4 +; O2-NEXT: sc $3, 0($1) +; O2-NEXT: beqz $3, $BB18_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadAdd32_OffGt9Bit: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $2, %hi(_gp_disp) +; O3-NEXT: addiu $2, $2, %lo(_gp_disp) +; O3-NEXT: addu $1, $2, $25 +; O3-NEXT: lw $1, %got(x)($1) +; O3-NEXT: addiu $1, $1, 1024 +; O3-NEXT: $BB18_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: ll $2, 0($1) +; O3-NEXT: addu $3, $2, $4 +; O3-NEXT: sc $3, 0($1) +; O3-NEXT: beqz $3, $BB18_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS32EB-LABEL: AtomicLoadAdd32_OffGt9Bit: +; MIPS32EB: # %bb.0: # %entry +; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) +; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) +; MIPS32EB-NEXT: addu $1, $2, $25 +; MIPS32EB-NEXT: lw $1, %got(x)($1) +; MIPS32EB-NEXT: addiu $1, $1, 1024 +; MIPS32EB-NEXT: $BB18_1: # %entry +; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS32EB-NEXT: ll $2, 0($1) +; MIPS32EB-NEXT: addu $3, $2, $4 +; MIPS32EB-NEXT: sc $3, 0($1) +; MIPS32EB-NEXT: beqz $3, $BB18_1 +; MIPS32EB-NEXT: nop +; MIPS32EB-NEXT: # %bb.2: # %entry +; MIPS32EB-NEXT: jr $ra +; MIPS32EB-NEXT: nop +entry: + %0 = atomicrmw add i32* getelementptr(i32, i32* @x, i32 256), i32 %incr monotonic + ret i32 %0 + +} diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll index c8b67eda156f..4005ea17e32b 100644 --- a/llvm/test/CodeGen/Mips/atomic.ll +++ b/llvm/test/CodeGen/Mips/atomic.ll @@ -1,35 +1,35 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32O0 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32R2 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32R6 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32R6O0 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS4 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64R2 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64R6 -; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64R6O0 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MM32 ; We want to verify the produced code is well formed all optimization levels, the rest of the tests which ensure correctness. -; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 -; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ; Keep one big-endian check so that we don't reduce testing, but don't add more ; since endianness doesn't affect the body of the atomic operations. -; RUN: llc -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS32EB @x = common global i32 0, align 4 diff --git a/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll b/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll new file mode 100644 index 000000000000..48c202940047 --- /dev/null +++ b/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll @@ -0,0 +1,1377 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS4 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64R2 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64R6 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64R6O0 + +; We want to verify the produced code is well formed all optimization levels, the rest of the test which ensure correctness. +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 + +; Keep one big-endian check so that we don't reduce testing, but don't add more +; since endianness doesn't affect the body of the atomic operations. +; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: FileCheck %s -check-prefix=MIPS64EB + +@x = common global i64 0, align 4 + +define i64 @AtomicLoadAdd(i64 signext %incr) nounwind { +; MIPS4-LABEL: AtomicLoadAdd: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB0_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: lld $2, 0($1) +; MIPS4-NEXT: daddu $3, $2, $4 +; MIPS4-NEXT: scd $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB0_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadAdd: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB0_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: lld $2, 0($1) +; MIPS64-NEXT: daddu $3, $2, $4 +; MIPS64-NEXT: scd $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB0_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadAdd: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB0_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: lld $2, 0($1) +; MIPS64R2-NEXT: daddu $3, $2, $4 +; MIPS64R2-NEXT: scd $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB0_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadAdd: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB0_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: lld $2, 0($1) +; MIPS64R6-NEXT: daddu $3, $2, $4 +; MIPS64R6-NEXT: scd $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB0_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadAdd: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB0_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: daddu $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadAdd: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; O1-NEXT: daddu $1, $1, $25 +; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; O1-NEXT: ld $1, %got_disp(x)($1) +; O1-NEXT: .LBB0_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: lld $2, 0($1) +; O1-NEXT: daddu $3, $2, $4 +; O1-NEXT: scd $3, 0($1) +; O1-NEXT: beqz $3, .LBB0_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadAdd: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; O2-NEXT: daddu $1, $1, $25 +; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; O2-NEXT: ld $1, %got_disp(x)($1) +; O2-NEXT: .LBB0_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: lld $2, 0($1) +; O2-NEXT: daddu $3, $2, $4 +; O2-NEXT: scd $3, 0($1) +; O2-NEXT: beqz $3, .LBB0_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadAdd: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; O3-NEXT: daddu $1, $1, $25 +; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; O3-NEXT: ld $1, %got_disp(x)($1) +; O3-NEXT: .LBB0_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: lld $2, 0($1) +; O3-NEXT: daddu $3, $2, $4 +; O3-NEXT: scd $3, 0($1) +; O3-NEXT: beqz $3, .LBB0_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS64EB-LABEL: AtomicLoadAdd: +; MIPS64EB: # %bb.0: # %entry +; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64EB-NEXT: daddu $1, $1, $25 +; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) +; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) +; MIPS64EB-NEXT: .LBB0_1: # %entry +; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64EB-NEXT: lld $2, 0($1) +; MIPS64EB-NEXT: daddu $3, $2, $4 +; MIPS64EB-NEXT: scd $3, 0($1) +; MIPS64EB-NEXT: beqz $3, .LBB0_1 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: # %bb.2: # %entry +; MIPS64EB-NEXT: jr $ra +; MIPS64EB-NEXT: nop +entry: + %0 = atomicrmw add i64* @x, i64 %incr monotonic + ret i64 %0 + +} + +define i64 @AtomicLoadSub(i64 signext %incr) nounwind { +; MIPS4-LABEL: AtomicLoadSub: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB1_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: lld $2, 0($1) +; MIPS4-NEXT: dsubu $3, $2, $4 +; MIPS4-NEXT: scd $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB1_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadSub: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB1_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: lld $2, 0($1) +; MIPS64-NEXT: dsubu $3, $2, $4 +; MIPS64-NEXT: scd $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB1_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadSub: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB1_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: lld $2, 0($1) +; MIPS64R2-NEXT: dsubu $3, $2, $4 +; MIPS64R2-NEXT: scd $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB1_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadSub: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB1_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: lld $2, 0($1) +; MIPS64R6-NEXT: dsubu $3, $2, $4 +; MIPS64R6-NEXT: scd $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB1_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadSub: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB1_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: dsubu $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadSub: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; O1-NEXT: daddu $1, $1, $25 +; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; O1-NEXT: ld $1, %got_disp(x)($1) +; O1-NEXT: .LBB1_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: lld $2, 0($1) +; O1-NEXT: dsubu $3, $2, $4 +; O1-NEXT: scd $3, 0($1) +; O1-NEXT: beqz $3, .LBB1_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadSub: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; O2-NEXT: daddu $1, $1, $25 +; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; O2-NEXT: ld $1, %got_disp(x)($1) +; O2-NEXT: .LBB1_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: lld $2, 0($1) +; O2-NEXT: dsubu $3, $2, $4 +; O2-NEXT: scd $3, 0($1) +; O2-NEXT: beqz $3, .LBB1_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadSub: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; O3-NEXT: daddu $1, $1, $25 +; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; O3-NEXT: ld $1, %got_disp(x)($1) +; O3-NEXT: .LBB1_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: lld $2, 0($1) +; O3-NEXT: dsubu $3, $2, $4 +; O3-NEXT: scd $3, 0($1) +; O3-NEXT: beqz $3, .LBB1_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS64EB-LABEL: AtomicLoadSub: +; MIPS64EB: # %bb.0: # %entry +; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64EB-NEXT: daddu $1, $1, $25 +; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) +; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) +; MIPS64EB-NEXT: .LBB1_1: # %entry +; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64EB-NEXT: lld $2, 0($1) +; MIPS64EB-NEXT: dsubu $3, $2, $4 +; MIPS64EB-NEXT: scd $3, 0($1) +; MIPS64EB-NEXT: beqz $3, .LBB1_1 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: # %bb.2: # %entry +; MIPS64EB-NEXT: jr $ra +; MIPS64EB-NEXT: nop +entry: + %0 = atomicrmw sub i64* @x, i64 %incr monotonic + ret i64 %0 + +} + +define i64 @AtomicLoadAnd(i64 signext %incr) nounwind { +; MIPS4-LABEL: AtomicLoadAnd: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB2_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: lld $2, 0($1) +; MIPS4-NEXT: and $3, $2, $4 +; MIPS4-NEXT: scd $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB2_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadAnd: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB2_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: lld $2, 0($1) +; MIPS64-NEXT: and $3, $2, $4 +; MIPS64-NEXT: scd $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB2_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadAnd: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB2_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: lld $2, 0($1) +; MIPS64R2-NEXT: and $3, $2, $4 +; MIPS64R2-NEXT: scd $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB2_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadAnd: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB2_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: lld $2, 0($1) +; MIPS64R6-NEXT: and $3, $2, $4 +; MIPS64R6-NEXT: scd $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB2_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadAnd: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB2_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadAnd: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; O1-NEXT: daddu $1, $1, $25 +; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; O1-NEXT: ld $1, %got_disp(x)($1) +; O1-NEXT: .LBB2_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: lld $2, 0($1) +; O1-NEXT: and $3, $2, $4 +; O1-NEXT: scd $3, 0($1) +; O1-NEXT: beqz $3, .LBB2_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadAnd: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; O2-NEXT: daddu $1, $1, $25 +; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; O2-NEXT: ld $1, %got_disp(x)($1) +; O2-NEXT: .LBB2_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: lld $2, 0($1) +; O2-NEXT: and $3, $2, $4 +; O2-NEXT: scd $3, 0($1) +; O2-NEXT: beqz $3, .LBB2_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadAnd: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; O3-NEXT: daddu $1, $1, $25 +; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; O3-NEXT: ld $1, %got_disp(x)($1) +; O3-NEXT: .LBB2_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: lld $2, 0($1) +; O3-NEXT: and $3, $2, $4 +; O3-NEXT: scd $3, 0($1) +; O3-NEXT: beqz $3, .LBB2_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS64EB-LABEL: AtomicLoadAnd: +; MIPS64EB: # %bb.0: # %entry +; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64EB-NEXT: daddu $1, $1, $25 +; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) +; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) +; MIPS64EB-NEXT: .LBB2_1: # %entry +; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64EB-NEXT: lld $2, 0($1) +; MIPS64EB-NEXT: and $3, $2, $4 +; MIPS64EB-NEXT: scd $3, 0($1) +; MIPS64EB-NEXT: beqz $3, .LBB2_1 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: # %bb.2: # %entry +; MIPS64EB-NEXT: jr $ra +; MIPS64EB-NEXT: nop +entry: + %0 = atomicrmw and i64* @x, i64 %incr monotonic + ret i64 %0 + +} + +define i64 @AtomicLoadOr(i64 signext %incr) nounwind { +; MIPS4-LABEL: AtomicLoadOr: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB3_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: lld $2, 0($1) +; MIPS4-NEXT: or $3, $2, $4 +; MIPS4-NEXT: scd $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB3_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadOr: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB3_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: lld $2, 0($1) +; MIPS64-NEXT: or $3, $2, $4 +; MIPS64-NEXT: scd $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB3_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadOr: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB3_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: lld $2, 0($1) +; MIPS64R2-NEXT: or $3, $2, $4 +; MIPS64R2-NEXT: scd $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB3_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadOr: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB3_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: lld $2, 0($1) +; MIPS64R6-NEXT: or $3, $2, $4 +; MIPS64R6-NEXT: scd $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB3_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadOr: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB3_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: or $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadOr: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; O1-NEXT: daddu $1, $1, $25 +; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; O1-NEXT: ld $1, %got_disp(x)($1) +; O1-NEXT: .LBB3_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: lld $2, 0($1) +; O1-NEXT: or $3, $2, $4 +; O1-NEXT: scd $3, 0($1) +; O1-NEXT: beqz $3, .LBB3_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadOr: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; O2-NEXT: daddu $1, $1, $25 +; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; O2-NEXT: ld $1, %got_disp(x)($1) +; O2-NEXT: .LBB3_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: lld $2, 0($1) +; O2-NEXT: or $3, $2, $4 +; O2-NEXT: scd $3, 0($1) +; O2-NEXT: beqz $3, .LBB3_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadOr: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; O3-NEXT: daddu $1, $1, $25 +; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; O3-NEXT: ld $1, %got_disp(x)($1) +; O3-NEXT: .LBB3_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: lld $2, 0($1) +; O3-NEXT: or $3, $2, $4 +; O3-NEXT: scd $3, 0($1) +; O3-NEXT: beqz $3, .LBB3_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS64EB-LABEL: AtomicLoadOr: +; MIPS64EB: # %bb.0: # %entry +; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64EB-NEXT: daddu $1, $1, $25 +; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) +; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) +; MIPS64EB-NEXT: .LBB3_1: # %entry +; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64EB-NEXT: lld $2, 0($1) +; MIPS64EB-NEXT: or $3, $2, $4 +; MIPS64EB-NEXT: scd $3, 0($1) +; MIPS64EB-NEXT: beqz $3, .LBB3_1 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: # %bb.2: # %entry +; MIPS64EB-NEXT: jr $ra +; MIPS64EB-NEXT: nop +entry: + %0 = atomicrmw or i64* @x, i64 %incr monotonic + ret i64 %0 + +} + +define i64 @AtomicLoadXor(i64 signext %incr) nounwind { +; MIPS4-LABEL: AtomicLoadXor: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB4_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: lld $2, 0($1) +; MIPS4-NEXT: xor $3, $2, $4 +; MIPS4-NEXT: scd $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB4_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadXor: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB4_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: lld $2, 0($1) +; MIPS64-NEXT: xor $3, $2, $4 +; MIPS64-NEXT: scd $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB4_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadXor: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB4_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: lld $2, 0($1) +; MIPS64R2-NEXT: xor $3, $2, $4 +; MIPS64R2-NEXT: scd $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB4_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadXor: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB4_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: lld $2, 0($1) +; MIPS64R6-NEXT: xor $3, $2, $4 +; MIPS64R6-NEXT: scd $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB4_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadXor: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB4_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: xor $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadXor: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; O1-NEXT: daddu $1, $1, $25 +; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; O1-NEXT: ld $1, %got_disp(x)($1) +; O1-NEXT: .LBB4_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: lld $2, 0($1) +; O1-NEXT: xor $3, $2, $4 +; O1-NEXT: scd $3, 0($1) +; O1-NEXT: beqz $3, .LBB4_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadXor: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; O2-NEXT: daddu $1, $1, $25 +; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; O2-NEXT: ld $1, %got_disp(x)($1) +; O2-NEXT: .LBB4_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: lld $2, 0($1) +; O2-NEXT: xor $3, $2, $4 +; O2-NEXT: scd $3, 0($1) +; O2-NEXT: beqz $3, .LBB4_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadXor: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; O3-NEXT: daddu $1, $1, $25 +; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; O3-NEXT: ld $1, %got_disp(x)($1) +; O3-NEXT: .LBB4_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: lld $2, 0($1) +; O3-NEXT: xor $3, $2, $4 +; O3-NEXT: scd $3, 0($1) +; O3-NEXT: beqz $3, .LBB4_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS64EB-LABEL: AtomicLoadXor: +; MIPS64EB: # %bb.0: # %entry +; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64EB-NEXT: daddu $1, $1, $25 +; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) +; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) +; MIPS64EB-NEXT: .LBB4_1: # %entry +; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64EB-NEXT: lld $2, 0($1) +; MIPS64EB-NEXT: xor $3, $2, $4 +; MIPS64EB-NEXT: scd $3, 0($1) +; MIPS64EB-NEXT: beqz $3, .LBB4_1 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: # %bb.2: # %entry +; MIPS64EB-NEXT: jr $ra +; MIPS64EB-NEXT: nop +entry: + %0 = atomicrmw xor i64* @x, i64 %incr monotonic + ret i64 %0 + +} + +define i64 @AtomicLoadNand(i64 signext %incr) nounwind { +; MIPS4-LABEL: AtomicLoadNand: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB5_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: lld $2, 0($1) +; MIPS4-NEXT: and $3, $2, $4 +; MIPS4-NEXT: nor $3, $zero, $3 +; MIPS4-NEXT: scd $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB5_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: nop +; +; MIPS64-LABEL: AtomicLoadNand: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB5_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: lld $2, 0($1) +; MIPS64-NEXT: and $3, $2, $4 +; MIPS64-NEXT: nor $3, $zero, $3 +; MIPS64-NEXT: scd $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB5_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS64R2-LABEL: AtomicLoadNand: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB5_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: lld $2, 0($1) +; MIPS64R2-NEXT: and $3, $2, $4 +; MIPS64R2-NEXT: nor $3, $zero, $3 +; MIPS64R2-NEXT: scd $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB5_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: nop +; +; MIPS64R6-LABEL: AtomicLoadNand: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB5_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: lld $2, 0($1) +; MIPS64R6-NEXT: and $3, $2, $4 +; MIPS64R6-NEXT: nor $3, $zero, $3 +; MIPS64R6-NEXT: scd $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB5_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jrc $ra +; +; MIPS64R6O0-LABEL: AtomicLoadNand: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB5_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: nor $1, $zero, $1 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 +; MIPS64R6O0-NEXT: nop +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: jrc $ra +; +; O1-LABEL: AtomicLoadNand: +; O1: # %bb.0: # %entry +; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; O1-NEXT: daddu $1, $1, $25 +; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; O1-NEXT: ld $1, %got_disp(x)($1) +; O1-NEXT: .LBB5_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: lld $2, 0($1) +; O1-NEXT: and $3, $2, $4 +; O1-NEXT: nor $3, $zero, $3 +; O1-NEXT: scd $3, 0($1) +; O1-NEXT: beqz $3, .LBB5_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: nop +; +; O2-LABEL: AtomicLoadNand: +; O2: # %bb.0: # %entry +; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; O2-NEXT: daddu $1, $1, $25 +; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; O2-NEXT: ld $1, %got_disp(x)($1) +; O2-NEXT: .LBB5_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: lld $2, 0($1) +; O2-NEXT: and $3, $2, $4 +; O2-NEXT: nor $3, $zero, $3 +; O2-NEXT: scd $3, 0($1) +; O2-NEXT: beqz $3, .LBB5_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: nop +; +; O3-LABEL: AtomicLoadNand: +; O3: # %bb.0: # %entry +; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; O3-NEXT: daddu $1, $1, $25 +; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; O3-NEXT: ld $1, %got_disp(x)($1) +; O3-NEXT: .LBB5_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: lld $2, 0($1) +; O3-NEXT: and $3, $2, $4 +; O3-NEXT: nor $3, $zero, $3 +; O3-NEXT: scd $3, 0($1) +; O3-NEXT: beqz $3, .LBB5_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: nop +; +; MIPS64EB-LABEL: AtomicLoadNand: +; MIPS64EB: # %bb.0: # %entry +; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64EB-NEXT: daddu $1, $1, $25 +; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) +; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) +; MIPS64EB-NEXT: .LBB5_1: # %entry +; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64EB-NEXT: lld $2, 0($1) +; MIPS64EB-NEXT: and $3, $2, $4 +; MIPS64EB-NEXT: nor $3, $zero, $3 +; MIPS64EB-NEXT: scd $3, 0($1) +; MIPS64EB-NEXT: beqz $3, .LBB5_1 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: # %bb.2: # %entry +; MIPS64EB-NEXT: jr $ra +; MIPS64EB-NEXT: nop +entry: + %0 = atomicrmw nand i64* @x, i64 %incr monotonic + ret i64 %0 + +} + +define i64 @AtomicSwap64(i64 signext %newval) nounwind { +; MIPS4-LABEL: AtomicSwap64: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: daddiu $sp, $sp, -16 +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; MIPS4-NEXT: sd $4, 8($sp) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB6_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: lld $2, 0($1) +; MIPS4-NEXT: move $3, $4 +; MIPS4-NEXT: scd $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB6_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64-LABEL: AtomicSwap64: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; MIPS64-NEXT: sd $4, 8($sp) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB6_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: lld $2, 0($1) +; MIPS64-NEXT: move $3, $4 +; MIPS64-NEXT: scd $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB6_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R2-LABEL: AtomicSwap64: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: daddiu $sp, $sp, -16 +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; MIPS64R2-NEXT: sd $4, 8($sp) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB6_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: lld $2, 0($1) +; MIPS64R2-NEXT: move $3, $4 +; MIPS64R2-NEXT: scd $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB6_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6-LABEL: AtomicSwap64: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; MIPS64R6-NEXT: sd $4, 8($sp) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB6_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: lld $2, 0($1) +; MIPS64R6-NEXT: move $3, $4 +; MIPS64R6-NEXT: scd $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB6_1 +; MIPS64R6-NEXT: nop +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: jr $ra +; MIPS64R6-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6O0-LABEL: AtomicSwap64: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; MIPS64R6O0-NEXT: sd $4, 8($sp) +; MIPS64R6O0-NEXT: ld $4, 8($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB6_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; O1-LABEL: AtomicSwap64: +; O1: # %bb.0: # %entry +; O1-NEXT: daddiu $sp, $sp, -16 +; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; O1-NEXT: daddu $1, $1, $25 +; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; O1-NEXT: sd $4, 8($sp) +; O1-NEXT: ld $1, %got_disp(x)($1) +; O1-NEXT: .LBB6_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: lld $2, 0($1) +; O1-NEXT: move $3, $4 +; O1-NEXT: scd $3, 0($1) +; O1-NEXT: beqz $3, .LBB6_1 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: jr $ra +; O1-NEXT: daddiu $sp, $sp, 16 +; +; O2-LABEL: AtomicSwap64: +; O2: # %bb.0: # %entry +; O2-NEXT: daddiu $sp, $sp, -16 +; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; O2-NEXT: daddu $1, $1, $25 +; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; O2-NEXT: sd $4, 8($sp) +; O2-NEXT: ld $1, %got_disp(x)($1) +; O2-NEXT: .LBB6_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: lld $2, 0($1) +; O2-NEXT: move $3, $4 +; O2-NEXT: scd $3, 0($1) +; O2-NEXT: beqz $3, .LBB6_1 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: jr $ra +; O2-NEXT: daddiu $sp, $sp, 16 +; +; O3-LABEL: AtomicSwap64: +; O3: # %bb.0: # %entry +; O3-NEXT: daddiu $sp, $sp, -16 +; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; O3-NEXT: sd $4, 8($sp) +; O3-NEXT: daddu $1, $1, $25 +; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; O3-NEXT: ld $1, %got_disp(x)($1) +; O3-NEXT: .LBB6_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: lld $2, 0($1) +; O3-NEXT: move $3, $4 +; O3-NEXT: scd $3, 0($1) +; O3-NEXT: beqz $3, .LBB6_1 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: jr $ra +; O3-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64EB-LABEL: AtomicSwap64: +; MIPS64EB: # %bb.0: # %entry +; MIPS64EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) +; MIPS64EB-NEXT: daddu $1, $1, $25 +; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) +; MIPS64EB-NEXT: sd $4, 8($sp) +; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) +; MIPS64EB-NEXT: .LBB6_1: # %entry +; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64EB-NEXT: lld $2, 0($1) +; MIPS64EB-NEXT: move $3, $4 +; MIPS64EB-NEXT: scd $3, 0($1) +; MIPS64EB-NEXT: beqz $3, .LBB6_1 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: # %bb.2: # %entry +; MIPS64EB-NEXT: jr $ra +; MIPS64EB-NEXT: daddiu $sp, $sp, 16 +entry: + %newval.addr = alloca i64, align 4 + store i64 %newval, i64* %newval.addr, align 4 + %tmp = load i64, i64* %newval.addr, align 4 + %0 = atomicrmw xchg i64* @x, i64 %tmp monotonic + ret i64 %0 + +} + +define i64 @AtomicCmpSwap64(i64 signext %oldval, i64 signext %newval) nounwind { +; MIPS4-LABEL: AtomicCmpSwap64: +; MIPS4: # %bb.0: # %entry +; MIPS4-NEXT: daddiu $sp, $sp, -16 +; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS4-NEXT: daddu $1, $1, $25 +; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS4-NEXT: sd $5, 8($sp) +; MIPS4-NEXT: ld $1, %got_disp(x)($1) +; MIPS4-NEXT: .LBB7_1: # %entry +; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS4-NEXT: lld $2, 0($1) +; MIPS4-NEXT: bne $2, $4, .LBB7_3 +; MIPS4-NEXT: nop +; MIPS4-NEXT: # %bb.2: # %entry +; MIPS4-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS4-NEXT: move $3, $5 +; MIPS4-NEXT: scd $3, 0($1) +; MIPS4-NEXT: beqz $3, .LBB7_1 +; MIPS4-NEXT: nop +; MIPS4-NEXT: .LBB7_3: # %entry +; MIPS4-NEXT: sync +; MIPS4-NEXT: jr $ra +; MIPS4-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64-LABEL: AtomicCmpSwap64: +; MIPS64: # %bb.0: # %entry +; MIPS64-NEXT: daddiu $sp, $sp, -16 +; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64-NEXT: daddu $1, $1, $25 +; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64-NEXT: sd $5, 8($sp) +; MIPS64-NEXT: ld $1, %got_disp(x)($1) +; MIPS64-NEXT: .LBB7_1: # %entry +; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64-NEXT: lld $2, 0($1) +; MIPS64-NEXT: bne $2, $4, .LBB7_3 +; MIPS64-NEXT: nop +; MIPS64-NEXT: # %bb.2: # %entry +; MIPS64-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64-NEXT: move $3, $5 +; MIPS64-NEXT: scd $3, 0($1) +; MIPS64-NEXT: beqz $3, .LBB7_1 +; MIPS64-NEXT: nop +; MIPS64-NEXT: .LBB7_3: # %entry +; MIPS64-NEXT: sync +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R2-LABEL: AtomicCmpSwap64: +; MIPS64R2: # %bb.0: # %entry +; MIPS64R2-NEXT: daddiu $sp, $sp, -16 +; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64R2-NEXT: daddu $1, $1, $25 +; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64R2-NEXT: sd $5, 8($sp) +; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R2-NEXT: .LBB7_1: # %entry +; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R2-NEXT: lld $2, 0($1) +; MIPS64R2-NEXT: bne $2, $4, .LBB7_3 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: # %bb.2: # %entry +; MIPS64R2-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64R2-NEXT: move $3, $5 +; MIPS64R2-NEXT: scd $3, 0($1) +; MIPS64R2-NEXT: beqz $3, .LBB7_1 +; MIPS64R2-NEXT: nop +; MIPS64R2-NEXT: .LBB7_3: # %entry +; MIPS64R2-NEXT: sync +; MIPS64R2-NEXT: jr $ra +; MIPS64R2-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6-LABEL: AtomicCmpSwap64: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64R6-NEXT: daddu $1, $1, $25 +; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64R6-NEXT: sd $5, 8($sp) +; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6-NEXT: .LBB7_1: # %entry +; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6-NEXT: lld $2, 0($1) +; MIPS64R6-NEXT: bnec $2, $4, .LBB7_3 +; MIPS64R6-NEXT: # %bb.2: # %entry +; MIPS64R6-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64R6-NEXT: move $3, $5 +; MIPS64R6-NEXT: scd $3, 0($1) +; MIPS64R6-NEXT: beqzc $3, .LBB7_1 +; MIPS64R6-NEXT: .LBB7_3: # %entry +; MIPS64R6-NEXT: sync +; MIPS64R6-NEXT: jr $ra +; MIPS64R6-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64R6O0-LABEL: AtomicCmpSwap64: +; MIPS64R6O0: # %bb.0: # %entry +; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 +; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64R6O0-NEXT: daddu $1, $1, $25 +; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64R6O0-NEXT: sd $5, 8($sp) +; MIPS64R6O0-NEXT: ld $5, 8($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) +; MIPS64R6O0-NEXT: .LBB7_1: # %entry +; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 +; MIPS64R6O0-NEXT: # %bb.2: # %entry +; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 +; MIPS64R6O0-NEXT: .LBB7_3: # %entry +; MIPS64R6O0-NEXT: sync +; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 +; MIPS64R6O0-NEXT: jrc $ra +; +; O1-LABEL: AtomicCmpSwap64: +; O1: # %bb.0: # %entry +; O1-NEXT: daddiu $sp, $sp, -16 +; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; O1-NEXT: daddu $1, $1, $25 +; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; O1-NEXT: sd $5, 8($sp) +; O1-NEXT: ld $1, %got_disp(x)($1) +; O1-NEXT: .LBB7_1: # %entry +; O1-NEXT: # =>This Inner Loop Header: Depth=1 +; O1-NEXT: lld $2, 0($1) +; O1-NEXT: bne $2, $4, .LBB7_3 +; O1-NEXT: nop +; O1-NEXT: # %bb.2: # %entry +; O1-NEXT: # in Loop: Header=BB7_1 Depth=1 +; O1-NEXT: move $3, $5 +; O1-NEXT: scd $3, 0($1) +; O1-NEXT: beqz $3, .LBB7_1 +; O1-NEXT: nop +; O1-NEXT: .LBB7_3: # %entry +; O1-NEXT: sync +; O1-NEXT: jr $ra +; O1-NEXT: daddiu $sp, $sp, 16 +; +; O2-LABEL: AtomicCmpSwap64: +; O2: # %bb.0: # %entry +; O2-NEXT: daddiu $sp, $sp, -16 +; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; O2-NEXT: daddu $1, $1, $25 +; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; O2-NEXT: sd $5, 8($sp) +; O2-NEXT: ld $1, %got_disp(x)($1) +; O2-NEXT: .LBB7_1: # %entry +; O2-NEXT: # =>This Inner Loop Header: Depth=1 +; O2-NEXT: lld $2, 0($1) +; O2-NEXT: bne $2, $4, .LBB7_3 +; O2-NEXT: nop +; O2-NEXT: # %bb.2: # %entry +; O2-NEXT: # in Loop: Header=BB7_1 Depth=1 +; O2-NEXT: move $3, $5 +; O2-NEXT: scd $3, 0($1) +; O2-NEXT: beqz $3, .LBB7_1 +; O2-NEXT: nop +; O2-NEXT: .LBB7_3: # %entry +; O2-NEXT: sync +; O2-NEXT: jr $ra +; O2-NEXT: daddiu $sp, $sp, 16 +; +; O3-LABEL: AtomicCmpSwap64: +; O3: # %bb.0: # %entry +; O3-NEXT: daddiu $sp, $sp, -16 +; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; O3-NEXT: sd $5, 8($sp) +; O3-NEXT: daddu $1, $1, $25 +; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; O3-NEXT: ld $1, %got_disp(x)($1) +; O3-NEXT: .LBB7_1: # %entry +; O3-NEXT: # =>This Inner Loop Header: Depth=1 +; O3-NEXT: lld $2, 0($1) +; O3-NEXT: bne $2, $4, .LBB7_3 +; O3-NEXT: nop +; O3-NEXT: # %bb.2: # %entry +; O3-NEXT: # in Loop: Header=BB7_1 Depth=1 +; O3-NEXT: move $3, $5 +; O3-NEXT: scd $3, 0($1) +; O3-NEXT: beqz $3, .LBB7_1 +; O3-NEXT: nop +; O3-NEXT: .LBB7_3: # %entry +; O3-NEXT: sync +; O3-NEXT: jr $ra +; O3-NEXT: daddiu $sp, $sp, 16 +; +; MIPS64EB-LABEL: AtomicCmpSwap64: +; MIPS64EB: # %bb.0: # %entry +; MIPS64EB-NEXT: daddiu $sp, $sp, -16 +; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64EB-NEXT: daddu $1, $1, $25 +; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) +; MIPS64EB-NEXT: sd $5, 8($sp) +; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) +; MIPS64EB-NEXT: .LBB7_1: # %entry +; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 +; MIPS64EB-NEXT: lld $2, 0($1) +; MIPS64EB-NEXT: bne $2, $4, .LBB7_3 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: # %bb.2: # %entry +; MIPS64EB-NEXT: # in Loop: Header=BB7_1 Depth=1 +; MIPS64EB-NEXT: move $3, $5 +; MIPS64EB-NEXT: scd $3, 0($1) +; MIPS64EB-NEXT: beqz $3, .LBB7_1 +; MIPS64EB-NEXT: nop +; MIPS64EB-NEXT: .LBB7_3: # %entry +; MIPS64EB-NEXT: sync +; MIPS64EB-NEXT: jr $ra +; MIPS64EB-NEXT: daddiu $sp, $sp, 16 +entry: + %newval.addr = alloca i64, align 4 + store i64 %newval, i64* %newval.addr, align 4 + %tmp = load i64, i64* %newval.addr, align 4 + %0 = cmpxchg i64* @x, i64 %oldval, i64 %tmp monotonic monotonic + %1 = extractvalue { i64, i1 } %0, 0 + ret i64 %1 + +} diff --git a/llvm/test/CodeGen/Mips/atomic64.ll b/llvm/test/CodeGen/Mips/atomic64.ll index d27c9ac42e05..6e647211d15c 100644 --- a/llvm/test/CodeGen/Mips/atomic64.ll +++ b/llvm/test/CodeGen/Mips/atomic64.ll @@ -1,23 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS4 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64R2 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64R6 -; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64R6O0 ; We want to verify the produced code is well formed all optimization levels, the rest of the test which ensure correctness. -; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 -; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ; Keep one big-endian check so that we don't reduce testing, but don't add more ; since endianness doesn't affect the body of the atomic operations. -; RUN: llc -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ +; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -check-prefix=MIPS64EB @x = common global i64 0, align 4 diff --git a/llvm/test/DebugInfo/Symbolize/ELF/loongarch-empty-name-symbol.s b/llvm/test/DebugInfo/Symbolize/ELF/loongarch-empty-name-symbol.s new file mode 100644 index 000000000000..ea58521cb4d6 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/loongarch-empty-name-symbol.s @@ -0,0 +1,26 @@ +# REQUIRES: loongarch-registered-target +## Ignore empty name symbols. + +# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t +# RUN: llvm-readelf -s %t | FileCheck %s --check-prefix=SYM + +# SYM: 0000000000000004 0 NOTYPE LOCAL DEFAULT [[#]] {{$}} +# SYM: 0000000000000000 0 NOTYPE GLOBAL DEFAULT [[#]] foo + +## Make sure we test at an address larger than or equal to an empty name symbol. +# RUN: llvm-symbolizer --obj=%t 0 4 | FileCheck %s + +# CHECK: foo +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: foo +# CHECK-NEXT: ??:0:0 + +.globl foo +foo: + nop + .file 1 "/tmp" "a.s" + .loc 1 1 0 + nop + +.section .debug_line,"",@progbits diff --git a/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll new file mode 100644 index 000000000000..8ff055f13ad1 --- /dev/null +++ b/llvm/test/DebugInfo/X86/dbg-value-no-crash.ll @@ -0,0 +1,39 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw,avx512vl -o - %s + +;; Check this won't result in crash. +define <8 x i32> @foo(ptr %0, <8 x i32> %1, i8 %2, i8 %3) { + %5 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %1, <8 x i32> zeroinitializer) + %6 = add nsw <8 x i32> %1, + call void @llvm.dbg.value(metadata <8 x i32> %6, metadata !4, metadata !DIExpression()), !dbg !15 + %7 = bitcast i8 %2 to <8 x i1> + %8 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %5 + %9 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %8, <8 x i32> zeroinitializer) + %10 = bitcast i8 %3 to <8 x i1> + %11 = select <8 x i1> %10, <8 x i32> %9, <8 x i32> + ret <8 x i32> %11 +} + +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2) +!1 = !DIFile(filename: "a.cpp", directory: "/") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !DILocalVariable(name: "a", arg: 2, scope: !5, file: !1, line: 12, type: !11) +!5 = distinct !DISubprogram(name: "foo", scope: !6, file: !1, line: 12, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, declaration: !9, retainedNodes: !10) +!6 = !DINamespace(name: "ns1", scope: null) +!7 = !DISubroutineType(types: !8) +!8 = !{null} +!9 = !DISubprogram(name: "foo", scope: !6, file: !1, line: 132, type: !7, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +!10 = !{!4} +!11 = !DICompositeType(tag: DW_TAG_array_type, baseType: !12, size: 256, flags: DIFlagVector, elements: !13) +!12 = !DIBasicType(name: "long long", size: 64, encoding: DW_ATE_signed) +!13 = !{!14} +!14 = !DISubrange(count: 4) +!15 = !DILocation(line: 0, scope: !5, inlinedAt: !16) +!16 = !DILocation(line: 18, scope: !17) +!17 = distinct !DISubprogram(name: "foo", scope: null, file: !1, type: !7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s index caeae4fa441f..56f391b7d235 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s @@ -1,3 +1,4 @@ +# UNSUPPORTED: loongarch64 # RUN: rm -rf %t && mkdir -p %t # RUN: llvm-mc -triple=x86_64-apple-macos10.9 -filetype=obj \ # RUN: -o %t/helper.o %S/Inputs/MachO_GOTAndStubsOptimizationHelper.s diff --git a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg index e2535ef1dbfd..09f1a2ab6c6d 100644 --- a/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg +++ b/llvm/test/ExecutionEngine/MCJIT/lit.local.cfg @@ -1,7 +1,8 @@ root = config.root targets = root.targets if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \ - ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets): + ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets) | \ + ('LoongArch' in targets) : config.unsupported = False else: config.unsupported = True @@ -9,7 +10,7 @@ else: # FIXME: autoconf and cmake produce different arch names. We should normalize # them before getting here. if root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64', - 'AArch64', 'ARM', 'Mips', + 'AArch64', 'ARM', 'Mips', 'loongarch64', 'PowerPC', 'ppc64', 'ppc64le', 'SystemZ']: config.unsupported = True diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/llvm/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll index 7a1731e74ff5..3023f74816b4 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null ; XFAIL: windows-gnu,windows-msvc ; UNSUPPORTED: powerpc64-unknown-linux-gnu diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll b/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll index e25fd710b5c3..ccc138922802 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; REQUIRES: cxx-shared-library ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s ; XFAIL: arm, cygwin, windows-msvc, windows-gnu diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll index f458ab79f984..117df54e2055 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null ; XFAIL: windows-gnu,windows-msvc ; UNSUPPORTED: powerpc64-unknown-linux-gnu diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll index b8684a17abc1..a0bc1c2f266f 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s ; XFAIL: windows-gnu,windows-msvc ; UNSUPPORTED: powerpc64-unknown-linux-gnu diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll index 060b5e132ad6..05c3e96f2c51 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s ; XFAIL: windows-gnu,windows-msvc ; UNSUPPORTED: powerpc64-unknown-linux-gnu diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll index 6e60396e29b4..c6073ea02061 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null ; XFAIL: windows-gnu,windows-msvc ; UNSUPPORTED: powerpc64-unknown-linux-gnu diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll index b6fae4600d8b..78b9be1cb8d1 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null ; XFAIL: windows-gnu,windows-msvc ; UNSUPPORTED: powerpc64-unknown-linux-gnu diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll index 34f72bc9365a..699d4a5dec2b 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ ; RUN: -relocation-model=pic -code-model=small %s > /dev/null ; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, windows-gnu, windows-msvc diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll index 9e76601c963d..94e3a94e56a1 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s ; XFAIL: windows-gnu,windows-msvc ; UNSUPPORTED: powerpc64-unknown-linux-gnu diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll index 20f232add47d..5a92c235392e 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll @@ -1,3 +1,4 @@ +;UNSUPPORTED: loongarch64 ; RUN: %lli -jit-kind=mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \ ; RUN: -O0 -relocation-model=pic -code-model=small %s ; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, windows-gnu, windows-msvc diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/hello-g.ll b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/hello-g.ll new file mode 100644 index 000000000000..0a920808d49a --- /dev/null +++ b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/hello-g.ll @@ -0,0 +1,33 @@ +; REQUIRES: asserts +; RUN: %lli --jit-kind=mcjit %s > /dev/null +@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1 + +; Function Attrs: noinline nounwind optnone +define signext i32 @main() !dbg !8 { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + %call = call signext i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0)), !dbg !12 + ret i32 0, !dbg !13 +} + +declare signext i32 @printf(i8*, ...) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.1", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "hello.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 1} +!7 = !{!"clang version 10.0.1"} +!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !9, scopeLine: 4, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !10) +!10 = !{!11} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DILocation(line: 5, column: 3, scope: !8) +!13 = !DILocation(line: 6, column: 3, scope: !8) diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg new file mode 100644 index 000000000000..2b5a4893e686 --- /dev/null +++ b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/MC/Disassembler/LoongArch/lit.local.cfg b/llvm/test/MC/Disassembler/LoongArch/lit.local.cfg new file mode 100644 index 000000000000..6223fc691edc --- /dev/null +++ b/llvm/test/MC/Disassembler/LoongArch/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/MC/Disassembler/LoongArch/simd.txt b/llvm/test/MC/Disassembler/LoongArch/simd.txt new file mode 100644 index 000000000000..90da1700a6d8 --- /dev/null +++ b/llvm/test/MC/Disassembler/LoongArch/simd.txt @@ -0,0 +1,1361 @@ +# RUN: llvm-mc --disassemble %s -triple=loongarch64-unknown-linux -mattr=+lsx,+lasx | FileCheck %s + +0xcf 0x2a 0x19 0x09 # CHECK: vfmadd.s $vr15, $vr22, $vr10, $vr18 +0x01 0x30 0x25 0x09 # CHECK: vfmadd.d $vr1, $vr0, $vr12, $vr10 +0x50 0x36 0x54 0x09 # CHECK: vfmsub.s $vr16, $vr18, $vr13, $vr8 +0xb9 0x05 0x6a 0x09 # CHECK: vfmsub.d $vr25, $vr13, $vr1, $vr20 +0x56 0x44 0x9b 0x09 # CHECK: vfnmadd.s $vr22, $vr2, $vr17, $vr22 +0xbc 0x0b 0xa7 0x09 # CHECK: vfnmadd.d $vr28, $vr29, $vr2, $vr14 +0x93 0x44 0xdc 0x09 # CHECK: vfnmsub.s $vr19, $vr4, $vr17, $vr24 +0xd8 0x72 0xef 0x09 # CHECK: vfnmsub.d $vr24, $vr22, $vr28, $vr30 +0x8f 0xa7 0x17 0x0a # CHECK: xvfmadd.s $xr15, $xr28, $xr9, $xr15 +0x05 0x33 0x25 0x0a # CHECK: xvfmadd.d $xr5, $xr24, $xr12, $xr10 +0x14 0x6c 0x5d 0x0a # CHECK: xvfmsub.s $xr20, $xr0, $xr27, $xr26 +0x0d 0x65 0x6d 0x0a # CHECK: xvfmsub.d $xr13, $xr8, $xr25, $xr26 +0xce 0x59 0x94 0x0a # CHECK: xvfnmadd.s $xr14, $xr14, $xr22, $xr8 +0x39 0x02 0xa2 0x0a # CHECK: xvfnmadd.d $xr25, $xr17, $xr0, $xr4 +0x6b 0x80 0xd5 0x0a # CHECK: xvfnmsub.s $xr11, $xr3, $xr0, $xr11 +0x62 0x60 0xeb 0x0a # CHECK: xvfnmsub.d $xr2, $xr3, $xr24, $xr22 +0xfa 0x6d 0x52 0x0c # CHECK: vfcmp.ceq.s $vr26, $vr15, $vr27 +0xb5 0x06 0x62 0x0c # CHECK: vfcmp.ceq.d $vr21, $vr21, $vr1 +0x28 0x4d 0x92 0x0c # CHECK: xvfcmp.ceq.s $xr8, $xr9, $xr19 +0x19 0x72 0xa2 0x0c # CHECK: xvfcmp.ceq.d $xr25, $xr16, $xr28 +0xf4 0xf6 0x14 0x0d # CHECK: vbitsel.v $vr20, $vr23, $vr29, $vr9 +0x47 0xf3 0x2b 0x0d # CHECK: xvbitsel.v $xr7, $xr26, $xr28, $xr23 +0x8b 0x9c 0x54 0x0d # CHECK: vshuf.b $vr11, $vr4, $vr7, $vr9 +0xb0 0x2a 0x66 0x0d # CHECK: xvshuf.b $xr16, $xr21, $xr10, $xr12 +0x3c 0x0b 0x38 0x2c # CHECK: vld $vr28, $r25, -510 +0xdc 0x3d 0x48 0x2c # CHECK: vst $vr28, $r14, 527 +0xcb 0x00 0x88 0x2c # CHECK: xvld $xr11, $r6, 512 +0xed 0xfc 0xd2 0x2c # CHECK: xvst $xr13, $r7, 1215 +0x28 0xfd 0x14 0x30 # CHECK: vldrepl.d $vr8, $r9, -1544 +0x22 0xd9 0x2e 0x30 # CHECK: vldrepl.w $vr2, $r9, -296 +0xfc 0xfa 0x41 0x30 # CHECK: vldrepl.h $vr28, $r23, 252 +0x25 0xad 0xb4 0x30 # CHECK: vldrepl.b $vr5, $r9, -725 +0x57 0x57 0x15 0x31 # CHECK: vstelm.d $vr23, $r26, 680, 1 +0xfe 0x8e 0x26 0x31 # CHECK: vstelm.w $vr30, $r23, -372, 1 +0xcb 0x3c 0x5c 0x31 # CHECK: vstelm.h $vr11, $r6, 30, 7 +0xe3 0xb1 0xb8 0x31 # CHECK: vstelm.b $vr3, $r15, 44, 14 +0x18 0xa5 0x11 0x32 # CHECK: xvldrepl.d $xr24, $r8, 840 +0x0e 0xef 0x21 0x32 # CHECK: xvldrepl.w $xr14, $r24, 492 +0x32 0x49 0x46 0x32 # CHECK: xvldrepl.h $xr18, $r9, 804 +0xa6 0xaf 0x8c 0x32 # CHECK: xvldrepl.b $xr6, $r29, 811 +0x75 0x94 0x13 0x33 # CHECK: xvstelm.d $xr21, $sp, -216, 0 +0xbf 0xab 0x21 0x33 # CHECK: xvstelm.w $xr31, $r29, 424, 0 +0xee 0xb4 0x50 0x33 # CHECK: xvstelm.h $xr14, $r7, 90, 4 +0x15 0xef 0xa3 0x33 # CHECK: xvstelm.b $xr21, $r24, -5, 8 +0x9d 0x78 0x40 0x38 # CHECK: vldx $vr29, $r4, $r30 +0x9f 0x77 0x44 0x38 # CHECK: vstx $vr31, $r28, $r29 +0xc8 0x63 0x48 0x38 # CHECK: xvldx $xr8, $r30, $r24 +0x22 0x75 0x4c 0x38 # CHECK: xvstx $xr2, $r9, $r29 +0x5c 0x5f 0x00 0x70 # CHECK: vseq.b $vr28, $vr26, $vr23 +0x2a 0x94 0x00 0x70 # CHECK: vseq.h $vr10, $vr1, $vr5 +0x63 0x47 0x01 0x70 # CHECK: vseq.w $vr3, $vr27, $vr17 +0x65 0x8c 0x01 0x70 # CHECK: vseq.d $vr5, $vr3, $vr3 +0x3d 0x1d 0x02 0x70 # CHECK: vsle.b $vr29, $vr9, $vr7 +0x05 0xa7 0x02 0x70 # CHECK: vsle.h $vr5, $vr24, $vr9 +0xd1 0x53 0x03 0x70 # CHECK: vsle.w $vr17, $vr30, $vr20 +0xdb 0xb4 0x03 0x70 # CHECK: vsle.d $vr27, $vr6, $vr13 +0x7e 0x29 0x04 0x70 # CHECK: vsle.bu $vr30, $vr11, $vr10 +0xb3 0xff 0x04 0x70 # CHECK: vsle.hu $vr19, $vr29, $vr31 +0x50 0x52 0x05 0x70 # CHECK: vsle.wu $vr16, $vr18, $vr20 +0x3f 0xa2 0x05 0x70 # CHECK: vsle.du $vr31, $vr17, $vr8 +0xfa 0x14 0x06 0x70 # CHECK: vslt.b $vr26, $vr7, $vr5 +0x4e 0xd0 0x06 0x70 # CHECK: vslt.h $vr14, $vr2, $vr20 +0xae 0x64 0x07 0x70 # CHECK: vslt.w $vr14, $vr5, $vr25 +0x3a 0xe5 0x07 0x70 # CHECK: vslt.d $vr26, $vr9, $vr25 +0x5f 0x3a 0x08 0x70 # CHECK: vslt.bu $vr31, $vr18, $vr14 +0xe5 0x95 0x08 0x70 # CHECK: vslt.hu $vr5, $vr15, $vr5 +0x9f 0x37 0x09 0x70 # CHECK: vslt.wu $vr31, $vr28, $vr13 +0x6b 0xda 0x09 0x70 # CHECK: vslt.du $vr11, $vr19, $vr22 +0x9a 0x7e 0x0a 0x70 # CHECK: vadd.b $vr26, $vr20, $vr31 +0x2b 0xf7 0x0a 0x70 # CHECK: vadd.h $vr11, $vr25, $vr29 +0x27 0x37 0x0b 0x70 # CHECK: vadd.w $vr7, $vr25, $vr13 +0xb0 0xc1 0x0b 0x70 # CHECK: vadd.d $vr16, $vr13, $vr16 +0x6c 0x54 0x0c 0x70 # CHECK: vsub.b $vr12, $vr3, $vr21 +0xaf 0xe5 0x0c 0x70 # CHECK: vsub.h $vr15, $vr13, $vr25 +0x14 0x66 0x0d 0x70 # CHECK: vsub.w $vr20, $vr16, $vr25 +0x73 0x9c 0x0d 0x70 # CHECK: vsub.d $vr19, $vr3, $vr7 +0xce 0x17 0x46 0x70 # CHECK: vsadd.b $vr14, $vr30, $vr5 +0x2a 0xbc 0x46 0x70 # CHECK: vsadd.h $vr10, $vr1, $vr15 +0xf3 0x2b 0x47 0x70 # CHECK: vsadd.w $vr19, $vr31, $vr10 +0x7a 0xf2 0x47 0x70 # CHECK: vsadd.d $vr26, $vr19, $vr28 +0x78 0x1c 0x48 0x70 # CHECK: vssub.b $vr24, $vr3, $vr7 +0x9f 0xe0 0x48 0x70 # CHECK: vssub.h $vr31, $vr4, $vr24 +0x7d 0x33 0x49 0x70 # CHECK: vssub.w $vr29, $vr27, $vr12 +0x17 0xa6 0x49 0x70 # CHECK: vssub.d $vr23, $vr16, $vr9 +0xba 0x13 0x4a 0x70 # CHECK: vsadd.bu $vr26, $vr29, $vr4 +0xef 0xa4 0x4a 0x70 # CHECK: vsadd.hu $vr15, $vr7, $vr9 +0x4d 0x42 0x4b 0x70 # CHECK: vsadd.wu $vr13, $vr18, $vr16 +0xa4 0x80 0x4b 0x70 # CHECK: vsadd.du $vr4, $vr5, $vr0 +0x3b 0x36 0x4c 0x70 # CHECK: vssub.bu $vr27, $vr17, $vr13 +0x05 0x85 0x4c 0x70 # CHECK: vssub.hu $vr5, $vr8, $vr1 +0x0e 0x59 0x4d 0x70 # CHECK: vssub.wu $vr14, $vr8, $vr22 +0x31 0xa1 0x4d 0x70 # CHECK: vssub.du $vr17, $vr9, $vr8 +0x77 0x0a 0x54 0x70 # CHECK: vhaddw.h.b $vr23, $vr19, $vr2 +0x1a 0xea 0x54 0x70 # CHECK: vhaddw.w.h $vr26, $vr16, $vr26 +0xe0 0x6f 0x55 0x70 # CHECK: vhaddw.d.w $vr0, $vr31, $vr27 +0xb9 0xe5 0x55 0x70 # CHECK: vhaddw.q.d $vr25, $vr13, $vr25 +0xe9 0x16 0x56 0x70 # CHECK: vhsubw.h.b $vr9, $vr23, $vr5 +0xaf 0xeb 0x56 0x70 # CHECK: vhsubw.w.h $vr15, $vr29, $vr26 +0x80 0x4b 0x57 0x70 # CHECK: vhsubw.d.w $vr0, $vr28, $vr18 +0x2e 0xa3 0x57 0x70 # CHECK: vhsubw.q.d $vr14, $vr25, $vr8 +0x01 0x56 0x58 0x70 # CHECK: vhaddw.hu.bu $vr1, $vr16, $vr21 +0xbc 0xf6 0x58 0x70 # CHECK: vhaddw.wu.hu $vr28, $vr21, $vr29 +0x9d 0x42 0x59 0x70 # CHECK: vhaddw.du.wu $vr29, $vr20, $vr16 +0x42 0xf1 0x59 0x70 # CHECK: vhaddw.qu.du $vr2, $vr10, $vr28 +0x7f 0x78 0x5a 0x70 # CHECK: vhsubw.hu.bu $vr31, $vr3, $vr30 +0x25 0xad 0x5a 0x70 # CHECK: vhsubw.wu.hu $vr5, $vr9, $vr11 +0xf7 0x5b 0x5b 0x70 # CHECK: vhsubw.du.wu $vr23, $vr31, $vr22 +0x84 0xcb 0x5b 0x70 # CHECK: vhsubw.qu.du $vr4, $vr28, $vr18 +0xb2 0x2d 0x5c 0x70 # CHECK: vadda.b $vr18, $vr13, $vr11 +0xd1 0xb1 0x5c 0x70 # CHECK: vadda.h $vr17, $vr14, $vr12 +0x76 0x0d 0x5d 0x70 # CHECK: vadda.w $vr22, $vr11, $vr3 +0x18 0xbf 0x5d 0x70 # CHECK: vadda.d $vr24, $vr24, $vr15 +0x77 0x46 0x60 0x70 # CHECK: vabsd.b $vr23, $vr19, $vr17 +0xee 0xb7 0x60 0x70 # CHECK: vabsd.h $vr14, $vr31, $vr13 +0x38 0x24 0x61 0x70 # CHECK: vabsd.w $vr24, $vr1, $vr9 +0x9f 0x82 0x61 0x70 # CHECK: vabsd.d $vr31, $vr20, $vr0 +0x97 0x75 0x62 0x70 # CHECK: vabsd.bu $vr23, $vr12, $vr29 +0x72 0x86 0x62 0x70 # CHECK: vabsd.hu $vr18, $vr19, $vr1 +0xad 0x72 0x63 0x70 # CHECK: vabsd.wu $vr13, $vr21, $vr28 +0x50 0xaf 0x63 0x70 # CHECK: vabsd.du $vr16, $vr26, $vr11 +0xa1 0x6e 0x64 0x70 # CHECK: vavg.b $vr1, $vr21, $vr27 +0x54 0xbf 0x64 0x70 # CHECK: vavg.h $vr20, $vr26, $vr15 +0x5d 0x0e 0x65 0x70 # CHECK: vavg.w $vr29, $vr18, $vr3 +0xf3 0xfd 0x65 0x70 # CHECK: vavg.d $vr19, $vr15, $vr31 +0x6b 0x45 0x66 0x70 # CHECK: vavg.bu $vr11, $vr11, $vr17 +0x9e 0xb7 0x66 0x70 # CHECK: vavg.hu $vr30, $vr28, $vr13 +0xe7 0x28 0x67 0x70 # CHECK: vavg.wu $vr7, $vr7, $vr10 +0xf9 0xb0 0x67 0x70 # CHECK: vavg.du $vr25, $vr7, $vr12 +0xbd 0x1d 0x68 0x70 # CHECK: vavgr.b $vr29, $vr13, $vr7 +0x85 0xcf 0x68 0x70 # CHECK: vavgr.h $vr5, $vr28, $vr19 +0xf3 0x39 0x69 0x70 # CHECK: vavgr.w $vr19, $vr15, $vr14 +0x03 0x88 0x69 0x70 # CHECK: vavgr.d $vr3, $vr0, $vr2 +0x77 0x7d 0x6a 0x70 # CHECK: vavgr.bu $vr23, $vr11, $vr31 +0x79 0xa2 0x6a 0x70 # CHECK: vavgr.hu $vr25, $vr19, $vr8 +0x3e 0x33 0x6b 0x70 # CHECK: vavgr.wu $vr30, $vr25, $vr12 +0x99 0xe6 0x6b 0x70 # CHECK: vavgr.du $vr25, $vr20, $vr25 +0x5c 0x6b 0x70 0x70 # CHECK: vmax.b $vr28, $vr26, $vr26 +0xa8 0xad 0x70 0x70 # CHECK: vmax.h $vr8, $vr13, $vr11 +0x95 0x7f 0x71 0x70 # CHECK: vmax.w $vr21, $vr28, $vr31 +0xc1 0xeb 0x71 0x70 # CHECK: vmax.d $vr1, $vr30, $vr26 +0xca 0x25 0x72 0x70 # CHECK: vmin.b $vr10, $vr14, $vr9 +0x6a 0xd5 0x72 0x70 # CHECK: vmin.h $vr10, $vr11, $vr21 +0x1a 0x30 0x73 0x70 # CHECK: vmin.w $vr26, $vr0, $vr12 +0x53 0x82 0x73 0x70 # CHECK: vmin.d $vr19, $vr18, $vr0 +0x22 0x73 0x74 0x70 # CHECK: vmax.bu $vr2, $vr25, $vr28 +0xc9 0xfa 0x74 0x70 # CHECK: vmax.hu $vr9, $vr22, $vr30 +0x35 0x6f 0x75 0x70 # CHECK: vmax.wu $vr21, $vr25, $vr27 +0xc3 0xe5 0x75 0x70 # CHECK: vmax.du $vr3, $vr14, $vr25 +0xf8 0x6c 0x76 0x70 # CHECK: vmin.bu $vr24, $vr7, $vr27 +0x92 0xf7 0x76 0x70 # CHECK: vmin.hu $vr18, $vr28, $vr29 +0x9a 0x08 0x77 0x70 # CHECK: vmin.wu $vr26, $vr4, $vr2 +0x0d 0x90 0x77 0x70 # CHECK: vmin.du $vr13, $vr0, $vr4 +0xa1 0x5e 0x84 0x70 # CHECK: vmul.b $vr1, $vr21, $vr23 +0xa9 0xe6 0x84 0x70 # CHECK: vmul.h $vr9, $vr21, $vr25 +0x10 0x71 0x85 0x70 # CHECK: vmul.w $vr16, $vr8, $vr28 +0x24 0xae 0x85 0x70 # CHECK: vmul.d $vr4, $vr17, $vr11 +0x0c 0x23 0x86 0x70 # CHECK: vmuh.b $vr12, $vr24, $vr8 +0xa6 0xe2 0x86 0x70 # CHECK: vmuh.h $vr6, $vr21, $vr24 +0xab 0x7b 0x87 0x70 # CHECK: vmuh.w $vr11, $vr29, $vr30 +0x21 0xe6 0x87 0x70 # CHECK: vmuh.d $vr1, $vr17, $vr25 +0xbd 0x2b 0x88 0x70 # CHECK: vmuh.bu $vr29, $vr29, $vr10 +0x38 0xd5 0x88 0x70 # CHECK: vmuh.hu $vr24, $vr9, $vr21 +0x8f 0x4e 0x89 0x70 # CHECK: vmuh.wu $vr15, $vr20, $vr19 +0x80 0x87 0x89 0x70 # CHECK: vmuh.du $vr0, $vr28, $vr1 +0x1b 0x10 0xa8 0x70 # CHECK: vmadd.b $vr27, $vr0, $vr4 +0x93 0xf2 0xa8 0x70 # CHECK: vmadd.h $vr19, $vr20, $vr28 +0xef 0x0c 0xa9 0x70 # CHECK: vmadd.w $vr15, $vr7, $vr3 +0x39 0xfb 0xa9 0x70 # CHECK: vmadd.d $vr25, $vr25, $vr30 +0x38 0x6b 0xaa 0x70 # CHECK: vmsub.b $vr24, $vr25, $vr26 +0x0c 0xb4 0xaa 0x70 # CHECK: vmsub.h $vr12, $vr0, $vr13 +0x1a 0x62 0xab 0x70 # CHECK: vmsub.w $vr26, $vr16, $vr24 +0x4d 0xa1 0xab 0x70 # CHECK: vmsub.d $vr13, $vr10, $vr8 +0x92 0x57 0xe0 0x70 # CHECK: vdiv.b $vr18, $vr28, $vr21 +0x11 0x87 0xe0 0x70 # CHECK: vdiv.h $vr17, $vr24, $vr1 +0x43 0x59 0xe1 0x70 # CHECK: vdiv.w $vr3, $vr10, $vr22 +0xaf 0xa1 0xe1 0x70 # CHECK: vdiv.d $vr15, $vr13, $vr8 +0x33 0x53 0xe2 0x70 # CHECK: vmod.b $vr19, $vr25, $vr20 +0x02 0xdb 0xe2 0x70 # CHECK: vmod.h $vr2, $vr24, $vr22 +0x5f 0x02 0xe3 0x70 # CHECK: vmod.w $vr31, $vr18, $vr0 +0x1f 0x88 0xe3 0x70 # CHECK: vmod.d $vr31, $vr0, $vr2 +0x8f 0x0c 0xe4 0x70 # CHECK: vdiv.bu $vr15, $vr4, $vr3 +0xf1 0xf4 0xe4 0x70 # CHECK: vdiv.hu $vr17, $vr7, $vr29 +0x5b 0x0d 0xe5 0x70 # CHECK: vdiv.wu $vr27, $vr10, $vr3 +0x08 0xeb 0xe5 0x70 # CHECK: vdiv.du $vr8, $vr24, $vr26 +0xca 0x62 0xe6 0x70 # CHECK: vmod.bu $vr10, $vr22, $vr24 +0xf3 0xe3 0xe6 0x70 # CHECK: vmod.hu $vr19, $vr31, $vr24 +0x1a 0x37 0xe7 0x70 # CHECK: vmod.wu $vr26, $vr24, $vr13 +0x74 0xaa 0xe7 0x70 # CHECK: vmod.du $vr20, $vr19, $vr10 +0x5c 0x7a 0xe8 0x70 # CHECK: vsll.b $vr28, $vr18, $vr30 +0x96 0xf8 0xe8 0x70 # CHECK: vsll.h $vr22, $vr4, $vr30 +0x21 0x23 0xe9 0x70 # CHECK: vsll.w $vr1, $vr25, $vr8 +0x5f 0xbe 0xe9 0x70 # CHECK: vsll.d $vr31, $vr18, $vr15 +0x85 0x41 0xea 0x70 # CHECK: vsrl.b $vr5, $vr12, $vr16 +0xa9 0xf0 0xea 0x70 # CHECK: vsrl.h $vr9, $vr5, $vr28 +0x1e 0x06 0xeb 0x70 # CHECK: vsrl.w $vr30, $vr16, $vr1 +0xfc 0xee 0xeb 0x70 # CHECK: vsrl.d $vr28, $vr23, $vr27 +0x2f 0x66 0xec 0x70 # CHECK: vsra.b $vr15, $vr17, $vr25 +0x00 0x95 0xec 0x70 # CHECK: vsra.h $vr0, $vr8, $vr5 +0x3d 0x1d 0xed 0x70 # CHECK: vsra.w $vr29, $vr9, $vr7 +0x76 0xcc 0xed 0x70 # CHECK: vsra.d $vr22, $vr3, $vr19 +0x08 0x22 0xee 0x70 # CHECK: vrotr.b $vr8, $vr16, $vr8 +0xae 0xac 0xee 0x70 # CHECK: vrotr.h $vr14, $vr5, $vr11 +0x91 0x67 0xef 0x70 # CHECK: vrotr.w $vr17, $vr28, $vr25 +0x92 0xcf 0xef 0x70 # CHECK: vrotr.d $vr18, $vr28, $vr19 +0x61 0x47 0xf0 0x70 # CHECK: vsrlr.b $vr1, $vr27, $vr17 +0xda 0xa9 0xf0 0x70 # CHECK: vsrlr.h $vr26, $vr14, $vr10 +0xa3 0x63 0xf1 0x70 # CHECK: vsrlr.w $vr3, $vr29, $vr24 +0x97 0xa8 0xf1 0x70 # CHECK: vsrlr.d $vr23, $vr4, $vr10 +0x59 0x54 0xf2 0x70 # CHECK: vsrar.b $vr25, $vr2, $vr21 +0x64 0xd1 0xf2 0x70 # CHECK: vsrar.h $vr4, $vr11, $vr20 +0xab 0x76 0xf3 0x70 # CHECK: vsrar.w $vr11, $vr21, $vr29 +0xbd 0x88 0xf3 0x70 # CHECK: vsrar.d $vr29, $vr5, $vr2 +0xd8 0xf5 0xf4 0x70 # CHECK: vsrln.b.h $vr24, $vr14, $vr29 +0xda 0x42 0xf5 0x70 # CHECK: vsrln.h.w $vr26, $vr22, $vr16 +0xf1 0x8b 0xf5 0x70 # CHECK: vsrln.w.d $vr17, $vr31, $vr2 +0x1f 0xdc 0xf6 0x70 # CHECK: vsran.b.h $vr31, $vr0, $vr23 +0x94 0x75 0xf7 0x70 # CHECK: vsran.h.w $vr20, $vr12, $vr29 +0x22 0x88 0xf7 0x70 # CHECK: vsran.w.d $vr2, $vr1, $vr2 +0x93 0x83 0xf8 0x70 # CHECK: vsrlrn.b.h $vr19, $vr28, $vr0 +0xb7 0x3b 0xf9 0x70 # CHECK: vsrlrn.h.w $vr23, $vr29, $vr14 +0x45 0x97 0xf9 0x70 # CHECK: vsrlrn.w.d $vr5, $vr26, $vr5 +0xf1 0x9d 0xfa 0x70 # CHECK: vsrarn.b.h $vr17, $vr15, $vr7 +0x4c 0x75 0xfb 0x70 # CHECK: vsrarn.h.w $vr12, $vr10, $vr29 +0x58 0xef 0xfb 0x70 # CHECK: vsrarn.w.d $vr24, $vr26, $vr27 +0x81 0xb9 0xfc 0x70 # CHECK: vssrln.b.h $vr1, $vr12, $vr14 +0x0b 0x49 0xfd 0x70 # CHECK: vssrln.h.w $vr11, $vr8, $vr18 +0xff 0x99 0xfd 0x70 # CHECK: vssrln.w.d $vr31, $vr15, $vr6 +0xad 0xe0 0xfe 0x70 # CHECK: vssran.b.h $vr13, $vr5, $vr24 +0x44 0x1f 0xff 0x70 # CHECK: vssran.h.w $vr4, $vr26, $vr7 +0x59 0x99 0xff 0x70 # CHECK: vssran.w.d $vr25, $vr10, $vr6 +0x9c 0x9b 0x00 0x71 # CHECK: vssrlrn.b.h $vr28, $vr28, $vr6 +0xef 0x46 0x01 0x71 # CHECK: vssrlrn.h.w $vr15, $vr23, $vr17 +0x2c 0x89 0x01 0x71 # CHECK: vssrlrn.w.d $vr12, $vr9, $vr2 +0x21 0xc7 0x02 0x71 # CHECK: vssrarn.b.h $vr1, $vr25, $vr17 +0x23 0x5d 0x03 0x71 # CHECK: vssrarn.h.w $vr3, $vr9, $vr23 +0x2e 0xed 0x03 0x71 # CHECK: vssrarn.w.d $vr14, $vr9, $vr27 +0x10 0xbf 0x04 0x71 # CHECK: vssrln.bu.h $vr16, $vr24, $vr15 +0xf5 0x7a 0x05 0x71 # CHECK: vssrln.hu.w $vr21, $vr23, $vr30 +0x0c 0xf9 0x05 0x71 # CHECK: vssrln.wu.d $vr12, $vr8, $vr30 +0x45 0xb2 0x06 0x71 # CHECK: vssran.bu.h $vr5, $vr18, $vr12 +0xe0 0x70 0x07 0x71 # CHECK: vssran.hu.w $vr0, $vr7, $vr28 +0x65 0xa1 0x07 0x71 # CHECK: vssran.wu.d $vr5, $vr11, $vr8 +0x32 0x8f 0x08 0x71 # CHECK: vssrlrn.bu.h $vr18, $vr25, $vr3 +0x33 0x50 0x09 0x71 # CHECK: vssrlrn.hu.w $vr19, $vr1, $vr20 +0xc6 0xcb 0x09 0x71 # CHECK: vssrlrn.wu.d $vr6, $vr30, $vr18 +0xac 0x8d 0x0a 0x71 # CHECK: vssrarn.bu.h $vr12, $vr13, $vr3 +0xb2 0x50 0x0b 0x71 # CHECK: vssrarn.hu.w $vr18, $vr5, $vr20 +0x17 0xd5 0x0b 0x71 # CHECK: vssrarn.wu.d $vr23, $vr8, $vr21 +0x4e 0x7c 0x0c 0x71 # CHECK: vbitclr.b $vr14, $vr2, $vr31 +0x31 0xa3 0x0c 0x71 # CHECK: vbitclr.h $vr17, $vr25, $vr8 +0x72 0x0d 0x0d 0x71 # CHECK: vbitclr.w $vr18, $vr11, $vr3 +0xff 0xf5 0x0d 0x71 # CHECK: vbitclr.d $vr31, $vr15, $vr29 +0xa8 0x43 0x0e 0x71 # CHECK: vbitset.b $vr8, $vr29, $vr16 +0x25 0xc6 0x0e 0x71 # CHECK: vbitset.h $vr5, $vr17, $vr17 +0x65 0x16 0x0f 0x71 # CHECK: vbitset.w $vr5, $vr19, $vr5 +0x65 0xab 0x0f 0x71 # CHECK: vbitset.d $vr5, $vr27, $vr10 +0xb0 0x20 0x10 0x71 # CHECK: vbitrev.b $vr16, $vr5, $vr8 +0xac 0xb3 0x10 0x71 # CHECK: vbitrev.h $vr12, $vr29, $vr12 +0xc3 0x39 0x11 0x71 # CHECK: vbitrev.w $vr3, $vr14, $vr14 +0x7f 0xbb 0x11 0x71 # CHECK: vbitrev.d $vr31, $vr27, $vr14 +0x16 0x4f 0x16 0x71 # CHECK: vpackev.b $vr22, $vr24, $vr19 +0x5c 0xc8 0x16 0x71 # CHECK: vpackev.h $vr28, $vr2, $vr18 +0x75 0x10 0x17 0x71 # CHECK: vpackev.w $vr21, $vr3, $vr4 +0xb8 0xae 0x17 0x71 # CHECK: vpackev.d $vr24, $vr21, $vr11 +0xec 0x6b 0x18 0x71 # CHECK: vpackod.b $vr12, $vr31, $vr26 +0x79 0xc0 0x18 0x71 # CHECK: vpackod.h $vr25, $vr3, $vr16 +0x55 0x3e 0x19 0x71 # CHECK: vpackod.w $vr21, $vr18, $vr15 +0x62 0x80 0x19 0x71 # CHECK: vpackod.d $vr2, $vr3, $vr0 +0x08 0x71 0x1a 0x71 # CHECK: vilvl.b $vr8, $vr8, $vr28 +0x14 0xfc 0x1a 0x71 # CHECK: vilvl.h $vr20, $vr0, $vr31 +0x4b 0x45 0x1b 0x71 # CHECK: vilvl.w $vr11, $vr10, $vr17 +0xe7 0x84 0x1b 0x71 # CHECK: vilvl.d $vr7, $vr7, $vr1 +0x6b 0x05 0x1c 0x71 # CHECK: vilvh.b $vr11, $vr11, $vr1 +0xe0 0xb7 0x1c 0x71 # CHECK: vilvh.h $vr0, $vr31, $vr13 +0xbc 0x1e 0x1d 0x71 # CHECK: vilvh.w $vr28, $vr21, $vr7 +0x77 0xcc 0x1d 0x71 # CHECK: vilvh.d $vr23, $vr3, $vr19 +0xa1 0x22 0x1e 0x71 # CHECK: vpickev.b $vr1, $vr21, $vr8 +0x30 0xa4 0x1e 0x71 # CHECK: vpickev.h $vr16, $vr1, $vr9 +0xad 0x11 0x1f 0x71 # CHECK: vpickev.w $vr13, $vr13, $vr4 +0xcb 0xfb 0x1f 0x71 # CHECK: vpickev.d $vr11, $vr30, $vr30 +0x67 0x35 0x20 0x71 # CHECK: vpickod.b $vr7, $vr11, $vr13 +0x72 0x84 0x20 0x71 # CHECK: vpickod.h $vr18, $vr3, $vr1 +0x03 0x4e 0x21 0x71 # CHECK: vpickod.w $vr3, $vr16, $vr19 +0xac 0xd5 0x21 0x71 # CHECK: vpickod.d $vr12, $vr13, $vr21 +0x2f 0x4e 0x22 0x71 # CHECK: vreplve.b $vr15, $vr17, $r19 +0xee 0x92 0x22 0x71 # CHECK: vreplve.h $vr14, $vr23, $r4 +0x7d 0x6e 0x23 0x71 # CHECK: vreplve.w $vr29, $vr19, $r27 +0x8d 0xd2 0x23 0x71 # CHECK: vreplve.d $vr13, $vr20, $r20 +0x59 0x54 0x26 0x71 # CHECK: vand.v $vr25, $vr2, $vr21 +0x64 0xc3 0x26 0x71 # CHECK: vor.v $vr4, $vr27, $vr16 +0x3e 0x13 0x27 0x71 # CHECK: vxor.v $vr30, $vr25, $vr4 +0x49 0xd8 0x27 0x71 # CHECK: vnor.v $vr9, $vr2, $vr22 +0x54 0x13 0x28 0x71 # CHECK: vandn.v $vr20, $vr26, $vr4 +0xa6 0xfa 0x28 0x71 # CHECK: vorn.v $vr6, $vr21, $vr30 +0x2b 0x35 0x2b 0x71 # CHECK: vfrstp.b $vr11, $vr9, $vr13 +0x55 0xdb 0x2b 0x71 # CHECK: vfrstp.h $vr21, $vr26, $vr22 +0xe9 0x40 0x2d 0x71 # CHECK: vadd.q $vr9, $vr7, $vr16 +0x22 0xc0 0x2d 0x71 # CHECK: vsub.q $vr2, $vr1, $vr16 +0x42 0x38 0x2e 0x71 # CHECK: vsigncov.b $vr2, $vr2, $vr14 +0xb5 0xb6 0x2e 0x71 # CHECK: vsigncov.h $vr21, $vr21, $vr13 +0xf5 0x14 0x2f 0x71 # CHECK: vsigncov.w $vr21, $vr7, $vr5 +0x4a 0x8d 0x2f 0x71 # CHECK: vsigncov.d $vr10, $vr10, $vr3 +0x8a 0x84 0x30 0x71 # CHECK: vfadd.s $vr10, $vr4, $vr1 +0x6f 0x0b 0x31 0x71 # CHECK: vfadd.d $vr15, $vr27, $vr2 +0x0e 0xa6 0x32 0x71 # CHECK: vfsub.s $vr14, $vr16, $vr9 +0x24 0x20 0x33 0x71 # CHECK: vfsub.d $vr4, $vr1, $vr8 +0x40 0x9a 0x38 0x71 # CHECK: vfmul.s $vr0, $vr18, $vr6 +0xfb 0x7b 0x39 0x71 # CHECK: vfmul.d $vr27, $vr31, $vr30 +0xe3 0x98 0x3a 0x71 # CHECK: vfdiv.s $vr3, $vr7, $vr6 +0xd0 0x78 0x3b 0x71 # CHECK: vfdiv.d $vr16, $vr6, $vr30 +0xd2 0xa3 0x3c 0x71 # CHECK: vfmax.s $vr18, $vr30, $vr8 +0x13 0x61 0x3d 0x71 # CHECK: vfmax.d $vr19, $vr8, $vr24 +0x58 0x9b 0x3e 0x71 # CHECK: vfmin.s $vr24, $vr26, $vr6 +0x30 0x07 0x3f 0x71 # CHECK: vfmin.d $vr16, $vr25, $vr1 +0xe8 0xb8 0x40 0x71 # CHECK: vfmaxa.s $vr8, $vr7, $vr14 +0x0a 0x11 0x41 0x71 # CHECK: vfmaxa.d $vr10, $vr8, $vr4 +0xd0 0xc8 0x42 0x71 # CHECK: vfmina.s $vr16, $vr6, $vr18 +0xfa 0x38 0x43 0x71 # CHECK: vfmina.d $vr26, $vr7, $vr14 +0x9e 0x60 0x46 0x71 # CHECK: vfcvt.h.s $vr30, $vr4, $vr24 +0x30 0x92 0x46 0x71 # CHECK: vfcvt.s.d $vr16, $vr17, $vr4 +0xf9 0x2a 0x48 0x71 # CHECK: vffint.s.l $vr25, $vr23, $vr10 +0xc9 0xee 0x49 0x71 # CHECK: vftint.w.d $vr9, $vr22, $vr27 +0x5f 0x75 0x4a 0x71 # CHECK: vftintrm.w.d $vr31, $vr10, $vr29 +0xb7 0xbd 0x4a 0x71 # CHECK: vftintrp.w.d $vr23, $vr13, $vr15 +0x32 0x19 0x4b 0x71 # CHECK: vftintrz.w.d $vr18, $vr9, $vr6 +0x95 0xf9 0x4b 0x71 # CHECK: vftintrne.w.d $vr21, $vr12, $vr30 +0x63 0x89 0x7a 0x71 # CHECK: vshuf.h $vr3, $vr11, $vr2 +0x95 0x74 0x7b 0x71 # CHECK: vshuf.w $vr21, $vr4, $vr29 +0xeb 0xca 0x7b 0x71 # CHECK: vshuf.d $vr11, $vr23, $vr18 +0xdb 0x1d 0x80 0x72 # CHECK: vseqi.b $vr27, $vr14, 7 +0x77 0xeb 0x80 0x72 # CHECK: vseqi.h $vr23, $vr27, -6 +0x08 0x41 0x81 0x72 # CHECK: vseqi.w $vr8, $vr8, -16 +0xab 0x94 0x81 0x72 # CHECK: vseqi.d $vr11, $vr5, 5 +0x68 0x1f 0x82 0x72 # CHECK: vslei.b $vr8, $vr27, 7 +0xbb 0xef 0x82 0x72 # CHECK: vslei.h $vr27, $vr29, -5 +0xb7 0x75 0x83 0x72 # CHECK: vslei.w $vr23, $vr13, -3 +0xe5 0xe1 0x83 0x72 # CHECK: vslei.d $vr5, $vr15, -8 +0x5d 0x25 0x84 0x72 # CHECK: vslei.bu $vr29, $vr10, 9 +0x5d 0xae 0x84 0x72 # CHECK: vslei.hu $vr29, $vr18, 11 +0x28 0x08 0x85 0x72 # CHECK: vslei.wu $vr8, $vr1, 2 +0xb0 0xa8 0x85 0x72 # CHECK: vslei.du $vr16, $vr5, 10 +0x88 0x78 0x86 0x72 # CHECK: vslti.b $vr8, $vr4, -2 +0xfa 0xc8 0x86 0x72 # CHECK: vslti.h $vr26, $vr7, -14 +0x1c 0x31 0x87 0x72 # CHECK: vslti.w $vr28, $vr8, 12 +0x64 0xa7 0x87 0x72 # CHECK: vslti.d $vr4, $vr27, 9 +0xca 0x49 0x88 0x72 # CHECK: vslti.bu $vr10, $vr14, 18 +0x9c 0xfb 0x88 0x72 # CHECK: vslti.hu $vr28, $vr28, 30 +0x6f 0x6f 0x89 0x72 # CHECK: vslti.wu $vr15, $vr27, 27 +0x3e 0xce 0x89 0x72 # CHECK: vslti.du $vr30, $vr17, 19 +0x26 0x48 0x8a 0x72 # CHECK: vaddi.bu $vr6, $vr1, 18 +0xcc 0x95 0x8a 0x72 # CHECK: vaddi.hu $vr12, $vr14, 5 +0x1c 0x68 0x8b 0x72 # CHECK: vaddi.wu $vr28, $vr0, 26 +0xaa 0x88 0x8b 0x72 # CHECK: vaddi.du $vr10, $vr5, 2 +0x96 0x0b 0x8c 0x72 # CHECK: vsubi.bu $vr22, $vr28, 2 +0xc0 0xfe 0x8c 0x72 # CHECK: vsubi.hu $vr0, $vr22, 31 +0xd4 0x14 0x8d 0x72 # CHECK: vsubi.wu $vr20, $vr6, 5 +0x72 0x85 0x8d 0x72 # CHECK: vsubi.du $vr18, $vr11, 1 +0x44 0x13 0x8e 0x72 # CHECK: vbsll.v $vr4, $vr26, 4 +0xe7 0xbf 0x8e 0x72 # CHECK: vbsrl.v $vr7, $vr31, 15 +0xf3 0x39 0x90 0x72 # CHECK: vmaxi.b $vr19, $vr15, 14 +0x79 0xd0 0x90 0x72 # CHECK: vmaxi.h $vr25, $vr3, -12 +0x34 0x17 0x91 0x72 # CHECK: vmaxi.w $vr20, $vr25, 5 +0x49 0xb1 0x91 0x72 # CHECK: vmaxi.d $vr9, $vr10, 12 +0xbe 0x72 0x92 0x72 # CHECK: vmini.b $vr30, $vr21, -4 +0x8b 0xf7 0x92 0x72 # CHECK: vmini.h $vr11, $vr28, -3 +0x26 0x5f 0x93 0x72 # CHECK: vmini.w $vr6, $vr25, -9 +0x1c 0x89 0x93 0x72 # CHECK: vmini.d $vr28, $vr8, 2 +0x0d 0x4f 0x94 0x72 # CHECK: vmaxi.bu $vr13, $vr24, 19 +0x23 0xd8 0x94 0x72 # CHECK: vmaxi.hu $vr3, $vr1, 22 +0x61 0x5c 0x95 0x72 # CHECK: vmaxi.wu $vr1, $vr3, 23 +0x46 0xd6 0x95 0x72 # CHECK: vmaxi.du $vr6, $vr18, 21 +0x4a 0x50 0x96 0x72 # CHECK: vmini.bu $vr10, $vr2, 20 +0x31 0xbe 0x96 0x72 # CHECK: vmini.hu $vr17, $vr17, 15 +0x7a 0x5f 0x97 0x72 # CHECK: vmini.wu $vr26, $vr27, 23 +0x6c 0xa3 0x97 0x72 # CHECK: vmini.du $vr12, $vr27, 8 +0x1a 0x25 0x9a 0x72 # CHECK: vfrstpi.b $vr26, $vr8, 9 +0x50 0xd0 0x9a 0x72 # CHECK: vfrstpi.h $vr16, $vr2, 20 +0x25 0x02 0x9c 0x72 # CHECK: vclo.b $vr5, $vr17 +0x88 0x04 0x9c 0x72 # CHECK: vclo.h $vr8, $vr4 +0xa1 0x09 0x9c 0x72 # CHECK: vclo.w $vr1, $vr13 +0xe0 0x0e 0x9c 0x72 # CHECK: vclo.d $vr0, $vr23 +0x24 0x13 0x9c 0x72 # CHECK: vclz.b $vr4, $vr25 +0x21 0x17 0x9c 0x72 # CHECK: vclz.h $vr1, $vr25 +0xa1 0x18 0x9c 0x72 # CHECK: vclz.w $vr1, $vr5 +0x30 0x1e 0x9c 0x72 # CHECK: vclz.d $vr16, $vr17 +0x64 0x20 0x9c 0x72 # CHECK: vpcnt.b $vr4, $vr3 +0x2f 0x26 0x9c 0x72 # CHECK: vpcnt.h $vr15, $vr17 +0x0d 0x29 0x9c 0x72 # CHECK: vpcnt.w $vr13, $vr8 +0x00 0x2d 0x9c 0x72 # CHECK: vpcnt.d $vr0, $vr8 +0x0e 0x33 0x9c 0x72 # CHECK: vneg.b $vr14, $vr24 +0xf8 0x34 0x9c 0x72 # CHECK: vneg.h $vr24, $vr7 +0xb3 0x38 0x9c 0x72 # CHECK: vneg.w $vr19, $vr5 +0x83 0x3f 0x9c 0x72 # CHECK: vneg.d $vr3, $vr28 +0x3f 0x43 0x9c 0x72 # CHECK: vmskltz.b $vr31, $vr25 +0x89 0x46 0x9c 0x72 # CHECK: vmskltz.h $vr9, $vr20 +0x56 0x4b 0x9c 0x72 # CHECK: vmskltz.w $vr22, $vr26 +0x5c 0x4d 0x9c 0x72 # CHECK: vmskltz.d $vr28, $vr10 +0xa7 0x50 0x9c 0x72 # CHECK: vmskgez.b $vr7, $vr5 +0x94 0x61 0x9c 0x72 # CHECK: vmsknz.b $vr20, $vr12 +0xc5 0x99 0x9c 0x72 # CHECK: vseteqz.v $fcc5, $vr14 +0x02 0x9d 0x9c 0x72 # CHECK: vsetnez.v $fcc2, $vr8 +0x80 0xa2 0x9c 0x72 # CHECK: vsetanyeqz.b $fcc0, $vr20 +0x04 0xa6 0x9c 0x72 # CHECK: vsetanyeqz.h $fcc4, $vr16 +0x47 0xa8 0x9c 0x72 # CHECK: vsetanyeqz.w $fcc7, $vr2 +0x84 0xad 0x9c 0x72 # CHECK: vsetanyeqz.d $fcc4, $vr12 +0x07 0xb0 0x9c 0x72 # CHECK: vsetallnez.b $fcc7, $vr0 +0x62 0xb5 0x9c 0x72 # CHECK: vsetallnez.h $fcc2, $vr11 +0x26 0xbb 0x9c 0x72 # CHECK: vsetallnez.w $fcc6, $vr25 +0xe7 0xbf 0x9c 0x72 # CHECK: vsetallnez.d $fcc7, $vr31 +0x8e 0xc7 0x9c 0x72 # CHECK: vflogb.s $vr14, $vr28 +0x3d 0xc9 0x9c 0x72 # CHECK: vflogb.d $vr29, $vr9 +0xa3 0xd5 0x9c 0x72 # CHECK: vfclass.s $vr3, $vr13 +0xe5 0xd9 0x9c 0x72 # CHECK: vfclass.d $vr5, $vr15 +0x73 0xe7 0x9c 0x72 # CHECK: vfsqrt.s $vr19, $vr27 +0x7f 0xe8 0x9c 0x72 # CHECK: vfsqrt.d $vr31, $vr3 +0x18 0xf6 0x9c 0x72 # CHECK: vfrecip.s $vr24, $vr16 +0x77 0xfa 0x9c 0x72 # CHECK: vfrecip.d $vr23, $vr19 +0xf2 0x05 0x9d 0x72 # CHECK: vfrsqrt.s $vr18, $vr15 +0xf2 0x0b 0x9d 0x72 # CHECK: vfrsqrt.d $vr18, $vr31 +0x7a 0x35 0x9d 0x72 # CHECK: vfrint.s $vr26, $vr11 +0x58 0x3a 0x9d 0x72 # CHECK: vfrint.d $vr24, $vr18 +0x65 0x44 0x9d 0x72 # CHECK: vfrintrm.s $vr5, $vr3 +0x57 0x49 0x9d 0x72 # CHECK: vfrintrm.d $vr23, $vr10 +0x54 0x54 0x9d 0x72 # CHECK: vfrintrp.s $vr20, $vr2 +0x3e 0x5a 0x9d 0x72 # CHECK: vfrintrp.d $vr30, $vr17 +0xd3 0x64 0x9d 0x72 # CHECK: vfrintrz.s $vr19, $vr6 +0x50 0x6a 0x9d 0x72 # CHECK: vfrintrz.d $vr16, $vr18 +0x08 0x77 0x9d 0x72 # CHECK: vfrintrne.s $vr8, $vr24 +0xa6 0x78 0x9d 0x72 # CHECK: vfrintrne.d $vr6, $vr5 +0xc4 0xe8 0x9d 0x72 # CHECK: vfcvtl.s.h $vr4, $vr6 +0xf0 0xec 0x9d 0x72 # CHECK: vfcvth.s.h $vr16, $vr7 +0x50 0xf1 0x9d 0x72 # CHECK: vfcvtl.d.s $vr16, $vr10 +0x3c 0xf7 0x9d 0x72 # CHECK: vfcvth.d.s $vr28, $vr25 +0x1c 0x02 0x9e 0x72 # CHECK: vffint.s.w $vr28, $vr16 +0xe4 0x07 0x9e 0x72 # CHECK: vffint.s.wu $vr4, $vr31 +0x32 0x0b 0x9e 0x72 # CHECK: vffint.d.l $vr18, $vr25 +0x38 0x0e 0x9e 0x72 # CHECK: vffint.d.lu $vr24, $vr17 +0x62 0x13 0x9e 0x72 # CHECK: vffintl.d.w $vr2, $vr27 +0x04 0x16 0x9e 0x72 # CHECK: vffinth.d.w $vr4, $vr16 +0x11 0x30 0x9e 0x72 # CHECK: vftint.w.s $vr17, $vr0 +0x57 0x36 0x9e 0x72 # CHECK: vftint.l.d $vr23, $vr18 +0x97 0x38 0x9e 0x72 # CHECK: vftintrm.w.s $vr23, $vr4 +0xde 0x3d 0x9e 0x72 # CHECK: vftintrm.l.d $vr30, $vr14 +0x07 0x40 0x9e 0x72 # CHECK: vftintrp.w.s $vr7, $vr0 +0x9c 0x46 0x9e 0x72 # CHECK: vftintrp.l.d $vr28, $vr20 +0xfc 0x4b 0x9e 0x72 # CHECK: vftintrz.w.s $vr28, $vr31 +0x12 0x4c 0x9e 0x72 # CHECK: vftintrz.l.d $vr18, $vr0 +0x2e 0x52 0x9e 0x72 # CHECK: vftintrne.w.s $vr14, $vr17 +0x56 0x56 0x9e 0x72 # CHECK: vftintrne.l.d $vr22, $vr18 +0x3a 0x5b 0x9e 0x72 # CHECK: vftint.wu.s $vr26, $vr25 +0x69 0x5f 0x9e 0x72 # CHECK: vftint.lu.d $vr9, $vr27 +0xda 0x72 0x9e 0x72 # CHECK: vftintrz.wu.s $vr26, $vr22 +0x9d 0x76 0x9e 0x72 # CHECK: vftintrz.lu.d $vr29, $vr20 +0x36 0x80 0x9e 0x72 # CHECK: vftintl.l.s $vr22, $vr1 +0x0d 0x87 0x9e 0x72 # CHECK: vftinth.l.s $vr13, $vr24 +0x68 0x8b 0x9e 0x72 # CHECK: vftintrml.l.s $vr8, $vr27 +0x92 0x8f 0x9e 0x72 # CHECK: vftintrmh.l.s $vr18, $vr28 +0x9b 0x93 0x9e 0x72 # CHECK: vftintrpl.l.s $vr27, $vr28 +0xf4 0x94 0x9e 0x72 # CHECK: vftintrph.l.s $vr20, $vr7 +0x46 0x98 0x9e 0x72 # CHECK: vftintrzl.l.s $vr6, $vr2 +0xd5 0x9c 0x9e 0x72 # CHECK: vftintrzh.l.s $vr21, $vr6 +0x79 0xa0 0x9e 0x72 # CHECK: vftintrnel.l.s $vr25, $vr3 +0xa7 0xa4 0x9e 0x72 # CHECK: vftintrneh.l.s $vr7, $vr5 +0x49 0xe0 0x9e 0x72 # CHECK: vexth.h.b $vr9, $vr2 +0x64 0xe7 0x9e 0x72 # CHECK: vexth.w.h $vr4, $vr27 +0x37 0xe8 0x9e 0x72 # CHECK: vexth.d.w $vr23, $vr1 +0xcf 0xec 0x9e 0x72 # CHECK: vexth.q.d $vr15, $vr6 +0x43 0xf0 0x9e 0x72 # CHECK: vexth.hu.bu $vr3, $vr2 +0x5f 0xf7 0x9e 0x72 # CHECK: vexth.wu.hu $vr31, $vr26 +0xea 0xfb 0x9e 0x72 # CHECK: vexth.du.wu $vr10, $vr31 +0x1c 0xfd 0x9e 0x72 # CHECK: vexth.qu.du $vr28, $vr8 +0x6f 0x00 0x9f 0x72 # CHECK: vreplgr2vr.b $vr15, $sp +0xea 0x06 0x9f 0x72 # CHECK: vreplgr2vr.h $vr10, $r23 +0x19 0x0a 0x9f 0x72 # CHECK: vreplgr2vr.w $vr25, $r16 +0xfb 0x0c 0x9f 0x72 # CHECK: vreplgr2vr.d $vr27, $r7 +0x18 0x3f 0xa0 0x72 # CHECK: vrotri.b $vr24, $vr24, 7 +0xa1 0x40 0xa0 0x72 # CHECK: vrotri.h $vr1, $vr5, 0 +0x0a 0xb1 0xa0 0x72 # CHECK: vrotri.w $vr10, $vr8, 12 +0xbe 0xab 0xa1 0x72 # CHECK: vrotri.d $vr30, $vr29, 42 +0x01 0x2e 0xa4 0x72 # CHECK: vsrlri.b $vr1, $vr16, 3 +0xbc 0x46 0xa4 0x72 # CHECK: vsrlri.h $vr28, $vr21, 1 +0x92 0xbc 0xa4 0x72 # CHECK: vsrlri.w $vr18, $vr4, 15 +0x7e 0x4c 0xa5 0x72 # CHECK: vsrlri.d $vr30, $vr3, 19 +0x0d 0x3c 0xa8 0x72 # CHECK: vsrari.b $vr13, $vr0, 7 +0x31 0x59 0xa8 0x72 # CHECK: vsrari.h $vr17, $vr9, 6 +0x83 0x9b 0xa8 0x72 # CHECK: vsrari.w $vr3, $vr28, 6 +0x44 0x88 0xa9 0x72 # CHECK: vsrari.d $vr4, $vr2, 34 +0x08 0x91 0xeb 0x72 # CHECK: vinsgr2vr.b $vr8, $r8, 4 +0xed 0xc4 0xeb 0x72 # CHECK: vinsgr2vr.h $vr13, $r7, 1 +0xc4 0xec 0xeb 0x72 # CHECK: vinsgr2vr.w $vr4, $r6, 3 +0xf7 0xf3 0xeb 0x72 # CHECK: vinsgr2vr.d $vr23, $r31, 0 +0x18 0xaa 0xef 0x72 # CHECK: vpickve2gr.b $r24, $vr16, 10 +0x31 0xcf 0xef 0x72 # CHECK: vpickve2gr.h $r17, $vr25, 3 +0x9e 0xeb 0xef 0x72 # CHECK: vpickve2gr.w $r30, $vr28, 2 +0x39 0xf5 0xef 0x72 # CHECK: vpickve2gr.d $r25, $vr9, 1 +0xdf 0x89 0xf3 0x72 # CHECK: vpickve2gr.bu $r31, $vr14, 2 +0x2c 0xd8 0xf3 0x72 # CHECK: vpickve2gr.hu $r12, $vr1, 6 +0x2a 0xe6 0xf3 0x72 # CHECK: vpickve2gr.wu $r10, $vr17, 1 +0x1a 0xf5 0xf3 0x72 # CHECK: vpickve2gr.du $r26, $vr8, 1 +0xc3 0xb0 0xf7 0x72 # CHECK: vreplvei.b $vr3, $vr6, 12 +0xb6 0xdf 0xf7 0x72 # CHECK: vreplvei.h $vr22, $vr29, 7 +0x51 0xe7 0xf7 0x72 # CHECK: vreplvei.w $vr17, $vr26, 1 +0x20 0xf6 0xf7 0x72 # CHECK: vreplvei.d $vr0, $vr17, 1 +0xd9 0x29 0x08 0x73 # CHECK: vsllwil.h.b $vr25, $vr14, 2 +0xb8 0x44 0x08 0x73 # CHECK: vsllwil.w.h $vr24, $vr5, 1 +0xd9 0xa1 0x08 0x73 # CHECK: vsllwil.d.w $vr25, $vr14, 8 +0xc3 0x02 0x09 0x73 # CHECK: vextl.q.d $vr3, $vr22 +0x2b 0x2f 0x0c 0x73 # CHECK: vsllwil.hu.bu $vr11, $vr25, 3 +0x42 0x6b 0x0c 0x73 # CHECK: vsllwil.wu.hu $vr2, $vr26, 10 +0x32 0xf1 0x0c 0x73 # CHECK: vsllwil.du.wu $vr18, $vr9, 28 +0x2d 0x03 0x0d 0x73 # CHECK: vextl.qu.du $vr13, $vr25 +0x1d 0x3b 0x10 0x73 # CHECK: vbitclri.b $vr29, $vr24, 6 +0xfb 0x55 0x10 0x73 # CHECK: vbitclri.h $vr27, $vr15, 5 +0x4b 0xa1 0x10 0x73 # CHECK: vbitclri.w $vr11, $vr10, 8 +0xe4 0x3c 0x11 0x73 # CHECK: vbitclri.d $vr4, $vr7, 15 +0x98 0x2e 0x14 0x73 # CHECK: vbitseti.b $vr24, $vr20, 3 +0x06 0x61 0x14 0x73 # CHECK: vbitseti.h $vr6, $vr8, 8 +0x35 0xe1 0x14 0x73 # CHECK: vbitseti.w $vr21, $vr9, 24 +0x5c 0x7a 0x15 0x73 # CHECK: vbitseti.d $vr28, $vr18, 30 +0xf3 0x23 0x18 0x73 # CHECK: vbitrevi.b $vr19, $vr31, 0 +0x32 0x40 0x18 0x73 # CHECK: vbitrevi.h $vr18, $vr1, 0 +0xd9 0xc8 0x18 0x73 # CHECK: vbitrevi.w $vr25, $vr6, 18 +0x68 0x5b 0x19 0x73 # CHECK: vbitrevi.d $vr8, $vr27, 22 +0x95 0x2b 0x24 0x73 # CHECK: vsat.b $vr21, $vr28, 2 +0xa6 0x70 0x24 0x73 # CHECK: vsat.h $vr6, $vr5, 12 +0xc3 0xc3 0x24 0x73 # CHECK: vsat.w $vr3, $vr30, 16 +0xe0 0x63 0x25 0x73 # CHECK: vsat.d $vr0, $vr31, 24 +0x94 0x2a 0x28 0x73 # CHECK: vsat.bu $vr20, $vr20, 2 +0xc8 0x70 0x28 0x73 # CHECK: vsat.hu $vr8, $vr6, 12 +0x92 0xea 0x28 0x73 # CHECK: vsat.wu $vr18, $vr20, 26 +0xca 0x84 0x29 0x73 # CHECK: vsat.du $vr10, $vr6, 33 +0x64 0x2e 0x2c 0x73 # CHECK: vslli.b $vr4, $vr19, 3 +0xe3 0x7a 0x2c 0x73 # CHECK: vslli.h $vr3, $vr23, 14 +0xb6 0x9a 0x2c 0x73 # CHECK: vslli.w $vr22, $vr21, 6 +0xf7 0x91 0x2d 0x73 # CHECK: vslli.d $vr23, $vr15, 36 +0x25 0x33 0x30 0x73 # CHECK: vsrli.b $vr5, $vr25, 4 +0xc9 0x65 0x30 0x73 # CHECK: vsrli.h $vr9, $vr14, 9 +0x07 0xb3 0x30 0x73 # CHECK: vsrli.w $vr7, $vr24, 12 +0x4f 0xfe 0x31 0x73 # CHECK: vsrli.d $vr15, $vr18, 63 +0x26 0x2c 0x34 0x73 # CHECK: vsrai.b $vr6, $vr1, 3 +0xa7 0x4f 0x34 0x73 # CHECK: vsrai.h $vr7, $vr29, 3 +0x7f 0xf7 0x34 0x73 # CHECK: vsrai.w $vr31, $vr27, 29 +0xdc 0xe3 0x35 0x73 # CHECK: vsrai.d $vr28, $vr30, 56 +0x42 0x4b 0x40 0x73 # CHECK: vsrlni.b.h $vr2, $vr26, 2 +0xdf 0x8d 0x40 0x73 # CHECK: vsrlni.h.w $vr31, $vr14, 3 +0x93 0x84 0x41 0x73 # CHECK: vsrlni.w.d $vr19, $vr4, 33 +0x7f 0xfc 0x42 0x73 # CHECK: vsrlni.d.q $vr31, $vr3, 63 +0x5a 0x42 0x44 0x73 # CHECK: vsrlrni.b.h $vr26, $vr18, 0 +0xd2 0x96 0x44 0x73 # CHECK: vsrlrni.h.w $vr18, $vr22, 5 +0x78 0x55 0x45 0x73 # CHECK: vsrlrni.w.d $vr24, $vr11, 21 +0x66 0x95 0x46 0x73 # CHECK: vsrlrni.d.q $vr6, $vr11, 37 +0xa3 0x56 0x48 0x73 # CHECK: vssrlni.b.h $vr3, $vr21, 5 +0x26 0xc0 0x48 0x73 # CHECK: vssrlni.h.w $vr6, $vr1, 16 +0xa4 0x6e 0x49 0x73 # CHECK: vssrlni.w.d $vr4, $vr21, 27 +0x48 0x7a 0x4b 0x73 # CHECK: vssrlni.d.q $vr8, $vr18, 94 +0x46 0x54 0x4c 0x73 # CHECK: vssrlni.bu.h $vr6, $vr2, 5 +0xbd 0x8b 0x4c 0x73 # CHECK: vssrlni.hu.w $vr29, $vr29, 2 +0x9c 0xbe 0x4d 0x73 # CHECK: vssrlni.wu.d $vr28, $vr20, 47 +0x56 0x49 0x4f 0x73 # CHECK: vssrlni.du.q $vr22, $vr10, 82 +0x31 0x6b 0x50 0x73 # CHECK: vssrlrni.b.h $vr17, $vr25, 10 +0xb5 0x83 0x50 0x73 # CHECK: vssrlrni.h.w $vr21, $vr29, 0 +0xe9 0xfd 0x51 0x73 # CHECK: vssrlrni.w.d $vr9, $vr15, 63 +0x24 0xd4 0x53 0x73 # CHECK: vssrlrni.d.q $vr4, $vr1, 117 +0xb9 0x4d 0x54 0x73 # CHECK: vssrlrni.bu.h $vr25, $vr13, 3 +0x9e 0x9f 0x54 0x73 # CHECK: vssrlrni.hu.w $vr30, $vr28, 7 +0x70 0x2f 0x55 0x73 # CHECK: vssrlrni.wu.d $vr16, $vr27, 11 +0xb4 0xfd 0x56 0x73 # CHECK: vssrlrni.du.q $vr20, $vr13, 63 +0x23 0x53 0x58 0x73 # CHECK: vsrani.b.h $vr3, $vr25, 4 +0xac 0xc5 0x58 0x73 # CHECK: vsrani.h.w $vr12, $vr13, 17 +0xc2 0x64 0x59 0x73 # CHECK: vsrani.w.d $vr2, $vr6, 25 +0x0c 0xa5 0x5b 0x73 # CHECK: vsrani.d.q $vr12, $vr8, 105 +0xbb 0x4a 0x5c 0x73 # CHECK: vsrarni.b.h $vr27, $vr21, 2 +0x6d 0x80 0x5c 0x73 # CHECK: vsrarni.h.w $vr13, $vr3, 0 +0xe9 0xab 0x5d 0x73 # CHECK: vsrarni.w.d $vr9, $vr31, 42 +0xb9 0xec 0x5e 0x73 # CHECK: vsrarni.d.q $vr25, $vr5, 59 +0xe8 0x70 0x60 0x73 # CHECK: vssrani.b.h $vr8, $vr7, 12 +0x55 0xfa 0x60 0x73 # CHECK: vssrani.h.w $vr21, $vr18, 30 +0xf7 0xcc 0x61 0x73 # CHECK: vssrani.w.d $vr23, $vr7, 51 +0xcc 0x21 0x62 0x73 # CHECK: vssrani.d.q $vr12, $vr14, 8 +0xb3 0x70 0x64 0x73 # CHECK: vssrani.bu.h $vr19, $vr5, 12 +0x3b 0xbf 0x64 0x73 # CHECK: vssrani.hu.w $vr27, $vr25, 15 +0x98 0xab 0x65 0x73 # CHECK: vssrani.wu.d $vr24, $vr28, 42 +0xe4 0xfe 0x66 0x73 # CHECK: vssrani.du.q $vr4, $vr23, 63 +0x1a 0x41 0x68 0x73 # CHECK: vssrarni.b.h $vr26, $vr8, 0 +0x64 0xe4 0x68 0x73 # CHECK: vssrarni.h.w $vr4, $vr3, 25 +0x20 0x4f 0x69 0x73 # CHECK: vssrarni.w.d $vr0, $vr25, 19 +0x74 0xa9 0x6b 0x73 # CHECK: vssrarni.d.q $vr20, $vr11, 106 +0x99 0x67 0x6c 0x73 # CHECK: vssrarni.bu.h $vr25, $vr28, 9 +0xf4 0xb2 0x6c 0x73 # CHECK: vssrarni.hu.w $vr20, $vr23, 12 +0xfc 0xea 0x6d 0x73 # CHECK: vssrarni.wu.d $vr28, $vr23, 58 +0xc1 0x75 0x6f 0x73 # CHECK: vssrarni.du.q $vr1, $vr14, 93 +0x6f 0x1f 0x80 0x73 # CHECK: vextrins.d $vr15, $vr27, 7 +0x13 0x4c 0x86 0x73 # CHECK: vextrins.w $vr19, $vr0, 147 +0x3d 0x15 0x89 0x73 # CHECK: vextrins.h $vr29, $vr9, 69 +0xa0 0x5e 0x8c 0x73 # CHECK: vextrins.b $vr0, $vr21, 23 +0x53 0xf1 0x92 0x73 # CHECK: vshuf4i.b $vr19, $vr10, 188 +0x2f 0x2c 0x96 0x73 # CHECK: vshuf4i.h $vr15, $vr1, 139 +0xa3 0x08 0x9a 0x73 # CHECK: vshuf4i.w $vr3, $vr5, 130 +0xa8 0x0f 0x9e 0x73 # CHECK: vshuf4i.d $vr8, $vr29, 131 +0x30 0xa3 0xc6 0x73 # CHECK: vbitseli.b $vr16, $vr25, 168 +0xe4 0xe6 0xd1 0x73 # CHECK: vandi.b $vr4, $vr23, 121 +0x47 0xf1 0xd6 0x73 # CHECK: vori.b $vr7, $vr10, 188 +0x49 0x63 0xdb 0x73 # CHECK: vxori.b $vr9, $vr26, 216 +0x84 0x6f 0xdf 0x73 # CHECK: vnori.b $vr4, $vr28, 219 +0x56 0x2c 0xe2 0x73 # CHECK: vldi $vr22, -3742 +0xae 0x43 0xe4 0x73 # CHECK: vpermi.w $vr14, $vr29, 16 +0xeb 0x56 0x00 0x74 # CHECK: xvseq.b $xr11, $xr23, $xr21 +0x46 0xed 0x00 0x74 # CHECK: xvseq.h $xr6, $xr10, $xr27 +0x73 0x57 0x01 0x74 # CHECK: xvseq.w $xr19, $xr27, $xr21 +0x92 0x88 0x01 0x74 # CHECK: xvseq.d $xr18, $xr4, $xr2 +0x53 0x15 0x02 0x74 # CHECK: xvsle.b $xr19, $xr10, $xr5 +0x2a 0xbb 0x02 0x74 # CHECK: xvsle.h $xr10, $xr25, $xr14 +0xf1 0x4a 0x03 0x74 # CHECK: xvsle.w $xr17, $xr23, $xr18 +0xef 0xa4 0x03 0x74 # CHECK: xvsle.d $xr15, $xr7, $xr9 +0xc5 0x3d 0x04 0x74 # CHECK: xvsle.bu $xr5, $xr14, $xr15 +0x29 0xe7 0x04 0x74 # CHECK: xvsle.hu $xr9, $xr25, $xr25 +0xfc 0x43 0x05 0x74 # CHECK: xvsle.wu $xr28, $xr31, $xr16 +0x11 0xe3 0x05 0x74 # CHECK: xvsle.du $xr17, $xr24, $xr24 +0x92 0x67 0x06 0x74 # CHECK: xvslt.b $xr18, $xr28, $xr25 +0xdd 0x88 0x06 0x74 # CHECK: xvslt.h $xr29, $xr6, $xr2 +0x4e 0x15 0x07 0x74 # CHECK: xvslt.w $xr14, $xr10, $xr5 +0xd3 0xbf 0x07 0x74 # CHECK: xvslt.d $xr19, $xr30, $xr15 +0xce 0x6c 0x08 0x74 # CHECK: xvslt.bu $xr14, $xr6, $xr27 +0x5b 0x97 0x08 0x74 # CHECK: xvslt.hu $xr27, $xr26, $xr5 +0x26 0x29 0x09 0x74 # CHECK: xvslt.wu $xr6, $xr9, $xr10 +0x8d 0xf1 0x09 0x74 # CHECK: xvslt.du $xr13, $xr12, $xr28 +0xc0 0x0c 0x0a 0x74 # CHECK: xvadd.b $xr0, $xr6, $xr3 +0x68 0xa9 0x0a 0x74 # CHECK: xvadd.h $xr8, $xr11, $xr10 +0xc5 0x54 0x0b 0x74 # CHECK: xvadd.w $xr5, $xr6, $xr21 +0xa4 0xaa 0x0b 0x74 # CHECK: xvadd.d $xr4, $xr21, $xr10 +0x10 0x78 0x0c 0x74 # CHECK: xvsub.b $xr16, $xr0, $xr30 +0x7c 0xc9 0x0c 0x74 # CHECK: xvsub.h $xr28, $xr11, $xr18 +0x4d 0x34 0x0d 0x74 # CHECK: xvsub.w $xr13, $xr2, $xr13 +0x20 0xd7 0x0d 0x74 # CHECK: xvsub.d $xr0, $xr25, $xr21 +0xc8 0x2f 0x1e 0x74 # CHECK: xvaddwev.h.b $xr8, $xr30, $xr11 +0xca 0x97 0x1e 0x74 # CHECK: xvaddwev.w.h $xr10, $xr30, $xr5 +0x34 0x07 0x1f 0x74 # CHECK: xvaddwev.d.w $xr20, $xr25, $xr1 +0x16 0xe3 0x1f 0x74 # CHECK: xvaddwev.q.d $xr22, $xr24, $xr24 +0x21 0x07 0x20 0x74 # CHECK: xvsubwev.h.b $xr1, $xr25, $xr1 +0xc4 0xaf 0x20 0x74 # CHECK: xvsubwev.w.h $xr4, $xr30, $xr11 +0x46 0x48 0x21 0x74 # CHECK: xvsubwev.d.w $xr6, $xr2, $xr18 +0x60 0xfd 0x21 0x74 # CHECK: xvsubwev.q.d $xr0, $xr11, $xr31 +0x84 0x64 0x22 0x74 # CHECK: xvaddwod.h.b $xr4, $xr4, $xr25 +0x2c 0xf7 0x22 0x74 # CHECK: xvaddwod.w.h $xr12, $xr25, $xr29 +0xd0 0x4e 0x23 0x74 # CHECK: xvaddwod.d.w $xr16, $xr22, $xr19 +0x37 0xbb 0x23 0x74 # CHECK: xvaddwod.q.d $xr23, $xr25, $xr14 +0x01 0x22 0x24 0x74 # CHECK: xvsubwod.h.b $xr1, $xr16, $xr8 +0x65 0xa1 0x24 0x74 # CHECK: xvsubwod.w.h $xr5, $xr11, $xr8 +0xf4 0x00 0x25 0x74 # CHECK: xvsubwod.d.w $xr20, $xr7, $xr0 +0xf1 0xd2 0x25 0x74 # CHECK: xvsubwod.q.d $xr17, $xr23, $xr20 +0x4f 0x7d 0x2e 0x74 # CHECK: xvaddwev.h.bu $xr15, $xr10, $xr31 +0x15 0xf3 0x2e 0x74 # CHECK: xvaddwev.w.hu $xr21, $xr24, $xr28 +0xe9 0x3b 0x2f 0x74 # CHECK: xvaddwev.d.wu $xr9, $xr31, $xr14 +0x39 0xa0 0x2f 0x74 # CHECK: xvaddwev.q.du $xr25, $xr1, $xr8 +0xfe 0x37 0x30 0x74 # CHECK: xvsubwev.h.bu $xr30, $xr31, $xr13 +0x81 0x87 0x30 0x74 # CHECK: xvsubwev.w.hu $xr1, $xr28, $xr1 +0xfd 0x76 0x31 0x74 # CHECK: xvsubwev.d.wu $xr29, $xr23, $xr29 +0x0d 0xee 0x31 0x74 # CHECK: xvsubwev.q.du $xr13, $xr16, $xr27 +0xad 0x0b 0x32 0x74 # CHECK: xvaddwod.h.bu $xr13, $xr29, $xr2 +0x4e 0xb5 0x32 0x74 # CHECK: xvaddwod.w.hu $xr14, $xr10, $xr13 +0x5e 0x2b 0x33 0x74 # CHECK: xvaddwod.d.wu $xr30, $xr26, $xr10 +0xa2 0x81 0x33 0x74 # CHECK: xvaddwod.q.du $xr2, $xr13, $xr0 +0xc6 0x16 0x34 0x74 # CHECK: xvsubwod.h.bu $xr6, $xr22, $xr5 +0xb3 0xa2 0x34 0x74 # CHECK: xvsubwod.w.hu $xr19, $xr21, $xr8 +0x70 0x79 0x35 0x74 # CHECK: xvsubwod.d.wu $xr16, $xr11, $xr30 +0x41 0xa7 0x35 0x74 # CHECK: xvsubwod.q.du $xr1, $xr26, $xr9 +0xa5 0x09 0x3e 0x74 # CHECK: xvaddwev.h.bu.b $xr5, $xr13, $xr2 +0xb1 0xd2 0x3e 0x74 # CHECK: xvaddwev.w.hu.h $xr17, $xr21, $xr20 +0x6b 0x4f 0x3f 0x74 # CHECK: xvaddwev.d.wu.w $xr11, $xr27, $xr19 +0xb4 0xf6 0x3f 0x74 # CHECK: xvaddwev.q.du.d $xr20, $xr21, $xr29 +0xc1 0x38 0x40 0x74 # CHECK: xvaddwod.h.bu.b $xr1, $xr6, $xr14 +0xa7 0xaf 0x40 0x74 # CHECK: xvaddwod.w.hu.h $xr7, $xr29, $xr11 +0x50 0x39 0x41 0x74 # CHECK: xvaddwod.d.wu.w $xr16, $xr10, $xr14 +0x6a 0xdd 0x41 0x74 # CHECK: xvaddwod.q.du.d $xr10, $xr11, $xr23 +0x58 0x71 0x46 0x74 # CHECK: xvsadd.b $xr24, $xr10, $xr28 +0x53 0xc6 0x46 0x74 # CHECK: xvsadd.h $xr19, $xr18, $xr17 +0xc2 0x30 0x47 0x74 # CHECK: xvsadd.w $xr2, $xr6, $xr12 +0x4f 0xf6 0x47 0x74 # CHECK: xvsadd.d $xr15, $xr18, $xr29 +0xaf 0x43 0x48 0x74 # CHECK: xvssub.b $xr15, $xr29, $xr16 +0x7c 0xa4 0x48 0x74 # CHECK: xvssub.h $xr28, $xr3, $xr9 +0x88 0x3e 0x49 0x74 # CHECK: xvssub.w $xr8, $xr20, $xr15 +0x17 0xcd 0x49 0x74 # CHECK: xvssub.d $xr23, $xr8, $xr19 +0x8c 0x40 0x4a 0x74 # CHECK: xvsadd.bu $xr12, $xr4, $xr16 +0x49 0xd3 0x4a 0x74 # CHECK: xvsadd.hu $xr9, $xr26, $xr20 +0xfe 0x71 0x4b 0x74 # CHECK: xvsadd.wu $xr30, $xr15, $xr28 +0xaf 0xf1 0x4b 0x74 # CHECK: xvsadd.du $xr15, $xr13, $xr28 +0x6a 0x3c 0x4c 0x74 # CHECK: xvssub.bu $xr10, $xr3, $xr15 +0x80 0x89 0x4c 0x74 # CHECK: xvssub.hu $xr0, $xr12, $xr2 +0x5e 0x5d 0x4d 0x74 # CHECK: xvssub.wu $xr30, $xr10, $xr23 +0xc9 0xbb 0x4d 0x74 # CHECK: xvssub.du $xr9, $xr30, $xr14 +0xb9 0x48 0x54 0x74 # CHECK: xvhaddw.h.b $xr25, $xr5, $xr18 +0x87 0xce 0x54 0x74 # CHECK: xvhaddw.w.h $xr7, $xr20, $xr19 +0xb7 0x10 0x55 0x74 # CHECK: xvhaddw.d.w $xr23, $xr5, $xr4 +0xf1 0xe4 0x55 0x74 # CHECK: xvhaddw.q.d $xr17, $xr7, $xr25 +0x5d 0x4e 0x56 0x74 # CHECK: xvhsubw.h.b $xr29, $xr18, $xr19 +0x9e 0x8f 0x56 0x74 # CHECK: xvhsubw.w.h $xr30, $xr28, $xr3 +0x25 0x35 0x57 0x74 # CHECK: xvhsubw.d.w $xr5, $xr9, $xr13 +0x94 0xf5 0x57 0x74 # CHECK: xvhsubw.q.d $xr20, $xr12, $xr29 +0x4b 0x1d 0x58 0x74 # CHECK: xvhaddw.hu.bu $xr11, $xr10, $xr7 +0xb0 0xd6 0x58 0x74 # CHECK: xvhaddw.wu.hu $xr16, $xr21, $xr21 +0xf1 0x23 0x59 0x74 # CHECK: xvhaddw.du.wu $xr17, $xr31, $xr8 +0x82 0xac 0x59 0x74 # CHECK: xvhaddw.qu.du $xr2, $xr4, $xr11 +0xd5 0x21 0x5a 0x74 # CHECK: xvhsubw.hu.bu $xr21, $xr14, $xr8 +0x19 0xec 0x5a 0x74 # CHECK: xvhsubw.wu.hu $xr25, $xr0, $xr27 +0x04 0x7a 0x5b 0x74 # CHECK: xvhsubw.du.wu $xr4, $xr16, $xr30 +0x2b 0x99 0x5b 0x74 # CHECK: xvhsubw.qu.du $xr11, $xr9, $xr6 +0xae 0x6a 0x5c 0x74 # CHECK: xvadda.b $xr14, $xr21, $xr26 +0xd5 0xd7 0x5c 0x74 # CHECK: xvadda.h $xr21, $xr30, $xr21 +0x7f 0x4e 0x5d 0x74 # CHECK: xvadda.w $xr31, $xr19, $xr19 +0x89 0xfc 0x5d 0x74 # CHECK: xvadda.d $xr9, $xr4, $xr31 +0x74 0x36 0x60 0x74 # CHECK: xvabsd.b $xr20, $xr19, $xr13 +0xf4 0xa8 0x60 0x74 # CHECK: xvabsd.h $xr20, $xr7, $xr10 +0xf7 0x03 0x61 0x74 # CHECK: xvabsd.w $xr23, $xr31, $xr0 +0x27 0xba 0x61 0x74 # CHECK: xvabsd.d $xr7, $xr17, $xr14 +0xec 0x1a 0x62 0x74 # CHECK: xvabsd.bu $xr12, $xr23, $xr6 +0xd0 0xcf 0x62 0x74 # CHECK: xvabsd.hu $xr16, $xr30, $xr19 +0xb3 0x68 0x63 0x74 # CHECK: xvabsd.wu $xr19, $xr5, $xr26 +0x80 0x9d 0x63 0x74 # CHECK: xvabsd.du $xr0, $xr12, $xr7 +0xf7 0x67 0x64 0x74 # CHECK: xvavg.b $xr23, $xr31, $xr25 +0x5b 0xec 0x64 0x74 # CHECK: xvavg.h $xr27, $xr2, $xr27 +0x14 0x40 0x65 0x74 # CHECK: xvavg.w $xr20, $xr0, $xr16 +0x2d 0xa9 0x65 0x74 # CHECK: xvavg.d $xr13, $xr9, $xr10 +0xdf 0x13 0x66 0x74 # CHECK: xvavg.bu $xr31, $xr30, $xr4 +0x36 0x96 0x66 0x74 # CHECK: xvavg.hu $xr22, $xr17, $xr5 +0xb5 0x47 0x67 0x74 # CHECK: xvavg.wu $xr21, $xr29, $xr17 +0xab 0xf4 0x67 0x74 # CHECK: xvavg.du $xr11, $xr5, $xr29 +0xb7 0x35 0x68 0x74 # CHECK: xvavgr.b $xr23, $xr13, $xr13 +0x9e 0xfe 0x68 0x74 # CHECK: xvavgr.h $xr30, $xr20, $xr31 +0x9d 0x27 0x69 0x74 # CHECK: xvavgr.w $xr29, $xr28, $xr9 +0x95 0xa2 0x69 0x74 # CHECK: xvavgr.d $xr21, $xr20, $xr8 +0x20 0x11 0x6a 0x74 # CHECK: xvavgr.bu $xr0, $xr9, $xr4 +0x03 0xec 0x6a 0x74 # CHECK: xvavgr.hu $xr3, $xr0, $xr27 +0xc2 0x57 0x6b 0x74 # CHECK: xvavgr.wu $xr2, $xr30, $xr21 +0xb6 0xc6 0x6b 0x74 # CHECK: xvavgr.du $xr22, $xr21, $xr17 +0x81 0x4e 0x70 0x74 # CHECK: xvmax.b $xr1, $xr20, $xr19 +0x20 0xba 0x70 0x74 # CHECK: xvmax.h $xr0, $xr17, $xr14 +0x00 0x41 0x71 0x74 # CHECK: xvmax.w $xr0, $xr8, $xr16 +0xf0 0xc2 0x71 0x74 # CHECK: xvmax.d $xr16, $xr23, $xr16 +0xd4 0x38 0x72 0x74 # CHECK: xvmin.b $xr20, $xr6, $xr14 +0x64 0xe0 0x72 0x74 # CHECK: xvmin.h $xr4, $xr3, $xr24 +0x45 0x5c 0x73 0x74 # CHECK: xvmin.w $xr5, $xr2, $xr23 +0xff 0xea 0x73 0x74 # CHECK: xvmin.d $xr31, $xr23, $xr26 +0xae 0x0d 0x74 0x74 # CHECK: xvmax.bu $xr14, $xr13, $xr3 +0x36 0x92 0x74 0x74 # CHECK: xvmax.hu $xr22, $xr17, $xr4 +0xb1 0x75 0x75 0x74 # CHECK: xvmax.wu $xr17, $xr13, $xr29 +0x4d 0x80 0x75 0x74 # CHECK: xvmax.du $xr13, $xr2, $xr0 +0xf2 0x6f 0x76 0x74 # CHECK: xvmin.bu $xr18, $xr31, $xr27 +0x42 0xb9 0x76 0x74 # CHECK: xvmin.hu $xr2, $xr10, $xr14 +0x1f 0x69 0x77 0x74 # CHECK: xvmin.wu $xr31, $xr8, $xr26 +0x4c 0xa7 0x77 0x74 # CHECK: xvmin.du $xr12, $xr26, $xr9 +0x5a 0x0c 0x84 0x74 # CHECK: xvmul.b $xr26, $xr2, $xr3 +0xb0 0x97 0x84 0x74 # CHECK: xvmul.h $xr16, $xr29, $xr5 +0x33 0x0c 0x85 0x74 # CHECK: xvmul.w $xr19, $xr1, $xr3 +0xef 0x81 0x85 0x74 # CHECK: xvmul.d $xr15, $xr15, $xr0 +0x89 0x25 0x86 0x74 # CHECK: xvmuh.b $xr9, $xr12, $xr9 +0xe8 0xc2 0x86 0x74 # CHECK: xvmuh.h $xr8, $xr23, $xr16 +0xdd 0x2c 0x87 0x74 # CHECK: xvmuh.w $xr29, $xr6, $xr11 +0x43 0x9e 0x87 0x74 # CHECK: xvmuh.d $xr3, $xr18, $xr7 +0xe3 0x4c 0x88 0x74 # CHECK: xvmuh.bu $xr3, $xr7, $xr19 +0x2d 0xc8 0x88 0x74 # CHECK: xvmuh.hu $xr13, $xr1, $xr18 +0xaf 0x42 0x89 0x74 # CHECK: xvmuh.wu $xr15, $xr21, $xr16 +0x4b 0xcd 0x89 0x74 # CHECK: xvmuh.du $xr11, $xr10, $xr19 +0x84 0x25 0x90 0x74 # CHECK: xvmulwev.h.b $xr4, $xr12, $xr9 +0x6a 0xd0 0x90 0x74 # CHECK: xvmulwev.w.h $xr10, $xr3, $xr20 +0xc4 0x4a 0x91 0x74 # CHECK: xvmulwev.d.w $xr4, $xr22, $xr18 +0xb4 0xee 0x91 0x74 # CHECK: xvmulwev.q.d $xr20, $xr21, $xr27 +0xe5 0x00 0x92 0x74 # CHECK: xvmulwod.h.b $xr5, $xr7, $xr0 +0x93 0xaf 0x92 0x74 # CHECK: xvmulwod.w.h $xr19, $xr28, $xr11 +0xf3 0x40 0x93 0x74 # CHECK: xvmulwod.d.w $xr19, $xr7, $xr16 +0x8b 0xb5 0x93 0x74 # CHECK: xvmulwod.q.d $xr11, $xr12, $xr13 +0x56 0x04 0x98 0x74 # CHECK: xvmulwev.h.bu $xr22, $xr2, $xr1 +0x62 0x90 0x98 0x74 # CHECK: xvmulwev.w.hu $xr2, $xr3, $xr4 +0x82 0x65 0x99 0x74 # CHECK: xvmulwev.d.wu $xr2, $xr12, $xr25 +0xb6 0xc7 0x99 0x74 # CHECK: xvmulwev.q.du $xr22, $xr29, $xr17 +0x29 0x01 0x9a 0x74 # CHECK: xvmulwod.h.bu $xr9, $xr9, $xr0 +0x54 0xc0 0x9a 0x74 # CHECK: xvmulwod.w.hu $xr20, $xr2, $xr16 +0x61 0x61 0x9b 0x74 # CHECK: xvmulwod.d.wu $xr1, $xr11, $xr24 +0x53 0xd8 0x9b 0x74 # CHECK: xvmulwod.q.du $xr19, $xr2, $xr22 +0xb6 0x63 0xa0 0x74 # CHECK: xvmulwev.h.bu.b $xr22, $xr29, $xr24 +0xc1 0xae 0xa0 0x74 # CHECK: xvmulwev.w.hu.h $xr1, $xr22, $xr11 +0x8c 0x31 0xa1 0x74 # CHECK: xvmulwev.d.wu.w $xr12, $xr12, $xr12 +0x20 0xde 0xa1 0x74 # CHECK: xvmulwev.q.du.d $xr0, $xr17, $xr23 +0x1a 0x5e 0xa2 0x74 # CHECK: xvmulwod.h.bu.b $xr26, $xr16, $xr23 +0x9f 0xa5 0xa2 0x74 # CHECK: xvmulwod.w.hu.h $xr31, $xr12, $xr9 +0x75 0x4f 0xa3 0x74 # CHECK: xvmulwod.d.wu.w $xr21, $xr27, $xr19 +0xa7 0xac 0xa3 0x74 # CHECK: xvmulwod.q.du.d $xr7, $xr5, $xr11 +0x76 0x3d 0xa8 0x74 # CHECK: xvmadd.b $xr22, $xr11, $xr15 +0xc3 0xe7 0xa8 0x74 # CHECK: xvmadd.h $xr3, $xr30, $xr25 +0x41 0x16 0xa9 0x74 # CHECK: xvmadd.w $xr1, $xr18, $xr5 +0xb0 0xae 0xa9 0x74 # CHECK: xvmadd.d $xr16, $xr21, $xr11 +0x8b 0x29 0xaa 0x74 # CHECK: xvmsub.b $xr11, $xr12, $xr10 +0x70 0x85 0xaa 0x74 # CHECK: xvmsub.h $xr16, $xr11, $xr1 +0xaf 0x56 0xab 0x74 # CHECK: xvmsub.w $xr15, $xr21, $xr21 +0x6c 0x91 0xab 0x74 # CHECK: xvmsub.d $xr12, $xr11, $xr4 +0xf5 0x18 0xac 0x74 # CHECK: xvmaddwev.h.b $xr21, $xr7, $xr6 +0xb0 0xb7 0xac 0x74 # CHECK: xvmaddwev.w.h $xr16, $xr29, $xr13 +0x27 0x7b 0xad 0x74 # CHECK: xvmaddwev.d.w $xr7, $xr25, $xr30 +0x73 0xa0 0xad 0x74 # CHECK: xvmaddwev.q.d $xr19, $xr3, $xr8 +0x74 0x33 0xae 0x74 # CHECK: xvmaddwod.h.b $xr20, $xr27, $xr12 +0xa0 0xb6 0xae 0x74 # CHECK: xvmaddwod.w.h $xr0, $xr21, $xr13 +0xb9 0x7d 0xaf 0x74 # CHECK: xvmaddwod.d.w $xr25, $xr13, $xr31 +0x5a 0xc3 0xaf 0x74 # CHECK: xvmaddwod.q.d $xr26, $xr26, $xr16 +0x52 0x57 0xb4 0x74 # CHECK: xvmaddwev.h.bu $xr18, $xr26, $xr21 +0x0e 0x96 0xb4 0x74 # CHECK: xvmaddwev.w.hu $xr14, $xr16, $xr5 +0xb3 0x53 0xb5 0x74 # CHECK: xvmaddwev.d.wu $xr19, $xr29, $xr20 +0xaf 0xc7 0xb5 0x74 # CHECK: xvmaddwev.q.du $xr15, $xr29, $xr17 +0x4d 0x07 0xb6 0x74 # CHECK: xvmaddwod.h.bu $xr13, $xr26, $xr1 +0x2f 0xc3 0xb6 0x74 # CHECK: xvmaddwod.w.hu $xr15, $xr25, $xr16 +0x97 0x24 0xb7 0x74 # CHECK: xvmaddwod.d.wu $xr23, $xr4, $xr9 +0xdd 0xc6 0xb7 0x74 # CHECK: xvmaddwod.q.du $xr29, $xr22, $xr17 +0x37 0x18 0xbc 0x74 # CHECK: xvmaddwev.h.bu.b $xr23, $xr1, $xr6 +0x64 0xb3 0xbc 0x74 # CHECK: xvmaddwev.w.hu.h $xr4, $xr27, $xr12 +0x40 0x14 0xbd 0x74 # CHECK: xvmaddwev.d.wu.w $xr0, $xr2, $xr5 +0xe9 0x87 0xbd 0x74 # CHECK: xvmaddwev.q.du.d $xr9, $xr31, $xr1 +0x69 0x52 0xbe 0x74 # CHECK: xvmaddwod.h.bu.b $xr9, $xr19, $xr20 +0xa7 0xb4 0xbe 0x74 # CHECK: xvmaddwod.w.hu.h $xr7, $xr5, $xr13 +0x6a 0x07 0xbf 0x74 # CHECK: xvmaddwod.d.wu.w $xr10, $xr27, $xr1 +0x79 0x82 0xbf 0x74 # CHECK: xvmaddwod.q.du.d $xr25, $xr19, $xr0 +0xe3 0x0b 0xe0 0x74 # CHECK: xvdiv.b $xr3, $xr31, $xr2 +0x81 0xc5 0xe0 0x74 # CHECK: xvdiv.h $xr1, $xr12, $xr17 +0x0d 0x30 0xe1 0x74 # CHECK: xvdiv.w $xr13, $xr0, $xr12 +0xb1 0xac 0xe1 0x74 # CHECK: xvdiv.d $xr17, $xr5, $xr11 +0x36 0x06 0xe2 0x74 # CHECK: xvmod.b $xr22, $xr17, $xr1 +0xbc 0xb0 0xe2 0x74 # CHECK: xvmod.h $xr28, $xr5, $xr12 +0x7d 0x3a 0xe3 0x74 # CHECK: xvmod.w $xr29, $xr19, $xr14 +0x11 0x99 0xe3 0x74 # CHECK: xvmod.d $xr17, $xr8, $xr6 +0xd7 0x08 0xe4 0x74 # CHECK: xvdiv.bu $xr23, $xr6, $xr2 +0xe9 0x83 0xe4 0x74 # CHECK: xvdiv.hu $xr9, $xr31, $xr0 +0x2f 0x10 0xe5 0x74 # CHECK: xvdiv.wu $xr15, $xr1, $xr4 +0xae 0xaf 0xe5 0x74 # CHECK: xvdiv.du $xr14, $xr29, $xr11 +0x84 0x7d 0xe6 0x74 # CHECK: xvmod.bu $xr4, $xr12, $xr31 +0x96 0xad 0xe6 0x74 # CHECK: xvmod.hu $xr22, $xr12, $xr11 +0xf5 0x2a 0xe7 0x74 # CHECK: xvmod.wu $xr21, $xr23, $xr10 +0xb5 0xfe 0xe7 0x74 # CHECK: xvmod.du $xr21, $xr21, $xr31 +0x50 0x2d 0xe8 0x74 # CHECK: xvsll.b $xr16, $xr10, $xr11 +0x4c 0xed 0xe8 0x74 # CHECK: xvsll.h $xr12, $xr10, $xr27 +0x5e 0x68 0xe9 0x74 # CHECK: xvsll.w $xr30, $xr2, $xr26 +0xa8 0xc6 0xe9 0x74 # CHECK: xvsll.d $xr8, $xr21, $xr17 +0x1b 0x4b 0xea 0x74 # CHECK: xvsrl.b $xr27, $xr24, $xr18 +0xf1 0xe3 0xea 0x74 # CHECK: xvsrl.h $xr17, $xr31, $xr24 +0x65 0x10 0xeb 0x74 # CHECK: xvsrl.w $xr5, $xr3, $xr4 +0xd5 0xa0 0xeb 0x74 # CHECK: xvsrl.d $xr21, $xr6, $xr8 +0x9c 0x57 0xec 0x74 # CHECK: xvsra.b $xr28, $xr28, $xr21 +0x93 0xe8 0xec 0x74 # CHECK: xvsra.h $xr19, $xr4, $xr26 +0x8d 0x06 0xed 0x74 # CHECK: xvsra.w $xr13, $xr20, $xr1 +0x00 0xc9 0xed 0x74 # CHECK: xvsra.d $xr0, $xr8, $xr18 +0xc8 0x73 0xee 0x74 # CHECK: xvrotr.b $xr8, $xr30, $xr28 +0x71 0x82 0xee 0x74 # CHECK: xvrotr.h $xr17, $xr19, $xr0 +0x8f 0x5f 0xef 0x74 # CHECK: xvrotr.w $xr15, $xr28, $xr23 +0x5f 0xd4 0xef 0x74 # CHECK: xvrotr.d $xr31, $xr2, $xr21 +0x54 0x2f 0xf0 0x74 # CHECK: xvsrlr.b $xr20, $xr26, $xr11 +0x4d 0x9e 0xf0 0x74 # CHECK: xvsrlr.h $xr13, $xr18, $xr7 +0x3c 0x0c 0xf1 0x74 # CHECK: xvsrlr.w $xr28, $xr1, $xr3 +0x66 0xb8 0xf1 0x74 # CHECK: xvsrlr.d $xr6, $xr3, $xr14 +0x0a 0x45 0xf2 0x74 # CHECK: xvsrar.b $xr10, $xr8, $xr17 +0x5f 0xac 0xf2 0x74 # CHECK: xvsrar.h $xr31, $xr2, $xr11 +0x0d 0x15 0xf3 0x74 # CHECK: xvsrar.w $xr13, $xr8, $xr5 +0x4c 0x82 0xf3 0x74 # CHECK: xvsrar.d $xr12, $xr18, $xr0 +0xcf 0xbc 0xf4 0x74 # CHECK: xvsrln.b.h $xr15, $xr6, $xr15 +0x76 0x46 0xf5 0x74 # CHECK: xvsrln.h.w $xr22, $xr19, $xr17 +0xe4 0x94 0xf5 0x74 # CHECK: xvsrln.w.d $xr4, $xr7, $xr5 +0x63 0xde 0xf6 0x74 # CHECK: xvsran.b.h $xr3, $xr19, $xr23 +0xd0 0x04 0xf7 0x74 # CHECK: xvsran.h.w $xr16, $xr6, $xr1 +0x1b 0x82 0xf7 0x74 # CHECK: xvsran.w.d $xr27, $xr16, $xr0 +0x22 0xa5 0xf8 0x74 # CHECK: xvsrlrn.b.h $xr2, $xr9, $xr9 +0x70 0x4d 0xf9 0x74 # CHECK: xvsrlrn.h.w $xr16, $xr11, $xr19 +0x3d 0xbf 0xf9 0x74 # CHECK: xvsrlrn.w.d $xr29, $xr25, $xr15 +0x8d 0xb6 0xfa 0x74 # CHECK: xvsrarn.b.h $xr13, $xr20, $xr13 +0xcd 0x06 0xfb 0x74 # CHECK: xvsrarn.h.w $xr13, $xr22, $xr1 +0x8d 0x89 0xfb 0x74 # CHECK: xvsrarn.w.d $xr13, $xr12, $xr2 +0x73 0xaa 0xfc 0x74 # CHECK: xvssrln.b.h $xr19, $xr19, $xr10 +0x0c 0x47 0xfd 0x74 # CHECK: xvssrln.h.w $xr12, $xr24, $xr17 +0xc7 0xbb 0xfd 0x74 # CHECK: xvssrln.w.d $xr7, $xr30, $xr14 +0x26 0xdd 0xfe 0x74 # CHECK: xvssran.b.h $xr6, $xr9, $xr23 +0x2d 0x09 0xff 0x74 # CHECK: xvssran.h.w $xr13, $xr9, $xr2 +0x52 0x87 0xff 0x74 # CHECK: xvssran.w.d $xr18, $xr26, $xr1 +0x38 0xde 0x00 0x75 # CHECK: xvssrlrn.b.h $xr24, $xr17, $xr23 +0x8a 0x21 0x01 0x75 # CHECK: xvssrlrn.h.w $xr10, $xr12, $xr8 +0x7e 0x9b 0x01 0x75 # CHECK: xvssrlrn.w.d $xr30, $xr27, $xr6 +0x74 0xff 0x02 0x75 # CHECK: xvssrarn.b.h $xr20, $xr27, $xr31 +0xf8 0x5e 0x03 0x75 # CHECK: xvssrarn.h.w $xr24, $xr23, $xr23 +0xa8 0xe7 0x03 0x75 # CHECK: xvssrarn.w.d $xr8, $xr29, $xr25 +0x8e 0xc4 0x04 0x75 # CHECK: xvssrln.bu.h $xr14, $xr4, $xr17 +0x9c 0x2a 0x05 0x75 # CHECK: xvssrln.hu.w $xr28, $xr20, $xr10 +0x0a 0xd1 0x05 0x75 # CHECK: xvssrln.wu.d $xr10, $xr8, $xr20 +0x92 0xdf 0x06 0x75 # CHECK: xvssran.bu.h $xr18, $xr28, $xr23 +0x79 0x62 0x07 0x75 # CHECK: xvssran.hu.w $xr25, $xr19, $xr24 +0xb0 0xcb 0x07 0x75 # CHECK: xvssran.wu.d $xr16, $xr29, $xr18 +0x62 0xba 0x08 0x75 # CHECK: xvssrlrn.bu.h $xr2, $xr19, $xr14 +0x06 0x48 0x09 0x75 # CHECK: xvssrlrn.hu.w $xr6, $xr0, $xr18 +0x9e 0xfc 0x09 0x75 # CHECK: xvssrlrn.wu.d $xr30, $xr4, $xr31 +0x90 0xa3 0x0a 0x75 # CHECK: xvssrarn.bu.h $xr16, $xr28, $xr8 +0x4b 0x18 0x0b 0x75 # CHECK: xvssrarn.hu.w $xr11, $xr2, $xr6 +0xd6 0xb0 0x0b 0x75 # CHECK: xvssrarn.wu.d $xr22, $xr6, $xr12 +0x04 0x42 0x0c 0x75 # CHECK: xvbitclr.b $xr4, $xr16, $xr16 +0xf0 0xeb 0x0c 0x75 # CHECK: xvbitclr.h $xr16, $xr31, $xr26 +0x58 0x50 0x0d 0x75 # CHECK: xvbitclr.w $xr24, $xr2, $xr20 +0x92 0xf9 0x0d 0x75 # CHECK: xvbitclr.d $xr18, $xr12, $xr30 +0x7a 0x5f 0x0e 0x75 # CHECK: xvbitset.b $xr26, $xr27, $xr23 +0x73 0xae 0x0e 0x75 # CHECK: xvbitset.h $xr19, $xr19, $xr11 +0x27 0x49 0x0f 0x75 # CHECK: xvbitset.w $xr7, $xr9, $xr18 +0xc6 0x8f 0x0f 0x75 # CHECK: xvbitset.d $xr6, $xr30, $xr3 +0xbe 0x1d 0x10 0x75 # CHECK: xvbitrev.b $xr30, $xr13, $xr7 +0x6c 0xa0 0x10 0x75 # CHECK: xvbitrev.h $xr12, $xr3, $xr8 +0x88 0x52 0x11 0x75 # CHECK: xvbitrev.w $xr8, $xr20, $xr20 +0xfc 0xc4 0x11 0x75 # CHECK: xvbitrev.d $xr28, $xr7, $xr17 +0x5d 0x32 0x16 0x75 # CHECK: xvpackev.b $xr29, $xr18, $xr12 +0x66 0xc5 0x16 0x75 # CHECK: xvpackev.h $xr6, $xr11, $xr17 +0x42 0x78 0x17 0x75 # CHECK: xvpackev.w $xr2, $xr2, $xr30 +0xfa 0xd5 0x17 0x75 # CHECK: xvpackev.d $xr26, $xr15, $xr21 +0x33 0x46 0x18 0x75 # CHECK: xvpackod.b $xr19, $xr17, $xr17 +0x0f 0x8d 0x18 0x75 # CHECK: xvpackod.h $xr15, $xr8, $xr3 +0xed 0x31 0x19 0x75 # CHECK: xvpackod.w $xr13, $xr15, $xr12 +0x65 0xe8 0x19 0x75 # CHECK: xvpackod.d $xr5, $xr3, $xr26 +0x3b 0x05 0x1a 0x75 # CHECK: xvilvl.b $xr27, $xr9, $xr1 +0x1d 0x85 0x1a 0x75 # CHECK: xvilvl.h $xr29, $xr8, $xr1 +0x09 0x1d 0x1b 0x75 # CHECK: xvilvl.w $xr9, $xr8, $xr7 +0xf9 0xc8 0x1b 0x75 # CHECK: xvilvl.d $xr25, $xr7, $xr18 +0x07 0x6b 0x1c 0x75 # CHECK: xvilvh.b $xr7, $xr24, $xr26 +0x86 0xf2 0x1c 0x75 # CHECK: xvilvh.h $xr6, $xr20, $xr28 +0xad 0x30 0x1d 0x75 # CHECK: xvilvh.w $xr13, $xr5, $xr12 +0xa1 0xfe 0x1d 0x75 # CHECK: xvilvh.d $xr1, $xr21, $xr31 +0xb1 0x7d 0x1e 0x75 # CHECK: xvpickev.b $xr17, $xr13, $xr31 +0x04 0xb9 0x1e 0x75 # CHECK: xvpickev.h $xr4, $xr8, $xr14 +0x0a 0x2d 0x1f 0x75 # CHECK: xvpickev.w $xr10, $xr8, $xr11 +0x9a 0xa2 0x1f 0x75 # CHECK: xvpickev.d $xr26, $xr20, $xr8 +0xb3 0x6e 0x20 0x75 # CHECK: xvpickod.b $xr19, $xr21, $xr27 +0xbc 0xcc 0x20 0x75 # CHECK: xvpickod.h $xr28, $xr5, $xr19 +0x55 0x5a 0x21 0x75 # CHECK: xvpickod.w $xr21, $xr18, $xr22 +0xfc 0xc8 0x21 0x75 # CHECK: xvpickod.d $xr28, $xr7, $xr18 +0x86 0x66 0x22 0x75 # CHECK: xvreplve.b $xr6, $xr20, $r25 +0xfb 0xb8 0x22 0x75 # CHECK: xvreplve.h $xr27, $xr7, $r14 +0x81 0x3c 0x23 0x75 # CHECK: xvreplve.w $xr1, $xr4, $r15 +0x8c 0xc1 0x23 0x75 # CHECK: xvreplve.d $xr12, $xr12, $r16 +0x61 0x74 0x26 0x75 # CHECK: xvand.v $xr1, $xr3, $xr29 +0x77 0xd1 0x26 0x75 # CHECK: xvor.v $xr23, $xr11, $xr20 +0x3f 0x78 0x27 0x75 # CHECK: xvxor.v $xr31, $xr1, $xr30 +0x5d 0xb7 0x27 0x75 # CHECK: xvnor.v $xr29, $xr26, $xr13 +0xc9 0x01 0x28 0x75 # CHECK: xvandn.v $xr9, $xr14, $xr0 +0x19 0xb1 0x28 0x75 # CHECK: xvorn.v $xr25, $xr8, $xr12 +0x55 0x6b 0x2b 0x75 # CHECK: xvfrstp.b $xr21, $xr26, $xr26 +0x24 0x8a 0x2b 0x75 # CHECK: xvfrstp.h $xr4, $xr17, $xr2 +0x9d 0x47 0x2d 0x75 # CHECK: xvadd.q $xr29, $xr28, $xr17 +0x5d 0xec 0x2d 0x75 # CHECK: xvsub.q $xr29, $xr2, $xr27 +0x92 0x1f 0x2e 0x75 # CHECK: xvsigncov.b $xr18, $xr28, $xr7 +0x92 0xc5 0x2e 0x75 # CHECK: xvsigncov.h $xr18, $xr12, $xr17 +0x3a 0x00 0x2f 0x75 # CHECK: xvsigncov.w $xr26, $xr1, $xr0 +0x6a 0xbb 0x2f 0x75 # CHECK: xvsigncov.d $xr10, $xr27, $xr14 +0x2f 0xa3 0x30 0x75 # CHECK: xvfadd.s $xr15, $xr25, $xr8 +0xd3 0x54 0x31 0x75 # CHECK: xvfadd.d $xr19, $xr6, $xr21 +0xda 0x98 0x32 0x75 # CHECK: xvfsub.s $xr26, $xr6, $xr6 +0x09 0x54 0x33 0x75 # CHECK: xvfsub.d $xr9, $xr0, $xr21 +0x06 0xb9 0x38 0x75 # CHECK: xvfmul.s $xr6, $xr8, $xr14 +0xab 0x6a 0x39 0x75 # CHECK: xvfmul.d $xr11, $xr21, $xr26 +0xeb 0x98 0x3a 0x75 # CHECK: xvfdiv.s $xr11, $xr7, $xr6 +0x40 0x13 0x3b 0x75 # CHECK: xvfdiv.d $xr0, $xr26, $xr4 +0x27 0x91 0x3c 0x75 # CHECK: xvfmax.s $xr7, $xr9, $xr4 +0x40 0x53 0x3d 0x75 # CHECK: xvfmax.d $xr0, $xr26, $xr20 +0x48 0xe9 0x3e 0x75 # CHECK: xvfmin.s $xr8, $xr10, $xr26 +0xc2 0x66 0x3f 0x75 # CHECK: xvfmin.d $xr2, $xr22, $xr25 +0x91 0x84 0x40 0x75 # CHECK: xvfmaxa.s $xr17, $xr4, $xr1 +0xfb 0x26 0x41 0x75 # CHECK: xvfmaxa.d $xr27, $xr23, $xr9 +0x75 0xec 0x42 0x75 # CHECK: xvfmina.s $xr21, $xr3, $xr27 +0xc7 0x10 0x43 0x75 # CHECK: xvfmina.d $xr7, $xr6, $xr4 +0x49 0x51 0x46 0x75 # CHECK: xvfcvt.h.s $xr9, $xr10, $xr20 +0xe5 0xd6 0x46 0x75 # CHECK: xvfcvt.s.d $xr5, $xr23, $xr21 +0x1c 0x2b 0x48 0x75 # CHECK: xvffint.s.l $xr28, $xr24, $xr10 +0x06 0x87 0x49 0x75 # CHECK: xvftint.w.d $xr6, $xr24, $xr1 +0x5b 0x7b 0x4a 0x75 # CHECK: xvftintrm.w.d $xr27, $xr26, $xr30 +0x9f 0x85 0x4a 0x75 # CHECK: xvftintrp.w.d $xr31, $xr12, $xr1 +0xab 0x56 0x4b 0x75 # CHECK: xvftintrz.w.d $xr11, $xr21, $xr21 +0x0f 0xf1 0x4b 0x75 # CHECK: xvftintrne.w.d $xr15, $xr8, $xr28 +0xb4 0x8e 0x7a 0x75 # CHECK: xvshuf.h $xr20, $xr21, $xr3 +0x56 0x7c 0x7b 0x75 # CHECK: xvshuf.w $xr22, $xr2, $xr31 +0x6f 0xe8 0x7b 0x75 # CHECK: xvshuf.d $xr15, $xr3, $xr26 +0xf5 0x62 0x7d 0x75 # CHECK: xvperm.w $xr21, $xr23, $xr24 +0xbc 0x04 0x80 0x76 # CHECK: xvseqi.b $xr28, $xr5, 1 +0x33 0xed 0x80 0x76 # CHECK: xvseqi.h $xr19, $xr9, -5 +0x48 0x7a 0x81 0x76 # CHECK: xvseqi.w $xr8, $xr18, -2 +0xc2 0xf2 0x81 0x76 # CHECK: xvseqi.d $xr2, $xr22, -4 +0xa4 0x5a 0x82 0x76 # CHECK: xvslei.b $xr4, $xr21, -10 +0x91 0xd2 0x82 0x76 # CHECK: xvslei.h $xr17, $xr20, -12 +0x89 0x66 0x83 0x76 # CHECK: xvslei.w $xr9, $xr20, -7 +0xd3 0xab 0x83 0x76 # CHECK: xvslei.d $xr19, $xr30, 10 +0x44 0x07 0x84 0x76 # CHECK: xvslei.bu $xr4, $xr26, 1 +0x0b 0x91 0x84 0x76 # CHECK: xvslei.hu $xr11, $xr8, 4 +0x92 0x7d 0x85 0x76 # CHECK: xvslei.wu $xr18, $xr12, 31 +0xfe 0xe8 0x85 0x76 # CHECK: xvslei.du $xr30, $xr7, 26 +0xab 0x0b 0x86 0x76 # CHECK: xvslti.b $xr11, $xr29, 2 +0x66 0xa3 0x86 0x76 # CHECK: xvslti.h $xr6, $xr27, 8 +0xf5 0x06 0x87 0x76 # CHECK: xvslti.w $xr21, $xr23, 1 +0xf2 0xef 0x87 0x76 # CHECK: xvslti.d $xr18, $xr31, -5 +0x9b 0x45 0x88 0x76 # CHECK: xvslti.bu $xr27, $xr12, 17 +0xd2 0xb1 0x88 0x76 # CHECK: xvslti.hu $xr18, $xr14, 12 +0x84 0x39 0x89 0x76 # CHECK: xvslti.wu $xr4, $xr12, 14 +0x1a 0xe0 0x89 0x76 # CHECK: xvslti.du $xr26, $xr0, 24 +0x5e 0x14 0x8a 0x76 # CHECK: xvaddi.bu $xr30, $xr2, 5 +0x36 0xa6 0x8a 0x76 # CHECK: xvaddi.hu $xr22, $xr17, 9 +0x43 0x77 0x8b 0x76 # CHECK: xvaddi.wu $xr3, $xr26, 29 +0x80 0xfa 0x8b 0x76 # CHECK: xvaddi.du $xr0, $xr20, 30 +0x80 0x1e 0x8c 0x76 # CHECK: xvsubi.bu $xr0, $xr20, 7 +0x04 0xcb 0x8c 0x76 # CHECK: xvsubi.hu $xr4, $xr24, 18 +0x41 0x6b 0x8d 0x76 # CHECK: xvsubi.wu $xr1, $xr26, 26 +0x89 0xa3 0x8d 0x76 # CHECK: xvsubi.du $xr9, $xr28, 8 +0xa0 0x22 0x8e 0x76 # CHECK: xvbsll.v $xr0, $xr21, 8 +0x04 0xf1 0x8e 0x76 # CHECK: xvbsrl.v $xr4, $xr8, 28 +0x28 0x48 0x90 0x76 # CHECK: xvmaxi.b $xr8, $xr1, -14 +0x93 0xc1 0x90 0x76 # CHECK: xvmaxi.h $xr19, $xr12, -16 +0x3b 0x14 0x91 0x76 # CHECK: xvmaxi.w $xr27, $xr1, 5 +0xe6 0x8c 0x91 0x76 # CHECK: xvmaxi.d $xr6, $xr7, 3 +0xca 0x14 0x92 0x76 # CHECK: xvmini.b $xr10, $xr6, 5 +0x48 0xd2 0x92 0x76 # CHECK: xvmini.h $xr8, $xr18, -12 +0xbf 0x65 0x93 0x76 # CHECK: xvmini.w $xr31, $xr13, -7 +0x6f 0xa7 0x93 0x76 # CHECK: xvmini.d $xr15, $xr27, 9 +0x25 0x5a 0x94 0x76 # CHECK: xvmaxi.bu $xr5, $xr17, 22 +0x66 0x90 0x94 0x76 # CHECK: xvmaxi.hu $xr6, $xr3, 4 +0x9a 0x45 0x95 0x76 # CHECK: xvmaxi.wu $xr26, $xr12, 17 +0x7e 0xf9 0x95 0x76 # CHECK: xvmaxi.du $xr30, $xr11, 30 +0x0f 0x1d 0x96 0x76 # CHECK: xvmini.bu $xr15, $xr8, 7 +0x32 0x87 0x96 0x76 # CHECK: xvmini.hu $xr18, $xr25, 1 +0x90 0x03 0x97 0x76 # CHECK: xvmini.wu $xr16, $xr28, 0 +0x6a 0xf6 0x97 0x76 # CHECK: xvmini.du $xr10, $xr19, 29 +0x28 0x0b 0x9a 0x76 # CHECK: xvfrstpi.b $xr8, $xr25, 2 +0x7c 0xea 0x9a 0x76 # CHECK: xvfrstpi.h $xr28, $xr19, 26 +0x02 0x01 0x9c 0x76 # CHECK: xvclo.b $xr2, $xr8 +0x2a 0x05 0x9c 0x76 # CHECK: xvclo.h $xr10, $xr9 +0xe2 0x0b 0x9c 0x76 # CHECK: xvclo.w $xr2, $xr31 +0x15 0x0f 0x9c 0x76 # CHECK: xvclo.d $xr21, $xr24 +0x0d 0x13 0x9c 0x76 # CHECK: xvclz.b $xr13, $xr24 +0xe4 0x17 0x9c 0x76 # CHECK: xvclz.h $xr4, $xr31 +0x27 0x18 0x9c 0x76 # CHECK: xvclz.w $xr7, $xr1 +0xcd 0x1e 0x9c 0x76 # CHECK: xvclz.d $xr13, $xr22 +0x49 0x23 0x9c 0x76 # CHECK: xvpcnt.b $xr9, $xr26 +0x6a 0x24 0x9c 0x76 # CHECK: xvpcnt.h $xr10, $xr3 +0xf8 0x28 0x9c 0x76 # CHECK: xvpcnt.w $xr24, $xr7 +0x05 0x2d 0x9c 0x76 # CHECK: xvpcnt.d $xr5, $xr8 +0x73 0x31 0x9c 0x76 # CHECK: xvneg.b $xr19, $xr11 +0xb5 0x36 0x9c 0x76 # CHECK: xvneg.h $xr21, $xr21 +0x33 0x3a 0x9c 0x76 # CHECK: xvneg.w $xr19, $xr17 +0xbf 0x3f 0x9c 0x76 # CHECK: xvneg.d $xr31, $xr29 +0x76 0x43 0x9c 0x76 # CHECK: xvmskltz.b $xr22, $xr27 +0x05 0x44 0x9c 0x76 # CHECK: xvmskltz.h $xr5, $xr0 +0x98 0x4b 0x9c 0x76 # CHECK: xvmskltz.w $xr24, $xr28 +0x59 0x4c 0x9c 0x76 # CHECK: xvmskltz.d $xr25, $xr2 +0xde 0x53 0x9c 0x76 # CHECK: xvmskgez.b $xr30, $xr30 +0x85 0x62 0x9c 0x76 # CHECK: xvmsknz.b $xr5, $xr20 +0x21 0x9b 0x9c 0x76 # CHECK: xvseteqz.v $fcc1, $xr25 +0xa5 0x9d 0x9c 0x76 # CHECK: xvsetnez.v $fcc5, $xr13 +0x80 0xa0 0x9c 0x76 # CHECK: xvsetanyeqz.b $fcc0, $xr4 +0xe0 0xa7 0x9c 0x76 # CHECK: xvsetanyeqz.h $fcc0, $xr31 +0xc2 0xab 0x9c 0x76 # CHECK: xvsetanyeqz.w $fcc2, $xr30 +0xe3 0xaf 0x9c 0x76 # CHECK: xvsetanyeqz.d $fcc3, $xr31 +0xa1 0xb2 0x9c 0x76 # CHECK: xvsetallnez.b $fcc1, $xr21 +0xa0 0xb6 0x9c 0x76 # CHECK: xvsetallnez.h $fcc0, $xr21 +0x00 0xb8 0x9c 0x76 # CHECK: xvsetallnez.w $fcc0, $xr0 +0xe1 0xbf 0x9c 0x76 # CHECK: xvsetallnez.d $fcc1, $xr31 +0x95 0xc4 0x9c 0x76 # CHECK: xvflogb.s $xr21, $xr4 +0x88 0xca 0x9c 0x76 # CHECK: xvflogb.d $xr8, $xr20 +0xaf 0xd7 0x9c 0x76 # CHECK: xvfclass.s $xr15, $xr29 +0xc7 0xd9 0x9c 0x76 # CHECK: xvfclass.d $xr7, $xr14 +0x7c 0xe6 0x9c 0x76 # CHECK: xvfsqrt.s $xr28, $xr19 +0xeb 0xeb 0x9c 0x76 # CHECK: xvfsqrt.d $xr11, $xr31 +0xe6 0xf6 0x9c 0x76 # CHECK: xvfrecip.s $xr6, $xr23 +0x00 0xfb 0x9c 0x76 # CHECK: xvfrecip.d $xr0, $xr24 +0x08 0x06 0x9d 0x76 # CHECK: xvfrsqrt.s $xr8, $xr16 +0x2f 0x0a 0x9d 0x76 # CHECK: xvfrsqrt.d $xr15, $xr17 +0x24 0x37 0x9d 0x76 # CHECK: xvfrint.s $xr4, $xr25 +0x81 0x3a 0x9d 0x76 # CHECK: xvfrint.d $xr1, $xr20 +0x1d 0x46 0x9d 0x76 # CHECK: xvfrintrm.s $xr29, $xr16 +0x44 0x49 0x9d 0x76 # CHECK: xvfrintrm.d $xr4, $xr10 +0xed 0x57 0x9d 0x76 # CHECK: xvfrintrp.s $xr13, $xr31 +0x74 0x59 0x9d 0x76 # CHECK: xvfrintrp.d $xr20, $xr11 +0xbb 0x65 0x9d 0x76 # CHECK: xvfrintrz.s $xr27, $xr13 +0x31 0x6b 0x9d 0x76 # CHECK: xvfrintrz.d $xr17, $xr25 +0x0e 0x75 0x9d 0x76 # CHECK: xvfrintrne.s $xr14, $xr8 +0x57 0x7b 0x9d 0x76 # CHECK: xvfrintrne.d $xr23, $xr26 +0xe4 0xea 0x9d 0x76 # CHECK: xvfcvtl.s.h $xr4, $xr23 +0x6e 0xed 0x9d 0x76 # CHECK: xvfcvth.s.h $xr14, $xr11 +0xfa 0xf3 0x9d 0x76 # CHECK: xvfcvtl.d.s $xr26, $xr31 +0x8d 0xf7 0x9d 0x76 # CHECK: xvfcvth.d.s $xr13, $xr28 +0x8e 0x03 0x9e 0x76 # CHECK: xvffint.s.w $xr14, $xr28 +0x00 0x05 0x9e 0x76 # CHECK: xvffint.s.wu $xr0, $xr8 +0x65 0x0b 0x9e 0x76 # CHECK: xvffint.d.l $xr5, $xr27 +0x5d 0x0e 0x9e 0x76 # CHECK: xvffint.d.lu $xr29, $xr18 +0x89 0x12 0x9e 0x76 # CHECK: xvffintl.d.w $xr9, $xr20 +0xab 0x15 0x9e 0x76 # CHECK: xvffinth.d.w $xr11, $xr13 +0x86 0x30 0x9e 0x76 # CHECK: xvftint.w.s $xr6, $xr4 +0xcb 0x36 0x9e 0x76 # CHECK: xvftint.l.d $xr11, $xr22 +0xb4 0x3a 0x9e 0x76 # CHECK: xvftintrm.w.s $xr20, $xr21 +0x7c 0x3f 0x9e 0x76 # CHECK: xvftintrm.l.d $xr28, $xr27 +0x0e 0x42 0x9e 0x76 # CHECK: xvftintrp.w.s $xr14, $xr16 +0x2e 0x47 0x9e 0x76 # CHECK: xvftintrp.l.d $xr14, $xr25 +0xc5 0x4b 0x9e 0x76 # CHECK: xvftintrz.w.s $xr5, $xr30 +0x6b 0x4e 0x9e 0x76 # CHECK: xvftintrz.l.d $xr11, $xr19 +0xfb 0x52 0x9e 0x76 # CHECK: xvftintrne.w.s $xr27, $xr23 +0xbb 0x55 0x9e 0x76 # CHECK: xvftintrne.l.d $xr27, $xr13 +0x5c 0x58 0x9e 0x76 # CHECK: xvftint.wu.s $xr28, $xr2 +0x9b 0x5d 0x9e 0x76 # CHECK: xvftint.lu.d $xr27, $xr12 +0xb5 0x73 0x9e 0x76 # CHECK: xvftintrz.wu.s $xr21, $xr29 +0x53 0x74 0x9e 0x76 # CHECK: xvftintrz.lu.d $xr19, $xr2 +0x42 0x82 0x9e 0x76 # CHECK: xvftintl.l.s $xr2, $xr18 +0xc8 0x87 0x9e 0x76 # CHECK: xvftinth.l.s $xr8, $xr30 +0x2d 0x8a 0x9e 0x76 # CHECK: xvftintrml.l.s $xr13, $xr17 +0x5e 0x8f 0x9e 0x76 # CHECK: xvftintrmh.l.s $xr30, $xr26 +0x4b 0x93 0x9e 0x76 # CHECK: xvftintrpl.l.s $xr11, $xr26 +0x7e 0x95 0x9e 0x76 # CHECK: xvftintrph.l.s $xr30, $xr11 +0xf9 0x98 0x9e 0x76 # CHECK: xvftintrzl.l.s $xr25, $xr7 +0xac 0x9c 0x9e 0x76 # CHECK: xvftintrzh.l.s $xr12, $xr5 +0x08 0xa3 0x9e 0x76 # CHECK: xvftintrnel.l.s $xr8, $xr24 +0x19 0xa7 0x9e 0x76 # CHECK: xvftintrneh.l.s $xr25, $xr24 +0xb7 0xe0 0x9e 0x76 # CHECK: xvexth.h.b $xr23, $xr5 +0xd9 0xe4 0x9e 0x76 # CHECK: xvexth.w.h $xr25, $xr6 +0x67 0xeb 0x9e 0x76 # CHECK: xvexth.d.w $xr7, $xr27 +0x4e 0xed 0x9e 0x76 # CHECK: xvexth.q.d $xr14, $xr10 +0xa0 0xf2 0x9e 0x76 # CHECK: xvexth.hu.bu $xr0, $xr21 +0xcf 0xf6 0x9e 0x76 # CHECK: xvexth.wu.hu $xr15, $xr22 +0xf8 0xf9 0x9e 0x76 # CHECK: xvexth.du.wu $xr24, $xr15 +0x44 0xfc 0x9e 0x76 # CHECK: xvexth.qu.du $xr4, $xr2 +0xd5 0x00 0x9f 0x76 # CHECK: xvreplgr2vr.b $xr21, $r6 +0x2b 0x04 0x9f 0x76 # CHECK: xvreplgr2vr.h $xr11, $ra +0xcd 0x0a 0x9f 0x76 # CHECK: xvreplgr2vr.w $xr13, $r22 +0x29 0x0e 0x9f 0x76 # CHECK: xvreplgr2vr.d $xr9, $r17 +0x12 0x12 0x9f 0x76 # CHECK: vext2xv.h.b $xr18, $xr16 +0xe3 0x16 0x9f 0x76 # CHECK: vext2xv.w.b $xr3, $xr23 +0x1e 0x1a 0x9f 0x76 # CHECK: vext2xv.d.b $xr30, $xr16 +0xfc 0x1e 0x9f 0x76 # CHECK: vext2xv.w.h $xr28, $xr23 +0x24 0x20 0x9f 0x76 # CHECK: vext2xv.d.h $xr4, $xr1 +0x97 0x25 0x9f 0x76 # CHECK: vext2xv.d.w $xr23, $xr12 +0xa0 0x28 0x9f 0x76 # CHECK: vext2xv.hu.bu $xr0, $xr5 +0x81 0x2c 0x9f 0x76 # CHECK: vext2xv.wu.bu $xr1, $xr4 +0x71 0x31 0x9f 0x76 # CHECK: vext2xv.du.bu $xr17, $xr11 +0x1c 0x34 0x9f 0x76 # CHECK: vext2xv.wu.hu $xr28, $xr0 +0x3a 0x3b 0x9f 0x76 # CHECK: vext2xv.du.hu $xr26, $xr25 +0xdd 0x3d 0x9f 0x76 # CHECK: vext2xv.du.wu $xr29, $xr14 +0xc3 0xb6 0x9f 0x76 # CHECK: xvhseli.d $xr3, $xr22, 13 +0xc0 0x29 0xa0 0x76 # CHECK: xvrotri.b $xr0, $xr14, 2 +0xe0 0x6c 0xa0 0x76 # CHECK: xvrotri.h $xr0, $xr7, 11 +0x38 0x8c 0xa0 0x76 # CHECK: xvrotri.w $xr24, $xr1, 3 +0xff 0x40 0xa1 0x76 # CHECK: xvrotri.d $xr31, $xr7, 16 +0x74 0x26 0xa4 0x76 # CHECK: xvsrlri.b $xr20, $xr19, 1 +0x3c 0x6c 0xa4 0x76 # CHECK: xvsrlri.h $xr28, $xr1, 11 +0x59 0xec 0xa4 0x76 # CHECK: xvsrlri.w $xr25, $xr2, 27 +0x3d 0x19 0xa5 0x76 # CHECK: xvsrlri.d $xr29, $xr9, 6 +0xa7 0x28 0xa8 0x76 # CHECK: xvsrari.b $xr7, $xr5, 2 +0x40 0x65 0xa8 0x76 # CHECK: xvsrari.h $xr0, $xr10, 9 +0x11 0xab 0xa8 0x76 # CHECK: xvsrari.w $xr17, $xr24, 10 +0xc7 0x99 0xa9 0x76 # CHECK: xvsrari.d $xr7, $xr14, 38 +0xe5 0xc7 0xeb 0x76 # CHECK: xvinsgr2vr.w $xr5, $r31, 1 +0x45 0xe7 0xeb 0x76 # CHECK: xvinsgr2vr.d $xr5, $r26, 1 +0x92 0xcb 0xef 0x76 # CHECK: xvpickve2gr.w $r18, $xr28, 2 +0x54 0xe5 0xef 0x76 # CHECK: xvpickve2gr.d $r20, $xr10, 1 +0x89 0xd9 0xf3 0x76 # CHECK: xvpickve2gr.wu $r9, $xr12, 6 +0xa9 0xe9 0xf3 0x76 # CHECK: xvpickve2gr.du $r9, $xr13, 2 +0xc1 0x97 0xf7 0x76 # CHECK: xvrepl128vei.b $xr1, $xr30, 5 +0xad 0xdd 0xf7 0x76 # CHECK: xvrepl128vei.h $xr13, $xr13, 7 +0xa7 0xe9 0xf7 0x76 # CHECK: xvrepl128vei.w $xr7, $xr13, 2 +0xe2 0xf7 0xf7 0x76 # CHECK: xvrepl128vei.d $xr2, $xr31, 1 +0xa4 0xcd 0xff 0x76 # CHECK: xvinsve0.w $xr4, $xr13, 3 +0x3b 0xe3 0xff 0x76 # CHECK: xvinsve0.d $xr27, $xr25, 0 +0x7d 0xde 0x03 0x77 # CHECK: xvpickve.w $xr29, $xr19, 7 +0x13 0xee 0x03 0x77 # CHECK: xvpickve.d $xr19, $xr16, 3 +0xa5 0x00 0x07 0x77 # CHECK: xvreplve0.b $xr5, $xr5 +0x0e 0x83 0x07 0x77 # CHECK: xvreplve0.h $xr14, $xr24 +0xaf 0xc1 0x07 0x77 # CHECK: xvreplve0.w $xr15, $xr13 +0x94 0xe2 0x07 0x77 # CHECK: xvreplve0.d $xr20, $xr20 +0x45 0xf1 0x07 0x77 # CHECK: xvreplve0.q $xr5, $xr10 +0x1f 0x2c 0x08 0x77 # CHECK: xvsllwil.h.b $xr31, $xr0, 3 +0x15 0x5f 0x08 0x77 # CHECK: xvsllwil.w.h $xr21, $xr24, 7 +0x1a 0xcb 0x08 0x77 # CHECK: xvsllwil.d.w $xr26, $xr24, 18 +0xc5 0x00 0x09 0x77 # CHECK: xvextl.q.d $xr5, $xr6 +0xed 0x3b 0x0c 0x77 # CHECK: xvsllwil.hu.bu $xr13, $xr31, 6 +0x93 0x62 0x0c 0x77 # CHECK: xvsllwil.wu.hu $xr19, $xr20, 8 +0xae 0x89 0x0c 0x77 # CHECK: xvsllwil.du.wu $xr14, $xr13, 2 +0xea 0x00 0x0d 0x77 # CHECK: xvextl.qu.du $xr10, $xr7 +0xbf 0x36 0x10 0x77 # CHECK: xvbitclri.b $xr31, $xr21, 5 +0x9a 0x48 0x10 0x77 # CHECK: xvbitclri.h $xr26, $xr4, 2 +0x35 0xbf 0x10 0x77 # CHECK: xvbitclri.w $xr21, $xr25, 15 +0x0e 0xfc 0x11 0x77 # CHECK: xvbitclri.d $xr14, $xr0, 63 +0x30 0x34 0x14 0x77 # CHECK: xvbitseti.b $xr16, $xr1, 5 +0xd3 0x4f 0x14 0x77 # CHECK: xvbitseti.h $xr19, $xr30, 3 +0xd2 0xee 0x14 0x77 # CHECK: xvbitseti.w $xr18, $xr22, 27 +0x2f 0xa0 0x15 0x77 # CHECK: xvbitseti.d $xr15, $xr1, 40 +0xb7 0x20 0x18 0x77 # CHECK: xvbitrevi.b $xr23, $xr5, 0 +0x45 0x5c 0x18 0x77 # CHECK: xvbitrevi.h $xr5, $xr2, 7 +0xd7 0xb0 0x18 0x77 # CHECK: xvbitrevi.w $xr23, $xr6, 12 +0xd2 0x85 0x19 0x77 # CHECK: xvbitrevi.d $xr18, $xr14, 33 +0x5b 0x33 0x24 0x77 # CHECK: xvsat.b $xr27, $xr26, 4 +0xa4 0x56 0x24 0x77 # CHECK: xvsat.h $xr4, $xr21, 5 +0x7d 0xab 0x24 0x77 # CHECK: xvsat.w $xr29, $xr27, 10 +0x0e 0xf0 0x25 0x77 # CHECK: xvsat.d $xr14, $xr0, 60 +0x3f 0x2f 0x28 0x77 # CHECK: xvsat.bu $xr31, $xr25, 3 +0x91 0x78 0x28 0x77 # CHECK: xvsat.hu $xr17, $xr4, 14 +0x31 0x92 0x28 0x77 # CHECK: xvsat.wu $xr17, $xr17, 4 +0x0b 0xac 0x29 0x77 # CHECK: xvsat.du $xr11, $xr0, 43 +0x18 0x2b 0x2c 0x77 # CHECK: xvslli.b $xr24, $xr24, 2 +0x37 0x5d 0x2c 0x77 # CHECK: xvslli.h $xr23, $xr9, 7 +0x8d 0xc1 0x2c 0x77 # CHECK: xvslli.w $xr13, $xr12, 16 +0xcb 0x46 0x2d 0x77 # CHECK: xvslli.d $xr11, $xr22, 17 +0xc9 0x25 0x30 0x77 # CHECK: xvsrli.b $xr9, $xr14, 1 +0x96 0x7e 0x30 0x77 # CHECK: xvsrli.h $xr22, $xr20, 15 +0xc5 0xd3 0x30 0x77 # CHECK: xvsrli.w $xr5, $xr30, 20 +0x01 0xea 0x31 0x77 # CHECK: xvsrli.d $xr1, $xr16, 58 +0xd2 0x28 0x34 0x77 # CHECK: xvsrai.b $xr18, $xr6, 2 +0x15 0x72 0x34 0x77 # CHECK: xvsrai.h $xr21, $xr16, 12 +0x2d 0xc6 0x34 0x77 # CHECK: xvsrai.w $xr13, $xr17, 17 +0x83 0xcd 0x35 0x77 # CHECK: xvsrai.d $xr3, $xr12, 51 +0xe1 0x50 0x40 0x77 # CHECK: xvsrlni.b.h $xr1, $xr7, 4 +0xb0 0xe6 0x40 0x77 # CHECK: xvsrlni.h.w $xr16, $xr21, 25 +0x4d 0xc1 0x41 0x77 # CHECK: xvsrlni.w.d $xr13, $xr10, 48 +0x91 0xf9 0x43 0x77 # CHECK: xvsrlni.d.q $xr17, $xr12, 126 +0x71 0x7e 0x44 0x77 # CHECK: xvsrlrni.b.h $xr17, $xr19, 15 +0x15 0xbb 0x44 0x77 # CHECK: xvsrlrni.h.w $xr21, $xr24, 14 +0xf4 0x0f 0x45 0x77 # CHECK: xvsrlrni.w.d $xr20, $xr31, 3 +0x1c 0x33 0x47 0x77 # CHECK: xvsrlrni.d.q $xr28, $xr24, 76 +0xfa 0x5c 0x48 0x77 # CHECK: xvssrlni.b.h $xr26, $xr7, 7 +0x9b 0xe7 0x48 0x77 # CHECK: xvssrlni.h.w $xr27, $xr28, 25 +0x04 0x41 0x49 0x77 # CHECK: xvssrlni.w.d $xr4, $xr8, 16 +0x2e 0x52 0x4b 0x77 # CHECK: xvssrlni.d.q $xr14, $xr17, 84 +0xd1 0x48 0x4c 0x77 # CHECK: xvssrlni.bu.h $xr17, $xr6, 2 +0x46 0x8f 0x4c 0x77 # CHECK: xvssrlni.hu.w $xr6, $xr26, 3 +0x4a 0xda 0x4d 0x77 # CHECK: xvssrlni.wu.d $xr10, $xr18, 54 +0x5d 0x1b 0x4f 0x77 # CHECK: xvssrlni.du.q $xr29, $xr26, 70 +0x26 0x59 0x50 0x77 # CHECK: xvssrlrni.b.h $xr6, $xr9, 6 +0x16 0x85 0x50 0x77 # CHECK: xvssrlrni.h.w $xr22, $xr8, 1 +0x3c 0x71 0x51 0x77 # CHECK: xvssrlrni.w.d $xr28, $xr9, 28 +0x74 0xa3 0x53 0x77 # CHECK: xvssrlrni.d.q $xr20, $xr27, 104 +0x99 0x70 0x54 0x77 # CHECK: xvssrlrni.bu.h $xr25, $xr4, 12 +0xb5 0x97 0x54 0x77 # CHECK: xvssrlrni.hu.w $xr21, $xr29, 5 +0x01 0xda 0x55 0x77 # CHECK: xvssrlrni.wu.d $xr1, $xr16, 54 +0xfd 0x64 0x56 0x77 # CHECK: xvssrlrni.du.q $xr29, $xr7, 25 +0x30 0x53 0x58 0x77 # CHECK: xvsrani.b.h $xr16, $xr25, 4 +0x4d 0x99 0x58 0x77 # CHECK: xvsrani.h.w $xr13, $xr10, 6 +0xa7 0xd6 0x59 0x77 # CHECK: xvsrani.w.d $xr7, $xr21, 53 +0x5a 0xde 0x5a 0x77 # CHECK: xvsrani.d.q $xr26, $xr18, 55 +0xb1 0x6e 0x5c 0x77 # CHECK: xvsrarni.b.h $xr17, $xr21, 11 +0xcf 0x8b 0x5c 0x77 # CHECK: xvsrarni.h.w $xr15, $xr30, 2 +0x77 0x7d 0x5d 0x77 # CHECK: xvsrarni.w.d $xr23, $xr11, 31 +0x36 0x43 0x5e 0x77 # CHECK: xvsrarni.d.q $xr22, $xr25, 16 +0x93 0x6a 0x60 0x77 # CHECK: xvssrani.b.h $xr19, $xr20, 10 +0x39 0xd9 0x60 0x77 # CHECK: xvssrani.h.w $xr25, $xr9, 22 +0x57 0x1c 0x61 0x77 # CHECK: xvssrani.w.d $xr23, $xr2, 7 +0x06 0xfd 0x63 0x77 # CHECK: xvssrani.d.q $xr6, $xr8, 127 +0xdb 0x55 0x64 0x77 # CHECK: xvssrani.bu.h $xr27, $xr14, 5 +0x2e 0xd0 0x64 0x77 # CHECK: xvssrani.hu.w $xr14, $xr1, 20 +0x8a 0xec 0x65 0x77 # CHECK: xvssrani.wu.d $xr10, $xr4, 59 +0x31 0x48 0x67 0x77 # CHECK: xvssrani.du.q $xr17, $xr1, 82 +0x5b 0x7e 0x68 0x77 # CHECK: xvssrarni.b.h $xr27, $xr18, 15 +0x70 0xbc 0x68 0x77 # CHECK: xvssrarni.h.w $xr16, $xr3, 15 +0x3a 0x4b 0x69 0x77 # CHECK: xvssrarni.w.d $xr26, $xr25, 18 +0x3c 0x03 0x6a 0x77 # CHECK: xvssrarni.d.q $xr28, $xr25, 0 +0x81 0x61 0x6c 0x77 # CHECK: xvssrarni.bu.h $xr1, $xr12, 8 +0x63 0xff 0x6c 0x77 # CHECK: xvssrarni.hu.w $xr3, $xr27, 31 +0x78 0xd3 0x6d 0x77 # CHECK: xvssrarni.wu.d $xr24, $xr27, 52 +0x65 0xc0 0x6f 0x77 # CHECK: xvssrarni.du.q $xr5, $xr3, 112 +0x35 0x8f 0x82 0x77 # CHECK: xvextrins.d $xr21, $xr25, 163 +0x33 0x72 0x84 0x77 # CHECK: xvextrins.w $xr19, $xr17, 28 +0xfe 0x3c 0x89 0x77 # CHECK: xvextrins.h $xr30, $xr7, 79 +0xe1 0x4b 0x8f 0x77 # CHECK: xvextrins.b $xr1, $xr31, 210 +0xc3 0x52 0x92 0x77 # CHECK: xvshuf4i.b $xr3, $xr22, 148 +0xc2 0x8a 0x94 0x77 # CHECK: xvshuf4i.h $xr2, $xr22, 34 +0x7f 0x96 0x9a 0x77 # CHECK: xvshuf4i.w $xr31, $xr19, 165 +0x3f 0x3a 0x9c 0x77 # CHECK: xvshuf4i.d $xr31, $xr17, 14 +0x1b 0x40 0xc5 0x77 # CHECK: xvbitseli.b $xr27, $xr0, 80 +0x57 0x64 0xd2 0x77 # CHECK: xvandi.b $xr23, $xr2, 153 +0x9b 0xf3 0xd6 0x77 # CHECK: xvori.b $xr27, $xr28, 188 +0x3c 0xf8 0xdb 0x77 # CHECK: xvxori.b $xr28, $xr1, 254 +0x44 0x90 0xdc 0x77 # CHECK: xvnori.b $xr4, $xr2, 36 +0x1a 0xc2 0xe2 0x77 # CHECK: xvldi $xr26, -2544 +0x16 0xa3 0xe6 0x77 # CHECK: xvpermi.w $xr22, $xr24, 168 +0xee 0x23 0xea 0x77 # CHECK: xvpermi.d $xr14, $xr31, 136 +0xdc 0x4d 0xef 0x77 # CHECK: xvpermi.q $xr28, $xr14, 211 +0xe0 0x7f 0x1e 0x70 # CHECK: vaddwev.h.b $vr0, $vr31, $vr31 +0x83 0xdc 0x1e 0x70 # CHECK: vaddwev.w.h $vr3, $vr4, $vr23 +0x5e 0x2f 0x1f 0x70 # CHECK: vaddwev.d.w $vr30, $vr26, $vr11 +0xb9 0xb7 0x1f 0x70 # CHECK: vaddwev.q.d $vr25, $vr29, $vr13 +0x8b 0x07 0x20 0x70 # CHECK: vsubwev.h.b $vr11, $vr28, $vr1 +0xe9 0x95 0x20 0x70 # CHECK: vsubwev.w.h $vr9, $vr15, $vr5 +0x31 0x29 0x21 0x70 # CHECK: vsubwev.d.w $vr17, $vr9, $vr10 +0x5a 0xae 0x21 0x70 # CHECK: vsubwev.q.d $vr26, $vr18, $vr11 +0x67 0x49 0x22 0x70 # CHECK: vaddwod.h.b $vr7, $vr11, $vr18 +0xe0 0xb0 0x22 0x70 # CHECK: vaddwod.w.h $vr0, $vr7, $vr12 +0x7e 0x43 0x23 0x70 # CHECK: vaddwod.d.w $vr30, $vr27, $vr16 +0x82 0xf6 0x23 0x70 # CHECK: vaddwod.q.d $vr2, $vr20, $vr29 +0xfa 0x4c 0x24 0x70 # CHECK: vsubwod.h.b $vr26, $vr7, $vr19 +0x73 0xac 0x24 0x70 # CHECK: vsubwod.w.h $vr19, $vr3, $vr11 +0x9f 0x33 0x25 0x70 # CHECK: vsubwod.d.w $vr31, $vr28, $vr12 +0x01 0xc3 0x25 0x70 # CHECK: vsubwod.q.d $vr1, $vr24, $vr16 +0xa3 0x77 0x2e 0x70 # CHECK: vaddwev.h.bu $vr3, $vr29, $vr29 +0xea 0xa9 0x2e 0x70 # CHECK: vaddwev.w.hu $vr10, $vr15, $vr10 +0xb8 0x13 0x2f 0x70 # CHECK: vaddwev.d.wu $vr24, $vr29, $vr4 +0xf1 0x82 0x2f 0x70 # CHECK: vaddwev.q.du $vr17, $vr23, $vr0 +0x79 0x51 0x30 0x70 # CHECK: vsubwev.h.bu $vr25, $vr11, $vr20 +0xf1 0xd1 0x30 0x70 # CHECK: vsubwev.w.hu $vr17, $vr15, $vr20 +0x2a 0x17 0x31 0x70 # CHECK: vsubwev.d.wu $vr10, $vr25, $vr5 +0x7d 0xa0 0x31 0x70 # CHECK: vsubwev.q.du $vr29, $vr3, $vr8 +0x0a 0x64 0x32 0x70 # CHECK: vaddwod.h.bu $vr10, $vr0, $vr25 +0x62 0xdf 0x32 0x70 # CHECK: vaddwod.w.hu $vr2, $vr27, $vr23 +0x02 0x58 0x33 0x70 # CHECK: vaddwod.d.wu $vr2, $vr0, $vr22 +0x40 0x8c 0x33 0x70 # CHECK: vaddwod.q.du $vr0, $vr2, $vr3 +0xee 0x0f 0x34 0x70 # CHECK: vsubwod.h.bu $vr14, $vr31, $vr3 +0x55 0x9c 0x34 0x70 # CHECK: vsubwod.w.hu $vr21, $vr2, $vr7 +0x0b 0x49 0x35 0x70 # CHECK: vsubwod.d.wu $vr11, $vr8, $vr18 +0x9e 0x82 0x35 0x70 # CHECK: vsubwod.q.du $vr30, $vr20, $vr0 +0x93 0x47 0x3e 0x70 # CHECK: vaddwev.h.bu.b $vr19, $vr28, $vr17 +0xee 0xf9 0x3e 0x70 # CHECK: vaddwev.w.hu.h $vr14, $vr15, $vr30 +0xef 0x28 0x3f 0x70 # CHECK: vaddwev.d.wu.w $vr15, $vr7, $vr10 +0xd3 0xf9 0x3f 0x70 # CHECK: vaddwev.q.du.d $vr19, $vr14, $vr30 +0x4f 0x22 0x40 0x70 # CHECK: vaddwod.h.bu.b $vr15, $vr18, $vr8 +0x73 0x9b 0x40 0x70 # CHECK: vaddwod.w.hu.h $vr19, $vr27, $vr6 +0x67 0x3d 0x41 0x70 # CHECK: vaddwod.d.wu.w $vr7, $vr11, $vr15 +0x00 0xe8 0x41 0x70 # CHECK: vaddwod.q.du.d $vr0, $vr0, $vr26 +0x78 0x56 0x90 0x70 # CHECK: vmulwev.h.b $vr24, $vr19, $vr21 +0xcd 0xca 0x90 0x70 # CHECK: vmulwev.w.h $vr13, $vr22, $vr18 +0xd8 0x36 0x91 0x70 # CHECK: vmulwev.d.w $vr24, $vr22, $vr13 +0xc4 0xfa 0x91 0x70 # CHECK: vmulwev.q.d $vr4, $vr22, $vr30 +0x56 0x63 0x92 0x70 # CHECK: vmulwod.h.b $vr22, $vr26, $vr24 +0x91 0x91 0x92 0x70 # CHECK: vmulwod.w.h $vr17, $vr12, $vr4 +0xf0 0x69 0x93 0x70 # CHECK: vmulwod.d.w $vr16, $vr15, $vr26 +0x03 0x96 0x93 0x70 # CHECK: vmulwod.q.d $vr3, $vr16, $vr5 +0x7f 0x4e 0x98 0x70 # CHECK: vmulwev.h.bu $vr31, $vr19, $vr19 +0xf6 0x97 0x98 0x70 # CHECK: vmulwev.w.hu $vr22, $vr31, $vr5 +0x80 0x78 0x99 0x70 # CHECK: vmulwev.d.wu $vr0, $vr4, $vr30 +0x7f 0xd0 0x99 0x70 # CHECK: vmulwev.q.du $vr31, $vr3, $vr20 +0xf9 0x34 0x9a 0x70 # CHECK: vmulwod.h.bu $vr25, $vr7, $vr13 +0x81 0xb1 0x9a 0x70 # CHECK: vmulwod.w.hu $vr1, $vr12, $vr12 +0xef 0x79 0x9b 0x70 # CHECK: vmulwod.d.wu $vr15, $vr15, $vr30 +0x8d 0x9b 0x9b 0x70 # CHECK: vmulwod.q.du $vr13, $vr28, $vr6 +0x48 0x0f 0xa0 0x70 # CHECK: vmulwev.h.bu.b $vr8, $vr26, $vr3 +0x2a 0x87 0xa0 0x70 # CHECK: vmulwev.w.hu.h $vr10, $vr25, $vr1 +0x09 0x4c 0xa1 0x70 # CHECK: vmulwev.d.wu.w $vr9, $vr0, $vr19 +0x0d 0xdf 0xa1 0x70 # CHECK: vmulwev.q.du.d $vr13, $vr24, $vr23 +0x14 0x38 0xa2 0x70 # CHECK: vmulwod.h.bu.b $vr20, $vr0, $vr14 +0x90 0x8e 0xa2 0x70 # CHECK: vmulwod.w.hu.h $vr16, $vr20, $vr3 +0xe5 0x6e 0xa3 0x70 # CHECK: vmulwod.d.wu.w $vr5, $vr23, $vr27 +0xde 0xf7 0xa3 0x70 # CHECK: vmulwod.q.du.d $vr30, $vr30, $vr29 +0x12 0x20 0xac 0x70 # CHECK: vmaddwev.h.b $vr18, $vr0, $vr8 +0xdd 0x9e 0xac 0x70 # CHECK: vmaddwev.w.h $vr29, $vr22, $vr7 +0xbc 0x7d 0xad 0x70 # CHECK: vmaddwev.d.w $vr28, $vr13, $vr31 +0x65 0xb4 0xad 0x70 # CHECK: vmaddwev.q.d $vr5, $vr3, $vr13 +0x24 0x24 0xae 0x70 # CHECK: vmaddwod.h.b $vr4, $vr1, $vr9 +0x3a 0xe1 0xae 0x70 # CHECK: vmaddwod.w.h $vr26, $vr9, $vr24 +0x7e 0x34 0xaf 0x70 # CHECK: vmaddwod.d.w $vr30, $vr3, $vr13 +0xaf 0xf5 0xaf 0x70 # CHECK: vmaddwod.q.d $vr15, $vr13, $vr29 +0x98 0x16 0xb4 0x70 # CHECK: vmaddwev.h.bu $vr24, $vr20, $vr5 +0x83 0xa0 0xb4 0x70 # CHECK: vmaddwev.w.hu $vr3, $vr4, $vr8 +0x7b 0x12 0xb5 0x70 # CHECK: vmaddwev.d.wu $vr27, $vr19, $vr4 +0x7c 0xf7 0xb5 0x70 # CHECK: vmaddwev.q.du $vr28, $vr27, $vr29 +0x85 0x6a 0xb6 0x70 # CHECK: vmaddwod.h.bu $vr5, $vr20, $vr26 +0xd5 0xab 0xb6 0x70 # CHECK: vmaddwod.w.hu $vr21, $vr30, $vr10 +0x67 0x51 0xb7 0x70 # CHECK: vmaddwod.d.wu $vr7, $vr11, $vr20 +0x5e 0xe2 0xb7 0x70 # CHECK: vmaddwod.q.du $vr30, $vr18, $vr24 +0x24 0x10 0xbc 0x70 # CHECK: vmaddwev.h.bu.b $vr4, $vr1, $vr4 +0x79 0xbd 0xbc 0x70 # CHECK: vmaddwev.w.hu.h $vr25, $vr11, $vr15 +0x0a 0x52 0xbd 0x70 # CHECK: vmaddwev.d.wu.w $vr10, $vr16, $vr20 +0x96 0xde 0xbd 0x70 # CHECK: vmaddwev.q.du.d $vr22, $vr20, $vr23 +0x3f 0x6f 0xbe 0x70 # CHECK: vmaddwod.h.bu.b $vr31, $vr25, $vr27 +0x48 0xe2 0xbe 0x70 # CHECK: vmaddwod.w.hu.h $vr8, $vr18, $vr24 +0xb2 0x29 0xbf 0x70 # CHECK: vmaddwod.d.wu.w $vr18, $vr13, $vr10 +0xaa 0xbc 0xbf 0x70 # CHECK: vmaddwod.q.du.d $vr10, $vr5, $vr15 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s deleted file mode 100644 index a10845d7422a..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s +++ /dev/null @@ -1,99 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -## Support for the 'D' extension implies support for 'F' -# ASM-AND-OBJ: fadd.s $fs5, $ft7, $fs1 -# ASM: encoding: [0xfd,0xe5,0x00,0x01] -fadd.s $fs5, $ft7, $fs1 - -# ASM-AND-OBJ: fadd.d $fs1, $fa7, $ft5 -# ASM: encoding: [0xf9,0x34,0x01,0x01] -fadd.d $fs1, $fa7, $ft5 - -# ASM-AND-OBJ: fsub.d $fs5, $fa1, $ft10 -# ASM: encoding: [0x3d,0x48,0x03,0x01] -fsub.d $fs5, $fa1, $ft10 - -# ASM-AND-OBJ: fmul.d $fa4, $fs6, $fa7 -# ASM: encoding: [0xc4,0x1f,0x05,0x01] -fmul.d $fa4, $fs6, $fa7 - -# ASM-AND-OBJ: fdiv.d $fa3, $fs1, $fs4 -# ASM: encoding: [0x23,0x73,0x07,0x01] -fdiv.d $fa3, $fs1, $fs4 - -# ASM-AND-OBJ: fmadd.d $ft13, $fs0, $fs4, $fs0 -# ASM: encoding: [0x15,0x73,0x2c,0x08] -fmadd.d $ft13, $fs0, $fs4, $fs0 - -# ASM-AND-OBJ: fmsub.d $fa6, $ft10, $ft12, $fs3 -# ASM: encoding: [0x46,0xd2,0x6d,0x08] -fmsub.d $fa6, $ft10, $ft12, $fs3 - -# ASM-AND-OBJ: fnmadd.d $fs1, $ft5, $ft11, $fs6 -# ASM: encoding: [0xb9,0x4d,0xaf,0x08] -fnmadd.d $fs1, $ft5, $ft11, $fs6 - -# ASM-AND-OBJ: fnmsub.d $fs6, $fs2, $fa7, $fs0 -# ASM: encoding: [0x5e,0x1f,0xec,0x08] -fnmsub.d $fs6, $fs2, $fa7, $fs0 - -# ASM-AND-OBJ: fmax.d $ft3, $fs2, $ft5 -# ASM: encoding: [0x4b,0x37,0x09,0x01] -fmax.d $ft3, $fs2, $ft5 - -# ASM-AND-OBJ: fmin.d $fa1, $ft5, $fs3 -# ASM: encoding: [0xa1,0x6d,0x0b,0x01] -fmin.d $fa1, $ft5, $fs3 - -# ASM-AND-OBJ: fmaxa.d $fs0, $ft5, $fa4 -# ASM: encoding: [0xb8,0x11,0x0d,0x01] -fmaxa.d $fs0, $ft5, $fa4 - -# ASM-AND-OBJ: fmina.d $ft10, $ft2, $fa0 -# ASM: encoding: [0x52,0x01,0x0f,0x01] -fmina.d $ft10, $ft2, $fa0 - -# ASM-AND-OBJ: fabs.d $ft15, $fa3 -# ASM: encoding: [0x77,0x08,0x14,0x01] -fabs.d $ft15, $fa3 - -# ASM-AND-OBJ: fneg.d $ft3, $fs2 -# ASM: encoding: [0x4b,0x1b,0x14,0x01] -fneg.d $ft3, $fs2 - -# ASM-AND-OBJ: fsqrt.d $fa2, $ft3 -# ASM: encoding: [0x62,0x49,0x14,0x01] -fsqrt.d $fa2, $ft3 - -# ASM-AND-OBJ: frecip.d $fs3, $fs3 -# ASM: encoding: [0x7b,0x5b,0x14,0x01] -frecip.d $fs3, $fs3 - -# ASM-AND-OBJ: frsqrt.d $ft14, $fa3 -# ASM: encoding: [0x76,0x68,0x14,0x01] -frsqrt.d $ft14, $fa3 - -# ASM-AND-OBJ: fscaleb.d $ft4, $ft6, $fs2 -# ASM: encoding: [0xcc,0x69,0x11,0x01] -fscaleb.d $ft4, $ft6, $fs2 - -# ASM-AND-OBJ: flogb.d $ft13, $fs5 -# ASM: encoding: [0xb5,0x2b,0x14,0x01] -flogb.d $ft13, $fs5 - -# ASM-AND-OBJ: fcopysign.d $ft8, $fs2, $fa6 -# ASM: encoding: [0x50,0x1b,0x13,0x01] -fcopysign.d $ft8, $fs2, $fa6 - -# ASM-AND-OBJ: fclass.d $ft11, $fa2 -# ASM: encoding: [0x53,0x38,0x14,0x01] -fclass.d $ft11, $fa2 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s b/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s deleted file mode 100644 index 1d6b489f33b0..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s +++ /dev/null @@ -1,31 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -## Support for the 'D' extension implies support for 'F' -# ASM-AND-OBJ: fldgt.s $fa3, $s4, $t1 -# ASM: encoding: [0x63,0x37,0x74,0x38] -fldgt.s $fa3, $s4, $t1 - -# ASM-AND-OBJ: fldgt.d $fs2, $a1, $s8 -# ASM: encoding: [0xba,0xfc,0x74,0x38] -fldgt.d $fs2, $a1, $s8 - -# ASM-AND-OBJ: fldle.d $fa3, $t3, $fp -# ASM: encoding: [0xe3,0xd9,0x75,0x38] -fldle.d $fa3, $t3, $fp - -# ASM-AND-OBJ: fstgt.d $ft5, $a7, $s3 -# ASM: encoding: [0x6d,0xe9,0x76,0x38] -fstgt.d $ft5, $a7, $s3 - -# ASM-AND-OBJ: fstle.d $ft10, $a5, $t1 -# ASM: encoding: [0x32,0xb5,0x77,0x38] -fstle.d $ft10, $a5, $t1 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-branch.s b/llvm/test/MC/LoongArch/Basic/Float/d-branch.s deleted file mode 100644 index 838b7e9330d7..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/d-branch.s +++ /dev/null @@ -1,15 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -## Support for the 'D' extension implies support for 'F' -# ASM-AND-OBJ: bceqz $fcc6, 12 -# ASM: encoding: [0xc0,0x0c,0x00,0x48] -bceqz $fcc6, 12 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-comp.s b/llvm/test/MC/LoongArch/Basic/Float/d-comp.s deleted file mode 100644 index 3ddae6d0567f..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/d-comp.s +++ /dev/null @@ -1,103 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -## Support for the 'D' extension implies support for 'F' -# ASM-AND-OBJ: fcmp.caf.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x10,0x0c] -fcmp.caf.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.caf.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x20,0x0c] -fcmp.caf.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cun.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x24,0x0c] -fcmp.cun.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.ceq.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x22,0x0c] -fcmp.ceq.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cueq.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x26,0x0c] -fcmp.cueq.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.clt.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x21,0x0c] -fcmp.clt.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cult.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x25,0x0c] -fcmp.cult.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cle.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x23,0x0c] -fcmp.cle.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cule.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x27,0x0c] -fcmp.cule.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cne.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x28,0x0c] -fcmp.cne.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cor.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x2a,0x0c] -fcmp.cor.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cune.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x2c,0x0c] -fcmp.cune.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.saf.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x20,0x0c] -fcmp.saf.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sun.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x24,0x0c] -fcmp.sun.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.seq.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x22,0x0c] -fcmp.seq.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sueq.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x26,0x0c] -fcmp.sueq.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.slt.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x21,0x0c] -fcmp.slt.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sult.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x25,0x0c] -fcmp.sult.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sle.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x23,0x0c] -fcmp.sle.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sule.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x27,0x0c] -fcmp.sule.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sne.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x28,0x0c] -fcmp.sne.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sor.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x2a,0x0c] -fcmp.sor.d $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sune.d $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x2c,0x0c] -fcmp.sune.d $fcc0, $fa0, $fa1 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-conv.s b/llvm/test/MC/LoongArch/Basic/Float/d-conv.s deleted file mode 100644 index fa5a5088e6a7..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/d-conv.s +++ /dev/null @@ -1,99 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -## Support for the 'D' extension implies support for 'F' -# ASM-AND-OBJ: frint.s $fa5, $ft9 -# ASM: encoding: [0x25,0x46,0x1e,0x01] -frint.s $fa5, $ft9 - -# ASM-AND-OBJ: fcvt.s.d $ft4, $ft11 -# ASM: encoding: [0x6c,0x1a,0x19,0x01] -fcvt.s.d $ft4, $ft11 - -# ASM-AND-OBJ: fcvt.d.s $ft2, $fa6 -# ASM: encoding: [0xca,0x24,0x19,0x01] -fcvt.d.s $ft2, $fa6 - -# ASM-AND-OBJ: ffint.s.l $fa6, $fa5 -# ASM: encoding: [0xa6,0x18,0x1d,0x01] -ffint.s.l $fa6, $fa5 - -# ASM-AND-OBJ: ffint.d.w $fs0, $ft10 -# ASM: encoding: [0x58,0x22,0x1d,0x01] -ffint.d.w $fs0, $ft10 - -# ASM-AND-OBJ: ffint.d.l $ft15, $fs2 -# ASM: encoding: [0x57,0x2b,0x1d,0x01] -ffint.d.l $ft15, $fs2 - -# ASM-AND-OBJ: ftint.w.d $fa3, $ft6 -# ASM: encoding: [0xc3,0x09,0x1b,0x01] -ftint.w.d $fa3, $ft6 - -# ASM-AND-OBJ: ftint.l.s $fs7, $fs0 -# ASM: encoding: [0x1f,0x27,0x1b,0x01] -ftint.l.s $fs7, $fs0 - -# ASM-AND-OBJ: ftint.l.d $ft8, $fs0 -# ASM: encoding: [0x10,0x2b,0x1b,0x01] -ftint.l.d $ft8, $fs0 - -# ASM-AND-OBJ: ftintrm.w.d $fa7, $ft0 -# ASM: encoding: [0x07,0x09,0x1a,0x01] -ftintrm.w.d $fa7, $ft0 - -# ASM-AND-OBJ: ftintrm.l.s $fs0, $ft2 -# ASM: encoding: [0x58,0x25,0x1a,0x01] -ftintrm.l.s $fs0, $ft2 - -# ASM-AND-OBJ: ftintrm.l.d $ft1, $ft1 -# ASM: encoding: [0x29,0x29,0x1a,0x01] -ftintrm.l.d $ft1, $ft1 - -# ASM-AND-OBJ: ftintrp.w.d $ft4, $fa3 -# ASM: encoding: [0x6c,0x48,0x1a,0x01] -ftintrp.w.d $ft4, $fa3 - -# ASM-AND-OBJ: ftintrp.l.s $fa0, $ft8 -# ASM: encoding: [0x00,0x66,0x1a,0x01] -ftintrp.l.s $fa0, $ft8 - -# ASM-AND-OBJ: ftintrp.l.d $fa4, $fs5 -# ASM: encoding: [0xa4,0x6b,0x1a,0x01] -ftintrp.l.d $fa4, $fs5 - -# ASM-AND-OBJ: ftintrz.w.d $fs1, $fs0 -# ASM: encoding: [0x19,0x8b,0x1a,0x01] -ftintrz.w.d $fs1, $fs0 - -# ASM-AND-OBJ: ftintrz.l.s $ft15, $fa5 -# ASM: encoding: [0xb7,0xa4,0x1a,0x01] -ftintrz.l.s $ft15, $fa5 - -# ASM-AND-OBJ: ftintrz.l.d $fa3, $ft2 -# ASM: encoding: [0x43,0xa9,0x1a,0x01] -ftintrz.l.d $fa3, $ft2 - -# ASM-AND-OBJ: ftintrne.w.d $fs7, $ft4 -# ASM: encoding: [0x9f,0xc9,0x1a,0x01] -ftintrne.w.d $fs7, $ft4 - -# ASM-AND-OBJ: ftintrne.l.s $ft14, $fs3 -# ASM: encoding: [0x76,0xe7,0x1a,0x01] -ftintrne.l.s $ft14, $fs3 - -# ASM-AND-OBJ: ftintrne.l.d $fs4, $fa6 -# ASM: encoding: [0xdc,0xe8,0x1a,0x01] -ftintrne.l.d $fs4, $fa6 - -# ASM-AND-OBJ: frint.d $fs5, $fa2 -# ASM: encoding: [0x5d,0x48,0x1e,0x01] -frint.d $fs5, $fa2 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s b/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s deleted file mode 100644 index b38a4461afcb..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s +++ /dev/null @@ -1,7 +0,0 @@ -# RUN: not llvm-mc --triple=loongarch32 -mattr=+d %s 2>&1 | FileCheck %s - -# CHECK: :[[#@LINE+1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set -movgr2fr.d $fa0, $a0 - -# CHECK: :[[#@LINE+1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set -movfr2gr.d $a0, $fa0 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-memory.s b/llvm/test/MC/LoongArch/Basic/Float/d-memory.s deleted file mode 100644 index a8f04cefe059..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/d-memory.s +++ /dev/null @@ -1,31 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -## Support for the 'D' extension implies support for 'F' -# ASM-AND-OBJ: fld.s $ft15, $t3, 250 -# ASM: encoding: [0xf7,0xe9,0x03,0x2b] -fld.s $ft15, $t3, 250 - -# ASM-AND-OBJ: fld.d $ft14, $t5, 114 -# ASM: encoding: [0x36,0xca,0x81,0x2b] -fld.d $ft14, $t5, 114 - -# ASM-AND-OBJ: fst.d $fs4, $a3, 198 -# ASM: encoding: [0xfc,0x18,0xc3,0x2b] -fst.d $fs4, $a3, 198 - -# ASM-AND-OBJ: fldx.d $fs3, $t1, $s8 -# ASM: encoding: [0xbb,0x7d,0x34,0x38] -fldx.d $fs3, $t1, $s8 - -# ASM-AND-OBJ: fstx.d $fa6, $t3, $t5 -# ASM: encoding: [0xe6,0x45,0x3c,0x38] -fstx.d $fa6, $t3, $t5 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-move.s b/llvm/test/MC/LoongArch/Basic/Float/d-move.s deleted file mode 100644 index c3008add6284..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/d-move.s +++ /dev/null @@ -1,39 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding --defsym=LA64=1 \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM,ASM-AND-OBJ64,ASM64 %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj --defsym=LA64=1 \ -# RUN: | llvm-objdump -d --mattr=+d - \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM-AND-OBJ64 %s - -## Support for the 'D' extension implies support for 'F' -# ASM-AND-OBJ: fmov.s $ft5, $ft15 -# ASM: encoding: [0xed,0x96,0x14,0x01] -fmov.s $ft5, $ft15 - -# ASM-AND-OBJ: fmov.d $fs6, $ft1 -# ASM: encoding: [0x3e,0x99,0x14,0x01] -fmov.d $fs6, $ft1 - -# ASM-AND-OBJ: fsel $ft10, $ft12, $ft13, $fcc4 -# ASM: encoding: [0x92,0x56,0x02,0x0d] -fsel $ft10, $ft12, $ft13, $fcc4 - -# ASM-AND-OBJ64: movgr2frh.w $ft15, $s3 -# ASM64: encoding: [0x57,0xaf,0x14,0x01] -movgr2frh.w $ft15, $s3 - -.ifdef LA64 - -# ASM-AND-OBJ64: movgr2fr.d $fs6, $a7 -# ASM64: encoding: [0x7e,0xa9,0x14,0x01] -movgr2fr.d $fs6, $a7 - -# ASM-AND-OBJ64: movfr2gr.d $s3, $ft9 -# ASM64: encoding: [0x3a,0xba,0x14,0x01] -movfr2gr.d $s3, $ft9 - -.endif diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s deleted file mode 100644 index a5873a54511a..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s +++ /dev/null @@ -1,94 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -# ASM-AND-OBJ: fadd.s $fs5, $ft7, $fs1 -# ASM: encoding: [0xfd,0xe5,0x00,0x01] -fadd.s $fs5, $ft7, $fs1 - -# ASM-AND-OBJ: fsub.s $ft6, $fa6, $fs7 -# ASM: encoding: [0xce,0xfc,0x02,0x01] -fsub.s $ft6, $fa6, $fs7 - -# ASM-AND-OBJ: fmul.s $fa0, $fa7, $ft9 -# ASM: encoding: [0xe0,0xc4,0x04,0x01] -fmul.s $fa0, $fa7, $ft9 - -# ASM-AND-OBJ: fdiv.s $ft12, $fs0, $ft11 -# ASM: encoding: [0x14,0xcf,0x06,0x01] -fdiv.s $ft12, $fs0, $ft11 - -# ASM-AND-OBJ: fmadd.s $fa3, $ft8, $fa3, $ft7 -# ASM: encoding: [0x03,0x8e,0x17,0x08] -fmadd.s $fa3, $ft8, $fa3, $ft7 - -# ASM-AND-OBJ: fmsub.s $ft15, $ft3, $ft13, $fa4 -# ASM: encoding: [0x77,0x55,0x52,0x08] -fmsub.s $ft15, $ft3, $ft13, $fa4 - -# ASM-AND-OBJ: fnmadd.s $fs5, $fa1, $fs0, $ft12 -# ASM: encoding: [0x3d,0x60,0x9a,0x08] -fnmadd.s $fs5, $fa1, $fs0, $ft12 - -# ASM-AND-OBJ: fnmsub.s $ft0, $fa4, $fs0, $fs1 -# ASM: encoding: [0x88,0xe0,0xdc,0x08] -fnmsub.s $ft0, $fa4, $fs0, $fs1 - -# ASM-AND-OBJ: fmax.s $ft14, $fa6, $fs3 -# ASM: encoding: [0xd6,0xec,0x08,0x01] -fmax.s $ft14, $fa6, $fs3 - -# ASM-AND-OBJ: fmin.s $ft6, $ft2, $ft11 -# ASM: encoding: [0x4e,0xcd,0x0a,0x01] -fmin.s $ft6, $ft2, $ft11 - -# ASM-AND-OBJ: fmaxa.s $ft1, $fs3, $fs7 -# ASM: encoding: [0x69,0xff,0x0c,0x01] -fmaxa.s $ft1, $fs3, $fs7 - -# ASM-AND-OBJ: fmina.s $ft7, $ft10, $fa1 -# ASM: encoding: [0x4f,0x86,0x0e,0x01] -fmina.s $ft7, $ft10, $fa1 - -# ASM-AND-OBJ: fabs.s $fs4, $ft4 -# ASM: encoding: [0x9c,0x05,0x14,0x01] -fabs.s $fs4, $ft4 - -# ASM-AND-OBJ: fneg.s $ft13, $fs0 -# ASM: encoding: [0x15,0x17,0x14,0x01] -fneg.s $ft13, $fs0 - -# ASM-AND-OBJ: fsqrt.s $fs3, $ft10 -# ASM: encoding: [0x5b,0x46,0x14,0x01] -fsqrt.s $fs3, $ft10 - -# ASM-AND-OBJ: frecip.s $ft9, $fs3 -# ASM: encoding: [0x71,0x57,0x14,0x01] -frecip.s $ft9, $fs3 - -# ASM-AND-OBJ: frsqrt.s $fs1, $ft4 -# ASM: encoding: [0x99,0x65,0x14,0x01] -frsqrt.s $fs1, $ft4 - -# ASM-AND-OBJ: fscaleb.s $ft13, $ft15, $fa6 -# ASM: encoding: [0xf5,0x9a,0x10,0x01] -fscaleb.s $ft13, $ft15, $fa6 - -# ASM-AND-OBJ: flogb.s $fs7, $ft15 -# ASM: encoding: [0xff,0x26,0x14,0x01] -flogb.s $fs7, $ft15 - -# ASM-AND-OBJ: fcopysign.s $ft5, $fs0, $ft15 -# ASM: encoding: [0x0d,0xdf,0x12,0x01] -fcopysign.s $ft5, $fs0, $ft15 - -# ASM-AND-OBJ: fclass.s $ft12, $ft1 -# ASM: encoding: [0x34,0x35,0x14,0x01] -fclass.s $ft12, $ft1 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s b/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s deleted file mode 100644 index bfff92ff8a06..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s +++ /dev/null @@ -1,26 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -# ASM-AND-OBJ: fldgt.s $fa3, $s4, $t1 -# ASM: encoding: [0x63,0x37,0x74,0x38] -fldgt.s $fa3, $s4, $t1 - -# ASM-AND-OBJ: fldle.s $fs0, $s6, $t5 -# ASM: encoding: [0xb8,0x47,0x75,0x38] -fldle.s $fs0, $s6, $t5 - -# ASM-AND-OBJ: fstgt.s $fs7, $t1, $s7 -# ASM: encoding: [0xbf,0x79,0x76,0x38] -fstgt.s $fs7, $t1, $s7 - -# ASM-AND-OBJ: fstle.s $ft5, $t1, $a3 -# ASM: encoding: [0xad,0x1d,0x77,0x38] -fstle.s $ft5, $t1, $a3 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-branch.s b/llvm/test/MC/LoongArch/Basic/Float/f-branch.s deleted file mode 100644 index 583008b5a4f6..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-branch.s +++ /dev/null @@ -1,18 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -# ASM-AND-OBJ: bceqz $fcc6, 12 -# ASM: encoding: [0xc0,0x0c,0x00,0x48] -bceqz $fcc6, 12 - -# ASM-AND-OBJ: bcnez $fcc6, 72 -# ASM: encoding: [0xc0,0x49,0x00,0x48] -bcnez $fcc6, 72 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-comp.s b/llvm/test/MC/LoongArch/Basic/Float/f-comp.s deleted file mode 100644 index cc4e1470d525..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-comp.s +++ /dev/null @@ -1,98 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -# ASM-AND-OBJ: fcmp.caf.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x10,0x0c] -fcmp.caf.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cun.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x14,0x0c] -fcmp.cun.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.ceq.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x12,0x0c] -fcmp.ceq.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cueq.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x16,0x0c] -fcmp.cueq.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.clt.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x11,0x0c] -fcmp.clt.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cult.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x15,0x0c] -fcmp.cult.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cle.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x13,0x0c] -fcmp.cle.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cule.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x17,0x0c] -fcmp.cule.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cne.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x18,0x0c] -fcmp.cne.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cor.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x1a,0x0c] -fcmp.cor.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.cune.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x04,0x1c,0x0c] -fcmp.cune.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.saf.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x10,0x0c] -fcmp.saf.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sun.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x14,0x0c] -fcmp.sun.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.seq.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x12,0x0c] -fcmp.seq.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sueq.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x16,0x0c] -fcmp.sueq.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.slt.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x11,0x0c] -fcmp.slt.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sult.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x15,0x0c] -fcmp.sult.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sle.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x13,0x0c] -fcmp.sle.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sule.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x17,0x0c] -fcmp.sule.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sne.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x18,0x0c] -fcmp.sne.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sor.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x1a,0x0c] -fcmp.sor.s $fcc0, $fa0, $fa1 - -# ASM-AND-OBJ: fcmp.sune.s $fcc0, $fa0, $fa1 -# ASM: encoding: [0x00,0x84,0x1c,0x0c] -fcmp.sune.s $fcc0, $fa0, $fa1 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-conv.s b/llvm/test/MC/LoongArch/Basic/Float/f-conv.s deleted file mode 100644 index db44077dfc38..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-conv.s +++ /dev/null @@ -1,38 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -# ASM-AND-OBJ: ffint.s.w $fs6, $fa5 -# ASM: encoding: [0xbe,0x10,0x1d,0x01] -ffint.s.w $fs6, $fa5 - -# ASM-AND-OBJ: ftint.w.s $ft13, $ft5 -# ASM: encoding: [0xb5,0x05,0x1b,0x01] -ftint.w.s $ft13, $ft5 - -# ASM-AND-OBJ: ftintrm.w.s $ft8, $ft8 -# ASM: encoding: [0x10,0x06,0x1a,0x01] -ftintrm.w.s $ft8, $ft8 - -# ASM-AND-OBJ: ftintrp.w.s $ft6, $fs7 -# ASM: encoding: [0xee,0x47,0x1a,0x01] -ftintrp.w.s $ft6, $fs7 - -# ASM-AND-OBJ: ftintrz.w.s $fa4, $fs5 -# ASM: encoding: [0xa4,0x87,0x1a,0x01] -ftintrz.w.s $fa4, $fs5 - -# ASM-AND-OBJ: ftintrne.w.s $fa4, $ft9 -# ASM: encoding: [0x24,0xc6,0x1a,0x01] -ftintrne.w.s $fa4, $ft9 - -# ASM-AND-OBJ: frint.s $fa5, $ft9 -# ASM: encoding: [0x25,0x46,0x1e,0x01] -frint.s $fa5, $ft9 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s b/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s deleted file mode 100644 index 2ab91b3f1a77..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: not llvm-mc --triple=loongarch32 -mattr=+f %s 2>&1 | FileCheck %s - -# CHECK: :[[#@LINE+1]]:1: error: instruction requires the following: 'D' (Double-Precision Floating-Point) -fadd.d $fa0, $fa0, $fa0 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-memory.s b/llvm/test/MC/LoongArch/Basic/Float/f-memory.s deleted file mode 100644 index b5fbd9abd2ba..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-memory.s +++ /dev/null @@ -1,26 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -# ASM-AND-OBJ: fld.s $ft15, $t3, 250 -# ASM: encoding: [0xf7,0xe9,0x03,0x2b] -fld.s $ft15, $t3, 250 - -# ASM-AND-OBJ: fst.s $fs6, $t7, 230 -# ASM: encoding: [0x7e,0x9a,0x43,0x2b] -fst.s $fs6, $t7, 230 - -# ASM-AND-OBJ: fldx.s $fa1, $t3, $t7 -# ASM: encoding: [0xe1,0x4d,0x30,0x38] -fldx.s $fa1, $t3, $t7 - -# ASM-AND-OBJ: fstx.s $fs2, $sp, $fp -# ASM: encoding: [0x7a,0x58,0x38,0x38] -fstx.s $fs2, $sp, $fp diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-move.s b/llvm/test/MC/LoongArch/Basic/Float/f-move.s deleted file mode 100644 index da9107686d35..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-move.s +++ /dev/null @@ -1,74 +0,0 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ -# RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ -# RUN: | llvm-objdump -d --mattr=+f - \ -# RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s - -# ASM-AND-OBJ: fmov.s $ft5, $ft15 -# ASM: encoding: [0xed,0x96,0x14,0x01] -fmov.s $ft5, $ft15 - -# ASM-AND-OBJ: fsel $ft10, $ft12, $ft13, $fcc4 -# ASM: encoding: [0x92,0x56,0x02,0x0d] -fsel $ft10, $ft12, $ft13, $fcc4 - -# ASM-AND-OBJ: movgr2fr.w $fa6, $tp -# ASM: encoding: [0x46,0xa4,0x14,0x01] -movgr2fr.w $fa6, $tp - -# ASM-AND-OBJ: movfr2gr.s $a6, $ft14 -# ASM: encoding: [0xca,0xb6,0x14,0x01] -movfr2gr.s $a6, $ft14 - -# ASM-AND-OBJ: movgr2fcsr $fcsr0, $a0 -# ASM: encoding: [0x80,0xc0,0x14,0x01] -movgr2fcsr $fcsr0, $a0 - -# ASM-AND-OBJ: movfcsr2gr $a0, $fcsr0 -# ASM: encoding: [0x04,0xc8,0x14,0x01] -movfcsr2gr $a0, $fcsr0 - -# ASM-AND-OBJ: movgr2fcsr $fcsr1, $a0 -# ASM: encoding: [0x81,0xc0,0x14,0x01] -movgr2fcsr $fcsr1, $a0 - -# ASM-AND-OBJ: movfcsr2gr $a0, $fcsr1 -# ASM: encoding: [0x24,0xc8,0x14,0x01] -movfcsr2gr $a0, $fcsr1 - -# ASM-AND-OBJ: movgr2fcsr $fcsr2, $a0 -# ASM: encoding: [0x82,0xc0,0x14,0x01] -movgr2fcsr $fcsr2, $a0 - -# ASM-AND-OBJ: movfcsr2gr $a0, $fcsr2 -# ASM: encoding: [0x44,0xc8,0x14,0x01] -movfcsr2gr $a0, $fcsr2 - -# ASM-AND-OBJ: movgr2fcsr $fcsr3, $a0 -# ASM: encoding: [0x83,0xc0,0x14,0x01] -movgr2fcsr $fcsr3, $a0 - -# ASM-AND-OBJ: movfcsr2gr $a0, $fcsr3 -# ASM: encoding: [0x64,0xc8,0x14,0x01] -movfcsr2gr $a0, $fcsr3 - -# ASM-AND-OBJ: movfr2cf $fcc4, $ft3 -# ASM: encoding: [0x64,0xd1,0x14,0x01] -movfr2cf $fcc4, $ft3 - -# ASM-AND-OBJ: movcf2fr $ft8, $fcc0 -# ASM: encoding: [0x10,0xd4,0x14,0x01] -movcf2fr $ft8, $fcc0 - -# ASM-AND-OBJ: movgr2cf $fcc5, $ra -# ASM: encoding: [0x25,0xd8,0x14,0x01] -movgr2cf $fcc5, $ra - -# ASM-AND-OBJ: movcf2gr $r21, $fcc7 -# ASM: encoding: [0xf5,0xdc,0x14,0x01] -movcf2gr $r21, $fcc7 diff --git a/llvm/test/MC/LoongArch/Basic/Integer/arith.s b/llvm/test/MC/LoongArch/Basic/Integer/arith.s deleted file mode 100644 index bfb3a4c11eb2..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/arith.s +++ /dev/null @@ -1,212 +0,0 @@ -## Test valid arithmetic operation instructions - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s - -############################################################# -## Instructions for both loongarch32 and loongarch64 -############################################################# - -# CHECK-ASM-AND-OBJ: add.w $a5, $ra, $s8 -# CHECK-ASM: encoding: [0x29,0x7c,0x10,0x00] -add.w $a5, $ra, $s8 - -# CHECK-ASM-AND-OBJ: sub.w $r21, $s2, $t7 -# CHECK-ASM: encoding: [0x35,0x4f,0x11,0x00] -sub.w $r21, $s2, $t7 - -# CHECK-ASM-AND-OBJ: addi.w $a1, $a3, 246 -# CHECK-ASM: encoding: [0xe5,0xd8,0x83,0x02] -addi.w $a1, $a3, 246 - -# CHECK-ASM-AND-OBJ: alsl.w $tp, $t5, $tp, 4 -# CHECK-ASM: encoding: [0x22,0x8a,0x05,0x00] -alsl.w $tp, $t5, $tp, 4 - -# CHECK-ASM-AND-OBJ: lu12i.w $t4, 49 -# CHECK-ASM: encoding: [0x30,0x06,0x00,0x14] -lu12i.w $t4, 49 - -# CHECK-ASM-AND-OBJ: lu12i.w $a0, -1 -# CHECK-ASM: encoding: [0xe4,0xff,0xff,0x15] -lu12i.w $a0, -1 - -# CHECK-ASM-AND-OBJ: slt $s6, $s3, $tp -# CHECK-ASM: encoding: [0x5d,0x0b,0x12,0x00] -slt $s6, $s3, $tp - -# CHECK-ASM-AND-OBJ: sltu $a7, $r21, $s6 -# CHECK-ASM: encoding: [0xab,0xf6,0x12,0x00] -sltu $a7, $r21, $s6 - -# CHECK-ASM-AND-OBJ: slti $s4, $ra, 235 -# CHECK-ASM: encoding: [0x3b,0xac,0x03,0x02] -slti $s4, $ra, 235 - -# CHECK-ASM-AND-OBJ: sltui $zero, $a4, 162 -# CHECK-ASM: encoding: [0x00,0x89,0x42,0x02] -sltui $zero, $a4, 162 - -# CHECK-ASM-AND-OBJ: pcaddi $a5, 187 -# CHECK-ASM: encoding: [0x69,0x17,0x00,0x18] -pcaddi $a5, 187 - -# CHECK-ASM-AND-OBJ: pcaddu12i $zero, 37 -# CHECK-ASM: encoding: [0xa0,0x04,0x00,0x1c] -pcaddu12i $zero, 37 - -# CHECK-ASM-AND-OBJ: pcalau12i $a6, 89 -# CHECK-ASM: encoding: [0x2a,0x0b,0x00,0x1a] -pcalau12i $a6, 89 - -# CHECK-ASM-AND-OBJ: and $t7, $s8, $ra -# CHECK-ASM: encoding: [0xf3,0x87,0x14,0x00] -and $t7, $s8, $ra - -# CHECK-ASM-AND-OBJ: or $t5, $t4, $s7 -# CHECK-ASM: encoding: [0x11,0x7a,0x15,0x00] -or $t5, $t4, $s7 - -# CHECK-ASM-AND-OBJ: nor $a1, $t6, $a1 -# CHECK-ASM: encoding: [0x45,0x16,0x14,0x00] -nor $a1, $t6, $a1 - -# CHECK-ASM-AND-OBJ: xor $t3, $t7, $a4 -# CHECK-ASM: encoding: [0x6f,0xa2,0x15,0x00] -xor $t3, $t7, $a4 - -# CHECK-ASM-AND-OBJ: andn $s5, $s2, $a1 -# CHECK-ASM: encoding: [0x3c,0x97,0x16,0x00] -andn $s5, $s2, $a1 - -# CHECK-ASM-AND-OBJ: orn $tp, $sp, $s2 -# CHECK-ASM: encoding: [0x62,0x64,0x16,0x00] -orn $tp, $sp, $s2 - -# CHECK-ASM-AND-OBJ: andi $s2, $zero, 106 -# CHECK-ASM: encoding: [0x19,0xa8,0x41,0x03] -andi $s2, $zero, 106 - -# CHECK-ASM-AND-OBJ: ori $t5, $a1, 47 -# CHECK-ASM: encoding: [0xb1,0xbc,0x80,0x03] -ori $t5, $a1, 47 - -# CHECK-ASM-AND-OBJ: xori $t6, $s0, 99 -# CHECK-ASM: encoding: [0xf2,0x8e,0xc1,0x03] -xori $t6, $s0, 99 - -# CHECK-ASM-AND-OBJ: mul.w $a0, $t6, $sp -# CHECK-ASM: encoding: [0x44,0x0e,0x1c,0x00] -mul.w $a0, $t6, $sp - -# CHECK-ASM-AND-OBJ: mulh.w $s4, $s0, $zero -# CHECK-ASM: encoding: [0xfb,0x82,0x1c,0x00] -mulh.w $s4, $s0, $zero - -# CHECK-ASM-AND-OBJ: mulh.wu $a6, $t5, $s1 -# CHECK-ASM: encoding: [0x2a,0x62,0x1d,0x00] -mulh.wu $a6, $t5, $s1 - -# CHECK-ASM-AND-OBJ: div.w $s7, $t1, $s2 -# CHECK-ASM: encoding: [0xbe,0x65,0x20,0x00] -div.w $s7, $t1, $s2 - -# CHECK-ASM-AND-OBJ: mod.w $ra, $s3, $a6 -# CHECK-ASM: encoding: [0x41,0xab,0x20,0x00] -mod.w $ra, $s3, $a6 - -# CHECK-ASM-AND-OBJ: div.wu $t7, $s0, $zero -# CHECK-ASM: encoding: [0xf3,0x02,0x21,0x00] -div.wu $t7, $s0, $zero - -# CHECK-ASM-AND-OBJ: mod.wu $s4, $a5, $t5 -# CHECK-ASM: encoding: [0x3b,0xc5,0x21,0x00] -mod.wu $s4, $a5, $t5 - - -############################################################# -## Instructions only for loongarch64 -############################################################# - -.ifdef LA64 - -# CHECK64-ASM-AND-OBJ: add.d $tp, $t6, $s4 -# CHECK64-ASM: encoding: [0x42,0xee,0x10,0x00] -add.d $tp, $t6, $s4 - -# CHECK64-ASM-AND-OBJ: sub.d $a3, $t0, $a3 -# CHECK64-ASM: encoding: [0x87,0x9d,0x11,0x00] -sub.d $a3, $t0, $a3 - -# CHECK64-ASM-AND-OBJ: addi.d $s5, $a2, 75 -# CHECK64-ASM: encoding: [0xdc,0x2c,0xc1,0x02] -addi.d $s5, $a2, 75 - -# CHECK64-ASM-AND-OBJ: addu16i.d $a5, $s0, 23 -# CHECK64-ASM: encoding: [0xe9,0x5e,0x00,0x10] -addu16i.d $a5, $s0, 23 - -# CHECK64-ASM-AND-OBJ: alsl.wu $t7, $a4, $s2, 1 -# CHECK64-ASM: encoding: [0x13,0x65,0x06,0x00] -alsl.wu $t7, $a4, $s2, 1 - -# CHECK64-ASM-AND-OBJ: alsl.d $t5, $a7, $a1, 3 -# CHECK64-ASM: encoding: [0x71,0x15,0x2d,0x00] -alsl.d $t5, $a7, $a1, 3 - -# CHECK64-ASM-AND-OBJ: lu32i.d $sp, 196 -# CHECK64-ASM: encoding: [0x83,0x18,0x00,0x16] -lu32i.d $sp, 196 - -# CHECK64-ASM-AND-OBJ: lu52i.d $t1, $a0, 195 -# CHECK64-ASM: encoding: [0x8d,0x0c,0x03,0x03] -lu52i.d $t1, $a0, 195 - -# CHECK64-ASM-AND-OBJ: pcaddu18i $t0, 26 -# CHECK64-ASM: encoding: [0x4c,0x03,0x00,0x1e] -pcaddu18i $t0, 26 - -# CHECK64-ASM-AND-OBJ: mul.d $ra, $t2, $s1 -# CHECK64-ASM: encoding: [0xc1,0xe1,0x1d,0x00] -mul.d $ra, $t2, $s1 - -# CHECK64-ASM-AND-OBJ: mulh.d $s5, $ra, $s4 -# CHECK64-ASM: encoding: [0x3c,0x6c,0x1e,0x00] -mulh.d $s5, $ra, $s4 - -# CHECK64-ASM-AND-OBJ: mulh.du $t1, $s4, $s6 -# CHECK64-ASM: encoding: [0x6d,0xf7,0x1e,0x00] -mulh.du $t1, $s4, $s6 - -# CHECK64-ASM-AND-OBJ: mulw.d.w $s4, $a2, $t5 -# CHECK64-ASM: encoding: [0xdb,0x44,0x1f,0x00] -mulw.d.w $s4, $a2, $t5 - -# CHECK64-ASM-AND-OBJ: mulw.d.wu $t5, $fp, $s7 -# CHECK64-ASM: encoding: [0xd1,0xfa,0x1f,0x00] -mulw.d.wu $t5, $fp, $s7 - -# CHECK64-ASM-AND-OBJ: div.d $s0, $a2, $r21 -# CHECK64-ASM: encoding: [0xd7,0x54,0x22,0x00] -div.d $s0, $a2, $r21 - -# CHECK64-ASM-AND-OBJ: mod.d $t4, $sp, $t3 -# CHECK64-ASM: encoding: [0x70,0xbc,0x22,0x00] -mod.d $t4, $sp, $t3 - -# CHECK64-ASM-AND-OBJ: div.du $s8, $s1, $t2 -# CHECK64-ASM: encoding: [0x1f,0x3b,0x23,0x00] -div.du $s8, $s1, $t2 - -# CHECK64-ASM-AND-OBJ: mod.du $s2, $s0, $s1 -# CHECK64-ASM: encoding: [0xf9,0xe2,0x23,0x00] -mod.du $s2, $s0, $s1 - -.endif - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s b/llvm/test/MC/LoongArch/Basic/Integer/atomic.s deleted file mode 100644 index 64274018081c..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/atomic.s +++ /dev/null @@ -1,185 +0,0 @@ -## Test valid atomic memory access instructions. - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s - -############################################################# -## Instructions for both loongarch32 and loongarch64 -############################################################# - -# CHECK-ASM-AND-OBJ: ll.w $tp, $s4, 220 -# CHECK-ASM: encoding: [0x62,0xdf,0x00,0x20] -ll.w $tp, $s4, 220 - -# CHECK-ASM-AND-OBJ: sc.w $t7, $t2, 56 -# CHECK-ASM: encoding: [0xd3,0x39,0x00,0x21] -sc.w $t7, $t2, 56 - - - -############################################################# -## Instructions only for loongarch64 -############################################################# - -.ifdef LA64 - -# CHECK64-ASM-AND-OBJ: amswap.w $a2, $t0, $s1 -# CHECK64-ASM: encoding: [0x06,0x33,0x60,0x38] -amswap.w $a2, $t0, $s1 - -# CHECK64-ASM-AND-OBJ: amswap.d $tp, $t2, $fp -# CHECK64-ASM: encoding: [0xc2,0xba,0x60,0x38] -amswap.d $tp, $t2, $fp - -# CHECK64-ASM-AND-OBJ: amadd.w $a4, $t0, $r21 -# CHECK64-ASM: encoding: [0xa8,0x32,0x61,0x38] -amadd.w $a4, $t0, $r21 - -# CHECK64-ASM-AND-OBJ: amadd.d $a1, $t5, $s6 -# CHECK64-ASM: encoding: [0xa5,0xc7,0x61,0x38] -amadd.d $a1, $t5, $s6 - -# CHECK64-ASM-AND-OBJ: amand.w $a0, $t7, $fp -# CHECK64-ASM: encoding: [0xc4,0x4e,0x62,0x38] -amand.w $a0, $t7, $fp - -# CHECK64-ASM-AND-OBJ: amand.d $a6, $t6, $s6 -# CHECK64-ASM: encoding: [0xaa,0xcb,0x62,0x38] -amand.d $a6, $t6, $s6 - -# CHECK64-ASM-AND-OBJ: amor.w $a2, $t4, $s0 -# CHECK64-ASM: encoding: [0xe6,0x42,0x63,0x38] -amor.w $a2, $t4, $s0 - -# CHECK64-ASM-AND-OBJ: amor.d $sp, $t4, $s1 -# CHECK64-ASM: encoding: [0x03,0xc3,0x63,0x38] -amor.d $sp, $t4, $s1 - -# CHECK64-ASM-AND-OBJ: amxor.w $tp, $t3, $s0 -# CHECK64-ASM: encoding: [0xe2,0x3e,0x64,0x38] -amxor.w $tp, $t3, $s0 - -# CHECK64-ASM-AND-OBJ: amxor.d $a4, $t8, $s5 -# CHECK64-ASM: encoding: [0x88,0xd3,0x64,0x38] -amxor.d $a4, $t8, $s5 - -# CHECK64-ASM-AND-OBJ: ammax.w $ra, $a7, $s0 -# CHECK64-ASM: encoding: [0xe1,0x2e,0x65,0x38] -ammax.w $ra, $a7, $s0 - -# CHECK64-ASM-AND-OBJ: ammax.d $a5, $t8, $s4 -# CHECK64-ASM: encoding: [0x69,0xd3,0x65,0x38] -ammax.d $a5, $t8, $s4 - -# CHECK64-ASM-AND-OBJ: ammin.w $a5, $t2, $s0 -# CHECK64-ASM: encoding: [0xe9,0x3a,0x66,0x38] -ammin.w $a5, $t2, $s0 - -# CHECK64-ASM-AND-OBJ: ammin.d $a5, $t1, $fp -# CHECK64-ASM: encoding: [0xc9,0xb6,0x66,0x38] -ammin.d $a5, $t1, $fp - -# CHECK64-ASM-AND-OBJ: ammax.wu $a5, $a7, $fp -# CHECK64-ASM: encoding: [0xc9,0x2e,0x67,0x38] -ammax.wu $a5, $a7, $fp - -# CHECK64-ASM-AND-OBJ: ammax.du $a2, $t4, $s2 -# CHECK64-ASM: encoding: [0x26,0xc3,0x67,0x38] -ammax.du $a2, $t4, $s2 - -# CHECK64-ASM-AND-OBJ: ammin.wu $a4, $t6, $s7 -# CHECK64-ASM: encoding: [0xc8,0x4b,0x68,0x38] -ammin.wu $a4, $t6, $s7 - -# CHECK64-ASM-AND-OBJ: ammin.du $a3, $t4, $s2 -# CHECK64-ASM: encoding: [0x27,0xc3,0x68,0x38] -ammin.du $a3, $t4, $s2 - -# CHECK64-ASM-AND-OBJ: amswap_db.w $a2, $t0, $s1 -# CHECK64-ASM: encoding: [0x06,0x33,0x69,0x38] -amswap_db.w $a2, $t0, $s1 - -# CHECK64-ASM-AND-OBJ: amswap_db.d $tp, $t2, $fp -# CHECK64-ASM: encoding: [0xc2,0xba,0x69,0x38] -amswap_db.d $tp, $t2, $fp - -# CHECK64-ASM-AND-OBJ: amadd_db.w $a4, $t0, $r21 -# CHECK64-ASM: encoding: [0xa8,0x32,0x6a,0x38] -amadd_db.w $a4, $t0, $r21 - -# CHECK64-ASM-AND-OBJ: amadd_db.d $a1, $t5, $s6 -# CHECK64-ASM: encoding: [0xa5,0xc7,0x6a,0x38] -amadd_db.d $a1, $t5, $s6 - -# CHECK64-ASM-AND-OBJ: amand_db.w $a0, $t7, $fp -# CHECK64-ASM: encoding: [0xc4,0x4e,0x6b,0x38] -amand_db.w $a0, $t7, $fp - -# CHECK64-ASM-AND-OBJ: amand_db.d $a6, $t6, $s6 -# CHECK64-ASM: encoding: [0xaa,0xcb,0x6b,0x38] -amand_db.d $a6, $t6, $s6 - -# CHECK64-ASM-AND-OBJ: amor_db.w $a2, $t4, $s0 -# CHECK64-ASM: encoding: [0xe6,0x42,0x6c,0x38] -amor_db.w $a2, $t4, $s0 - -# CHECK64-ASM-AND-OBJ: amor_db.d $sp, $t4, $s1 -# CHECK64-ASM: encoding: [0x03,0xc3,0x6c,0x38] -amor_db.d $sp, $t4, $s1 - -# CHECK64-ASM-AND-OBJ: amxor_db.w $tp, $t3, $s0 -# CHECK64-ASM: encoding: [0xe2,0x3e,0x6d,0x38] -amxor_db.w $tp, $t3, $s0 - -# CHECK64-ASM-AND-OBJ: amxor_db.d $a4, $t8, $s5 -# CHECK64-ASM: encoding: [0x88,0xd3,0x6d,0x38] -amxor_db.d $a4, $t8, $s5 - -# CHECK64-ASM-AND-OBJ: ammax_db.w $ra, $a7, $s0 -# CHECK64-ASM: encoding: [0xe1,0x2e,0x6e,0x38] -ammax_db.w $ra, $a7, $s0 - -# CHECK64-ASM-AND-OBJ: ammax_db.d $a5, $t8, $s4 -# CHECK64-ASM: encoding: [0x69,0xd3,0x6e,0x38] -ammax_db.d $a5, $t8, $s4 - -# CHECK64-ASM-AND-OBJ: ammin_db.w $a5, $t2, $s0 -# CHECK64-ASM: encoding: [0xe9,0x3a,0x6f,0x38] -ammin_db.w $a5, $t2, $s0 - -# CHECK64-ASM-AND-OBJ: ammin_db.d $a5, $t1, $fp -# CHECK64-ASM: encoding: [0xc9,0xb6,0x6f,0x38] -ammin_db.d $a5, $t1, $fp - -# CHECK64-ASM-AND-OBJ: ammax_db.wu $a5, $a7, $fp -# CHECK64-ASM: encoding: [0xc9,0x2e,0x70,0x38] -ammax_db.wu $a5, $a7, $fp - -# CHECK64-ASM-AND-OBJ: ammax_db.du $a2, $t4, $s2 -# CHECK64-ASM: encoding: [0x26,0xc3,0x70,0x38] -ammax_db.du $a2, $t4, $s2 - -# CHECK64-ASM-AND-OBJ: ammin_db.wu $a4, $t6, $s7 -# CHECK64-ASM: encoding: [0xc8,0x4b,0x71,0x38] -ammin_db.wu $a4, $t6, $s7 - -# CHECK64-ASM-AND-OBJ: ammin_db.du $a3, $t4, $s2 -# CHECK64-ASM: encoding: [0x27,0xc3,0x71,0x38] -ammin_db.du $a3, $t4, $s2 - -# CHECK64-ASM-AND-OBJ: ll.d $s2, $s4, 16 -# CHECK64-ASM: encoding: [0x79,0x13,0x00,0x22] -ll.d $s2, $s4, 16 - -# CHECK64-ASM-AND-OBJ: sc.d $t5, $t5, 244 -# CHECK64-ASM: encoding: [0x31,0xf6,0x00,0x23] -sc.d $t5, $t5, 244 - -.endif - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/barrier.s b/llvm/test/MC/LoongArch/Basic/Integer/barrier.s deleted file mode 100644 index a9462fc380f6..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/barrier.s +++ /dev/null @@ -1,19 +0,0 @@ -## Test valid barrier instructions. - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s - -# CHECK-ASM-AND-OBJ: dbar 0 -# CHECK-ASM: encoding: [0x00,0x00,0x72,0x38] -dbar 0 - -# CHECK-ASM-AND-OBJ: ibar 0 -# CHECK-ASM: encoding: [0x00,0x80,0x72,0x38] -ibar 0 - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/bit-manipu.s b/llvm/test/MC/LoongArch/Basic/Integer/bit-manipu.s deleted file mode 100644 index 3cbe90611f27..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/bit-manipu.s +++ /dev/null @@ -1,136 +0,0 @@ -## Test valid bit manipulation instructions. - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s - -############################################################# -## Instructions for both loongarch32 and loongarch64 -############################################################# - -# CHECK-ASM: ext.w.b $t8, $t6 -# CHECK-ASM: encoding: [0x54,0x5e,0x00,0x00] -ext.w.b $t8, $t6 - -# CHECK-ASM: ext.w.h $s0, $s0 -# CHECK-ASM: encoding: [0xf7,0x5a,0x00,0x00] -ext.w.h $s0, $s0 - -# CHECK-ASM-AND-OBJ: clo.w $ra, $sp -# CHECK-ASM: encoding: [0x61,0x10,0x00,0x00] -clo.w $ra, $sp - -# CHECK-ASM-AND-OBJ: clz.w $a3, $a6 -# CHECK-ASM: encoding: [0x47,0x15,0x00,0x00] -clz.w $a3, $a6 - -# CHECK-ASM-AND-OBJ: cto.w $tp, $a2 -# CHECK-ASM: encoding: [0xc2,0x18,0x00,0x00] -cto.w $tp, $a2 - -# CHECK-ASM-AND-OBJ: ctz.w $a1, $fp -# CHECK-ASM: encoding: [0xc5,0x1e,0x00,0x00] -ctz.w $a1, $fp - -# CHECK-ASM-AND-OBJ: bytepick.w $s6, $zero, $t4, 0 -# CHECK-ASM: encoding: [0x1d,0x40,0x08,0x00] -bytepick.w $s6, $zero, $t4, 0 - -# CHECK-ASM-AND-OBJ: revb.2h $t8, $a7 -# CHECK-ASM: encoding: [0x74,0x31,0x00,0x00] -revb.2h $t8, $a7 - -# CHECK-ASM-AND-OBJ: bitrev.4b $r21, $s4 -# CHECK-ASM: encoding: [0x75,0x4b,0x00,0x00] -bitrev.4b $r21, $s4 - -# CHECK-ASM-AND-OBJ: bitrev.w $s2, $a1 -# CHECK-ASM: encoding: [0xb9,0x50,0x00,0x00] -bitrev.w $s2, $a1 - -# CHECK-ASM-AND-OBJ: bstrins.w $a4, $a7, 7, 2 -# CHECK-ASM: encoding: [0x68,0x09,0x67,0x00] -bstrins.w $a4, $a7, 7, 2 - -# CHECK-ASM-AND-OBJ: bstrpick.w $ra, $a5, 10, 4 -# CHECK-ASM: encoding: [0x21,0x91,0x6a,0x00] -bstrpick.w $ra, $a5, 10, 4 - -# CHECK-ASM-AND-OBJ: maskeqz $t8, $a7, $t6 -# CHECK-ASM: encoding: [0x74,0x49,0x13,0x00] -maskeqz $t8, $a7, $t6 - -# CHECK-ASM-AND-OBJ: masknez $t8, $t1, $s3 -# CHECK-ASM: encoding: [0xb4,0xe9,0x13,0x00] -masknez $t8, $t1, $s3 - - -############################################################# -## Instructions only for loongarch64 -############################################################# - -.ifdef LA64 - -# CHECK64-ASM-AND-OBJ: clo.d $s6, $ra -# CHECK64-ASM: encoding: [0x3d,0x20,0x00,0x00] -clo.d $s6, $ra - -# CHECK64-ASM-AND-OBJ: clz.d $s3, $s3 -# CHECK64-ASM: encoding: [0x5a,0x27,0x00,0x00] -clz.d $s3, $s3 - -# CHECK64-ASM-AND-OBJ: cto.d $t6, $t8 -# CHECK64-ASM: encoding: [0x92,0x2a,0x00,0x00] -cto.d $t6, $t8 - -# CHECK64-ASM-AND-OBJ: ctz.d $t5, $a6 -# CHECK64-ASM: encoding: [0x51,0x2d,0x00,0x00] -ctz.d $t5, $a6 - -# CHECK64-ASM-AND-OBJ: bytepick.d $t3, $t5, $t8, 4 -# CHECK64-ASM: encoding: [0x2f,0x52,0x0e,0x00] -bytepick.d $t3, $t5, $t8, 4 - -# CHECK64-ASM-AND-OBJ: revb.4h $t1, $t7 -# CHECK64-ASM: encoding: [0x6d,0x36,0x00,0x00] -revb.4h $t1, $t7 - -# CHECK64-ASM-AND-OBJ: revb.2w $s5, $s4 -# CHECK64-ASM: encoding: [0x7c,0x3b,0x00,0x00] -revb.2w $s5, $s4 - -# CHECK64-ASM-AND-OBJ: revb.d $zero, $s0 -# CHECK64-ASM: encoding: [0xe0,0x3e,0x00,0x00] -revb.d $zero, $s0 - -# CHECK64-ASM-AND-OBJ: revh.2w $s5, $a6 -# CHECK64-ASM: encoding: [0x5c,0x41,0x00,0x00] -revh.2w $s5, $a6 - -# CHECK64-ASM-AND-OBJ: revh.d $a5, $a3 -# CHECK64-ASM: encoding: [0xe9,0x44,0x00,0x00] -revh.d $a5, $a3 - -# CHECK64-ASM-AND-OBJ: bitrev.8b $t1, $s2 -# CHECK64-ASM: encoding: [0x2d,0x4f,0x00,0x00] -bitrev.8b $t1, $s2 - -# CHECK64-ASM-AND-OBJ: bitrev.d $t7, $s0 -# CHECK64-ASM: encoding: [0xf3,0x56,0x00,0x00] -bitrev.d $t7, $s0 - -# CHECK64-ASM-AND-OBJ: bstrins.d $a4, $a7, 7, 2 -# CHECK64-ASM: encoding: [0x68,0x09,0x87,0x00] -bstrins.d $a4, $a7, 7, 2 - -# CHECK64-ASM-AND-OBJ: bstrpick.d $s8, $s4, 39, 22 -# CHECK64-ASM: encoding: [0x7f,0x5b,0xe7,0x00] -bstrpick.d $s8, $s4, 39, 22 - -.endif - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/bit-shift.s b/llvm/test/MC/LoongArch/Basic/Integer/bit-shift.s deleted file mode 100644 index 4b8f00a709fb..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/bit-shift.s +++ /dev/null @@ -1,88 +0,0 @@ -## Test valid bit shift instructions. - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s - -############################################################# -## Instructions for both loongarch32 and loongarch64 -############################################################# - -# CHECK-ASM-AND-OBJ: sll.w $s1, $s4, $s0 -# CHECK-ASM: encoding: [0x78,0x5f,0x17,0x00] -sll.w $s1, $s4, $s0 - -# CHECK-ASM-AND-OBJ: srl.w $s8, $t5, $a3 -# CHECK-ASM: encoding: [0x3f,0x9e,0x17,0x00] -srl.w $s8, $t5, $a3 - -# CHECK-ASM-AND-OBJ: sra.w $t0, $s5, $a6 -# CHECK-ASM: encoding: [0x8c,0x2b,0x18,0x00] -sra.w $t0, $s5, $a6 - -# CHECK-ASM-AND-OBJ: rotr.w $ra, $s3, $t6 -# CHECK-ASM: encoding: [0x41,0x4b,0x1b,0x00] -rotr.w $ra, $s3, $t6 - -# CHECK-ASM-AND-OBJ: slli.w $s3, $t6, 0 -# CHECK-ASM: encoding: [0x5a,0x82,0x40,0x00] -slli.w $s3, $t6, 0 - -# CHECK-ASM-AND-OBJ: srli.w $a6, $t2, 30 -# CHECK-ASM: encoding: [0xca,0xf9,0x44,0x00] -srli.w $a6, $t2, 30 - -# CHECK-ASM-AND-OBJ: srai.w $a4, $t5, 24 -# CHECK-ASM: encoding: [0x28,0xe2,0x48,0x00] -srai.w $a4, $t5, 24 - -# CHECK-ASM-AND-OBJ: rotri.w $s0, $t8, 23 -# CHECK-ASM: encoding: [0x97,0xde,0x4c,0x00] -rotri.w $s0, $t8, 23 - - -############################################################# -## Instructions only for loongarch64 -############################################################# - -.ifdef LA64 - -# CHECK64-ASM-AND-OBJ: sll.d $t8, $t3, $sp -# CHECK64-ASM: encoding: [0xf4,0x8d,0x18,0x00] -sll.d $t8, $t3, $sp - -# CHECK64-ASM-AND-OBJ: srl.d $t2, $s2, $zero -# CHECK64-ASM: encoding: [0x2e,0x03,0x19,0x00] -srl.d $t2, $s2, $zero - -# CHECK64-ASM-AND-OBJ: sra.d $a3, $fp, $s8 -# CHECK64-ASM: encoding: [0xc7,0xfe,0x19,0x00] -sra.d $a3, $fp, $s8 - -# CHECK64-ASM-AND-OBJ: rotr.d $s8, $sp, $ra -# CHECK64-ASM: encoding: [0x7f,0x84,0x1b,0x00] -rotr.d $s8, $sp, $ra - -# CHECK64-ASM-AND-OBJ: slli.d $a6, $s8, 39 -# CHECK64-ASM: encoding: [0xea,0x9f,0x41,0x00] -slli.d $a6, $s8, 39 - -# CHECK64-ASM-AND-OBJ: srli.d $s8, $fp, 38 -# CHECK64-ASM: encoding: [0xdf,0x9a,0x45,0x00] -srli.d $s8, $fp, 38 - -# CHECK64-ASM-AND-OBJ: srai.d $a5, $r21, 27 -# CHECK64-ASM: encoding: [0xa9,0x6e,0x49,0x00] -srai.d $a5, $r21, 27 - -# CHECK64-ASM-AND-OBJ: rotri.d $s6, $zero, 7 -# CHECK64-ASM: encoding: [0x1d,0x1c,0x4d,0x00] -rotri.d $s6, $zero, 7 - -.endif - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/bound-check.s b/llvm/test/MC/LoongArch/Basic/Integer/bound-check.s deleted file mode 100644 index cfb7e4ba88de..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/bound-check.s +++ /dev/null @@ -1,71 +0,0 @@ -## Test valid boundary check memory access instructions. - -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s - -# CHECK-ASM-AND-OBJ: ldgt.b $a2, $a2, $s6 -# CHECK-ASM: encoding: [0xc6,0x74,0x78,0x38] -ldgt.b $a2, $a2, $s6 - -# CHECK-ASM-AND-OBJ: ldgt.h $a1, $s8, $ra -# CHECK-ASM: encoding: [0xe5,0x87,0x78,0x38] -ldgt.h $a1, $s8, $ra - -# CHECK-ASM-AND-OBJ: ldgt.w $t3, $s3, $a4 -# CHECK-ASM: encoding: [0x4f,0x23,0x79,0x38] -ldgt.w $t3, $s3, $a4 - -# CHECK-ASM-AND-OBJ: ldgt.d $s0, $s2, $s8 -# CHECK-ASM: encoding: [0x37,0xff,0x79,0x38] -ldgt.d $s0, $s2, $s8 - -# CHECK-ASM-AND-OBJ: ldle.b $a5, $t0, $t3 -# CHECK-ASM: encoding: [0x89,0x3d,0x7a,0x38] -ldle.b $a5, $t0, $t3 - -# CHECK-ASM-AND-OBJ: ldle.h $a7, $a7, $s0 -# CHECK-ASM: encoding: [0x6b,0xdd,0x7a,0x38] -ldle.h $a7, $a7, $s0 - -# CHECK-ASM-AND-OBJ: ldle.w $s1, $tp, $tp -# CHECK-ASM: encoding: [0x58,0x08,0x7b,0x38] -ldle.w $s1, $tp, $tp - -# CHECK-ASM-AND-OBJ: ldle.d $t8, $t3, $t4 -# CHECK-ASM: encoding: [0xf4,0xc1,0x7b,0x38] -ldle.d $t8, $t3, $t4 - -# CHECK-ASM-AND-OBJ: stgt.b $s4, $t7, $t8 -# CHECK-ASM: encoding: [0x7b,0x52,0x7c,0x38] -stgt.b $s4, $t7, $t8 - -# CHECK-ASM-AND-OBJ: stgt.h $t4, $a0, $a2 -# CHECK-ASM: encoding: [0x90,0x98,0x7c,0x38] -stgt.h $t4, $a0, $a2 - -# CHECK-ASM-AND-OBJ: stgt.w $s8, $s5, $t2 -# CHECK-ASM: encoding: [0x9f,0x3b,0x7d,0x38] -stgt.w $s8, $s5, $t2 - -# CHECK-ASM-AND-OBJ: stgt.d $s7, $r21, $s1 -# CHECK-ASM: encoding: [0xbe,0xe2,0x7d,0x38] -stgt.d $s7, $r21, $s1 - -# CHECK-ASM-AND-OBJ: stle.b $a6, $a0, $t4 -# CHECK-ASM: encoding: [0x8a,0x40,0x7e,0x38] -stle.b $a6, $a0, $t4 - -# CHECK-ASM-AND-OBJ: stle.h $t5, $t5, $r21 -# CHECK-ASM: encoding: [0x31,0xd6,0x7e,0x38] -stle.h $t5, $t5, $r21 - -# CHECK-ASM-AND-OBJ: stle.w $s0, $s5, $s6 -# CHECK-ASM: encoding: [0x97,0x77,0x7f,0x38] -stle.w $s0, $s5, $s6 - -# CHECK-ASM-AND-OBJ: stle.d $s2, $s1, $s6 -# CHECK-ASM: encoding: [0x19,0xf7,0x7f,0x38] -stle.d $s2, $s1, $s6 - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/branch.s b/llvm/test/MC/LoongArch/Basic/Integer/branch.s deleted file mode 100644 index c4e8edf815cf..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/branch.s +++ /dev/null @@ -1,55 +0,0 @@ -## Test valid branch instructions. - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s - -# CHECK-ASM-AND-OBJ: beq $a6, $a3, 176 -# CHECK-ASM: encoding: [0x47,0xb1,0x00,0x58] -beq $a6, $a3, 176 - -# CHECK-ASM-AND-OBJ: bne $s2, $ra, 136 -# CHECK-ASM: encoding: [0x21,0x8b,0x00,0x5c] -bne $s2, $ra, 136 - -# CHECK-ASM-AND-OBJ: blt $t3, $s7, 168 -# CHECK-ASM: encoding: [0xfe,0xa9,0x00,0x60] -blt $t3, $s7, 168 - -# CHECK-ASM-AND-OBJ: bge $t0, $t3, 148 -# CHECK-ASM: encoding: [0x8f,0x95,0x00,0x64] -bge $t0, $t3, 148 - -# CHECK-ASM-AND-OBJ: bltu $t5, $a1, 4 -# CHECK-ASM: encoding: [0x25,0x06,0x00,0x68] -bltu $t5, $a1, 4 - -# CHECK-ASM-AND-OBJ: bgeu $a2, $s0, 140 -# CHECK-ASM: encoding: [0xd7,0x8c,0x00,0x6c] -bgeu $a2, $s0, 140 - -# CHECK-ASM-AND-OBJ: beqz $a5, 96 -# CHECK-ASM: encoding: [0x20,0x61,0x00,0x40] -beqz $a5, 96 - -# CHECK-ASM-AND-OBJ: bnez $sp, 212 -# CHECK-ASM: encoding: [0x60,0xd4,0x00,0x44] -bnez $sp, 212 - -# CHECK-ASM-AND-OBJ: b 248 -# CHECK-ASM: encoding: [0x00,0xf8,0x00,0x50] -b 248 - -# CHECK-ASM-AND-OBJ: bl 236 -# CHECK-ASM: encoding: [0x00,0xec,0x00,0x54] -bl 236 - -# CHECK-ASM-AND-OBJ: jirl $ra, $a0, 4 -# CHECK-ASM: encoding: [0x81,0x04,0x00,0x4c] -jirl $ra, $a0, 4 - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/crc.s b/llvm/test/MC/LoongArch/Basic/Integer/crc.s deleted file mode 100644 index e57134d60247..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/crc.s +++ /dev/null @@ -1,39 +0,0 @@ -## Test valid CRC check instructions. - -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s - -# CHECK-ASM-AND-OBJ: crc.w.b.w $s1, $a3, $tp -# CHECK-ASM: encoding: [0xf8,0x08,0x24,0x00] -crc.w.b.w $s1, $a3, $tp - -# CHECK-ASM-AND-OBJ: crc.w.h.w $s8, $a6, $t6 -# CHECK-ASM: encoding: [0x5f,0xc9,0x24,0x00] -crc.w.h.w $s8, $a6, $t6 - -# CHECK-ASM-AND-OBJ: crc.w.w.w $s5, $a2, $a6 -# CHECK-ASM: encoding: [0xdc,0x28,0x25,0x00] -crc.w.w.w $s5, $a2, $a6 - -# CHECK-ASM-AND-OBJ: crc.w.d.w $s5, $a7, $s8 -# CHECK-ASM: encoding: [0x7c,0xfd,0x25,0x00] -crc.w.d.w $s5, $a7, $s8 - -# CHECK-ASM-AND-OBJ: crcc.w.b.w $t3, $t6, $sp -# CHECK-ASM: encoding: [0x4f,0x0e,0x26,0x00] -crcc.w.b.w $t3, $t6, $sp - -# CHECK-ASM-AND-OBJ: crcc.w.h.w $r21, $s6, $t6 -# CHECK-ASM: encoding: [0xb5,0xcb,0x26,0x00] -crcc.w.h.w $r21, $s6, $t6 - -# CHECK-ASM-AND-OBJ: crcc.w.w.w $t5, $t2, $t1 -# CHECK-ASM: encoding: [0xd1,0x35,0x27,0x00] -crcc.w.w.w $t5, $t2, $t1 - -# CHECK-ASM-AND-OBJ: crcc.w.d.w $s7, $r21, $s4 -# CHECK-ASM: encoding: [0xbe,0xee,0x27,0x00] -crcc.w.d.w $s7, $r21, $s4 - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid-dis.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid-dis.s deleted file mode 100644 index 5aa79ca80ce9..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/invalid-dis.s +++ /dev/null @@ -1,10 +0,0 @@ -# Test that disassembler rejects data smaller than 4 bytes. - -# RUN: llvm-mc --filetype=obj --triple=loongarch32 < %s \ -# RUN: | llvm-objdump -d - | FileCheck %s -# RUN: llvm-mc --filetype=obj --triple=loongarch64 < %s \ -# RUN: | llvm-objdump -d - | FileCheck %s - -# CHECK: 11 -# CHECK: 22 -.2byte 0x2211 diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid.s deleted file mode 100644 index 94b3976f5bfd..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s +++ /dev/null @@ -1,191 +0,0 @@ -## Test invalid instructions on both loongarch32 and loongarch64 target. - -# RUN: not llvm-mc --triple=loongarch32 --mattr=-f %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK64 -# RUN: not llvm-mc --triple=loongarch64 --mattr=-f %s 2>&1 --defsym=LA64=1 | FileCheck %s - -## Out of range immediates -## uimm2 -bytepick.w $a0, $a0, $a0, -1 -# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] -bytepick.w $a0, $a0, $a0, 4 -# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] - -## uimm2_plus1 -alsl.w $a0, $a0, $a0, 0 -# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [1, 4] -alsl.w $a0, $a0, $a0, 5 -# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [1, 4] - -## uimm5 -slli.w $a0, $a0, -1 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 31] -srli.w $a0, $a0, -1 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 31] -srai.w $a0, $a0, 32 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 31] -rotri.w $a0, $a0, 32 -# CHECK: :[[#@LINE-1]]:19: error: immediate must be an integer in the range [0, 31] -bstrins.w $a0, $a0, 31, -1 -# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] -bstrpick.w $a0, $a0, 32, 0 -# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] -preld -1, $a0, 0 -# CHECK: :[[#@LINE-1]]:7: error: immediate must be an integer in the range [0, 31] -preld 32, $a0, 0 -# CHECK: :[[#@LINE-1]]:7: error: immediate must be an integer in the range [0, 31] - -## uimm12 -andi $a0, $a0, -1 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [0, 4095] -ori $a0, $a0, 4096 -# CHECK: :[[#@LINE-1]]:15: error: immediate must be an integer in the range [0, 4095] -xori $a0, $a0, 4096 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [0, 4095] - -## simm12 -addi.w $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [-2048, 2047] -slti $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -sltui $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -preld 0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:15: error: immediate must be an integer in the range [-2048, 2047] -ld.b $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -ld.h $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -ld.w $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -ld.bu $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -ld.hu $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -st.b $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -st.h $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -st.w $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] - -## simm14_lsl2 -ll.w $a0, $a0, -32772 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] -ll.w $a0, $a0, -32769 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] -sc.w $a0, $a0, 32767 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] -sc.w $a0, $a0, 32768 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] - -## simm16_lsl2 -beq $a0, $a0, -0x20004 -# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] -bne $a0, $a0, -0x20004 -# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] -blt $a0, $a0, -0x1FFFF -# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] -bge $a0, $a0, -0x1FFFF -# CHECK: :[[#@LINE-1]]:15: error: immediate must be a multiple of 4 in the range [-131072, 131068] -bltu $a0, $a0, 0x1FFFF -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] -bgeu $a0, $a0, 0x1FFFF -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] -jirl $a0, $a0, 0x20000 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-131072, 131068] - -## simm20 -lu12i.w $a0, -0x80001 -# CHECK: :[[#@LINE-1]]:14: error: immediate must be an integer in the range [-524288, 524287] -pcaddi $a0, -0x80001 -# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-524288, 524287] -pcaddu12i $a0, 0x80000 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] -pcalau12i $a0, 0x80000 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] - -## simm21_lsl2 -beqz $a0, -0x400001 -# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] -bnez $a0, -0x3FFFFF -# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] -beqz $a0, 0x3FFFFF -# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] -bnez $a0, 0x400000 -# CHECK: :[[#@LINE-1]]:11: error: immediate must be a multiple of 4 in the range [-4194304, 4194300] - -## simm26_lsl2 -b -0x8000001 -# CHECK: :[[#@LINE-1]]:3: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] -b 0x1 -# CHECK: :[[#@LINE-1]]:3: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] -bl 0x7FFFFFF -# CHECK: :[[#@LINE-1]]:4: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] -bl 0x8000000 -# CHECK: :[[#@LINE-1]]:4: error: immediate must be a multiple of 4 in the range [-134217728, 134217724] - -## Invalid mnemonics -nori $a0, $a0, 0 -# CHECK: :[[#@LINE-1]]:1: error: unrecognized instruction mnemonic -andni $a0, $a0, 0 -# CHECK: :[[#@LINE-1]]:1: error: unrecognized instruction mnemonic -orni $a0, $a0, 0 -# CHECK: :[[#@LINE-1]]:1: error: unrecognized instruction mnemonic - -## Invalid register names -add.w $foo, $a0, $a0 -# CHECK: :[[#@LINE-1]]:8: error: invalid operand for instruction -sub.w $a8, $a0, $a0 -# CHECK: :[[#@LINE-1]]:8: error: invalid operand for instruction -addi.w $x0, $a0, 0 -# CHECK: :[[#@LINE-1]]:9: error: invalid operand for instruction -alsl.w $t9, $a0, $a0, 1 -# CHECK: :[[#@LINE-1]]:9: error: invalid operand for instruction -lu12i.w $s10, 0 -# CHECK: :[[#@LINE-1]]:10: error: invalid operand for instruction - -.ifndef LA64 -## LoongArch64 mnemonics -add.d $a0, $a0, $a0 -# CHECK64: :[[#@LINE-1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set -addi.d $a0, $a0, 0 -# CHECK64: :[[#@LINE-1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set -.endif - -## Invalid operand types -slt $a0, $a0, 0 -# CHECK: :[[#@LINE-1]]:15: error: invalid operand for instruction -slti $a0, 0, 0 -# CHECK: :[[#@LINE-1]]:11: error: invalid operand for instruction - -## Too many operands -andi $a0, $a0, 0, 0 -# CHECK: :[[#@LINE-1]]:19: error: invalid operand for instruction - -## Too few operands -and $a0, $a0 -# CHECK: :[[#@LINE-1]]:1: error: too few operands for instruction -andi $a0, $a0 -# CHECK: :[[#@LINE-1]]:1: error: too few operands for instruction - -## Instructions outside the base integer ISA -## TODO: Test instructions in LSX/LASX/LBT/LVZ after their introduction. - -## Floating-Point mnemonics -fadd.s $fa0, $fa0, $fa0 -# CHECK: :[[#@LINE-1]]:1: error: instruction requires the following: 'F' (Single-Precision Floating-Point) -fadd.d $fa0, $fa0, $fa0 -# CHECK: :[[#@LINE-1]]:1: error: instruction requires the following: 'D' (Double-Precision Floating-Point) - -## Using floating point registers when integer registers are expected -sll.w $a0, $a0, $fa0 -# CHECK: :[[#@LINE-1]]:18: error: invalid operand for instruction - -## msbw < lsbw -# CHECK: :[[#@LINE+1]]:21: error: msb is less than lsb -bstrins.w $a0, $a0, 1, 2 -# CHECK: ^~~~ - -# CHECK: :[[#@LINE+1]]:22: error: msb is less than lsb -bstrpick.w $a0, $a0, 30, 31 -# CHECK: ^~~~~~ diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s deleted file mode 100644 index a8b175a886cc..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s +++ /dev/null @@ -1,77 +0,0 @@ -## Test invalid instructions on loongarch64 target. - -# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s - -## Out of range immediates -## uimm2_plus1 -alsl.wu $a0, $a0, $a0, 0 -# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [1, 4] -alsl.d $a0, $a0, $a0, 5 -# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [1, 4] - -## uimm3 -bytepick.d $a0, $a0, $a0, -1 -# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] -bytepick.d $a0, $a0, $a0, 8 -# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] - -## uimm6 -slli.d $a0, $a0, -1 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 63] -srli.d $a0, $a0, -1 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 63] -srai.d $a0, $a0, 64 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [0, 63] -rotri.d $a0, $a0, 64 -# CHECK: :[[#@LINE-1]]:19: error: immediate must be an integer in the range [0, 63] -bstrins.d $a0, $a0, 63, -1 -# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] -bstrpick.d $a0, $a0, 64, 0 -# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] - -## simm12 -addi.d $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:18: error: immediate must be an integer in the range [-2048, 2047] -lu52i.d $a0, $a0, -2049 -# CHECK: :[[#@LINE-1]]:19: error: immediate must be an integer in the range [-2048, 2047] -ld.wu $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] -ld.d $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] -st.d $a0, $a0, 2048 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] - -## simm14_lsl2 -ldptr.w $a0, $a0, -32772 -# CHECK: :[[#@LINE-1]]:19: error: immediate must be a multiple of 4 in the range [-32768, 32764] -ldptr.d $a0, $a0, -32772 -# CHECK: :[[#@LINE-1]]:19: error: immediate must be a multiple of 4 in the range [-32768, 32764] -stptr.w $a0, $a0, -32769 -# CHECK: :[[#@LINE-1]]:19: error: immediate must be a multiple of 4 in the range [-32768, 32764] -stptr.d $a0, $a0, -32769 -# CHECK: :[[#@LINE-1]]:19: error: immediate must be a multiple of 4 in the range [-32768, 32764] -ll.w $a0, $a0, 32767 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] -sc.w $a0, $a0, 32768 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be a multiple of 4 in the range [-32768, 32764] - -## simm16 -addu16i.d $a0, $a0, -32769 -# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-32768, 32767] -addu16i.d $a0, $a0, 32768 -# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-32768, 32767] - -## simm20 -lu32i.d $a0, -0x80001 -# CHECK: :[[#@LINE-1]]:14: error: immediate must be an integer in the range [-524288, 524287] -pcaddu18i $a0, 0x80000 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] - -## msbd < lsbd -# CHECK: :[[#@LINE+1]]:21: error: msb is less than lsb -bstrins.d $a0, $a0, 1, 2 -# CHECK: ^~~~ - -# CHECK: :[[#@LINE+1]]:22: error: msb is less than lsb -bstrpick.d $a0, $a0, 32, 63 -# CHECK: ^~~~~~ diff --git a/llvm/test/MC/LoongArch/Basic/Integer/memory.s b/llvm/test/MC/LoongArch/Basic/Integer/memory.s deleted file mode 100644 index 1d363d44d7e4..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/memory.s +++ /dev/null @@ -1,132 +0,0 @@ -## Test valid memory access instructions. - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s - -############################################################# -## Instructions for both loongarch32 and loongarch64 -############################################################# - -# CHECK-ASM-AND-OBJ: ld.b $s1, $a4, 21 -# CHECK-ASM: encoding: [0x18,0x55,0x00,0x28] -ld.b $s1, $a4, 21 - -# CHECK-ASM-AND-OBJ: ld.h $a3, $t6, 80 -# CHECK-ASM: encoding: [0x47,0x42,0x41,0x28] -ld.h $a3, $t6, 80 - -# CHECK-ASM-AND-OBJ: ld.w $t6, $s3, 92 -# CHECK-ASM: encoding: [0x52,0x73,0x81,0x28] -ld.w $t6, $s3, 92 - -# CHECK-ASM-AND-OBJ: ld.bu $t1, $t1, 150 -# CHECK-ASM: encoding: [0xad,0x59,0x02,0x2a] -ld.bu $t1, $t1, 150 - -# CHECK-ASM-AND-OBJ: ld.hu $t6, $s6, 198 -# CHECK-ASM: encoding: [0xb2,0x1b,0x43,0x2a] -ld.hu $t6, $s6, 198 - -# CHECK-ASM-AND-OBJ: st.b $sp, $a3, 95 -# CHECK-ASM: encoding: [0xe3,0x7c,0x01,0x29] -st.b $sp, $a3, 95 - -# CHECK-ASM-AND-OBJ: st.h $s2, $t4, 122 -# CHECK-ASM: encoding: [0x19,0xea,0x41,0x29] -st.h $s2, $t4, 122 - -# CHECK-ASM-AND-OBJ: st.w $t1, $t1, 175 -# CHECK-ASM: encoding: [0xad,0xbd,0x82,0x29] -st.w $t1, $t1, 175 - -# CHECK-ASM-AND-OBJ: preld 10, $zero, 23 -# CHECK-ASM: encoding: [0x0a,0x5c,0xc0,0x2a] -preld 10, $zero, 23 - - -############################################################# -## Instructions only for loongarch64 -############################################################# - -.ifdef LA64 - -# CHECK64-ASM-AND-OBJ: ld.wu $t2, $t7, 31 -# CHECK64-ASM: encoding: [0x6e,0x7e,0x80,0x2a] -ld.wu $t2, $t7, 31 - -# CHECK: ld.d $t6, $t8, 159 -# CHECK: encoding: [0x92,0x7e,0xc2,0x28] -ld.d $t6, $t8, 159 - -# CHECK64-ASM-AND-OBJ: st.d $s7, $s7, 60 -# CHECK64-ASM: encoding: [0xde,0xf3,0xc0,0x29] -st.d $s7, $s7, 60 - -# CHECK64-ASM-AND-OBJ: ldx.b $s1, $ra, $tp -# CHECK64-ASM: encoding: [0x38,0x08,0x00,0x38] -ldx.b $s1, $ra, $tp - -# CHECK64-ASM-AND-OBJ: ldx.h $fp, $fp, $t5 -# CHECK64-ASM: encoding: [0xd6,0x46,0x04,0x38] -ldx.h $fp, $fp, $t5 - -# CHECK64-ASM-AND-OBJ: ldx.w $s2, $a7, $s0 -# CHECK64-ASM: encoding: [0x79,0x5d,0x08,0x38] -ldx.w $s2, $a7, $s0 - -# CHECK64-ASM-AND-OBJ: ldx.d $t6, $s0, $t8 -# CHECK64-ASM: encoding: [0xf2,0x52,0x0c,0x38] -ldx.d $t6, $s0, $t8 - -# CHECK64-ASM-AND-OBJ: ldx.bu $a7, $a5, $a5 -# CHECK64-ASM: encoding: [0x2b,0x25,0x20,0x38] -ldx.bu $a7, $a5, $a5 - -# CHECK64-ASM-AND-OBJ: ldx.hu $fp, $s0, $s4 -# CHECK64-ASM: encoding: [0xf6,0x6e,0x24,0x38] -ldx.hu $fp, $s0, $s4 - -# CHECK64-ASM-AND-OBJ: ldx.wu $a4, $s1, $s5 -# CHECK64-ASM: encoding: [0x08,0x73,0x28,0x38] -ldx.wu $a4, $s1, $s5 - -# CHECK64-ASM-AND-OBJ: stx.b $t7, $ra, $sp -# CHECK64-ASM: encoding: [0x33,0x0c,0x10,0x38] -stx.b $t7, $ra, $sp - -# CHECK64-ASM-AND-OBJ: stx.h $zero, $s5, $s3 -# CHECK64-ASM: encoding: [0x80,0x6b,0x14,0x38] -stx.h $zero, $s5, $s3 - -# CHECK64-ASM-AND-OBJ: stx.w $a3, $a0, $s8 -# CHECK64-ASM: encoding: [0x87,0x7c,0x18,0x38] -stx.w $a3, $a0, $s8 - -# CHECK64-ASM-AND-OBJ: stx.d $a3, $s8, $a6 -# CHECK64-ASM: encoding: [0xe7,0x2b,0x1c,0x38] -stx.d $a3, $s8, $a6 - -# CHECK64-ASM-AND-OBJ: ldptr.w $s3, $a2, 60 -# CHECK64-ASM: encoding: [0xda,0x3c,0x00,0x24] -ldptr.w $s3, $a2, 60 - -# CHECK64-ASM-AND-OBJ: ldptr.d $a1, $s6, 244 -# CHECK64-ASM: encoding: [0xa5,0xf7,0x00,0x26] -ldptr.d $a1, $s6, 244 - -# CHECK64-ASM-AND-OBJ: stptr.w $s5, $a1, 216 -# CHECK64-ASM: encoding: [0xbc,0xd8,0x00,0x25] -stptr.w $s5, $a1, 216 - -# CHECK64-ASM-AND-OBJ: stptr.d $t2, $s1, 196 -# CHECK64-ASM: encoding: [0x0e,0xc7,0x00,0x27] -stptr.d $t2, $s1, 196 - -.endif - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/misc.s b/llvm/test/MC/LoongArch/Basic/Integer/misc.s deleted file mode 100644 index 182d1da9b237..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/misc.s +++ /dev/null @@ -1,56 +0,0 @@ -## Test valid misc instructions. - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s - -############################################################# -## Instructions for both loongarch32 and loongarch64 -############################################################# - -# CHECK-ASM-AND-OBJ: syscall 100 -# CHECK-ASM: encoding: [0x64,0x00,0x2b,0x00] -syscall 100 - -# CHECK-ASM-AND-OBJ: break 199 -# CHECK-ASM: encoding: [0xc7,0x00,0x2a,0x00] -break 199 - -# CHECK-ASM-AND-OBJ: rdtimel.w $s1, $a0 -# CHECK-ASM: encoding: [0x98,0x60,0x00,0x00] -rdtimel.w $s1, $a0 - -# CHECK-ASM-AND-OBJ: rdtimeh.w $a7, $a1 -# CHECK-ASM: encoding: [0xab,0x64,0x00,0x00] -rdtimeh.w $a7, $a1 - -# CHECK-ASM-AND-OBJ: cpucfg $sp, $a4 -# CHECK-ASM: encoding: [0x03,0x6d,0x00,0x00] -cpucfg $sp, $a4 - - -############################################################# -## Instructions only for loongarch64 -############################################################# - -.ifdef LA64 - -# CHECK64-ASM-AND-OBJ: asrtle.d $t0, $t5 -# CHECK64-ASM: encoding: [0x80,0x45,0x01,0x00] -asrtle.d $t0, $t5 - -# CHECK64-ASM-AND-OBJ: asrtgt.d $t8, $t8 -# CHECK64-ASM: encoding: [0x80,0xd2,0x01,0x00] -asrtgt.d $t8, $t8 - -# CHECK64-ASM-AND-OBJ: rdtime.d $tp, $t3 -# CHECK64-ASM: encoding: [0xe2,0x69,0x00,0x00] -rdtime.d $tp, $t3 - -.endif - diff --git a/llvm/test/MC/LoongArch/Basic/Integer/pseudos.s b/llvm/test/MC/LoongArch/Basic/Integer/pseudos.s deleted file mode 100644 index e718982f3e2c..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Integer/pseudos.s +++ /dev/null @@ -1,18 +0,0 @@ -## Test valid pseudo instructions - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s - -# CHECK-ASM-AND-OBJ: nop -# CHECK-ASM: encoding: [0x00,0x00,0x40,0x03] -nop - -# CHECK-ASM-AND-OBJ: move $a4, $a5 -# CHECK-ASM: encoding: [0x28,0x01,0x15,0x00] -move $a4, $a5 diff --git a/llvm/test/MC/LoongArch/Basic/Privilege/invalid.s b/llvm/test/MC/LoongArch/Basic/Privilege/invalid.s deleted file mode 100644 index 380b848a6af9..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Privilege/invalid.s +++ /dev/null @@ -1,14 +0,0 @@ -# RUN: not llvm-mc --triple=loongarch32 %s 2>&1 | FileCheck %s --check-prefixes=ERR,ERR32 -# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s --check-prefix=ERR - -## csrxchg: rj != 0,1 -csrxchg $a0, $zero, 0 -# ERR: :[[#@LINE-1]]:15: error: must not be $r0 or $r1 -csrxchg $a0, $ra, 0 -# ERR: :[[#@LINE-1]]:15: error: must not be $r0 or $r1 - -## LoongArch64 mnemonics -iocsrrd.d $a0, $a1 -# ERR32: :[[#@LINE-1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set -iocsrwr.d $a0, $a1 -# ERR32: :[[#@LINE-1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set diff --git a/llvm/test/MC/LoongArch/Basic/Privilege/valid.s b/llvm/test/MC/LoongArch/Basic/Privilege/valid.s deleted file mode 100644 index 1d5ca68664d4..000000000000 --- a/llvm/test/MC/LoongArch/Basic/Privilege/valid.s +++ /dev/null @@ -1,118 +0,0 @@ -## Test valid privilege instructions - -# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ,CHECK64-ASM,CHECK64-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 | llvm-objdump -d - \ -# RUN: | FileCheck --check-prefixes=CHECK-ASM-AND-OBJ,CHECK64-ASM-AND-OBJ %s - -############################################################# -## Instructions for both loongarch32 and loongarch64 -############################################################# - -# CHECK-ASM-AND-OBJ: csrrd $s3, 30 -# CHECK-ASM: encoding: [0x1a,0x78,0x00,0x04] -csrrd $s3, 30 - -# CHECK-ASM-AND-OBJ: csrwr $s1, 194 -# CHECK-ASM: encoding: [0x38,0x08,0x03,0x04] -csrwr $s1, 194 - -# CHECK-ASM-AND-OBJ: csrxchg $a2, $s4, 214 -# CHECK-ASM: encoding: [0x66,0x5b,0x03,0x04] -csrxchg $a2, $s4, 214 - -# CHECK-ASM-AND-OBJ: iocsrrd.b $s3, $s1 -# CHECK-ASM: encoding: [0x1a,0x03,0x48,0x06] -iocsrrd.b $s3, $s1 - -# CHECK-ASM-AND-OBJ: iocsrrd.h $a1, $s4 -# CHECK-ASM: encoding: [0x65,0x07,0x48,0x06] -iocsrrd.h $a1, $s4 - -# CHECK-ASM-AND-OBJ: iocsrrd.w $a6, $t8 -# CHECK-ASM: encoding: [0x8a,0x0a,0x48,0x06] -iocsrrd.w $a6, $t8 - -# CHECK-ASM-AND-OBJ: iocsrwr.b $a0, $s0 -# CHECK-ASM: encoding: [0xe4,0x12,0x48,0x06] -iocsrwr.b $a0, $s0 - -# CHECK-ASM-AND-OBJ: iocsrwr.h $a7, $zero -# CHECK-ASM: encoding: [0x0b,0x14,0x48,0x06] -iocsrwr.h $a7, $zero - -# CHECK-ASM-AND-OBJ: iocsrwr.w $t8, $s3 -# CHECK-ASM: encoding: [0x54,0x1b,0x48,0x06] -iocsrwr.w $t8, $s3 - -# CHECK-ASM-AND-OBJ: cacop 0, $a6, 27 -# CHECK-ASM: encoding: [0x40,0x6d,0x00,0x06] -cacop 0, $a6, 27 - -# CHECK-ASM-AND-OBJ: tlbclr -# CHECK-ASM: encoding: [0x00,0x20,0x48,0x06] -tlbclr - -# CHECK-ASM-AND-OBJ: tlbflush -# CHECK-ASM: encoding: [0x00,0x24,0x48,0x06] -tlbflush - -# CHECK-ASM-AND-OBJ: tlbsrch -# CHECK-ASM: encoding: [0x00,0x28,0x48,0x06] -tlbsrch - -# CHECK-ASM-AND-OBJ: tlbrd -# CHECK-ASM: encoding: [0x00,0x2c,0x48,0x06] -tlbrd - -# CHECK-ASM-AND-OBJ: tlbwr -# CHECK-ASM: encoding: [0x00,0x30,0x48,0x06] -tlbwr - -# CHECK-ASM-AND-OBJ: tlbfill -# CHECK-ASM: encoding: [0x00,0x34,0x48,0x06] -tlbfill - -# CHECK-ASM-AND-OBJ: invtlb 16, $s6, $s2 -# CHECK-ASM: encoding: [0xb0,0xe7,0x49,0x06] -invtlb 16, $s6, $s2 - -# CHECK-ASM-AND-OBJ: lddir $t0, $s7, 92 -# CHECK-ASM: encoding: [0xcc,0x73,0x41,0x06] -lddir $t0, $s7, 92 - -# CHECK-ASM-AND-OBJ: ldpte $t6, 200 -# CHECK-ASM: encoding: [0x40,0x22,0x47,0x06] -ldpte $t6, 200 - -# CHECK-ASM-AND-OBJ: ertn -# CHECK-ASM: encoding: [0x00,0x38,0x48,0x06] -ertn - -# CHECK-ASM-AND-OBJ: dbcl 201 -# CHECK-ASM: encoding: [0xc9,0x80,0x2a,0x00] -dbcl 201 - -# CHECK-ASM-AND-OBJ: idle 204 -# CHECK-ASM: encoding: [0xcc,0x80,0x48,0x06] -idle 204 - -############################################################# -## Instructions only for loongarch64 -############################################################# - -.ifdef LA64 - -# CHECK64-ASM-AND-OBJ: iocsrrd.d $t5, $s2 -# CHECK64-ASM: encoding: [0x31,0x0f,0x48,0x06] -iocsrrd.d $t5, $s2 - -# CHECK64-ASM-AND-OBJ: iocsrwr.d $t8, $a3 -# CHECK64-ASM: encoding: [0xf4,0x1c,0x48,0x06] -iocsrwr.d $t8, $a3 - -.endif diff --git a/llvm/test/MC/LoongArch/Directives/cfi.s b/llvm/test/MC/LoongArch/Directives/cfi.s deleted file mode 100644 index 7101fc907290..000000000000 --- a/llvm/test/MC/LoongArch/Directives/cfi.s +++ /dev/null @@ -1,34 +0,0 @@ -## Test cfi directives. - -# RUN: llvm-mc %s --triple=loongarch32 | FileCheck %s -# RUN: llvm-mc %s --triple=loongarch64 | FileCheck %s -# RUN: not llvm-mc --triple=loongarch32 --defsym=ERR=1 < %s 2>&1 \ -# RUN: | FileCheck %s --check-prefix=CHECK-ERR -# RUN: not llvm-mc --triple=loongarch64 --defsym=ERR=1 < %s 2>&1 \ -# RUN: | FileCheck %s --check-prefix=CHECK-ERR - -# CHECK: .cfi_startproc -.cfi_startproc -# CHECK-NEXT: .cfi_offset 0, 0 -.cfi_offset 0, 0 -# CHECK-NEXT: .cfi_offset 9, 8 -.cfi_offset 9, 8 -# CHECK-NEXT: .cfi_offset 31, 16 -.cfi_offset 31, 16 -# CHECK-NEXT: .cfi_endproc -.cfi_endproc - -.ifdef ERR -.cfi_startproc -# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number -.cfi_offset -22, -8 -# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number -.cfi_offset fp, -8 -# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number -.cfi_offset $22, -8 -# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number -.cfi_offset $r22, -8 -# CHECK-ERR: :[[#@LINE+1]]:13: error: invalid register number -.cfi_offset $fp, -8 -.cfi_endproc -.endif diff --git a/llvm/test/MC/LoongArch/Directives/data.s b/llvm/test/MC/LoongArch/Directives/data.s deleted file mode 100644 index e3c66d10b18a..000000000000 --- a/llvm/test/MC/LoongArch/Directives/data.s +++ /dev/null @@ -1,102 +0,0 @@ -## Test data directives. -# RUN: llvm-mc --triple=loongarch32 < %s \ -# RUN: | FileCheck --check-prefix=CHECK-ASM %s -# RUN: llvm-mc --triple=loongarch64 < %s \ -# RUN: | FileCheck --check-prefix=CHECK-ASM %s -# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | llvm-objdump -s - \ -# RUN: | FileCheck --check-prefix=CHECK-DATA %s -# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | llvm-objdump -s - \ -# RUN: | FileCheck --check-prefix=CHECK-DATA %s -# RUN: not llvm-mc --triple=loongarch32 --defsym=ERR=1 < %s 2>&1 \ -# RUN: | FileCheck %s --check-prefix=CHECK-ERR -# RUN: not llvm-mc --triple=loongarch64 --defsym=ERR=1 < %s 2>&1 \ -# RUN: | FileCheck %s --check-prefix=CHECK-ERR - -.data - -# CHECK-ASM: .byte 0 -# CHECK-ASM-NEXT: .byte 1 -# CHECK-ASM-NEXT: .byte 171 -# CHECK-ASM-NEXT: .byte 255 -# CHECK-DATA: Contents of section .data: -# CHECK-DATA-NEXT: 0000 0001abff 0100ffff 0100ffff 0100ffff -.byte 0 -.byte 1 -.byte 0xab -.byte 0xff - -# CHECK-ASM: .half 1 -# CHECK-ASM-NEXT: .half 65535 -.half 0x1 -.half 0xffff - -# CHECK-ASM: .half 1 -# CHECK-ASM-NEXT: .half 65535 -.2byte 0x1 -.2byte 0xffff - -# CHECK-ASM: .half 1 -# CHECK-ASM-NEXT: .half 65535 -.short 0x1 -.short 0xffff - -# CHECK-ASM: .half 0 -# CHECK-ASM-NEXT: .half 1 -# CHECK-ASM-NEXT: .half 4660 -# CHECK-ASM-NEXT: .half 65535 -# CHECK-DATA-NEXT: 0010 00000100 3412ffff 01000000 ffffffff -.hword 0 -.hword 0x1 -.hword 0x1234 -.hword 0xffff - -# CHECK-ASM: .word 1 -# CHECK-ASM-NEXT: .word 4294967295 -.word 0x1 -.word 0xffffffff - -# CHECK-ASM: .word 1 -# CHECK-ASM-NEXT: .word 4294967295 -# CHECK-DATA-NEXT: 0020 01000000 ffffffff 01000000 ffffffff -.long 0x1 -.long 0xffffffff - -# CHECK-ASM: .word 1 -# CHECK-ASM-NEXT: .word 4294967295 -.4byte 0x1 -.4byte 0xffffffff - -# CHECK-ASM: .dword 1 -# CHECK-ASM-NEXT: .dword 1234605616436508552 -# CHECK-DATA-NEXT: 0030 01000000 00000000 88776655 44332211 -.dword 0x1 -.dword 0x1122334455667788 - -# CHECK-ASM: .dword 1 -# CHECK-ASM-NEXT: .dword 1234605616436508552 -# CHECK-DATA-NEXT: 0040 01000000 00000000 88776655 44332211 -.8byte 0x1 -.8byte 0x1122334455667788 - -.ifdef ERR -# CHECK-ERR: :[[#@LINE+1]]:7: error: out of range literal value -.byte 0xffa -# CHECK-ERR: :[[#@LINE+1]]:7: error: out of range literal value -.half 0xffffa -# CHECK-ERR: :[[#@LINE+1]]:8: error: out of range literal value -.short 0xffffa -# CHECK-ERR: :[[#@LINE+1]]:8: error: out of range literal value -.hword 0xffffa -# CHECK-ERR: :[[#@LINE+1]]:8: error: out of range literal value -.2byte 0xffffa -# CHECK-ERR: :[[#@LINE+1]]:7: error: out of range literal value -.word 0xffffffffa -# CHECK-ERR: :[[#@LINE+1]]:7: error: out of range literal value -.long 0xffffffffa -# CHECK-ERR: :[[#@LINE+1]]:8: error: out of range literal value -.4byte 0xffffffffa -# CHECK-ERR: :[[#@LINE+1]]:8: error: literal value out of range for directive -.dword 0xffffffffffffffffa -# CHECK-ERR: :[[#@LINE+1]]:8: error: literal value out of range for directive -.8byte 0xffffffffffffffffa -.endif diff --git a/llvm/test/MC/LoongArch/Misc/aligned-nops.s b/llvm/test/MC/LoongArch/Misc/aligned-nops.s deleted file mode 100644 index 8554b4998223..000000000000 --- a/llvm/test/MC/LoongArch/Misc/aligned-nops.s +++ /dev/null @@ -1,15 +0,0 @@ -# RUN: llvm-mc --filetype=obj --triple=loongarch64 < %s \ -# RUN: | llvm-objdump -d - | FileCheck %s - -# func1 and func2 are 8 byte alignment but the func1's size is 4. -# So assembler will insert a nop to make sure 8 byte alignment. - -.text - -.p2align 3 -func1: - addi.d $sp, $sp, -16 -# CHECK: addi.d $sp, $sp, -16 -# CHECK-NEXT: nop -.p2align 3 -func2: diff --git a/llvm/test/MC/LoongArch/aligned-nops.s b/llvm/test/MC/LoongArch/aligned-nops.s new file mode 100644 index 000000000000..2ef26ac4b5ed --- /dev/null +++ b/llvm/test/MC/LoongArch/aligned-nops.s @@ -0,0 +1,25 @@ +# RUN: llvm-mc -filetype=obj -triple loongarch64 < %s \ +# RUN: | llvm-objdump -d - | FileCheck -check-prefix=CHECK-INST %s + +# alpha and main are 8 byte alignment +# but the alpha function's size is 4 +# So assembler will insert a nop to make sure 8 byte alignment. + + .text + .p2align 3 + .type alpha,@function +alpha: +# BB#0: + addi.d $sp, $sp, -16 +# CHECK-INST: nop +.Lfunc_end0: + .size alpha, .Lfunc_end0-alpha + # -- End function + .globl main + .p2align 3 + .type main,@function +main: # @main +# BB#0: +.Lfunc_end1: + .size main, .Lfunc_end1-main + # -- End function diff --git a/llvm/test/MC/LoongArch/atomic-error.s b/llvm/test/MC/LoongArch/atomic-error.s new file mode 100644 index 000000000000..7e61a5ba5d65 --- /dev/null +++ b/llvm/test/MC/LoongArch/atomic-error.s @@ -0,0 +1,7 @@ +# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + +# CHECK: error: expected memory with constant 0 offset +amadd_db.d $a1, $t5, $s6, 1 + +# CHECK: error: unexpected token in argument list +amadd_db.d $a1, $t5, $s6, a diff --git a/llvm/test/MC/LoongArch/atomic.s b/llvm/test/MC/LoongArch/atomic.s new file mode 100644 index 000000000000..10a406550be6 --- /dev/null +++ b/llvm/test/MC/LoongArch/atomic.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding | \ +# RUN: FileCheck --check-prefixes=ASM,ASM-AND-OBJ %s +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj | llvm-objdump -d - | \ +# RUN: FileCheck --check-prefixes=ASM-AND-OBJ %s + +# ASM-AND-OBJ: amadd_db.d $r5, $r17, $r29, 0 +# ASM: encoding: [0xa5,0xc7,0x6a,0x38] +amadd_db.d $a1, $t5, $s6, 0 + +# ASM-AND-OBJ: amadd_db.d $r5, $r17, $r29, 0 +# ASM: encoding: [0xa5,0xc7,0x6a,0x38] +amadd_db.d $a1, $t5, $s6 diff --git a/llvm/test/MC/LoongArch/cgprofile.ll b/llvm/test/MC/LoongArch/cgprofile.ll new file mode 100644 index 000000000000..686dd6a0a771 --- /dev/null +++ b/llvm/test/MC/LoongArch/cgprofile.ll @@ -0,0 +1,63 @@ +;; Copied from llvm/test/MC/ELF/cgprofile.ll but use different triple. + +; RUN: llc -filetype=asm %s -o - --mtriple=loongarch64-linux-linux-gnu | FileCheck %s +; RUN: llc -filetype=obj %s -o %t --mtriple=loongarch64-linux-linux-gnu +; RUN: llvm-readobj -r --cg-profile %t | FileCheck %s --check-prefix=OBJ + +declare void @b() + +define void @a() { + call void @b() + ret void +} + +define void @freq(i1 %cond) { + br i1 %cond, label %A, label %B +A: + call void @a(); + ret void +B: + call void @b(); + ret void +} + +!llvm.module.flags = !{!0} + +!0 = !{i32 5, !"CG Profile", !1} +!1 = !{!2, !3, !4, !5} +!2 = !{void ()* @a, void ()* @b, i64 32} +!3 = !{void (i1)* @freq, void ()* @a, i64 11} +!4 = !{void (i1)* @freq, void ()* @b, i64 20} +!5 = !{void (i1)* @freq, null, i64 20} + +; CHECK: .cg_profile a, b, 32 +; CHECK: .cg_profile freq, a, 11 +; CHECK: .cg_profile freq, b, 20 + +; OBJ: Relocations [ +; OBJ: Section ({{.*}}) .rel.llvm.call-graph-profile { +; OBJ-NEXT: 0x0 R_LARCH_NONE a +; OBJ-NEXT: 0x0 R_LARCH_NONE b +; OBJ-NEXT: 0x8 R_LARCH_NONE freq +; OBJ-NEXT: 0x8 R_LARCH_NONE a +; OBJ-NEXT: 0x10 R_LARCH_NONE freq +; OBJ-NEXT: 0x10 R_LARCH_NONE b +; OBJ-NEXT: } + +; OBJ: CGProfile [ +; OBJ: CGProfileEntry { +; OBJ: From: a +; OBJ: To: b +; OBJ: Weight: 32 +; OBJ: } +; OBJ: CGProfileEntry { +; OBJ: From: freq +; OBJ: To: a +; OBJ: Weight: 11 +; OBJ: } +; OBJ: CGProfileEntry { +; OBJ: From: freq +; OBJ: To: b +; OBJ: Weight: 20 +; OBJ: } +; OBJ:] diff --git a/llvm/test/MC/LoongArch/cgprofile.s b/llvm/test/MC/LoongArch/cgprofile.s new file mode 100644 index 000000000000..53f59e5d3a68 --- /dev/null +++ b/llvm/test/MC/LoongArch/cgprofile.s @@ -0,0 +1,30 @@ +## Copied from llvm/test/MC/ELF/cgprofile.s but use different triple. + +# RUN: llvm-mc --filetype=obj --triple=loongarch64-linux-gnu %s -o - | llvm-readobj -r -S --symbols --sd --cg-profile - | FileCheck %s + + .section .test,"aw",@progbits +a: .word b + + .cg_profile a, b, 32 + .cg_profile freq, a, 11 + .cg_profile late, late2, 20 + .cg_profile .L.local, b, 42 + + .globl late +late: +late2: .word 0 +late3: +.L.local: + +# CHECK: Relocations [ +# CHECK: Section ({{.*}}) .rel.llvm.call-graph-profile { +# CHECK-NEXT: 0x0 R_LARCH_NONE a +# CHECK-NEXT: 0x0 R_LARCH_NONE b +# CHECK-NEXT: 0x8 R_LARCH_NONE freq +# CHECK-NEXT: 0x8 R_LARCH_NONE a +# CHECK-NEXT: 0x10 R_LARCH_NONE late +# CHECK-NEXT: 0x10 R_LARCH_NONE late2 +# CHECK-NEXT: 0x18 R_LARCH_NONE .test +# CHECK-NEXT: 0x18 R_LARCH_NONE b +# CHECK-NEXT: } +# CHECK-NEXT: ] diff --git a/llvm/test/MC/LoongArch/data_half.s b/llvm/test/MC/LoongArch/data_half.s new file mode 100644 index 000000000000..a8efeaacec39 --- /dev/null +++ b/llvm/test/MC/LoongArch/data_half.s @@ -0,0 +1,13 @@ +# RUN: llvm-mc --triple=loongarch64 < %s | FileCheck %s + +.data + +# CHECK: .half 1 +# CHECK-NEXT: .half 65535 +.half 0x1 +.half 0xffff + +# CHECK: .half 1 +# CHECK-NEXT: .half 65535 +.2byte 0x1 +.2byte 0xffff diff --git a/llvm/test/MC/LoongArch/fixups-expr.s b/llvm/test/MC/LoongArch/fixups-expr.s new file mode 100644 index 000000000000..d35fe7e77f6e --- /dev/null +++ b/llvm/test/MC/LoongArch/fixups-expr.s @@ -0,0 +1,40 @@ +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ +# RUN: | llvm-readobj -r - | FileCheck %s + +# Check that subtraction expressions are emitted as two relocations + +.globl G1 +.globl G2 +.L1: +G1: +nop +.L2: +G2: + +.data +.8byte .L2-.L1 # CHECK: 0x0 R_LARCH_ADD64 .L2 0x0 + # CHECK: 0x0 R_LARCH_SUB64 .L1 0x0 +.8byte G2-G1 # CHECK: 0x8 R_LARCH_ADD64 G2 0x0 + # CHECK: 0x8 R_LARCH_SUB64 G1 0x0 +.4byte .L2-.L1 # CHECK: 0x10 R_LARCH_ADD32 .L2 0x0 + # CHECK: 0x10 R_LARCH_SUB32 .L1 0x0 +.4byte G2-G1 # CHECK: 0x14 R_LARCH_ADD32 G2 0x0 + # CHECK: 0x14 R_LARCH_SUB32 G1 0x0 +.2byte .L2-.L1 # CHECK: 0x18 R_LARCH_ADD16 .L2 0x0 + # CHECK: 0x18 R_LARCH_SUB16 .L1 0x0 +.2byte G2-G1 # CHECK: 0x1A R_LARCH_ADD16 G2 0x0 + # CHECK: 0x1A R_LARCH_SUB16 G1 0x0 +.byte .L2-.L1 # CHECK: 0x1C R_LARCH_ADD8 .L2 0x0 + # CHECK: 0x1C R_LARCH_SUB8 .L1 0x0 +.byte G2-G1 # CHECK: 0x1D R_LARCH_ADD8 G2 0x0 + # CHECK: 0x1D R_LARCH_SUB8 G1 0x0 + +.section .rodata.str.1 +.L.str: +.asciz "string" + +.rodata +.Lreltable: +.word .L.str-.Lreltable # CHECK: 0x0 R_LARCH_ADD32 .L.str 0x0 + # CHECK: 0x0 R_LARCH_SUB32 .Lreltable 0x0 + diff --git a/llvm/test/MC/LoongArch/invalid.s b/llvm/test/MC/LoongArch/invalid.s new file mode 100644 index 000000000000..e0fc7ce4b202 --- /dev/null +++ b/llvm/test/MC/LoongArch/invalid.s @@ -0,0 +1,50 @@ +# RUN: not llvm-mc %s -triple=loongarch64-unknown-linux-gnu 2>&1 | FileCheck %s +.text +csrxchg $r6, $r0, 214 # CHECK: :[[@LINE]]:1: error: invalid operand ($zero) for instruction +csrxchg $r6, $r1, 214 # CHECK: :[[@LINE]]:1: error: invalid operand ($r1) for instruction + +## out-of-bound immediate +### simm16 << 2 +beq $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +beq $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bne $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bne $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +blt $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +blt $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bge $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bge $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bltu $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bltu $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bgeu $r10, $r7, -0x20000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bgeu $r10, $r7, 0x1FFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +### simm21 << 2 +beqz $r9, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +beqz $r9, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bnez $r9, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bnez $r9, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bceqz $fcc6, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bceqz $fcc6, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bcnez $fcc6, -0x400000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bcnez $fcc6, 0x3FFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +### simm26 << 2 +b -0x8000000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +b 0x7FFFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bl -0x8000000-4 # CHECK: :[[@LINE]]:1: error: branch target out of range +bl 0x7FFFFFC+4 # CHECK: :[[@LINE]]:1: error: branch target out of range + +## unaligned immediate +### simm16 << 2 +beq $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bne $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +blt $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bge $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bltu $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bgeu $r10, $r7, 0x1FFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +### simm21 << 2 +beqz $r9, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bnez $r9, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bceqz $fcc6, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bcnez $fcc6, 0x3FFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +### simm26 << 2 +b 0x7FFFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address +bl 0x7FFFFFC+1 # CHECK: :[[@LINE]]:1: error: branch to misaligned address diff --git a/llvm/test/MC/LoongArch/lit.local.cfg b/llvm/test/MC/LoongArch/lit.local.cfg index 2b5a4893e686..6223fc691edc 100644 --- a/llvm/test/MC/LoongArch/lit.local.cfg +++ b/llvm/test/MC/LoongArch/lit.local.cfg @@ -1,2 +1,3 @@ if not 'LoongArch' in config.root.targets: config.unsupported = True + diff --git a/llvm/test/MC/LoongArch/macro-la.s b/llvm/test/MC/LoongArch/macro-la.s new file mode 100644 index 000000000000..eca76ac69d53 --- /dev/null +++ b/llvm/test/MC/LoongArch/macro-la.s @@ -0,0 +1,168 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: la.pcrel $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] +# CHECK: # fixup A - offset: 0, value: (symbol)+2048, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x02] +# CHECK: # fixup A - offset: 0, value: (symbol)+4, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: (symbol)+2052, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup C - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup E - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup G - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB +# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 +la.local $a0, symbol + +# CHECK: la.got $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x28] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB +# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 +la.global $a0, symbol + +# CHECK: la.pcrel $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] +# CHECK: # fixup A - offset: 0, value: (symbol)+2048, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x02] +# CHECK: # fixup A - offset: 0, value: (symbol)+4, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: (symbol)+2052, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup C - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup E - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup G - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB +# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 +la.pcrel $a0, symbol + +# CHECK: la.got $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x28] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_GPREL +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB +# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 +la.got $a0, symbol + +# CHECK: la.tls.le $r4, symbol # encoding: [0x04,0x00,0x00,0x14] +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_TPREL +# CHECK: # fixup B - offset: 0, value: 32, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup D - offset: 0, value: 44, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0x80,0x03] +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_TPREL +# CHECK: # fixup B - offset: 0, value: 4095, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_AND +# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_U_10_12 +# CHECK: # la expanded slot # encoding: [0x04,0x00,0x00,0x16] +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_TPREL +# CHECK: # fixup B - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup D - offset: 0, value: 44, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0x00,0x03] +# CHECK: # fixup A - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_TPREL +# CHECK: # fixup B - offset: 0, value: 52, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup D - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 +la.tls.le $a0, symbol + +# CHECK: la.tls.ie $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GOT +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x28] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GOT +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GOT +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB +# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 +la.tls.ie $a0, symbol + +# CHECK: la.tls.gd $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x02] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB +# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 +la.tls.ld $a0, symbol + +# CHECK: la.tls.gd $r4, symbol # encoding: [0x04,0x00,0x00,0x1c] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2048, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup E - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_5_20 +# CHECK: # la expanded slot # encoding: [0x84,0x00,0xc0,0x02] +# CHECK: # fixup A - offset: 0, value: _GLOBAL_OFFSET_TABLE_+4, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup B - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD +# CHECK: # fixup C - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup D - offset: 0, value: _GLOBAL_OFFSET_TABLE_+2052, kind: fixup_LARCH_SOP_PUSH_PCREL +# CHECK: # fixup E - offset: 0, value: symbol, kind: fixup_LARCH_SOP_PUSH_TLS_GD +# CHECK: # fixup F - offset: 0, value: 0, kind: fixup_LARCH_SOP_ADD +# CHECK: # fixup G - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup H - offset: 0, value: 0, kind: fixup_LARCH_SOP_SR +# CHECK: # fixup I - offset: 0, value: 12, kind: fixup_LARCH_SOP_PUSH_ABSOLUTE +# CHECK: # fixup J - offset: 0, value: 0, kind: fixup_LARCH_SOP_SL +# CHECK: # fixup K - offset: 0, value: 0, kind: fixup_LARCH_SOP_SUB +# CHECK: # fixup L - offset: 0, value: 0, kind: fixup_LARCH_SOP_POP_32_S_10_12 +la.tls.gd $a0, symbol diff --git a/llvm/test/MC/LoongArch/macro-li.s b/llvm/test/MC/LoongArch/macro-li.s new file mode 100644 index 000000000000..b1a7c58ba4d8 --- /dev/null +++ b/llvm/test/MC/LoongArch/macro-li.s @@ -0,0 +1,773 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu | FileCheck %s +li.w $a0, 0x00000000 # CHECK: ori $r4, $zero, 0 +li.w $a0, 0x000007ff # CHECK: ori $r4, $zero, 2047 +li.w $a0, 0x00000800 # CHECK: ori $r4, $zero, 2048 +li.w $a0, 0x00000fff # CHECK: ori $r4, $zero, 4095 +li.w $a0, 0x7ffff000 # CHECK: lu12i.w $r4, 524287 +li.w $a0, 0x7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 +li.w $a0, 0x7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 +li.w $a0, 0x7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 +li.w $a0, 0x80000000 # CHECK: lu12i.w $r4, -524288 +li.w $a0, 0x800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 +li.w $a0, 0x80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 +li.w $a0, 0x80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 +li.w $a0, 0xfffff000 # CHECK: lu12i.w $r4, -1 +li.w $a0, 0xfffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 +li.w $a0, 0xfffff800 # CHECK: addi.w $r4, $zero, -2048 +li.w $a0, 0xffffffff # CHECK: addi.w $r4, $zero, -1 +li.d $a0, 0x0000000000000000 # CHECK: addi.d $r4, $zero, 0 +li.d $a0, 0x00000000000007ff # CHECK: addi.d $r4, $zero, 2047 +li.d $a0, 0x0000000000000800 # CHECK: ori $r4, $zero, 2048 +li.d $a0, 0x0000000000000fff # CHECK: ori $r4, $zero, 4095 +li.d $a0, 0x000000007ffff000 # CHECK: lu12i.w $r4, 524287 +li.d $a0, 0x000000007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 +li.d $a0, 0x000000007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 +li.d $a0, 0x000000007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 +li.d $a0, 0x0000000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x0000000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x0000000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x00000000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 0 +li.d $a0, 0x0007ffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007fffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007fffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007fffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0007ffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 524287 +li.d $a0, 0x0008000000000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000800007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000800007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000800007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000800007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x0008000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x00080000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000ffffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000ffffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000ffffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x000fffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu52i.d $r4, $r4, 0 +li.d $a0, 0x7ff0000000000000 # CHECK: lu52i.d $r4, $zero, 2047 +li.d $a0, 0x7ff00000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff000007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff000007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff000007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff000007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff0000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff00000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7fffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7fffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7fffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff7ffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000000000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff800007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff800007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff800007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff800007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff8000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ff80000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ffffffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ffffffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7ffffffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x7fffffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu52i.d $r4, $r4, 2047 +li.d $a0, 0x8000000000000000 # CHECK: lu52i.d $r4, $zero, -2048 +li.d $a0, 0x80000000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800000007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800000007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800000007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800000007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8000000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80000000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007fffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007fffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007fffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8007ffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000000000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800800007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800800007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800800007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800800007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x8008000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x80080000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800ffffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800ffffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800ffffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0x800fffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu52i.d $r4, $r4, -2048 +li.d $a0, 0xfff0000000000000 # CHECK: lu52i.d $r4, $zero, -1 +li.d $a0, 0xfff00000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff000007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff000007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff000007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff000007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff0000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff00000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 0 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff80000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7fffffffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7fffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7fffffffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff7ffffffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, 524287 + # CHECK: lu52i.d $r4, $r4, -1 +li.d $a0, 0xfff8000000000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000000000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000000000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff800007ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff800007ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff800007ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff800007fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000080000000 # CHECK: lu12i.w $r4, -524288 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000080000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff8000080000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000fffff000 # CHECK: lu12i.w $r4, -1 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000fffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000fffff800 # CHECK: addi.w $r4, $zero, -2048 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xfff80000ffffffff # CHECK: addi.w $r4, $zero, -1 + # CHECK: lu32i.d $r4, -524288 +li.d $a0, 0xffffffff00000000 # CHECK: ori $r4, $zero, 0 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff000007ff # CHECK: ori $r4, $zero, 2047 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff00000800 # CHECK: ori $r4, $zero, 2048 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff00000fff # CHECK: ori $r4, $zero, 4095 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff7ffff000 # CHECK: lu12i.w $r4, 524287 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff7ffff7ff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2047 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff7ffff800 # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 2048 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff7fffffff # CHECK: lu12i.w $r4, 524287 + # CHECK: ori $r4, $r4, 4095 + # CHECK: lu32i.d $r4, -1 +li.d $a0, 0xffffffff80000000 # CHECK: lu12i.w $r4, -524288 +li.d $a0, 0xffffffff800007ff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2047 +li.d $a0, 0xffffffff80000800 # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 2048 +li.d $a0, 0xffffffff80000fff # CHECK: lu12i.w $r4, -524288 + # CHECK: ori $r4, $r4, 4095 +li.d $a0, 0xfffffffffffff000 # CHECK: lu12i.w $r4, -1 +li.d $a0, 0xfffffffffffff7ff # CHECK: lu12i.w $r4, -1 + # CHECK: ori $r4, $r4, 2047 +li.d $a0, 0xfffffffffffff800 # CHECK: addi.d $r4, $zero, -2048 +li.d $a0, 0xffffffffffffffff # CHECK: addi.d $r4, $zero, -1 diff --git a/llvm/test/MC/LoongArch/reloc-directive-err.s b/llvm/test/MC/LoongArch/reloc-directive-err.s new file mode 100644 index 000000000000..60fd145564ae --- /dev/null +++ b/llvm/test/MC/LoongArch/reloc-directive-err.s @@ -0,0 +1,7 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck --check-prefix=PRINT %s +# RUN: not llvm-mc --filetype=obj --triple=loongarch64 %s -o /dev/null 2>&1 \ +# RUN: | FileCheck %s + +# PRINT: .reloc 0, R_INVALID, 0 +# CHECK: {{.*}}.s:[[# @LINE+1]]:11: error: unknown relocation name +.reloc 0, R_INVALID, 0 diff --git a/llvm/test/MC/LoongArch/reloc-directive.s b/llvm/test/MC/LoongArch/reloc-directive.s new file mode 100644 index 000000000000..282da7f287eb --- /dev/null +++ b/llvm/test/MC/LoongArch/reloc-directive.s @@ -0,0 +1,177 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck --check-prefix=PRINT %s +# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ +# RUN: | llvm-readobj -r - | FileCheck %s + +# PRINT: .reloc 0, R_LARCH_NONE, 0 +# PRINT-NEXT: .reloc 1, R_LARCH_32, 1 +# PRINT-NEXT: .reloc 2, R_LARCH_64, 2 +# PRINT-NEXT: .reloc 3, R_LARCH_RELATIVE, 3 +# PRINT-NEXT: .reloc 4, R_LARCH_COPY, 4 +# PRINT-NEXT: .reloc 5, R_LARCH_JUMP_SLOT, 5 +# PRINT-NEXT: .reloc 6, R_LARCH_TLS_DTPMOD32, 6 +# PRINT-NEXT: .reloc 7, R_LARCH_TLS_DTPMOD64, 7 +# PRINT-NEXT: .reloc 8, R_LARCH_TLS_DTPREL32, 8 +# PRINT-NEXT: .reloc 9, R_LARCH_TLS_DTPREL64, 9 +# PRINT-NEXT: .reloc 10, R_LARCH_TLS_TPREL32, 10 +# PRINT-NEXT: .reloc 11, R_LARCH_TLS_TPREL64, 11 +# PRINT-NEXT: .reloc 12, R_LARCH_IRELATIVE, 12 +# PRINT-NEXT: .reloc 13, BFD_RELOC_NONE, 13 +# PRINT-NEXT: .reloc 14, BFD_RELOC_32, 14 +# PRINT-NEXT: .reloc 15, BFD_RELOC_64, 15 +# PRINT-NEXT: .reloc 20, R_LARCH_MARK_LA, 20 +# PRINT-NEXT: .reloc 21, R_LARCH_MARK_PCREL, 21 +# PRINT-NEXT: .reloc 22, R_LARCH_SOP_PUSH_PCREL, 22 +# PRINT-NEXT: .reloc 23, R_LARCH_SOP_PUSH_ABSOLUTE, 23 +# PRINT-NEXT: .reloc 24, R_LARCH_SOP_PUSH_DUP, 24 +# PRINT-NEXT: .reloc 25, R_LARCH_SOP_PUSH_GPREL, 25 +# PRINT-NEXT: .reloc 26, R_LARCH_SOP_PUSH_TLS_TPREL, 26 +# PRINT-NEXT: .reloc 27, R_LARCH_SOP_PUSH_TLS_GOT, 27 +# PRINT-NEXT: .reloc 28, R_LARCH_SOP_PUSH_TLS_GD, 28 +# PRINT-NEXT: .reloc 29, R_LARCH_SOP_PUSH_PLT_PCREL, 29 +# PRINT-NEXT: .reloc 30, R_LARCH_SOP_ASSERT, 30 +# PRINT-NEXT: .reloc 31, R_LARCH_SOP_NOT, 31 +# PRINT-NEXT: .reloc 32, R_LARCH_SOP_SUB, 32 +# PRINT-NEXT: .reloc 33, R_LARCH_SOP_SL, 33 +# PRINT-NEXT: .reloc 34, R_LARCH_SOP_SR, 34 +# PRINT-NEXT: .reloc 35, R_LARCH_SOP_ADD, 35 +# PRINT-NEXT: .reloc 36, R_LARCH_SOP_AND, 36 +# PRINT-NEXT: .reloc 37, R_LARCH_SOP_IF_ELSE, 37 +# PRINT-NEXT: .reloc 38, R_LARCH_SOP_POP_32_S_10_5, 38 +# PRINT-NEXT: .reloc 39, R_LARCH_SOP_POP_32_U_10_12, 39 +# PRINT-NEXT: .reloc 40, R_LARCH_SOP_POP_32_S_10_12, 40 +# PRINT-NEXT: .reloc 41, R_LARCH_SOP_POP_32_S_10_16, 41 +# PRINT-NEXT: .reloc 42, R_LARCH_SOP_POP_32_S_10_16_S2, 42 +# PRINT-NEXT: .reloc 43, R_LARCH_SOP_POP_32_S_5_20, 43 +# PRINT-NEXT: .reloc 44, R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44 +# PRINT-NEXT: .reloc 45, R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45 +# PRINT-NEXT: .reloc 46, R_LARCH_SOP_POP_32_U, 46 +# PRINT-NEXT: .reloc 47, R_LARCH_ADD8, 47 +# PRINT-NEXT: .reloc 48, R_LARCH_ADD16, 48 +# PRINT-NEXT: .reloc 49, R_LARCH_ADD24, 49 +# PRINT-NEXT: .reloc 50, R_LARCH_ADD32, 50 +# PRINT-NEXT: .reloc 51, R_LARCH_ADD64, 51 +# PRINT-NEXT: .reloc 52, R_LARCH_SUB8, 52 +# PRINT-NEXT: .reloc 53, R_LARCH_SUB16, 53 +# PRINT-NEXT: .reloc 54, R_LARCH_SUB24, 54 +# PRINT-NEXT: .reloc 55, R_LARCH_SUB32, 55 +# PRINT-NEXT: .reloc 56, R_LARCH_SUB64, 56 +# PRINT-NEXT: .reloc 57, R_LARCH_GNU_VTINHERIT, 57 +# PRINT-NEXT: .reloc 58, R_LARCH_GNU_VTENTRY, 58 + +.text + .fill 59, 1, 0x0 + .reloc 0, R_LARCH_NONE, 0 + .reloc 1, R_LARCH_32, 1 + .reloc 2, R_LARCH_64, 2 + .reloc 3, R_LARCH_RELATIVE, 3 + .reloc 4, R_LARCH_COPY, 4 + .reloc 5, R_LARCH_JUMP_SLOT, 5 + .reloc 6, R_LARCH_TLS_DTPMOD32, 6 + .reloc 7, R_LARCH_TLS_DTPMOD64, 7 + .reloc 8, R_LARCH_TLS_DTPREL32, 8 + .reloc 9, R_LARCH_TLS_DTPREL64, 9 + .reloc 10, R_LARCH_TLS_TPREL32, 10 + .reloc 11, R_LARCH_TLS_TPREL64, 11 + .reloc 12, R_LARCH_IRELATIVE, 12 + .reloc 13, BFD_RELOC_NONE, 13 + .reloc 14, BFD_RELOC_32, 14 + .reloc 15, BFD_RELOC_64, 15 + .reloc 20, R_LARCH_MARK_LA, 20 + .reloc 21, R_LARCH_MARK_PCREL, 21 + .reloc 22, R_LARCH_SOP_PUSH_PCREL, 22 + .reloc 23, R_LARCH_SOP_PUSH_ABSOLUTE, 23 + .reloc 24, R_LARCH_SOP_PUSH_DUP, 24 + .reloc 25, R_LARCH_SOP_PUSH_GPREL, 25 + .reloc 26, R_LARCH_SOP_PUSH_TLS_TPREL, 26 + .reloc 27, R_LARCH_SOP_PUSH_TLS_GOT, 27 + .reloc 28, R_LARCH_SOP_PUSH_TLS_GD, 28 + .reloc 29, R_LARCH_SOP_PUSH_PLT_PCREL, 29 + .reloc 30, R_LARCH_SOP_ASSERT, 30 + .reloc 31, R_LARCH_SOP_NOT, 31 + .reloc 32, R_LARCH_SOP_SUB, 32 + .reloc 33, R_LARCH_SOP_SL, 33 + .reloc 34, R_LARCH_SOP_SR, 34 + .reloc 35, R_LARCH_SOP_ADD, 35 + .reloc 36, R_LARCH_SOP_AND, 36 + .reloc 37, R_LARCH_SOP_IF_ELSE, 37 + .reloc 38, R_LARCH_SOP_POP_32_S_10_5, 38 + .reloc 39, R_LARCH_SOP_POP_32_U_10_12, 39 + .reloc 40, R_LARCH_SOP_POP_32_S_10_12, 40 + .reloc 41, R_LARCH_SOP_POP_32_S_10_16, 41 + .reloc 42, R_LARCH_SOP_POP_32_S_10_16_S2, 42 + .reloc 43, R_LARCH_SOP_POP_32_S_5_20, 43 + .reloc 44, R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44 + .reloc 45, R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45 + .reloc 46, R_LARCH_SOP_POP_32_U, 46 + .reloc 47, R_LARCH_ADD8, 47 + .reloc 48, R_LARCH_ADD16, 48 + .reloc 49, R_LARCH_ADD24, 49 + .reloc 50, R_LARCH_ADD32, 50 + .reloc 51, R_LARCH_ADD64, 51 + .reloc 52, R_LARCH_SUB8, 52 + .reloc 53, R_LARCH_SUB16, 53 + .reloc 54, R_LARCH_SUB24, 54 + .reloc 55, R_LARCH_SUB32, 55 + .reloc 56, R_LARCH_SUB64, 56 + .reloc 57, R_LARCH_GNU_VTINHERIT, 57 + .reloc 58, R_LARCH_GNU_VTENTRY, 58 + +# CHECK: Relocations [ +# CHECK-NEXT: Section ({{.*}}) .rela.text { +# CHECK-NEXT: 0x0 R_LARCH_NONE - 0x0 +# CHECK-NEXT: 0x1 R_LARCH_32 - 0x1 +# CHECK-NEXT: 0x2 R_LARCH_64 - 0x2 +# CHECK-NEXT: 0x3 R_LARCH_RELATIVE - 0x3 +# CHECK-NEXT: 0x4 R_LARCH_COPY - 0x4 +# CHECK-NEXT: 0x5 R_LARCH_JUMP_SLOT - 0x5 +# CHECK-NEXT: 0x6 R_LARCH_TLS_DTPMOD32 - 0x6 +# CHECK-NEXT: 0x7 R_LARCH_TLS_DTPMOD64 - 0x7 +# CHECK-NEXT: 0x8 R_LARCH_TLS_DTPREL32 - 0x8 +# CHECK-NEXT: 0x9 R_LARCH_TLS_DTPREL64 - 0x9 +# CHECK-NEXT: 0xA R_LARCH_TLS_TPREL32 - 0xA +# CHECK-NEXT: 0xB R_LARCH_TLS_TPREL64 - 0xB +# CHECK-NEXT: 0xC R_LARCH_IRELATIVE - 0xC +# CHECK-NEXT: 0xD R_LARCH_NONE - 0xD +# CHECK-NEXT: 0xE R_LARCH_32 - 0xE +# CHECK-NEXT: 0xF R_LARCH_64 - 0xF +# CHECK-NEXT: 0x14 R_LARCH_MARK_LA - 0x14 +# CHECK-NEXT: 0x15 R_LARCH_MARK_PCREL - 0x15 +# CHECK-NEXT: 0x16 R_LARCH_SOP_PUSH_PCREL - 0x16 +# CHECK-NEXT: 0x17 R_LARCH_SOP_PUSH_ABSOLUTE - 0x17 +# CHECK-NEXT: 0x18 R_LARCH_SOP_PUSH_DUP - 0x18 +# CHECK-NEXT: 0x19 R_LARCH_SOP_PUSH_GPREL - 0x19 +# CHECK-NEXT: 0x1A R_LARCH_SOP_PUSH_TLS_TPREL - 0x1A +# CHECK-NEXT: 0x1B R_LARCH_SOP_PUSH_TLS_GOT - 0x1B +# CHECK-NEXT: 0x1C R_LARCH_SOP_PUSH_TLS_GD - 0x1C +# CHECK-NEXT: 0x1D R_LARCH_SOP_PUSH_PLT_PCREL - 0x1D +# CHECK-NEXT: 0x1E R_LARCH_SOP_ASSERT - 0x1E +# CHECK-NEXT: 0x1F R_LARCH_SOP_NOT - 0x1F +# CHECK-NEXT: 0x20 R_LARCH_SOP_SUB - 0x20 +# CHECK-NEXT: 0x21 R_LARCH_SOP_SL - 0x21 +# CHECK-NEXT: 0x22 R_LARCH_SOP_SR - 0x22 +# CHECK-NEXT: 0x23 R_LARCH_SOP_ADD - 0x23 +# CHECK-NEXT: 0x24 R_LARCH_SOP_AND - 0x24 +# CHECK-NEXT: 0x25 R_LARCH_SOP_IF_ELSE - 0x25 +# CHECK-NEXT: 0x26 R_LARCH_SOP_POP_32_S_10_5 - 0x26 +# CHECK-NEXT: 0x27 R_LARCH_SOP_POP_32_U_10_12 - 0x27 +# CHECK-NEXT: 0x28 R_LARCH_SOP_POP_32_S_10_12 - 0x28 +# CHECK-NEXT: 0x29 R_LARCH_SOP_POP_32_S_10_16 - 0x29 +# CHECK-NEXT: 0x2A R_LARCH_SOP_POP_32_S_10_16_S2 - 0x2A +# CHECK-NEXT: 0x2B R_LARCH_SOP_POP_32_S_5_20 - 0x2B +# CHECK-NEXT: 0x2C R_LARCH_SOP_POP_32_S_0_5_10_16_S2 - 0x2C +# CHECK-NEXT: 0x2D R_LARCH_SOP_POP_32_S_0_10_10_16_S2 - 0x2D +# CHECK-NEXT: 0x2E R_LARCH_SOP_POP_32_U - 0x2E +# CHECK-NEXT: 0x2F R_LARCH_ADD8 - 0x2F +# CHECK-NEXT: 0x30 R_LARCH_ADD16 - 0x30 +# CHECK-NEXT: 0x31 R_LARCH_ADD24 - 0x31 +# CHECK-NEXT: 0x32 R_LARCH_ADD32 - 0x32 +# CHECK-NEXT: 0x33 R_LARCH_ADD64 - 0x33 +# CHECK-NEXT: 0x34 R_LARCH_SUB8 - 0x34 +# CHECK-NEXT: 0x35 R_LARCH_SUB16 - 0x35 +# CHECK-NEXT: 0x36 R_LARCH_SUB24 - 0x36 +# CHECK-NEXT: 0x37 R_LARCH_SUB32 - 0x37 +# CHECK-NEXT: 0x38 R_LARCH_SUB64 - 0x38 +# CHECK-NEXT: 0x39 R_LARCH_GNU_VTINHERIT - 0x39 +# CHECK-NEXT: 0x3A R_LARCH_GNU_VTENTRY - 0x3A +# CHECK-NEXT: } +# CHECK-NEXT: ] diff --git a/llvm/test/MC/LoongArch/Misc/unaligned-nops.s b/llvm/test/MC/LoongArch/unaligned-nops.s similarity index 54% rename from llvm/test/MC/LoongArch/Misc/unaligned-nops.s rename to llvm/test/MC/LoongArch/unaligned-nops.s index 5952540b46d0..453e2cdcaaff 100644 --- a/llvm/test/MC/LoongArch/Misc/unaligned-nops.s +++ b/llvm/test/MC/LoongArch/unaligned-nops.s @@ -1,4 +1,4 @@ -# RUN: not --crash llvm-mc --filetype=obj --triple=loongarch64 %s -o %t +# RUN: not --crash llvm-mc -filetype=obj -triple=loongarch64 %s -o %t .byte 1 # CHECK: LLVM ERROR: unable to write nop sequence of 3 bytes .p2align 2 diff --git a/llvm/test/MC/LoongArch/valid_12imm.s b/llvm/test/MC/LoongArch/valid_12imm.s new file mode 100644 index 000000000000..ed44180bf7b7 --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_12imm.s @@ -0,0 +1,33 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: slti $r27, $ra, 235 +# CHECK: encoding: [0x3b,0xac,0x03,0x02] +slti $r27, $ra, 235 + +# CHECK: sltui $zero, $r8, 162 +# CHECK: encoding: [0x00,0x89,0x42,0x02] +sltui $zero, $r8, 162 + +# CHECK: addi.w $r5, $r7, 246 +# CHECK: encoding: [0xe5,0xd8,0x83,0x02] +addi.w $r5, $r7, 246 + +# CHECK: addi.d $r28, $r6, 75 +# CHECK: encoding: [0xdc,0x2c,0xc1,0x02] +addi.d $r28, $r6, 75 + +# CHECK: lu52i.d $r13, $r4, 195 +# CHECK: encoding: [0x8d,0x0c,0x03,0x03] +lu52i.d $r13, $r4, 195 + +# CHECK: andi $r25, $zero, 106 +# CHECK: encoding: [0x19,0xa8,0x41,0x03] +andi $r25, $zero, 106 + +# CHECK: ori $r17, $r5, 47 +# CHECK: encoding: [0xb1,0xbc,0x80,0x03] +ori $r17, $r5, 47 + +# CHECK: xori $r18, $r23, 99 +# CHECK: encoding: [0xf2,0x8e,0xc1,0x03] +xori $r18, $r23, 99 + diff --git a/llvm/test/MC/LoongArch/valid_4operands.s b/llvm/test/MC/LoongArch/valid_4operands.s new file mode 100644 index 000000000000..1418bb67709e --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_4operands.s @@ -0,0 +1,53 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: fmadd.s $f3, $f16, $f3, $f15 +# CHECK: encoding: [0x03,0x8e,0x17,0x08] +fmadd.s $f3, $f16, $f3, $f15 + +# CHECK: fmadd.d $f21, $f24, $f28, $f24 +# CHECK: encoding: [0x15,0x73,0x2c,0x08] +fmadd.d $f21, $f24, $f28, $f24 + +# CHECK: fmsub.s $f23, $f11, $f21, $f4 +# CHECK: encoding: [0x77,0x55,0x52,0x08] +fmsub.s $f23, $f11, $f21, $f4 + +# CHECK: fmsub.d $f6, $f18, $f20, $f27 +# CHECK: encoding: [0x46,0xd2,0x6d,0x08] +fmsub.d $f6, $f18, $f20, $f27 + +# CHECK: fnmadd.s $f29, $f1, $f24, $f20 +# CHECK: encoding: [0x3d,0x60,0x9a,0x08] +fnmadd.s $f29, $f1, $f24, $f20 + +# CHECK: fnmadd.d $f25, $f13, $f19, $f30 +# CHECK: encoding: [0xb9,0x4d,0xaf,0x08] +fnmadd.d $f25, $f13, $f19, $f30 + +# CHECK: fnmsub.s $f8, $f4, $f24, $f25 +# CHECK: encoding: [0x88,0xe0,0xdc,0x08] +fnmsub.s $f8, $f4, $f24, $f25 + +# CHECK: fnmsub.d $f30, $f26, $f7, $f24 +# CHECK: encoding: [0x5e,0x1f,0xec,0x08] +fnmsub.d $f30, $f26, $f7, $f24 + +# CHECK: fcmp.ceq.s $fcc7, $f17, $f29 +# CHECK: encoding: [0x27,0x76,0x12,0x0c] +fcmp.ceq.s $fcc7, $f17, $f29 + +# CHECK: fcmp.ceq.d $fcc4, $f12, $f9 +# CHECK: encoding: [0x84,0x25,0x22,0x0c] +fcmp.ceq.d $fcc4, $f12, $f9 + +# CHECK: fcmp.cult.s $fcc0, $f0, $f1 +# CHECK: encoding: [0x00,0x04,0x15,0x0c] +fcmp.cult.s $fcc0, $f0, $f1 + +# CHECK: fcmp.cult.d $fcc2, $f3, $f4 +# CHECK: encoding: [0x62,0x10,0x25,0x0c] +fcmp.cult.d $fcc2, $f3, $f4 + +# CHECK: fsel $f18, $f20, $f21, $fcc4 +# CHECK: encoding: [0x92,0x56,0x02,0x0d] +fsel $f18, $f20, $f21, $fcc4 + diff --git a/llvm/test/MC/LoongArch/valid_bigimm.s b/llvm/test/MC/LoongArch/valid_bigimm.s new file mode 100644 index 000000000000..d7b3bbb7dba0 --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_bigimm.s @@ -0,0 +1,33 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: addu16i.d $r9, $r23, 23 +# CHECK: encoding: [0xe9,0x5e,0x00,0x10] +addu16i.d $r9, $r23, 23 + +# CHECK: lu12i.w $r16, 49 +# CHECK: encoding: [0x30,0x06,0x00,0x14] +lu12i.w $r16, 49 + +# CHECK: lu12i.w $r4, -1 +# CHECK: encoding: [0xe4,0xff,0xff,0x15] +lu12i.w $r4, -1 + +# CHECK: lu32i.d $sp, 196 +# CHECK: encoding: [0x83,0x18,0x00,0x16] +lu32i.d $sp, 196 + +# CHECK: pcaddi $r9, 187 +# CHECK: encoding: [0x69,0x17,0x00,0x18] +pcaddi $r9, 187 + +# CHECK: pcalau12i $r10, 89 +# CHECK: encoding: [0x2a,0x0b,0x00,0x1a] +pcalau12i $r10, 89 + +# CHECK: pcaddu12i $zero, 37 +# CHECK: encoding: [0xa0,0x04,0x00,0x1c] +pcaddu12i $zero, 37 + +# CHECK: pcaddu18i $r12, 26 +# CHECK: encoding: [0x4c,0x03,0x00,0x1e] +pcaddu18i $r12, 26 + diff --git a/llvm/test/MC/LoongArch/valid_branch.s b/llvm/test/MC/LoongArch/valid_branch.s new file mode 100644 index 000000000000..256e70b6dff1 --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_branch.s @@ -0,0 +1,155 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ,CHECK-ASM %s +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -filetype=obj \ +# RUN: | llvm-objdump -d - | FileCheck -check-prefix=CHECK-ASM-AND-OBJ %s + +## random operands +# CHECK-ASM-AND-OBJ: beqz $r9, 96 +# CHECK-ASM: encoding: [0x20,0x61,0x00,0x40] +beqz $r9, 96 + +# CHECK-ASM-AND-OBJ: bnez $sp, 212 +# CHECK-ASM: encoding: [0x60,0xd4,0x00,0x44] +bnez $sp, 212 + +# CHECK-ASM-AND-OBJ: bceqz $fcc6, 12 +# CHECK-ASM: encoding: [0xc0,0x0c,0x00,0x48] +bceqz $fcc6, 12 + +# CHECK-ASM-AND-OBJ: bcnez $fcc6, 72 +# CHECK-ASM: encoding: [0xc0,0x49,0x00,0x48] +bcnez $fcc6, 72 + +# CHECK-ASM-AND-OBJ: b 248 +# CHECK-ASM: encoding: [0x00,0xf8,0x00,0x50] +b 248 + +# CHECK-ASM-AND-OBJ: bl 236 +# CHECK-ASM: encoding: [0x00,0xec,0x00,0x54] +bl 236 + +# CHECK-ASM-AND-OBJ: beq $r10, $r7, 176 +# CHECK-ASM: encoding: [0x47,0xb1,0x00,0x58] +beq $r10, $r7, 176 + +# CHECK-ASM-AND-OBJ: bne $r25, $ra, 136 +# CHECK-ASM: encoding: [0x21,0x8b,0x00,0x5c] +bne $r25, $ra, 136 + +# CHECK-ASM-AND-OBJ: blt $r15, $r30, 168 +# CHECK-ASM: encoding: [0xfe,0xa9,0x00,0x60] +blt $r15, $r30, 168 + +# CHECK-ASM-AND-OBJ: bge $r12, $r15, 148 +# CHECK-ASM: encoding: [0x8f,0x95,0x00,0x64] +bge $r12, $r15, 148 + +# CHECK-ASM-AND-OBJ: bltu $r17, $r5, 4 +# CHECK-ASM: encoding: [0x25,0x06,0x00,0x68] +bltu $r17, $r5, 4 + +# CHECK-ASM-AND-OBJ: bgeu $r6, $r23, 140 +# CHECK-ASM: encoding: [0xd7,0x8c,0x00,0x6c] +bgeu $r6, $r23, 140 + + +## immediate lower/upper boundary +### simm16 << 2 +# CHECK-ASM-AND-OBJ: beq $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x5a] +beq $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: beq $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x59] +beq $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: bne $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x5e] +bne $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: bne $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x5d] +bne $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: blt $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x62] +blt $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: blt $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x61] +blt $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: bge $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x66] +bge $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: bge $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x65] +bge $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: bltu $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x6a] +bltu $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: bltu $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x69] +bltu $r10, $r7, 0x1FFFC + +# CHECK-ASM-AND-OBJ: bgeu $r10, $r7, -131072 +# CHECK-ASM: encoding: [0x47,0x01,0x00,0x6e] +bgeu $r10, $r7, -0x20000 + +# CHECK-ASM-AND-OBJ: bgeu $r10, $r7, 131068 +# CHECK-ASM: encoding: [0x47,0xfd,0xff,0x6d] +bgeu $r10, $r7, 0x1FFFC + +### simm21 << 2 +# CHECK-ASM-AND-OBJ: beqz $r9, -4194304 +# CHECK-ASM: encoding: [0x30,0x01,0x00,0x40] +beqz $r9, -0x400000 + +# CHECK-ASM-AND-OBJ: beqz $r9, 4194300 +# CHECK-ASM: encoding: [0x2f,0xfd,0xff,0x43] +beqz $r9, 0x3FFFFC + +# CHECK-ASM-AND-OBJ: bnez $r9, -4194304 +# CHECK-ASM: encoding: [0x30,0x01,0x00,0x44] +bnez $r9, -0x400000 + +# CHECK-ASM-AND-OBJ: bnez $r9, 4194300 +# CHECK-ASM: encoding: [0x2f,0xfd,0xff,0x47] +bnez $r9, 0x3FFFFC + +# CHECK-ASM-AND-OBJ: bceqz $fcc6, -4194304 +# CHECK-ASM: encoding: [0xd0,0x00,0x00,0x48] +bceqz $fcc6, -0x400000 + +# CHECK-ASM-AND-OBJ: bceqz $fcc6, 4194300 +# CHECK-ASM: encoding: [0xcf,0xfc,0xff,0x4b] +bceqz $fcc6, 0x3FFFFC + +# CHECK-ASM-AND-OBJ: bcnez $fcc6, -4194304 +# CHECK-ASM: encoding: [0xd0,0x01,0x00,0x48] +bcnez $fcc6, -0x400000 + +# CHECK-ASM-AND-OBJ: bcnez $fcc6, 4194300 +# CHECK-ASM: encoding: [0xcf,0xfd,0xff,0x4b] +bcnez $fcc6, 0x3FFFFC + +### simm26 << 2 +# CHECK-ASM-AND-OBJ: b -134217728 +# CHECK-ASM: encoding: [0x00,0x02,0x00,0x50] +b -0x8000000 + +# CHECK-ASM-AND-OBJ: b 134217724 +# CHECK-ASM: encoding: [0xff,0xfd,0xff,0x53] +b 0x7FFFFFC + +# CHECK-ASM-AND-OBJ: bl -134217728 +# CHECK-ASM: encoding: [0x00,0x02,0x00,0x54] +bl -0x8000000 + +# CHECK-ASM-AND-OBJ: bl 134217724 +# CHECK-ASM: encoding: [0xff,0xfd,0xff,0x57] +bl 0x7FFFFFC + diff --git a/llvm/test/MC/LoongArch/valid_float.s b/llvm/test/MC/LoongArch/valid_float.s new file mode 100644 index 000000000000..05ecefdc10d1 --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_float.s @@ -0,0 +1,297 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: fadd.s $f29, $f15, $f25 +# CHECK: encoding: [0xfd,0xe5,0x00,0x01] +fadd.s $f29, $f15, $f25 + +# CHECK: fadd.d $f25, $f7, $f13 +# CHECK: encoding: [0xf9,0x34,0x01,0x01] +fadd.d $f25, $f7, $f13 + +# CHECK: fsub.s $f14, $f6, $f31 +# CHECK: encoding: [0xce,0xfc,0x02,0x01] +fsub.s $f14, $f6, $f31 + +# CHECK: fsub.d $f29, $f1, $f18 +# CHECK: encoding: [0x3d,0x48,0x03,0x01] +fsub.d $f29, $f1, $f18 + +# CHECK: fmul.s $f0, $f7, $f17 +# CHECK: encoding: [0xe0,0xc4,0x04,0x01] +fmul.s $f0, $f7, $f17 + +# CHECK: fmul.d $f4, $f30, $f7 +# CHECK: encoding: [0xc4,0x1f,0x05,0x01] +fmul.d $f4, $f30, $f7 + +# CHECK: fdiv.s $f20, $f24, $f19 +# CHECK: encoding: [0x14,0xcf,0x06,0x01] +fdiv.s $f20, $f24, $f19 + +# CHECK: fdiv.d $f3, $f25, $f28 +# CHECK: encoding: [0x23,0x73,0x07,0x01] +fdiv.d $f3, $f25, $f28 + +# CHECK: fmax.s $f22, $f6, $f27 +# CHECK: encoding: [0xd6,0xec,0x08,0x01] +fmax.s $f22, $f6, $f27 + +# CHECK: fmax.d $f11, $f26, $f13 +# CHECK: encoding: [0x4b,0x37,0x09,0x01] +fmax.d $f11, $f26, $f13 + +# CHECK: fmin.s $f14, $f10, $f19 +# CHECK: encoding: [0x4e,0xcd,0x0a,0x01] +fmin.s $f14, $f10, $f19 + +# CHECK: fmin.d $f1, $f13, $f27 +# CHECK: encoding: [0xa1,0x6d,0x0b,0x01] +fmin.d $f1, $f13, $f27 + +# CHECK: fmaxa.s $f9, $f27, $f31 +# CHECK: encoding: [0x69,0xff,0x0c,0x01] +fmaxa.s $f9, $f27, $f31 + +# CHECK: fmaxa.d $f24, $f13, $f4 +# CHECK: encoding: [0xb8,0x11,0x0d,0x01] +fmaxa.d $f24, $f13, $f4 + +# CHECK: fmina.s $f15, $f18, $f1 +# CHECK: encoding: [0x4f,0x86,0x0e,0x01] +fmina.s $f15, $f18, $f1 + +# CHECK: fmina.d $f18, $f10, $f0 +# CHECK: encoding: [0x52,0x01,0x0f,0x01] +fmina.d $f18, $f10, $f0 + +# CHECK: fscaleb.s $f21, $f23, $f6 +# CHECK: encoding: [0xf5,0x9a,0x10,0x01] +fscaleb.s $f21, $f23, $f6 + +# CHECK: fscaleb.d $f12, $f14, $f26 +# CHECK: encoding: [0xcc,0x69,0x11,0x01] +fscaleb.d $f12, $f14, $f26 + +# CHECK: fcopysign.s $f13, $f24, $f23 +# CHECK: encoding: [0x0d,0xdf,0x12,0x01] +fcopysign.s $f13, $f24, $f23 + +# CHECK: fcopysign.d $f16, $f26, $f6 +# CHECK: encoding: [0x50,0x1b,0x13,0x01] +fcopysign.d $f16, $f26, $f6 + +# CHECK: fabs.s $f28, $f12 +# CHECK: encoding: [0x9c,0x05,0x14,0x01] +fabs.s $f28, $f12 + +# CHECK: fabs.d $f23, $f3 +# CHECK: encoding: [0x77,0x08,0x14,0x01] +fabs.d $f23, $f3 + +# CHECK: fneg.s $f21, $f24 +# CHECK: encoding: [0x15,0x17,0x14,0x01] +fneg.s $f21, $f24 + +# CHECK: fneg.d $f11, $f26 +# CHECK: encoding: [0x4b,0x1b,0x14,0x01] +fneg.d $f11, $f26 + +# CHECK: flogb.s $f31, $f23 +# CHECK: encoding: [0xff,0x26,0x14,0x01] +flogb.s $f31, $f23 + +# CHECK: flogb.d $f21, $f29 +# CHECK: encoding: [0xb5,0x2b,0x14,0x01] +flogb.d $f21, $f29 + +# CHECK: fclass.s $f20, $f9 +# CHECK: encoding: [0x34,0x35,0x14,0x01] +fclass.s $f20, $f9 + +# CHECK: fclass.d $f19, $f2 +# CHECK: encoding: [0x53,0x38,0x14,0x01] +fclass.d $f19, $f2 + +# CHECK: fsqrt.s $f27, $f18 +# CHECK: encoding: [0x5b,0x46,0x14,0x01] +fsqrt.s $f27, $f18 + +# CHECK: fsqrt.d $f2, $f11 +# CHECK: encoding: [0x62,0x49,0x14,0x01] +fsqrt.d $f2, $f11 + +# CHECK: frecip.s $f17, $f27 +# CHECK: encoding: [0x71,0x57,0x14,0x01] +frecip.s $f17, $f27 + +# CHECK: frecip.d $f27, $f27 +# CHECK: encoding: [0x7b,0x5b,0x14,0x01] +frecip.d $f27, $f27 + +# CHECK: frsqrt.s $f25, $f12 +# CHECK: encoding: [0x99,0x65,0x14,0x01] +frsqrt.s $f25, $f12 + +# CHECK: frsqrt.d $f22, $f3 +# CHECK: encoding: [0x76,0x68,0x14,0x01] +frsqrt.d $f22, $f3 + +# CHECK: fmov.s $f13, $f23 +# CHECK: encoding: [0xed,0x96,0x14,0x01] +fmov.s $f13, $f23 + +# CHECK: fmov.d $f30, $f9 +# CHECK: encoding: [0x3e,0x99,0x14,0x01] +fmov.d $f30, $f9 + +# CHECK: movgr2fr.w $f6, $tp +# CHECK: encoding: [0x46,0xa4,0x14,0x01] +movgr2fr.w $f6, $tp + +# CHECK: movgr2fr.d $f30, $r11 +# CHECK: encoding: [0x7e,0xa9,0x14,0x01] +movgr2fr.d $f30, $r11 + +# CHECK: movgr2frh.w $f23, $r26 +# CHECK: encoding: [0x57,0xaf,0x14,0x01] +movgr2frh.w $f23, $r26 + +# CHECK: movfr2gr.s $r10, $f22 +# CHECK: encoding: [0xca,0xb6,0x14,0x01] +movfr2gr.s $r10, $f22 + +# CHECK: movfr2gr.d $r26, $f17 +# CHECK: encoding: [0x3a,0xba,0x14,0x01] +movfr2gr.d $r26, $f17 + +# CHECK: movfrh2gr.s $sp, $f26 +# CHECK: encoding: [0x43,0xbf,0x14,0x01] +movfrh2gr.s $sp, $f26 + +# CHECK: movfr2cf $fcc4, $f11 +# CHECK: encoding: [0x64,0xd1,0x14,0x01] +movfr2cf $fcc4, $f11 + +# CHECK: movcf2fr $f16, $fcc0 +# CHECK: encoding: [0x10,0xd4,0x14,0x01] +movcf2fr $f16, $fcc0 + +# CHECK: movgr2cf $fcc5, $ra +# CHECK: encoding: [0x25,0xd8,0x14,0x01] +movgr2cf $fcc5, $ra + +# CHECK: movcf2gr $r21, $fcc7 +# CHECK: encoding: [0xf5,0xdc,0x14,0x01] +movcf2gr $r21, $fcc7 + +# CHECK: fcvt.s.d $f12, $f19 +# CHECK: encoding: [0x6c,0x1a,0x19,0x01] +fcvt.s.d $f12, $f19 + +# CHECK: fcvt.d.s $f10, $f6 +# CHECK: encoding: [0xca,0x24,0x19,0x01] +fcvt.d.s $f10, $f6 + +# CHECK: ftintrm.w.s $f16, $f16 +# CHECK: encoding: [0x10,0x06,0x1a,0x01] +ftintrm.w.s $f16, $f16 + +# CHECK: ftintrm.w.d $f7, $f8 +# CHECK: encoding: [0x07,0x09,0x1a,0x01] +ftintrm.w.d $f7, $f8 + +# CHECK: ftintrm.l.s $f24, $f10 +# CHECK: encoding: [0x58,0x25,0x1a,0x01] +ftintrm.l.s $f24, $f10 + +# CHECK: ftintrm.l.d $f9, $f9 +# CHECK: encoding: [0x29,0x29,0x1a,0x01] +ftintrm.l.d $f9, $f9 + +# CHECK: ftintrp.w.s $f14, $f31 +# CHECK: encoding: [0xee,0x47,0x1a,0x01] +ftintrp.w.s $f14, $f31 + +# CHECK: ftintrp.w.d $f12, $f3 +# CHECK: encoding: [0x6c,0x48,0x1a,0x01] +ftintrp.w.d $f12, $f3 + +# CHECK: ftintrp.l.s $f0, $f16 +# CHECK: encoding: [0x00,0x66,0x1a,0x01] +ftintrp.l.s $f0, $f16 + +# CHECK: ftintrp.l.d $f4, $f29 +# CHECK: encoding: [0xa4,0x6b,0x1a,0x01] +ftintrp.l.d $f4, $f29 + +# CHECK: ftintrz.w.s $f4, $f29 +# CHECK: encoding: [0xa4,0x87,0x1a,0x01] +ftintrz.w.s $f4, $f29 + +# CHECK: ftintrz.w.d $f25, $f24 +# CHECK: encoding: [0x19,0x8b,0x1a,0x01] +ftintrz.w.d $f25, $f24 + +# CHECK: ftintrz.l.s $f23, $f5 +# CHECK: encoding: [0xb7,0xa4,0x1a,0x01] +ftintrz.l.s $f23, $f5 + +# CHECK: ftintrz.l.d $f3, $f10 +# CHECK: encoding: [0x43,0xa9,0x1a,0x01] +ftintrz.l.d $f3, $f10 + +# CHECK: ftintrne.w.s $f4, $f17 +# CHECK: encoding: [0x24,0xc6,0x1a,0x01] +ftintrne.w.s $f4, $f17 + +# CHECK: ftintrne.w.d $f31, $f12 +# CHECK: encoding: [0x9f,0xc9,0x1a,0x01] +ftintrne.w.d $f31, $f12 + +# CHECK: ftintrne.l.s $f22, $f27 +# CHECK: encoding: [0x76,0xe7,0x1a,0x01] +ftintrne.l.s $f22, $f27 + +# CHECK: ftintrne.l.d $f28, $f6 +# CHECK: encoding: [0xdc,0xe8,0x1a,0x01] +ftintrne.l.d $f28, $f6 + +# CHECK: ftint.w.s $f21, $f13 +# CHECK: encoding: [0xb5,0x05,0x1b,0x01] +ftint.w.s $f21, $f13 + +# CHECK: ftint.w.d $f3, $f14 +# CHECK: encoding: [0xc3,0x09,0x1b,0x01] +ftint.w.d $f3, $f14 + +# CHECK: ftint.l.s $f31, $f24 +# CHECK: encoding: [0x1f,0x27,0x1b,0x01] +ftint.l.s $f31, $f24 + +# CHECK: ftint.l.d $f16, $f24 +# CHECK: encoding: [0x10,0x2b,0x1b,0x01] +ftint.l.d $f16, $f24 + +# CHECK: ffint.s.w $f30, $f5 +# CHECK: encoding: [0xbe,0x10,0x1d,0x01] +ffint.s.w $f30, $f5 + +# CHECK: ffint.s.l $f6, $f5 +# CHECK: encoding: [0xa6,0x18,0x1d,0x01] +ffint.s.l $f6, $f5 + +# CHECK: ffint.d.w $f24, $f18 +# CHECK: encoding: [0x58,0x22,0x1d,0x01] +ffint.d.w $f24, $f18 + +# CHECK: ffint.d.l $f23, $f26 +# CHECK: encoding: [0x57,0x2b,0x1d,0x01] +ffint.d.l $f23, $f26 + +# CHECK: frint.s $f5, $f17 +# CHECK: encoding: [0x25,0x46,0x1e,0x01] +frint.s $f5, $f17 + +# CHECK: frint.d $f29, $f2 +# CHECK: encoding: [0x5d,0x48,0x1e,0x01] +frint.d $f29, $f2 + diff --git a/llvm/test/MC/LoongArch/valid_integer.s b/llvm/test/MC/LoongArch/valid_integer.s new file mode 100644 index 000000000000..cc78662d53b1 --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_integer.s @@ -0,0 +1,369 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: clo.w $ra, $sp +# CHECK: encoding: [0x61,0x10,0x00,0x00] +clo.w $ra, $sp + +# CHECK: clz.w $r7, $r10 +# CHECK: encoding: [0x47,0x15,0x00,0x00] +clz.w $r7, $r10 + +# CHECK: cto.w $tp, $r6 +# CHECK: encoding: [0xc2,0x18,0x00,0x00] +cto.w $tp, $r6 + +# CHECK: ctz.w $r5, $r22 +# CHECK: encoding: [0xc5,0x1e,0x00,0x00] +ctz.w $r5, $r22 + +# CHECK: clo.d $r29, $ra +# CHECK: encoding: [0x3d,0x20,0x00,0x00] +clo.d $r29, $ra + +# CHECK: clz.d $r26, $r26 +# CHECK: encoding: [0x5a,0x27,0x00,0x00] +clz.d $r26, $r26 + +# CHECK: cto.d $r18, $r20 +# CHECK: encoding: [0x92,0x2a,0x00,0x00] +cto.d $r18, $r20 + +# CHECK: ctz.d $r17, $r10 +# CHECK: encoding: [0x51,0x2d,0x00,0x00] +ctz.d $r17, $r10 + +# CHECK: revb.2h $r20, $r11 +# CHECK: encoding: [0x74,0x31,0x00,0x00] +revb.2h $r20, $r11 + +# CHECK: revb.4h $r13, $r19 +# CHECK: encoding: [0x6d,0x36,0x00,0x00] +revb.4h $r13, $r19 + +# CHECK: revb.2w $r28, $r27 +# CHECK: encoding: [0x7c,0x3b,0x00,0x00] +revb.2w $r28, $r27 + +# CHECK: revb.d $zero, $r23 +# CHECK: encoding: [0xe0,0x3e,0x00,0x00] +revb.d $zero, $r23 + +# CHECK: revh.2w $r28, $r10 +# CHECK: encoding: [0x5c,0x41,0x00,0x00] +revh.2w $r28, $r10 + +# CHECK: revh.d $r9, $r7 +# CHECK: encoding: [0xe9,0x44,0x00,0x00] +revh.d $r9, $r7 + +# CHECK: bitrev.4b $r21, $r27 +# CHECK: encoding: [0x75,0x4b,0x00,0x00] +bitrev.4b $r21, $r27 + +# CHECK: bitrev.8b $r13, $r25 +# CHECK: encoding: [0x2d,0x4f,0x00,0x00] +bitrev.8b $r13, $r25 + +# CHECK: bitrev.w $r25, $r5 +# CHECK: encoding: [0xb9,0x50,0x00,0x00] +bitrev.w $r25, $r5 + +# CHECK: bitrev.d $r19, $r23 +# CHECK: encoding: [0xf3,0x56,0x00,0x00] +bitrev.d $r19, $r23 + +# CHECK: ext.w.h $r23, $r23 +# CHECK: encoding: [0xf7,0x5a,0x00,0x00] +ext.w.h $r23, $r23 + +# CHECK: ext.w.b $r20, $r18 +# CHECK: encoding: [0x54,0x5e,0x00,0x00] +ext.w.b $r20, $r18 + +# CHECK: rdtimel.w $r24, $r4 +# CHECK: encoding: [0x98,0x60,0x00,0x00] +rdtimel.w $r24, $r4 + +# CHECK: rdtimeh.w $r11, $r5 +# CHECK: encoding: [0xab,0x64,0x00,0x00] +rdtimeh.w $r11, $r5 + +# CHECK: rdtime.d $tp, $ra +# CHECK: encoding: [0x22,0x68,0x00,0x00] +rdtime.d $tp, $ra + +# CHECK: cpucfg $sp, $ra +# CHECK: encoding: [0x23,0x6c,0x00,0x00] +cpucfg $sp, $ra + +# CHECK: asrtle.d $r21, $r19 +# CHECK: encoding: [0xa0,0x4e,0x01,0x00] +asrtle.d $r21, $r19 + +# CHECK: asrtgt.d $ra, $r19 +# CHECK: encoding: [0x20,0xcc,0x01,0x00] +asrtgt.d $ra, $r19 + +# CHECK: alsl.w $tp, $r17, $tp, 4 +# CHECK: encoding: [0x22,0x8a,0x05,0x00] +alsl.w $tp, $r17, $tp, 4 + +# CHECK: bytepick.w $r29, $zero, $r16, 0 +# CHECK: encoding: [0x1d,0x40,0x08,0x00] +bytepick.w $r29, $zero, $r16, 0 + +# CHECK: bytepick.d $r15, $r17, $r20, 4 +# CHECK: encoding: [0x2f,0x52,0x0e,0x00] +bytepick.d $r15, $r17, $r20, 4 + +# CHECK: add.w $r9, $ra, $r31 +# CHECK: encoding: [0x29,0x7c,0x10,0x00] +add.w $r9, $ra, $r31 + +# CHECK: add.d $tp, $r18, $r27 +# CHECK: encoding: [0x42,0xee,0x10,0x00] +add.d $tp, $r18, $r27 + +# CHECK: sub.w $r21, $r25, $r19 +# CHECK: encoding: [0x35,0x4f,0x11,0x00] +sub.w $r21, $r25, $r19 + +# CHECK: sub.d $r7, $r12, $r7 +# CHECK: encoding: [0x87,0x9d,0x11,0x00] +sub.d $r7, $r12, $r7 + +# CHECK: slt $r29, $r26, $tp +# CHECK: encoding: [0x5d,0x0b,0x12,0x00] +slt $r29, $r26, $tp + +# CHECK: sltu $r11, $r21, $r29 +# CHECK: encoding: [0xab,0xf6,0x12,0x00] +sltu $r11, $r21, $r29 + +# CHECK: maskeqz $r20, $r11, $r18 +# CHECK: encoding: [0x74,0x49,0x13,0x00] +maskeqz $r20, $r11, $r18 + +# CHECK: masknez $r20, $r13, $r26 +# CHECK: encoding: [0xb4,0xe9,0x13,0x00] +masknez $r20, $r13, $r26 + +# CHECK: nor $r5, $r18, $r5 +# CHECK: encoding: [0x45,0x16,0x14,0x00] +nor $r5, $r18, $r5 + +# CHECK: and $r19, $r31, $ra +# CHECK: encoding: [0xf3,0x87,0x14,0x00] +and $r19, $r31, $ra + +# CHECK: or $r17, $r16, $r30 +# CHECK: encoding: [0x11,0x7a,0x15,0x00] +or $r17, $r16, $r30 + +# CHECK: xor $r15, $r19, $r8 +# CHECK: encoding: [0x6f,0xa2,0x15,0x00] +xor $r15, $r19, $r8 + +# CHECK: orn $tp, $sp, $r25 +# CHECK: encoding: [0x62,0x64,0x16,0x00] +orn $tp, $sp, $r25 + +# CHECK: andn $r28, $r25, $r5 +# CHECK: encoding: [0x3c,0x97,0x16,0x00] +andn $r28, $r25, $r5 + +# CHECK: sll.w $r24, $r27, $r23 +# CHECK: encoding: [0x78,0x5f,0x17,0x00] +sll.w $r24, $r27, $r23 + +# CHECK: srl.w $r31, $r17, $r7 +# CHECK: encoding: [0x3f,0x9e,0x17,0x00] +srl.w $r31, $r17, $r7 + +# CHECK: sra.w $r12, $r28, $r10 +# CHECK: encoding: [0x8c,0x2b,0x18,0x00] +sra.w $r12, $r28, $r10 + +# CHECK: sll.d $r20, $r15, $sp +# CHECK: encoding: [0xf4,0x8d,0x18,0x00] +sll.d $r20, $r15, $sp + +# CHECK: srl.d $r14, $r25, $zero +# CHECK: encoding: [0x2e,0x03,0x19,0x00] +srl.d $r14, $r25, $zero + +# CHECK: sra.d $r7, $r22, $r31 +# CHECK: encoding: [0xc7,0xfe,0x19,0x00] +sra.d $r7, $r22, $r31 + +# CHECK: rotr.w $ra, $r26, $r18 +# CHECK: encoding: [0x41,0x4b,0x1b,0x00] +rotr.w $ra, $r26, $r18 + +# CHECK: rotr.d $r31, $sp, $ra +# CHECK: encoding: [0x7f,0x84,0x1b,0x00] +rotr.d $r31, $sp, $ra + +# CHECK: mul.w $r4, $r18, $sp +# CHECK: encoding: [0x44,0x0e,0x1c,0x00] +mul.w $r4, $r18, $sp + +# CHECK: mulh.w $r27, $r23, $zero +# CHECK: encoding: [0xfb,0x82,0x1c,0x00] +mulh.w $r27, $r23, $zero + +# CHECK: mulh.wu $r10, $r17, $r24 +# CHECK: encoding: [0x2a,0x62,0x1d,0x00] +mulh.wu $r10, $r17, $r24 + +# CHECK: mul.d $ra, $r14, $r24 +# CHECK: encoding: [0xc1,0xe1,0x1d,0x00] +mul.d $ra, $r14, $r24 + +# CHECK: mulh.d $r28, $ra, $r27 +# CHECK: encoding: [0x3c,0x6c,0x1e,0x00] +mulh.d $r28, $ra, $r27 + +# CHECK: mulh.du $r13, $r27, $r29 +# CHECK: encoding: [0x6d,0xf7,0x1e,0x00] +mulh.du $r13, $r27, $r29 + +# CHECK: mulw.d.w $r27, $r6, $r17 +# CHECK: encoding: [0xdb,0x44,0x1f,0x00] +mulw.d.w $r27, $r6, $r17 + +# CHECK: mulw.d.wu $r17, $r22, $r30 +# CHECK: encoding: [0xd1,0xfa,0x1f,0x00] +mulw.d.wu $r17, $r22, $r30 + +# CHECK: div.w $r30, $r13, $r25 +# CHECK: encoding: [0xbe,0x65,0x20,0x00] +div.w $r30, $r13, $r25 + +# CHECK: mod.w $ra, $r26, $r10 +# CHECK: encoding: [0x41,0xab,0x20,0x00] +mod.w $ra, $r26, $r10 + +# CHECK: div.wu $r19, $r23, $zero +# CHECK: encoding: [0xf3,0x02,0x21,0x00] +div.wu $r19, $r23, $zero + +# CHECK: mod.wu $r27, $r9, $r17 +# CHECK: encoding: [0x3b,0xc5,0x21,0x00] +mod.wu $r27, $r9, $r17 + +# CHECK: div.d $r23, $r6, $r21 +# CHECK: encoding: [0xd7,0x54,0x22,0x00] +div.d $r23, $r6, $r21 + +# CHECK: mod.d $r16, $sp, $r15 +# CHECK: encoding: [0x70,0xbc,0x22,0x00] +mod.d $r16, $sp, $r15 + +# CHECK: div.du $r31, $r24, $r14 +# CHECK: encoding: [0x1f,0x3b,0x23,0x00] +div.du $r31, $r24, $r14 + +# CHECK: mod.du $r25, $r23, $r24 +# CHECK: encoding: [0xf9,0xe2,0x23,0x00] +mod.du $r25, $r23, $r24 + +# CHECK: crc.w.b.w $r24, $r7, $tp +# CHECK: encoding: [0xf8,0x08,0x24,0x00] +crc.w.b.w $r24, $r7, $tp + +# CHECK: crc.w.h.w $r31, $r10, $r18 +# CHECK: encoding: [0x5f,0xc9,0x24,0x00] +crc.w.h.w $r31, $r10, $r18 + +# CHECK: crc.w.w.w $r28, $r6, $r10 +# CHECK: encoding: [0xdc,0x28,0x25,0x00] +crc.w.w.w $r28, $r6, $r10 + +# CHECK: crc.w.d.w $r28, $r11, $r31 +# CHECK: encoding: [0x7c,0xfd,0x25,0x00] +crc.w.d.w $r28, $r11, $r31 + +# CHECK: crcc.w.b.w $r15, $r18, $sp +# CHECK: encoding: [0x4f,0x0e,0x26,0x00] +crcc.w.b.w $r15, $r18, $sp + +# CHECK: crcc.w.h.w $r21, $r29, $r18 +# CHECK: encoding: [0xb5,0xcb,0x26,0x00] +crcc.w.h.w $r21, $r29, $r18 + +# CHECK: crcc.w.w.w $r17, $r14, $r13 +# CHECK: encoding: [0xd1,0x35,0x27,0x00] +crcc.w.w.w $r17, $r14, $r13 + +# CHECK: crcc.w.d.w $r30, $r21, $r27 +# CHECK: encoding: [0xbe,0xee,0x27,0x00] +crcc.w.d.w $r30, $r21, $r27 + +# CHECK: break 23 +# CHECK: encoding: [0x17,0x00,0x2a,0x00] +break 23 + +# CHECK: syscall 2 +# CHECK: encoding: [0x02,0x00,0x2b,0x00] +syscall 2 + +# CHECK: alsl.d $r17, $r11, $r5, 3 +# CHECK: encoding: [0x71,0x15,0x2d,0x00] +alsl.d $r17, $r11, $r5, 3 + +# CHECK: slli.w $r26, $r18, 0 +# CHECK: encoding: [0x5a,0x82,0x40,0x00] +slli.w $r26, $r18, 0 + +# CHECK: slli.d $r10, $r31, 39 +# CHECK: encoding: [0xea,0x9f,0x41,0x00] +slli.d $r10, $r31, 39 + +# CHECK: srli.w $r10, $r14, 30 +# CHECK: encoding: [0xca,0xf9,0x44,0x00] +srli.w $r10, $r14, 30 + +# CHECK: srli.d $r31, $r22, 38 +# CHECK: encoding: [0xdf,0x9a,0x45,0x00] +srli.d $r31, $r22, 38 + +# CHECK: srai.w $r8, $r17, 24 +# CHECK: encoding: [0x28,0xe2,0x48,0x00] +srai.w $r8, $r17, 24 + +# CHECK: srai.d $r9, $r21, 27 +# CHECK: encoding: [0xa9,0x6e,0x49,0x00] +srai.d $r9, $r21, 27 + +# CHECK: rotri.w $r23, $r20, 23 +# CHECK: encoding: [0x97,0xde,0x4c,0x00] +rotri.w $r23, $r20, 23 + +# CHECK: rotri.d $r29, $zero, 7 +# CHECK: encoding: [0x1d,0x1c,0x4d,0x00] +rotri.d $r29, $zero, 7 + +# CHECK: bstrins.w $r8, $r11, 7, 2 +# CHECK: encoding: [0x68,0x09,0x67,0x00] +bstrins.w $r8, $r11, 7, 2 + +# CHECK: bstrins.d $r8, $r11, 7, 2 +# CHECK: encoding: [0x68,0x09,0x87,0x00] +bstrins.d $r8, $r11, 7, 2 + +# CHECK: bstrpick.w $ra, $r9, 10, 4 +# CHECK: encoding: [0x21,0x91,0x6a,0x00] +bstrpick.w $ra, $r9, 10, 4 + +# CHECK: bstrpick.d $r31, $r27, 39, 22 +# CHECK: encoding: [0x7f,0x5b,0xe7,0x00] +bstrpick.d $r31, $r27, 39, 22 + +# CHECK: cpucfg $sp, $r8 +# CHECK: encoding: [0x03,0x6d,0x00,0x00] +cpucfg $sp, $r8 + +# CHECK: alsl.wu $r19, $r8, $r25, 1 +# CHECK: encoding: [0x13,0x65,0x06,0x00] +alsl.wu $r19, $r8, $r25, 1 + diff --git a/llvm/test/MC/LoongArch/valid_memory.s b/llvm/test/MC/LoongArch/valid_memory.s new file mode 100644 index 000000000000..30ea88c9992b --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_memory.s @@ -0,0 +1,405 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: dbar 0 +# CHECK: encoding: [0x00,0x00,0x72,0x38] +dbar 0 + +# CHECK: ibar 0 +# CHECK: encoding: [0x00,0x80,0x72,0x38] +ibar 0 + +# CHECK: ll.w $tp, $r27, 220 +# CHECK: encoding: [0x62,0xdf,0x00,0x20] +ll.w $tp, $r27, 220 + +# CHECK: sc.w $r19, $r14, 56 +# CHECK: encoding: [0xd3,0x39,0x00,0x21] +sc.w $r19, $r14, 56 + +# CHECK: ll.d $r25, $r27, 16 +# CHECK: encoding: [0x79,0x13,0x00,0x22] +ll.d $r25, $r27, 16 + +# CHECK: sc.d $r17, $r17, 244 +# CHECK: encoding: [0x31,0xf6,0x00,0x23] +sc.d $r17, $r17, 244 + +# CHECK: ldptr.w $r26, $r6, 60 +# CHECK: encoding: [0xda,0x3c,0x00,0x24] +ldptr.w $r26, $r6, 60 + +# CHECK: stptr.w $r28, $r5, 216 +# CHECK: encoding: [0xbc,0xd8,0x00,0x25] +stptr.w $r28, $r5, 216 + +# CHECK: ldptr.d $r5, $r29, 244 +# CHECK: encoding: [0xa5,0xf7,0x00,0x26] +ldptr.d $r5, $r29, 244 + +# CHECK: stptr.d $r14, $r24, 196 +# CHECK: encoding: [0x0e,0xc7,0x00,0x27] +stptr.d $r14, $r24, 196 + +# CHECK: ld.b $r24, $r8, 21 +# CHECK: encoding: [0x18,0x55,0x00,0x28] +ld.b $r24, $r8, 21 + +# CHECK: ld.h $r7, $r18, 80 +# CHECK: encoding: [0x47,0x42,0x41,0x28] +ld.h $r7, $r18, 80 + +# CHECK: ld.w $r18, $r26, 92 +# CHECK: encoding: [0x52,0x73,0x81,0x28] +ld.w $r18, $r26, 92 + +# CHECK: ld.d $r18, $r20, 159 +# CHECK: encoding: [0x92,0x7e,0xc2,0x28] +ld.d $r18, $r20, 159 + +# CHECK: st.b $sp, $r7, 95 +# CHECK: encoding: [0xe3,0x7c,0x01,0x29] +st.b $sp, $r7, 95 + +# CHECK: st.h $r25, $r16, 122 +# CHECK: encoding: [0x19,0xea,0x41,0x29] +st.h $r25, $r16, 122 + +# CHECK: st.w $r13, $r13, 175 +# CHECK: encoding: [0xad,0xbd,0x82,0x29] +st.w $r13, $r13, 175 + +# CHECK: st.d $r30, $r30, 60 +# CHECK: encoding: [0xde,0xf3,0xc0,0x29] +st.d $r30, $r30, 60 + +# CHECK: ld.bu $r13, $r13, 150 +# CHECK: encoding: [0xad,0x59,0x02,0x2a] +ld.bu $r13, $r13, 150 + +# CHECK: ld.hu $r18, $r29, 198 +# CHECK: encoding: [0xb2,0x1b,0x43,0x2a] +ld.hu $r18, $r29, 198 + +# CHECK: ld.wu $r14, $r19, 31 +# CHECK: encoding: [0x6e,0x7e,0x80,0x2a] +ld.wu $r14, $r19, 31 + +# CHECK: fld.s $f23, $r15, 250 +# CHECK: encoding: [0xf7,0xe9,0x03,0x2b] +fld.s $f23, $r15, 250 + +# CHECK: fst.s $f30, $r19, 230 +# CHECK: encoding: [0x7e,0x9a,0x43,0x2b] +fst.s $f30, $r19, 230 + +# CHECK: fld.d $f22, $r17, 114 +# CHECK: encoding: [0x36,0xca,0x81,0x2b] +fld.d $f22, $r17, 114 + +# CHECK: fst.d $f28, $r7, 198 +# CHECK: encoding: [0xfc,0x18,0xc3,0x2b] +fst.d $f28, $r7, 198 + +# CHECK: ldx.b $r24, $ra, $tp +# CHECK: encoding: [0x38,0x08,0x00,0x38] +ldx.b $r24, $ra, $tp + +# CHECK: ldx.h $r22, $r22, $r17 +# CHECK: encoding: [0xd6,0x46,0x04,0x38] +ldx.h $r22, $r22, $r17 + +# CHECK: ldx.w $r25, $r11, $r23 +# CHECK: encoding: [0x79,0x5d,0x08,0x38] +ldx.w $r25, $r11, $r23 + +# CHECK: ldx.d $r18, $r23, $r20 +# CHECK: encoding: [0xf2,0x52,0x0c,0x38] +ldx.d $r18, $r23, $r20 + +# CHECK: stx.b $r19, $ra, $sp +# CHECK: encoding: [0x33,0x0c,0x10,0x38] +stx.b $r19, $ra, $sp + +# CHECK: stx.h $zero, $r28, $r26 +# CHECK: encoding: [0x80,0x6b,0x14,0x38] +stx.h $zero, $r28, $r26 + +# CHECK: stx.w $r7, $r4, $r31 +# CHECK: encoding: [0x87,0x7c,0x18,0x38] +stx.w $r7, $r4, $r31 + +# CHECK: stx.d $r7, $r31, $r10 +# CHECK: encoding: [0xe7,0x2b,0x1c,0x38] +stx.d $r7, $r31, $r10 + +# CHECK: ldx.bu $r11, $r9, $r9 +# CHECK: encoding: [0x2b,0x25,0x20,0x38] +ldx.bu $r11, $r9, $r9 + +# CHECK: ldx.hu $r22, $r23, $r27 +# CHECK: encoding: [0xf6,0x6e,0x24,0x38] +ldx.hu $r22, $r23, $r27 + +# CHECK: ldx.wu $r8, $r24, $r28 +# CHECK: encoding: [0x08,0x73,0x28,0x38] +ldx.wu $r8, $r24, $r28 + +# CHECK: fldx.s $f1, $r15, $r19 +# CHECK: encoding: [0xe1,0x4d,0x30,0x38] +fldx.s $f1, $r15, $r19 + +# CHECK: fldx.d $f27, $r13, $r31 +# CHECK: encoding: [0xbb,0x7d,0x34,0x38] +fldx.d $f27, $r13, $r31 + +# CHECK: fstx.s $f26, $sp, $r22 +# CHECK: encoding: [0x7a,0x58,0x38,0x38] +fstx.s $f26, $sp, $r22 + +# CHECK: fstx.d $f6, $r15, $r17 +# CHECK: encoding: [0xe6,0x45,0x3c,0x38] +fstx.d $f6, $r15, $r17 + +# CHECK: amswap_db.w $r6, $r12, $r24, 0 +# CHECK: encoding: [0x06,0x33,0x69,0x38] +amswap_db.w $r6, $r12, $r24, 0 + +# CHECK: amswap_db.d $tp, $r14, $r22, 0 +# CHECK: encoding: [0xc2,0xba,0x69,0x38] +amswap_db.d $tp, $r14, $r22, 0 + +# CHECK: amadd_db.w $r8, $r12, $r21, 0 +# CHECK: encoding: [0xa8,0x32,0x6a,0x38] +amadd_db.w $r8, $r12, $r21, 0 + +# CHECK: amadd_db.d $r5, $r17, $r29, 0 +# CHECK: encoding: [0xa5,0xc7,0x6a,0x38] +amadd_db.d $r5, $r17, $r29, 0 + +# CHECK: amand_db.w $r4, $r19, $r22, 0 +# CHECK: encoding: [0xc4,0x4e,0x6b,0x38] +amand_db.w $r4, $r19, $r22, 0 + +# CHECK: amand_db.d $r10, $r18, $r29, 0 +# CHECK: encoding: [0xaa,0xcb,0x6b,0x38] +amand_db.d $r10, $r18, $r29, 0 + +# CHECK: amor_db.w $r6, $r16, $r23, 0 +# CHECK: encoding: [0xe6,0x42,0x6c,0x38] +amor_db.w $r6, $r16, $r23, 0 + +# CHECK: amor_db.d $sp, $r16, $r24, 0 +# CHECK: encoding: [0x03,0xc3,0x6c,0x38] +amor_db.d $sp, $r16, $r24, 0 + +# CHECK: amxor_db.w $tp, $r15, $r23, 0 +# CHECK: encoding: [0xe2,0x3e,0x6d,0x38] +amxor_db.w $tp, $r15, $r23, 0 + +# CHECK: amxor_db.d $r8, $r20, $r28, 0 +# CHECK: encoding: [0x88,0xd3,0x6d,0x38] +amxor_db.d $r8, $r20, $r28, 0 + +# CHECK: ammax_db.w $ra, $r11, $r23, 0 +# CHECK: encoding: [0xe1,0x2e,0x6e,0x38] +ammax_db.w $ra, $r11, $r23, 0 + +# CHECK: ammax_db.d $r9, $r20, $r27, 0 +# CHECK: encoding: [0x69,0xd3,0x6e,0x38] +ammax_db.d $r9, $r20, $r27, 0 + +# CHECK: ammin_db.w $r9, $r14, $r23, 0 +# CHECK: encoding: [0xe9,0x3a,0x6f,0x38] +ammin_db.w $r9, $r14, $r23, 0 + +# CHECK: ammin_db.d $r9, $r13, $r22, 0 +# CHECK: encoding: [0xc9,0xb6,0x6f,0x38] +ammin_db.d $r9, $r13, $r22, 0 + +# CHECK: ammax_db.wu $r9, $r11, $r22, 0 +# CHECK: encoding: [0xc9,0x2e,0x70,0x38] +ammax_db.wu $r9, $r11, $r22, 0 + +# CHECK: ammax_db.du $r6, $r16, $r25, 0 +# CHECK: encoding: [0x26,0xc3,0x70,0x38] +ammax_db.du $r6, $r16, $r25, 0 + +# CHECK: ammin_db.wu $r8, $r18, $r30, 0 +# CHECK: encoding: [0xc8,0x4b,0x71,0x38] +ammin_db.wu $r8, $r18, $r30, 0 + +# CHECK: ammin_db.du $r7, $r16, $r25, 0 +# CHECK: encoding: [0x27,0xc3,0x71,0x38] +ammin_db.du $r7, $r16, $r25, 0 + +# CHECK: amswap.w $r6, $r12, $r24, 0 +# CHECK: encoding: [0x06,0x33,0x60,0x38] +amswap.w $r6, $r12, $r24, 0 + +# CHECK: amswap.d $tp, $r14, $r22, 0 +# CHECK: encoding: [0xc2,0xba,0x60,0x38] +amswap.d $tp, $r14, $r22, 0 + +# CHECK: amadd.w $r8, $r12, $r21, 0 +# CHECK: encoding: [0xa8,0x32,0x61,0x38] +amadd.w $r8, $r12, $r21, 0 + +# CHECK: amadd.d $r5, $r17, $r29, 0 +# CHECK: encoding: [0xa5,0xc7,0x61,0x38] +amadd.d $r5, $r17, $r29, 0 + +# CHECK: amand.w $r4, $r19, $r22, 0 +# CHECK: encoding: [0xc4,0x4e,0x62,0x38] +amand.w $r4, $r19, $r22, 0 + +# CHECK: amand.d $r10, $r18, $r29, 0 +# CHECK: encoding: [0xaa,0xcb,0x62,0x38] +amand.d $r10, $r18, $r29, 0 + +# CHECK: amor.w $r6, $r16, $r23, 0 +# CHECK: encoding: [0xe6,0x42,0x63,0x38] +amor.w $r6, $r16, $r23, 0 + +# CHECK: amor.d $sp, $r16, $r24, 0 +# CHECK: encoding: [0x03,0xc3,0x63,0x38] +amor.d $sp, $r16, $r24, 0 + +# CHECK: amxor.w $tp, $r15, $r23, 0 +# CHECK: encoding: [0xe2,0x3e,0x64,0x38] +amxor.w $tp, $r15, $r23, 0 + +# CHECK: amxor.d $r8, $r20, $r28, 0 +# CHECK: encoding: [0x88,0xd3,0x64,0x38] +amxor.d $r8, $r20, $r28, 0 + +# CHECK: ammax.w $ra, $r11, $r23, 0 +# CHECK: encoding: [0xe1,0x2e,0x65,0x38] +ammax.w $ra, $r11, $r23, 0 + +# CHECK: ammax.d $r9, $r20, $r27, 0 +# CHECK: encoding: [0x69,0xd3,0x65,0x38] +ammax.d $r9, $r20, $r27, 0 + +# CHECK: ammin.w $r9, $r14, $r23, 0 +# CHECK: encoding: [0xe9,0x3a,0x66,0x38] +ammin.w $r9, $r14, $r23, 0 + +# CHECK: ammin.d $r9, $r13, $r22, 0 +# CHECK: encoding: [0xc9,0xb6,0x66,0x38] +ammin.d $r9, $r13, $r22, 0 + +# CHECK: ammax.wu $r9, $r11, $r22, 0 +# CHECK: encoding: [0xc9,0x2e,0x67,0x38] +ammax.wu $r9, $r11, $r22, 0 + +# CHECK: ammax.du $r6, $r16, $r25, 0 +# CHECK: encoding: [0x26,0xc3,0x67,0x38] +ammax.du $r6, $r16, $r25, 0 + +# CHECK: ammin.wu $r8, $r18, $r30, 0 +# CHECK: encoding: [0xc8,0x4b,0x68,0x38] +ammin.wu $r8, $r18, $r30, 0 + +# CHECK: ammin.du $r7, $r16, $r25, 0 +# CHECK: encoding: [0x27,0xc3,0x68,0x38] +ammin.du $r7, $r16, $r25, 0 + +# CHECK: fldgt.s $f3, $r27, $r13 +# CHECK: encoding: [0x63,0x37,0x74,0x38] +fldgt.s $f3, $r27, $r13 + +# CHECK: fldgt.d $f26, $r5, $r31 +# CHECK: encoding: [0xba,0xfc,0x74,0x38] +fldgt.d $f26, $r5, $r31 + +# CHECK: fldle.s $f24, $r29, $r17 +# CHECK: encoding: [0xb8,0x47,0x75,0x38] +fldle.s $f24, $r29, $r17 + +# CHECK: fldle.d $f3, $r15, $r22 +# CHECK: encoding: [0xe3,0xd9,0x75,0x38] +fldle.d $f3, $r15, $r22 + +# CHECK: fstgt.s $f31, $r13, $r30 +# CHECK: encoding: [0xbf,0x79,0x76,0x38] +fstgt.s $f31, $r13, $r30 + +# CHECK: fstgt.d $f13, $r11, $r26 +# CHECK: encoding: [0x6d,0xe9,0x76,0x38] +fstgt.d $f13, $r11, $r26 + +# CHECK: fstle.s $f13, $r13, $r7 +# CHECK: encoding: [0xad,0x1d,0x77,0x38] +fstle.s $f13, $r13, $r7 + +# CHECK: fstle.d $f18, $r9, $r13 +# CHECK: encoding: [0x32,0xb5,0x77,0x38] +fstle.d $f18, $r9, $r13 + +# CHECK: preld 10, $zero, 23 +# CHECK: encoding: [0x0a,0x5c,0xc0,0x2a] +preld 10, $zero, 23 + +# CHECK: ldgt.b $r6, $r6, $r29 +# CHECK: encoding: [0xc6,0x74,0x78,0x38] +ldgt.b $r6, $r6, $r29 + +# CHECK: ldgt.h $r5, $r31, $ra +# CHECK: encoding: [0xe5,0x87,0x78,0x38] +ldgt.h $r5, $r31, $ra + +# CHECK: ldgt.w $r15, $r26, $r8 +# CHECK: encoding: [0x4f,0x23,0x79,0x38] +ldgt.w $r15, $r26, $r8 + +# CHECK: ldgt.d $r23, $r25, $r31 +# CHECK: encoding: [0x37,0xff,0x79,0x38] +ldgt.d $r23, $r25, $r31 + +# CHECK: ldle.b $r9, $r12, $r15 +# CHECK: encoding: [0x89,0x3d,0x7a,0x38] +ldle.b $r9, $r12, $r15 + +# CHECK: ldle.h $r11, $r11, $r23 +# CHECK: encoding: [0x6b,0xdd,0x7a,0x38] +ldle.h $r11, $r11, $r23 + +# CHECK: ldle.w $r24, $tp, $tp +# CHECK: encoding: [0x58,0x08,0x7b,0x38] +ldle.w $r24, $tp, $tp + +# CHECK: ldle.d $r20, $r15, $r16 +# CHECK: encoding: [0xf4,0xc1,0x7b,0x38] +ldle.d $r20, $r15, $r16 + +# CHECK: stgt.b $r27, $r19, $r20 +# CHECK: encoding: [0x7b,0x52,0x7c,0x38] +stgt.b $r27, $r19, $r20 + +# CHECK: stgt.h $r16, $r4, $r6 +# CHECK: encoding: [0x90,0x98,0x7c,0x38] +stgt.h $r16, $r4, $r6 + +# CHECK: stgt.w $r31, $r28, $r14 +# CHECK: encoding: [0x9f,0x3b,0x7d,0x38] +stgt.w $r31, $r28, $r14 + +# CHECK: stgt.d $r30, $r21, $r24 +# CHECK: encoding: [0xbe,0xe2,0x7d,0x38] +stgt.d $r30, $r21, $r24 + +# CHECK: stle.b $r10, $r4, $r16 +# CHECK: encoding: [0x8a,0x40,0x7e,0x38] +stle.b $r10, $r4, $r16 + +# CHECK: stle.h $r17, $r17, $r21 +# CHECK: encoding: [0x31,0xd6,0x7e,0x38] +stle.h $r17, $r17, $r21 + +# CHECK: stle.w $r23, $r28, $r29 +# CHECK: encoding: [0x97,0x77,0x7f,0x38] +stle.w $r23, $r28, $r29 + +# CHECK: stle.d $r25, $r24, $r29 +# CHECK: encoding: [0x19,0xf7,0x7f,0x38] +stle.d $r25, $r24, $r29 + diff --git a/llvm/test/MC/LoongArch/valid_priv.s b/llvm/test/MC/LoongArch/valid_priv.s new file mode 100644 index 000000000000..57a252a8df35 --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_priv.s @@ -0,0 +1,125 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: csrrd $r26, 30 +# CHECK: encoding: [0x1a,0x78,0x00,0x04] +csrrd $r26, 30 + +# CHECK: csrwr $r24, 194 +# CHECK: encoding: [0x38,0x08,0x03,0x04] +csrwr $r24, 194 + +# CHECK: csrxchg $r6, $r27, 214 +# CHECK: encoding: [0x66,0x5b,0x03,0x04] +csrxchg $r6, $r27, 214 + +# CHECK: cacop 0, $r10, 27 +# CHECK: encoding: [0x40,0x6d,0x00,0x06] +cacop 0, $r10, 27 + +# CHECK: lddir $r12, $r30, 92 +# CHECK: encoding: [0xcc,0x73,0x41,0x06] +lddir $r12, $r30, 92 + +# CHECK: ldpte $r18, 200 +# CHECK: encoding: [0x40,0x22,0x47,0x06] +ldpte $r18, 200 + +# CHECK: iocsrrd.b $r26, $r24 +# CHECK: encoding: [0x1a,0x03,0x48,0x06] +iocsrrd.b $r26, $r24 + +# CHECK: iocsrrd.h $r5, $r27 +# CHECK: encoding: [0x65,0x07,0x48,0x06] +iocsrrd.h $r5, $r27 + +# CHECK: iocsrrd.w $r10, $r20 +# CHECK: encoding: [0x8a,0x0a,0x48,0x06] +iocsrrd.w $r10, $r20 + +# CHECK: iocsrrd.d $r17, $r25 +# CHECK: encoding: [0x31,0x0f,0x48,0x06] +iocsrrd.d $r17, $r25 + +# CHECK: iocsrwr.b $r4, $r23 +# CHECK: encoding: [0xe4,0x12,0x48,0x06] +iocsrwr.b $r4, $r23 + +# CHECK: iocsrwr.h $r11, $zero +# CHECK: encoding: [0x0b,0x14,0x48,0x06] +iocsrwr.h $r11, $zero + +# CHECK: iocsrwr.w $r20, $r26 +# CHECK: encoding: [0x54,0x1b,0x48,0x06] +iocsrwr.w $r20, $r26 + +# CHECK: iocsrwr.d $r20, $r7 +# CHECK: encoding: [0xf4,0x1c,0x48,0x06] +iocsrwr.d $r20, $r7 + +# CHECK: tlbclr +# CHECK: encoding: [0x00,0x20,0x48,0x06] +tlbclr + +# CHECK: tlbflush +# CHECK: encoding: [0x00,0x24,0x48,0x06] +tlbflush + +# CHECK: tlbsrch +# CHECK: encoding: [0x00,0x28,0x48,0x06] +tlbsrch + +# CHECK: tlbrd +# CHECK: encoding: [0x00,0x2c,0x48,0x06] +tlbrd + +# CHECK: tlbwr +# CHECK: encoding: [0x00,0x30,0x48,0x06] +tlbwr + +# CHECK: tlbfill +# CHECK: encoding: [0x00,0x34,0x48,0x06] +tlbfill + +# CHECK: ertn +# CHECK: encoding: [0x00,0x38,0x48,0x06] +ertn + +# CHECK: idle 204 +# CHECK: encoding: [0xcc,0x80,0x48,0x06] +idle 204 + +# CHECK: invtlb 16, $r29, $r25 +# CHECK: encoding: [0xb0,0xe7,0x49,0x06] +invtlb 16, $r29, $r25 + +# CHECK: rdtimel.w $r30, $r19 +# CHECK: encoding: [0x7e,0x62,0x00,0x00] +rdtimel.w $r30, $r19 + +# CHECK: rdtimeh.w $r19, $r14 +# CHECK: encoding: [0xd3,0x65,0x00,0x00] +rdtimeh.w $r19, $r14 + +# CHECK: rdtime.d $tp, $r15 +# CHECK: encoding: [0xe2,0x69,0x00,0x00] +rdtime.d $tp, $r15 + +# CHECK: asrtle.d $r12, $r17 +# CHECK: encoding: [0x80,0x45,0x01,0x00] +asrtle.d $r12, $r17 + +# CHECK: asrtgt.d $r20, $r20 +# CHECK: encoding: [0x80,0xd2,0x01,0x00] +asrtgt.d $r20, $r20 + +# CHECK: break 199 +# CHECK: encoding: [0xc7,0x00,0x2a,0x00] +break 199 + +# CHECK: dbcl 201 +# CHECK: encoding: [0xc9,0x80,0x2a,0x00] +dbcl 201 + +# CHECK: syscall 100 +# CHECK: encoding: [0x64,0x00,0x2b,0x00] +syscall 100 + diff --git a/llvm/test/MC/LoongArch/valid_simd.s b/llvm/test/MC/LoongArch/valid_simd.s new file mode 100644 index 000000000000..7db00e8bbda6 --- /dev/null +++ b/llvm/test/MC/LoongArch/valid_simd.s @@ -0,0 +1,5437 @@ +# RUN: llvm-mc %s -triple=loongarch64-unknown-linux-gnu -show-encoding | FileCheck %s +# CHECK: vfmadd.s $vr15, $vr22, $vr10, $vr18 +# CHECK: encoding: [0xcf,0x2a,0x19,0x09] +vfmadd.s $vr15, $vr22, $vr10, $vr18 + +# CHECK: vfmadd.d $vr1, $vr0, $vr12, $vr10 +# CHECK: encoding: [0x01,0x30,0x25,0x09] +vfmadd.d $vr1, $vr0, $vr12, $vr10 + +# CHECK: vfmsub.s $vr16, $vr18, $vr13, $vr8 +# CHECK: encoding: [0x50,0x36,0x54,0x09] +vfmsub.s $vr16, $vr18, $vr13, $vr8 + +# CHECK: vfmsub.d $vr25, $vr13, $vr1, $vr20 +# CHECK: encoding: [0xb9,0x05,0x6a,0x09] +vfmsub.d $vr25, $vr13, $vr1, $vr20 + +# CHECK: vfnmadd.s $vr22, $vr2, $vr17, $vr22 +# CHECK: encoding: [0x56,0x44,0x9b,0x09] +vfnmadd.s $vr22, $vr2, $vr17, $vr22 + +# CHECK: vfnmadd.d $vr28, $vr29, $vr2, $vr14 +# CHECK: encoding: [0xbc,0x0b,0xa7,0x09] +vfnmadd.d $vr28, $vr29, $vr2, $vr14 + +# CHECK: vfnmsub.s $vr19, $vr4, $vr17, $vr24 +# CHECK: encoding: [0x93,0x44,0xdc,0x09] +vfnmsub.s $vr19, $vr4, $vr17, $vr24 + +# CHECK: vfnmsub.d $vr24, $vr22, $vr28, $vr30 +# CHECK: encoding: [0xd8,0x72,0xef,0x09] +vfnmsub.d $vr24, $vr22, $vr28, $vr30 + +# CHECK: xvfmadd.s $xr15, $xr28, $xr9, $xr15 +# CHECK: encoding: [0x8f,0xa7,0x17,0x0a] +xvfmadd.s $xr15, $xr28, $xr9, $xr15 + +# CHECK: xvfmadd.d $xr5, $xr24, $xr12, $xr10 +# CHECK: encoding: [0x05,0x33,0x25,0x0a] +xvfmadd.d $xr5, $xr24, $xr12, $xr10 + +# CHECK: xvfmsub.s $xr20, $xr0, $xr27, $xr26 +# CHECK: encoding: [0x14,0x6c,0x5d,0x0a] +xvfmsub.s $xr20, $xr0, $xr27, $xr26 + +# CHECK: xvfmsub.d $xr13, $xr8, $xr25, $xr26 +# CHECK: encoding: [0x0d,0x65,0x6d,0x0a] +xvfmsub.d $xr13, $xr8, $xr25, $xr26 + +# CHECK: xvfnmadd.s $xr14, $xr14, $xr22, $xr8 +# CHECK: encoding: [0xce,0x59,0x94,0x0a] +xvfnmadd.s $xr14, $xr14, $xr22, $xr8 + +# CHECK: xvfnmadd.d $xr25, $xr17, $xr0, $xr4 +# CHECK: encoding: [0x39,0x02,0xa2,0x0a] +xvfnmadd.d $xr25, $xr17, $xr0, $xr4 + +# CHECK: xvfnmsub.s $xr11, $xr3, $xr0, $xr11 +# CHECK: encoding: [0x6b,0x80,0xd5,0x0a] +xvfnmsub.s $xr11, $xr3, $xr0, $xr11 + +# CHECK: xvfnmsub.d $xr2, $xr3, $xr24, $xr22 +# CHECK: encoding: [0x62,0x60,0xeb,0x0a] +xvfnmsub.d $xr2, $xr3, $xr24, $xr22 + +# CHECK: vfcmp.ceq.s $vr26, $vr15, $vr27 +# CHECK: encoding: [0xfa,0x6d,0x52,0x0c] +vfcmp.ceq.s $vr26, $vr15, $vr27 + +# CHECK: vfcmp.ceq.d $vr21, $vr21, $vr1 +# CHECK: encoding: [0xb5,0x06,0x62,0x0c] +vfcmp.ceq.d $vr21, $vr21, $vr1 + +# CHECK: xvfcmp.ceq.s $xr8, $xr9, $xr19 +# CHECK: encoding: [0x28,0x4d,0x92,0x0c] +xvfcmp.ceq.s $xr8, $xr9, $xr19 + +# CHECK: xvfcmp.ceq.d $xr25, $xr16, $xr28 +# CHECK: encoding: [0x19,0x72,0xa2,0x0c] +xvfcmp.ceq.d $xr25, $xr16, $xr28 + +# CHECK: vbitsel.v $vr20, $vr23, $vr29, $vr9 +# CHECK: encoding: [0xf4,0xf6,0x14,0x0d] +vbitsel.v $vr20, $vr23, $vr29, $vr9 + +# CHECK: xvbitsel.v $xr7, $xr26, $xr28, $xr23 +# CHECK: encoding: [0x47,0xf3,0x2b,0x0d] +xvbitsel.v $xr7, $xr26, $xr28, $xr23 + +# CHECK: vshuf.b $vr11, $vr4, $vr7, $vr9 +# CHECK: encoding: [0x8b,0x9c,0x54,0x0d] +vshuf.b $vr11, $vr4, $vr7, $vr9 + +# CHECK: xvshuf.b $xr16, $xr21, $xr10, $xr12 +# CHECK: encoding: [0xb0,0x2a,0x66,0x0d] +xvshuf.b $xr16, $xr21, $xr10, $xr12 + +# CHECK: vld $vr28, $r25, -510 +# CHECK: encoding: [0x3c,0x0b,0x38,0x2c] +vld $vr28, $r25, -510 + +# CHECK: vst $vr28, $r14, 527 +# CHECK: encoding: [0xdc,0x3d,0x48,0x2c] +vst $vr28, $r14, 527 + +# CHECK: xvld $xr11, $r6, 512 +# CHECK: encoding: [0xcb,0x00,0x88,0x2c] +xvld $xr11, $r6, 512 + +# CHECK: xvst $xr13, $r7, 1215 +# CHECK: encoding: [0xed,0xfc,0xd2,0x2c] +xvst $xr13, $r7, 1215 + +# CHECK: vldrepl.d $vr8, $r9, -1544 +# CHECK: encoding: [0x28,0xfd,0x14,0x30] +vldrepl.d $vr8, $r9, -1544 + +# CHECK: vldrepl.w $vr2, $r9, -296 +# CHECK: encoding: [0x22,0xd9,0x2e,0x30] +vldrepl.w $vr2, $r9, -296 + +# CHECK: vldrepl.h $vr28, $r23, 252 +# CHECK: encoding: [0xfc,0xfa,0x41,0x30] +vldrepl.h $vr28, $r23, 252 + +# CHECK: vldrepl.b $vr5, $r9, -725 +# CHECK: encoding: [0x25,0xad,0xb4,0x30] +vldrepl.b $vr5, $r9, -725 + +# CHECK: vstelm.d $vr23, $r26, 680, 1 +# CHECK: encoding: [0x57,0x57,0x15,0x31] +vstelm.d $vr23, $r26, 680, 1 + +# CHECK: vstelm.w $vr30, $r23, -372, 1 +# CHECK: encoding: [0xfe,0x8e,0x26,0x31] +vstelm.w $vr30, $r23, -372, 1 + +# CHECK: vstelm.h $vr11, $r6, 30, 7 +# CHECK: encoding: [0xcb,0x3c,0x5c,0x31] +vstelm.h $vr11, $r6, 30, 7 + +# CHECK: vstelm.b $vr3, $r15, 44, 14 +# CHECK: encoding: [0xe3,0xb1,0xb8,0x31] +vstelm.b $vr3, $r15, 44, 14 + +# CHECK: xvldrepl.d $xr24, $r8, 840 +# CHECK: encoding: [0x18,0xa5,0x11,0x32] +xvldrepl.d $xr24, $r8, 840 + +# CHECK: xvldrepl.w $xr14, $r24, 492 +# CHECK: encoding: [0x0e,0xef,0x21,0x32] +xvldrepl.w $xr14, $r24, 492 + +# CHECK: xvldrepl.h $xr18, $r9, 804 +# CHECK: encoding: [0x32,0x49,0x46,0x32] +xvldrepl.h $xr18, $r9, 804 + +# CHECK: xvldrepl.b $xr6, $r29, 811 +# CHECK: encoding: [0xa6,0xaf,0x8c,0x32] +xvldrepl.b $xr6, $r29, 811 + +# CHECK: xvstelm.d $xr21, $sp, -216, 0 +# CHECK: encoding: [0x75,0x94,0x13,0x33] +xvstelm.d $xr21, $sp, -216, 0 + +# CHECK: xvstelm.w $xr31, $r29, 424, 0 +# CHECK: encoding: [0xbf,0xab,0x21,0x33] +xvstelm.w $xr31, $r29, 424, 0 + +# CHECK: xvstelm.h $xr14, $r7, 90, 4 +# CHECK: encoding: [0xee,0xb4,0x50,0x33] +xvstelm.h $xr14, $r7, 90, 4 + +# CHECK: xvstelm.b $xr21, $r24, -5, 8 +# CHECK: encoding: [0x15,0xef,0xa3,0x33] +xvstelm.b $xr21, $r24, -5, 8 + +# CHECK: vldx $vr29, $r4, $r30 +# CHECK: encoding: [0x9d,0x78,0x40,0x38] +vldx $vr29, $r4, $r30 + +# CHECK: vstx $vr31, $r28, $r29 +# CHECK: encoding: [0x9f,0x77,0x44,0x38] +vstx $vr31, $r28, $r29 + +# CHECK: xvldx $xr8, $r30, $r24 +# CHECK: encoding: [0xc8,0x63,0x48,0x38] +xvldx $xr8, $r30, $r24 + +# CHECK: xvstx $xr2, $r9, $r29 +# CHECK: encoding: [0x22,0x75,0x4c,0x38] +xvstx $xr2, $r9, $r29 + +# CHECK: vseq.b $vr28, $vr26, $vr23 +# CHECK: encoding: [0x5c,0x5f,0x00,0x70] +vseq.b $vr28, $vr26, $vr23 + +# CHECK: vseq.h $vr10, $vr1, $vr5 +# CHECK: encoding: [0x2a,0x94,0x00,0x70] +vseq.h $vr10, $vr1, $vr5 + +# CHECK: vseq.w $vr3, $vr27, $vr17 +# CHECK: encoding: [0x63,0x47,0x01,0x70] +vseq.w $vr3, $vr27, $vr17 + +# CHECK: vseq.d $vr5, $vr3, $vr3 +# CHECK: encoding: [0x65,0x8c,0x01,0x70] +vseq.d $vr5, $vr3, $vr3 + +# CHECK: vsle.b $vr29, $vr9, $vr7 +# CHECK: encoding: [0x3d,0x1d,0x02,0x70] +vsle.b $vr29, $vr9, $vr7 + +# CHECK: vsle.h $vr5, $vr24, $vr9 +# CHECK: encoding: [0x05,0xa7,0x02,0x70] +vsle.h $vr5, $vr24, $vr9 + +# CHECK: vsle.w $vr17, $vr30, $vr20 +# CHECK: encoding: [0xd1,0x53,0x03,0x70] +vsle.w $vr17, $vr30, $vr20 + +# CHECK: vsle.d $vr27, $vr6, $vr13 +# CHECK: encoding: [0xdb,0xb4,0x03,0x70] +vsle.d $vr27, $vr6, $vr13 + +# CHECK: vsle.bu $vr30, $vr11, $vr10 +# CHECK: encoding: [0x7e,0x29,0x04,0x70] +vsle.bu $vr30, $vr11, $vr10 + +# CHECK: vsle.hu $vr19, $vr29, $vr31 +# CHECK: encoding: [0xb3,0xff,0x04,0x70] +vsle.hu $vr19, $vr29, $vr31 + +# CHECK: vsle.wu $vr16, $vr18, $vr20 +# CHECK: encoding: [0x50,0x52,0x05,0x70] +vsle.wu $vr16, $vr18, $vr20 + +# CHECK: vsle.du $vr31, $vr17, $vr8 +# CHECK: encoding: [0x3f,0xa2,0x05,0x70] +vsle.du $vr31, $vr17, $vr8 + +# CHECK: vslt.b $vr26, $vr7, $vr5 +# CHECK: encoding: [0xfa,0x14,0x06,0x70] +vslt.b $vr26, $vr7, $vr5 + +# CHECK: vslt.h $vr14, $vr2, $vr20 +# CHECK: encoding: [0x4e,0xd0,0x06,0x70] +vslt.h $vr14, $vr2, $vr20 + +# CHECK: vslt.w $vr14, $vr5, $vr25 +# CHECK: encoding: [0xae,0x64,0x07,0x70] +vslt.w $vr14, $vr5, $vr25 + +# CHECK: vslt.d $vr26, $vr9, $vr25 +# CHECK: encoding: [0x3a,0xe5,0x07,0x70] +vslt.d $vr26, $vr9, $vr25 + +# CHECK: vslt.bu $vr31, $vr18, $vr14 +# CHECK: encoding: [0x5f,0x3a,0x08,0x70] +vslt.bu $vr31, $vr18, $vr14 + +# CHECK: vslt.hu $vr5, $vr15, $vr5 +# CHECK: encoding: [0xe5,0x95,0x08,0x70] +vslt.hu $vr5, $vr15, $vr5 + +# CHECK: vslt.wu $vr31, $vr28, $vr13 +# CHECK: encoding: [0x9f,0x37,0x09,0x70] +vslt.wu $vr31, $vr28, $vr13 + +# CHECK: vslt.du $vr11, $vr19, $vr22 +# CHECK: encoding: [0x6b,0xda,0x09,0x70] +vslt.du $vr11, $vr19, $vr22 + +# CHECK: vadd.b $vr26, $vr20, $vr31 +# CHECK: encoding: [0x9a,0x7e,0x0a,0x70] +vadd.b $vr26, $vr20, $vr31 + +# CHECK: vadd.h $vr11, $vr25, $vr29 +# CHECK: encoding: [0x2b,0xf7,0x0a,0x70] +vadd.h $vr11, $vr25, $vr29 + +# CHECK: vadd.w $vr7, $vr25, $vr13 +# CHECK: encoding: [0x27,0x37,0x0b,0x70] +vadd.w $vr7, $vr25, $vr13 + +# CHECK: vadd.d $vr16, $vr13, $vr16 +# CHECK: encoding: [0xb0,0xc1,0x0b,0x70] +vadd.d $vr16, $vr13, $vr16 + +# CHECK: vsub.b $vr12, $vr3, $vr21 +# CHECK: encoding: [0x6c,0x54,0x0c,0x70] +vsub.b $vr12, $vr3, $vr21 + +# CHECK: vsub.h $vr15, $vr13, $vr25 +# CHECK: encoding: [0xaf,0xe5,0x0c,0x70] +vsub.h $vr15, $vr13, $vr25 + +# CHECK: vsub.w $vr20, $vr16, $vr25 +# CHECK: encoding: [0x14,0x66,0x0d,0x70] +vsub.w $vr20, $vr16, $vr25 + +# CHECK: vsub.d $vr19, $vr3, $vr7 +# CHECK: encoding: [0x73,0x9c,0x0d,0x70] +vsub.d $vr19, $vr3, $vr7 + +# CHECK: vsadd.b $vr14, $vr30, $vr5 +# CHECK: encoding: [0xce,0x17,0x46,0x70] +vsadd.b $vr14, $vr30, $vr5 + +# CHECK: vsadd.h $vr10, $vr1, $vr15 +# CHECK: encoding: [0x2a,0xbc,0x46,0x70] +vsadd.h $vr10, $vr1, $vr15 + +# CHECK: vsadd.w $vr19, $vr31, $vr10 +# CHECK: encoding: [0xf3,0x2b,0x47,0x70] +vsadd.w $vr19, $vr31, $vr10 + +# CHECK: vsadd.d $vr26, $vr19, $vr28 +# CHECK: encoding: [0x7a,0xf2,0x47,0x70] +vsadd.d $vr26, $vr19, $vr28 + +# CHECK: vssub.b $vr24, $vr3, $vr7 +# CHECK: encoding: [0x78,0x1c,0x48,0x70] +vssub.b $vr24, $vr3, $vr7 + +# CHECK: vssub.h $vr31, $vr4, $vr24 +# CHECK: encoding: [0x9f,0xe0,0x48,0x70] +vssub.h $vr31, $vr4, $vr24 + +# CHECK: vssub.w $vr29, $vr27, $vr12 +# CHECK: encoding: [0x7d,0x33,0x49,0x70] +vssub.w $vr29, $vr27, $vr12 + +# CHECK: vssub.d $vr23, $vr16, $vr9 +# CHECK: encoding: [0x17,0xa6,0x49,0x70] +vssub.d $vr23, $vr16, $vr9 + +# CHECK: vsadd.bu $vr26, $vr29, $vr4 +# CHECK: encoding: [0xba,0x13,0x4a,0x70] +vsadd.bu $vr26, $vr29, $vr4 + +# CHECK: vsadd.hu $vr15, $vr7, $vr9 +# CHECK: encoding: [0xef,0xa4,0x4a,0x70] +vsadd.hu $vr15, $vr7, $vr9 + +# CHECK: vsadd.wu $vr13, $vr18, $vr16 +# CHECK: encoding: [0x4d,0x42,0x4b,0x70] +vsadd.wu $vr13, $vr18, $vr16 + +# CHECK: vsadd.du $vr4, $vr5, $vr0 +# CHECK: encoding: [0xa4,0x80,0x4b,0x70] +vsadd.du $vr4, $vr5, $vr0 + +# CHECK: vssub.bu $vr27, $vr17, $vr13 +# CHECK: encoding: [0x3b,0x36,0x4c,0x70] +vssub.bu $vr27, $vr17, $vr13 + +# CHECK: vssub.hu $vr5, $vr8, $vr1 +# CHECK: encoding: [0x05,0x85,0x4c,0x70] +vssub.hu $vr5, $vr8, $vr1 + +# CHECK: vssub.wu $vr14, $vr8, $vr22 +# CHECK: encoding: [0x0e,0x59,0x4d,0x70] +vssub.wu $vr14, $vr8, $vr22 + +# CHECK: vssub.du $vr17, $vr9, $vr8 +# CHECK: encoding: [0x31,0xa1,0x4d,0x70] +vssub.du $vr17, $vr9, $vr8 + +# CHECK: vhaddw.h.b $vr23, $vr19, $vr2 +# CHECK: encoding: [0x77,0x0a,0x54,0x70] +vhaddw.h.b $vr23, $vr19, $vr2 + +# CHECK: vhaddw.w.h $vr26, $vr16, $vr26 +# CHECK: encoding: [0x1a,0xea,0x54,0x70] +vhaddw.w.h $vr26, $vr16, $vr26 + +# CHECK: vhaddw.d.w $vr0, $vr31, $vr27 +# CHECK: encoding: [0xe0,0x6f,0x55,0x70] +vhaddw.d.w $vr0, $vr31, $vr27 + +# CHECK: vhaddw.q.d $vr25, $vr13, $vr25 +# CHECK: encoding: [0xb9,0xe5,0x55,0x70] +vhaddw.q.d $vr25, $vr13, $vr25 + +# CHECK: vhsubw.h.b $vr9, $vr23, $vr5 +# CHECK: encoding: [0xe9,0x16,0x56,0x70] +vhsubw.h.b $vr9, $vr23, $vr5 + +# CHECK: vhsubw.w.h $vr15, $vr29, $vr26 +# CHECK: encoding: [0xaf,0xeb,0x56,0x70] +vhsubw.w.h $vr15, $vr29, $vr26 + +# CHECK: vhsubw.d.w $vr0, $vr28, $vr18 +# CHECK: encoding: [0x80,0x4b,0x57,0x70] +vhsubw.d.w $vr0, $vr28, $vr18 + +# CHECK: vhsubw.q.d $vr14, $vr25, $vr8 +# CHECK: encoding: [0x2e,0xa3,0x57,0x70] +vhsubw.q.d $vr14, $vr25, $vr8 + +# CHECK: vhaddw.hu.bu $vr1, $vr16, $vr21 +# CHECK: encoding: [0x01,0x56,0x58,0x70] +vhaddw.hu.bu $vr1, $vr16, $vr21 + +# CHECK: vhaddw.wu.hu $vr28, $vr21, $vr29 +# CHECK: encoding: [0xbc,0xf6,0x58,0x70] +vhaddw.wu.hu $vr28, $vr21, $vr29 + +# CHECK: vhaddw.du.wu $vr29, $vr20, $vr16 +# CHECK: encoding: [0x9d,0x42,0x59,0x70] +vhaddw.du.wu $vr29, $vr20, $vr16 + +# CHECK: vhaddw.qu.du $vr2, $vr10, $vr28 +# CHECK: encoding: [0x42,0xf1,0x59,0x70] +vhaddw.qu.du $vr2, $vr10, $vr28 + +# CHECK: vhsubw.hu.bu $vr31, $vr3, $vr30 +# CHECK: encoding: [0x7f,0x78,0x5a,0x70] +vhsubw.hu.bu $vr31, $vr3, $vr30 + +# CHECK: vhsubw.wu.hu $vr5, $vr9, $vr11 +# CHECK: encoding: [0x25,0xad,0x5a,0x70] +vhsubw.wu.hu $vr5, $vr9, $vr11 + +# CHECK: vhsubw.du.wu $vr23, $vr31, $vr22 +# CHECK: encoding: [0xf7,0x5b,0x5b,0x70] +vhsubw.du.wu $vr23, $vr31, $vr22 + +# CHECK: vhsubw.qu.du $vr4, $vr28, $vr18 +# CHECK: encoding: [0x84,0xcb,0x5b,0x70] +vhsubw.qu.du $vr4, $vr28, $vr18 + +# CHECK: vadda.b $vr18, $vr13, $vr11 +# CHECK: encoding: [0xb2,0x2d,0x5c,0x70] +vadda.b $vr18, $vr13, $vr11 + +# CHECK: vadda.h $vr17, $vr14, $vr12 +# CHECK: encoding: [0xd1,0xb1,0x5c,0x70] +vadda.h $vr17, $vr14, $vr12 + +# CHECK: vadda.w $vr22, $vr11, $vr3 +# CHECK: encoding: [0x76,0x0d,0x5d,0x70] +vadda.w $vr22, $vr11, $vr3 + +# CHECK: vadda.d $vr24, $vr24, $vr15 +# CHECK: encoding: [0x18,0xbf,0x5d,0x70] +vadda.d $vr24, $vr24, $vr15 + +# CHECK: vabsd.b $vr23, $vr19, $vr17 +# CHECK: encoding: [0x77,0x46,0x60,0x70] +vabsd.b $vr23, $vr19, $vr17 + +# CHECK: vabsd.h $vr14, $vr31, $vr13 +# CHECK: encoding: [0xee,0xb7,0x60,0x70] +vabsd.h $vr14, $vr31, $vr13 + +# CHECK: vabsd.w $vr24, $vr1, $vr9 +# CHECK: encoding: [0x38,0x24,0x61,0x70] +vabsd.w $vr24, $vr1, $vr9 + +# CHECK: vabsd.d $vr31, $vr20, $vr0 +# CHECK: encoding: [0x9f,0x82,0x61,0x70] +vabsd.d $vr31, $vr20, $vr0 + +# CHECK: vabsd.bu $vr23, $vr12, $vr29 +# CHECK: encoding: [0x97,0x75,0x62,0x70] +vabsd.bu $vr23, $vr12, $vr29 + +# CHECK: vabsd.hu $vr18, $vr19, $vr1 +# CHECK: encoding: [0x72,0x86,0x62,0x70] +vabsd.hu $vr18, $vr19, $vr1 + +# CHECK: vabsd.wu $vr13, $vr21, $vr28 +# CHECK: encoding: [0xad,0x72,0x63,0x70] +vabsd.wu $vr13, $vr21, $vr28 + +# CHECK: vabsd.du $vr16, $vr26, $vr11 +# CHECK: encoding: [0x50,0xaf,0x63,0x70] +vabsd.du $vr16, $vr26, $vr11 + +# CHECK: vavg.b $vr1, $vr21, $vr27 +# CHECK: encoding: [0xa1,0x6e,0x64,0x70] +vavg.b $vr1, $vr21, $vr27 + +# CHECK: vavg.h $vr20, $vr26, $vr15 +# CHECK: encoding: [0x54,0xbf,0x64,0x70] +vavg.h $vr20, $vr26, $vr15 + +# CHECK: vavg.w $vr29, $vr18, $vr3 +# CHECK: encoding: [0x5d,0x0e,0x65,0x70] +vavg.w $vr29, $vr18, $vr3 + +# CHECK: vavg.d $vr19, $vr15, $vr31 +# CHECK: encoding: [0xf3,0xfd,0x65,0x70] +vavg.d $vr19, $vr15, $vr31 + +# CHECK: vavg.bu $vr11, $vr11, $vr17 +# CHECK: encoding: [0x6b,0x45,0x66,0x70] +vavg.bu $vr11, $vr11, $vr17 + +# CHECK: vavg.hu $vr30, $vr28, $vr13 +# CHECK: encoding: [0x9e,0xb7,0x66,0x70] +vavg.hu $vr30, $vr28, $vr13 + +# CHECK: vavg.wu $vr7, $vr7, $vr10 +# CHECK: encoding: [0xe7,0x28,0x67,0x70] +vavg.wu $vr7, $vr7, $vr10 + +# CHECK: vavg.du $vr25, $vr7, $vr12 +# CHECK: encoding: [0xf9,0xb0,0x67,0x70] +vavg.du $vr25, $vr7, $vr12 + +# CHECK: vavgr.b $vr29, $vr13, $vr7 +# CHECK: encoding: [0xbd,0x1d,0x68,0x70] +vavgr.b $vr29, $vr13, $vr7 + +# CHECK: vavgr.h $vr5, $vr28, $vr19 +# CHECK: encoding: [0x85,0xcf,0x68,0x70] +vavgr.h $vr5, $vr28, $vr19 + +# CHECK: vavgr.w $vr19, $vr15, $vr14 +# CHECK: encoding: [0xf3,0x39,0x69,0x70] +vavgr.w $vr19, $vr15, $vr14 + +# CHECK: vavgr.d $vr3, $vr0, $vr2 +# CHECK: encoding: [0x03,0x88,0x69,0x70] +vavgr.d $vr3, $vr0, $vr2 + +# CHECK: vavgr.bu $vr23, $vr11, $vr31 +# CHECK: encoding: [0x77,0x7d,0x6a,0x70] +vavgr.bu $vr23, $vr11, $vr31 + +# CHECK: vavgr.hu $vr25, $vr19, $vr8 +# CHECK: encoding: [0x79,0xa2,0x6a,0x70] +vavgr.hu $vr25, $vr19, $vr8 + +# CHECK: vavgr.wu $vr30, $vr25, $vr12 +# CHECK: encoding: [0x3e,0x33,0x6b,0x70] +vavgr.wu $vr30, $vr25, $vr12 + +# CHECK: vavgr.du $vr25, $vr20, $vr25 +# CHECK: encoding: [0x99,0xe6,0x6b,0x70] +vavgr.du $vr25, $vr20, $vr25 + +# CHECK: vmax.b $vr28, $vr26, $vr26 +# CHECK: encoding: [0x5c,0x6b,0x70,0x70] +vmax.b $vr28, $vr26, $vr26 + +# CHECK: vmax.h $vr8, $vr13, $vr11 +# CHECK: encoding: [0xa8,0xad,0x70,0x70] +vmax.h $vr8, $vr13, $vr11 + +# CHECK: vmax.w $vr21, $vr28, $vr31 +# CHECK: encoding: [0x95,0x7f,0x71,0x70] +vmax.w $vr21, $vr28, $vr31 + +# CHECK: vmax.d $vr1, $vr30, $vr26 +# CHECK: encoding: [0xc1,0xeb,0x71,0x70] +vmax.d $vr1, $vr30, $vr26 + +# CHECK: vmin.b $vr10, $vr14, $vr9 +# CHECK: encoding: [0xca,0x25,0x72,0x70] +vmin.b $vr10, $vr14, $vr9 + +# CHECK: vmin.h $vr10, $vr11, $vr21 +# CHECK: encoding: [0x6a,0xd5,0x72,0x70] +vmin.h $vr10, $vr11, $vr21 + +# CHECK: vmin.w $vr26, $vr0, $vr12 +# CHECK: encoding: [0x1a,0x30,0x73,0x70] +vmin.w $vr26, $vr0, $vr12 + +# CHECK: vmin.d $vr19, $vr18, $vr0 +# CHECK: encoding: [0x53,0x82,0x73,0x70] +vmin.d $vr19, $vr18, $vr0 + +# CHECK: vmax.bu $vr2, $vr25, $vr28 +# CHECK: encoding: [0x22,0x73,0x74,0x70] +vmax.bu $vr2, $vr25, $vr28 + +# CHECK: vmax.hu $vr9, $vr22, $vr30 +# CHECK: encoding: [0xc9,0xfa,0x74,0x70] +vmax.hu $vr9, $vr22, $vr30 + +# CHECK: vmax.wu $vr21, $vr25, $vr27 +# CHECK: encoding: [0x35,0x6f,0x75,0x70] +vmax.wu $vr21, $vr25, $vr27 + +# CHECK: vmax.du $vr3, $vr14, $vr25 +# CHECK: encoding: [0xc3,0xe5,0x75,0x70] +vmax.du $vr3, $vr14, $vr25 + +# CHECK: vmin.bu $vr24, $vr7, $vr27 +# CHECK: encoding: [0xf8,0x6c,0x76,0x70] +vmin.bu $vr24, $vr7, $vr27 + +# CHECK: vmin.hu $vr18, $vr28, $vr29 +# CHECK: encoding: [0x92,0xf7,0x76,0x70] +vmin.hu $vr18, $vr28, $vr29 + +# CHECK: vmin.wu $vr26, $vr4, $vr2 +# CHECK: encoding: [0x9a,0x08,0x77,0x70] +vmin.wu $vr26, $vr4, $vr2 + +# CHECK: vmin.du $vr13, $vr0, $vr4 +# CHECK: encoding: [0x0d,0x90,0x77,0x70] +vmin.du $vr13, $vr0, $vr4 + +# CHECK: vmul.b $vr1, $vr21, $vr23 +# CHECK: encoding: [0xa1,0x5e,0x84,0x70] +vmul.b $vr1, $vr21, $vr23 + +# CHECK: vmul.h $vr9, $vr21, $vr25 +# CHECK: encoding: [0xa9,0xe6,0x84,0x70] +vmul.h $vr9, $vr21, $vr25 + +# CHECK: vmul.w $vr16, $vr8, $vr28 +# CHECK: encoding: [0x10,0x71,0x85,0x70] +vmul.w $vr16, $vr8, $vr28 + +# CHECK: vmul.d $vr4, $vr17, $vr11 +# CHECK: encoding: [0x24,0xae,0x85,0x70] +vmul.d $vr4, $vr17, $vr11 + +# CHECK: vmuh.b $vr12, $vr24, $vr8 +# CHECK: encoding: [0x0c,0x23,0x86,0x70] +vmuh.b $vr12, $vr24, $vr8 + +# CHECK: vmuh.h $vr6, $vr21, $vr24 +# CHECK: encoding: [0xa6,0xe2,0x86,0x70] +vmuh.h $vr6, $vr21, $vr24 + +# CHECK: vmuh.w $vr11, $vr29, $vr30 +# CHECK: encoding: [0xab,0x7b,0x87,0x70] +vmuh.w $vr11, $vr29, $vr30 + +# CHECK: vmuh.d $vr1, $vr17, $vr25 +# CHECK: encoding: [0x21,0xe6,0x87,0x70] +vmuh.d $vr1, $vr17, $vr25 + +# CHECK: vmuh.bu $vr29, $vr29, $vr10 +# CHECK: encoding: [0xbd,0x2b,0x88,0x70] +vmuh.bu $vr29, $vr29, $vr10 + +# CHECK: vmuh.hu $vr24, $vr9, $vr21 +# CHECK: encoding: [0x38,0xd5,0x88,0x70] +vmuh.hu $vr24, $vr9, $vr21 + +# CHECK: vmuh.wu $vr15, $vr20, $vr19 +# CHECK: encoding: [0x8f,0x4e,0x89,0x70] +vmuh.wu $vr15, $vr20, $vr19 + +# CHECK: vmuh.du $vr0, $vr28, $vr1 +# CHECK: encoding: [0x80,0x87,0x89,0x70] +vmuh.du $vr0, $vr28, $vr1 + +# CHECK: vmadd.b $vr27, $vr0, $vr4 +# CHECK: encoding: [0x1b,0x10,0xa8,0x70] +vmadd.b $vr27, $vr0, $vr4 + +# CHECK: vmadd.h $vr19, $vr20, $vr28 +# CHECK: encoding: [0x93,0xf2,0xa8,0x70] +vmadd.h $vr19, $vr20, $vr28 + +# CHECK: vmadd.w $vr15, $vr7, $vr3 +# CHECK: encoding: [0xef,0x0c,0xa9,0x70] +vmadd.w $vr15, $vr7, $vr3 + +# CHECK: vmadd.d $vr25, $vr25, $vr30 +# CHECK: encoding: [0x39,0xfb,0xa9,0x70] +vmadd.d $vr25, $vr25, $vr30 + +# CHECK: vmsub.b $vr24, $vr25, $vr26 +# CHECK: encoding: [0x38,0x6b,0xaa,0x70] +vmsub.b $vr24, $vr25, $vr26 + +# CHECK: vmsub.h $vr12, $vr0, $vr13 +# CHECK: encoding: [0x0c,0xb4,0xaa,0x70] +vmsub.h $vr12, $vr0, $vr13 + +# CHECK: vmsub.w $vr26, $vr16, $vr24 +# CHECK: encoding: [0x1a,0x62,0xab,0x70] +vmsub.w $vr26, $vr16, $vr24 + +# CHECK: vmsub.d $vr13, $vr10, $vr8 +# CHECK: encoding: [0x4d,0xa1,0xab,0x70] +vmsub.d $vr13, $vr10, $vr8 + +# CHECK: vdiv.b $vr18, $vr28, $vr21 +# CHECK: encoding: [0x92,0x57,0xe0,0x70] +vdiv.b $vr18, $vr28, $vr21 + +# CHECK: vdiv.h $vr17, $vr24, $vr1 +# CHECK: encoding: [0x11,0x87,0xe0,0x70] +vdiv.h $vr17, $vr24, $vr1 + +# CHECK: vdiv.w $vr3, $vr10, $vr22 +# CHECK: encoding: [0x43,0x59,0xe1,0x70] +vdiv.w $vr3, $vr10, $vr22 + +# CHECK: vdiv.d $vr15, $vr13, $vr8 +# CHECK: encoding: [0xaf,0xa1,0xe1,0x70] +vdiv.d $vr15, $vr13, $vr8 + +# CHECK: vmod.b $vr19, $vr25, $vr20 +# CHECK: encoding: [0x33,0x53,0xe2,0x70] +vmod.b $vr19, $vr25, $vr20 + +# CHECK: vmod.h $vr2, $vr24, $vr22 +# CHECK: encoding: [0x02,0xdb,0xe2,0x70] +vmod.h $vr2, $vr24, $vr22 + +# CHECK: vmod.w $vr31, $vr18, $vr0 +# CHECK: encoding: [0x5f,0x02,0xe3,0x70] +vmod.w $vr31, $vr18, $vr0 + +# CHECK: vmod.d $vr31, $vr0, $vr2 +# CHECK: encoding: [0x1f,0x88,0xe3,0x70] +vmod.d $vr31, $vr0, $vr2 + +# CHECK: vdiv.bu $vr15, $vr4, $vr3 +# CHECK: encoding: [0x8f,0x0c,0xe4,0x70] +vdiv.bu $vr15, $vr4, $vr3 + +# CHECK: vdiv.hu $vr17, $vr7, $vr29 +# CHECK: encoding: [0xf1,0xf4,0xe4,0x70] +vdiv.hu $vr17, $vr7, $vr29 + +# CHECK: vdiv.wu $vr27, $vr10, $vr3 +# CHECK: encoding: [0x5b,0x0d,0xe5,0x70] +vdiv.wu $vr27, $vr10, $vr3 + +# CHECK: vdiv.du $vr8, $vr24, $vr26 +# CHECK: encoding: [0x08,0xeb,0xe5,0x70] +vdiv.du $vr8, $vr24, $vr26 + +# CHECK: vmod.bu $vr10, $vr22, $vr24 +# CHECK: encoding: [0xca,0x62,0xe6,0x70] +vmod.bu $vr10, $vr22, $vr24 + +# CHECK: vmod.hu $vr19, $vr31, $vr24 +# CHECK: encoding: [0xf3,0xe3,0xe6,0x70] +vmod.hu $vr19, $vr31, $vr24 + +# CHECK: vmod.wu $vr26, $vr24, $vr13 +# CHECK: encoding: [0x1a,0x37,0xe7,0x70] +vmod.wu $vr26, $vr24, $vr13 + +# CHECK: vmod.du $vr20, $vr19, $vr10 +# CHECK: encoding: [0x74,0xaa,0xe7,0x70] +vmod.du $vr20, $vr19, $vr10 + +# CHECK: vsll.b $vr28, $vr18, $vr30 +# CHECK: encoding: [0x5c,0x7a,0xe8,0x70] +vsll.b $vr28, $vr18, $vr30 + +# CHECK: vsll.h $vr22, $vr4, $vr30 +# CHECK: encoding: [0x96,0xf8,0xe8,0x70] +vsll.h $vr22, $vr4, $vr30 + +# CHECK: vsll.w $vr1, $vr25, $vr8 +# CHECK: encoding: [0x21,0x23,0xe9,0x70] +vsll.w $vr1, $vr25, $vr8 + +# CHECK: vsll.d $vr31, $vr18, $vr15 +# CHECK: encoding: [0x5f,0xbe,0xe9,0x70] +vsll.d $vr31, $vr18, $vr15 + +# CHECK: vsrl.b $vr5, $vr12, $vr16 +# CHECK: encoding: [0x85,0x41,0xea,0x70] +vsrl.b $vr5, $vr12, $vr16 + +# CHECK: vsrl.h $vr9, $vr5, $vr28 +# CHECK: encoding: [0xa9,0xf0,0xea,0x70] +vsrl.h $vr9, $vr5, $vr28 + +# CHECK: vsrl.w $vr30, $vr16, $vr1 +# CHECK: encoding: [0x1e,0x06,0xeb,0x70] +vsrl.w $vr30, $vr16, $vr1 + +# CHECK: vsrl.d $vr28, $vr23, $vr27 +# CHECK: encoding: [0xfc,0xee,0xeb,0x70] +vsrl.d $vr28, $vr23, $vr27 + +# CHECK: vsra.b $vr15, $vr17, $vr25 +# CHECK: encoding: [0x2f,0x66,0xec,0x70] +vsra.b $vr15, $vr17, $vr25 + +# CHECK: vsra.h $vr0, $vr8, $vr5 +# CHECK: encoding: [0x00,0x95,0xec,0x70] +vsra.h $vr0, $vr8, $vr5 + +# CHECK: vsra.w $vr29, $vr9, $vr7 +# CHECK: encoding: [0x3d,0x1d,0xed,0x70] +vsra.w $vr29, $vr9, $vr7 + +# CHECK: vsra.d $vr22, $vr3, $vr19 +# CHECK: encoding: [0x76,0xcc,0xed,0x70] +vsra.d $vr22, $vr3, $vr19 + +# CHECK: vrotr.b $vr8, $vr16, $vr8 +# CHECK: encoding: [0x08,0x22,0xee,0x70] +vrotr.b $vr8, $vr16, $vr8 + +# CHECK: vrotr.h $vr14, $vr5, $vr11 +# CHECK: encoding: [0xae,0xac,0xee,0x70] +vrotr.h $vr14, $vr5, $vr11 + +# CHECK: vrotr.w $vr17, $vr28, $vr25 +# CHECK: encoding: [0x91,0x67,0xef,0x70] +vrotr.w $vr17, $vr28, $vr25 + +# CHECK: vrotr.d $vr18, $vr28, $vr19 +# CHECK: encoding: [0x92,0xcf,0xef,0x70] +vrotr.d $vr18, $vr28, $vr19 + +# CHECK: vsrlr.b $vr1, $vr27, $vr17 +# CHECK: encoding: [0x61,0x47,0xf0,0x70] +vsrlr.b $vr1, $vr27, $vr17 + +# CHECK: vsrlr.h $vr26, $vr14, $vr10 +# CHECK: encoding: [0xda,0xa9,0xf0,0x70] +vsrlr.h $vr26, $vr14, $vr10 + +# CHECK: vsrlr.w $vr3, $vr29, $vr24 +# CHECK: encoding: [0xa3,0x63,0xf1,0x70] +vsrlr.w $vr3, $vr29, $vr24 + +# CHECK: vsrlr.d $vr23, $vr4, $vr10 +# CHECK: encoding: [0x97,0xa8,0xf1,0x70] +vsrlr.d $vr23, $vr4, $vr10 + +# CHECK: vsrar.b $vr25, $vr2, $vr21 +# CHECK: encoding: [0x59,0x54,0xf2,0x70] +vsrar.b $vr25, $vr2, $vr21 + +# CHECK: vsrar.h $vr4, $vr11, $vr20 +# CHECK: encoding: [0x64,0xd1,0xf2,0x70] +vsrar.h $vr4, $vr11, $vr20 + +# CHECK: vsrar.w $vr11, $vr21, $vr29 +# CHECK: encoding: [0xab,0x76,0xf3,0x70] +vsrar.w $vr11, $vr21, $vr29 + +# CHECK: vsrar.d $vr29, $vr5, $vr2 +# CHECK: encoding: [0xbd,0x88,0xf3,0x70] +vsrar.d $vr29, $vr5, $vr2 + +# CHECK: vsrln.b.h $vr24, $vr14, $vr29 +# CHECK: encoding: [0xd8,0xf5,0xf4,0x70] +vsrln.b.h $vr24, $vr14, $vr29 + +# CHECK: vsrln.h.w $vr26, $vr22, $vr16 +# CHECK: encoding: [0xda,0x42,0xf5,0x70] +vsrln.h.w $vr26, $vr22, $vr16 + +# CHECK: vsrln.w.d $vr17, $vr31, $vr2 +# CHECK: encoding: [0xf1,0x8b,0xf5,0x70] +vsrln.w.d $vr17, $vr31, $vr2 + +# CHECK: vsran.b.h $vr31, $vr0, $vr23 +# CHECK: encoding: [0x1f,0xdc,0xf6,0x70] +vsran.b.h $vr31, $vr0, $vr23 + +# CHECK: vsran.h.w $vr20, $vr12, $vr29 +# CHECK: encoding: [0x94,0x75,0xf7,0x70] +vsran.h.w $vr20, $vr12, $vr29 + +# CHECK: vsran.w.d $vr2, $vr1, $vr2 +# CHECK: encoding: [0x22,0x88,0xf7,0x70] +vsran.w.d $vr2, $vr1, $vr2 + +# CHECK: vsrlrn.b.h $vr19, $vr28, $vr0 +# CHECK: encoding: [0x93,0x83,0xf8,0x70] +vsrlrn.b.h $vr19, $vr28, $vr0 + +# CHECK: vsrlrn.h.w $vr23, $vr29, $vr14 +# CHECK: encoding: [0xb7,0x3b,0xf9,0x70] +vsrlrn.h.w $vr23, $vr29, $vr14 + +# CHECK: vsrlrn.w.d $vr5, $vr26, $vr5 +# CHECK: encoding: [0x45,0x97,0xf9,0x70] +vsrlrn.w.d $vr5, $vr26, $vr5 + +# CHECK: vsrarn.b.h $vr17, $vr15, $vr7 +# CHECK: encoding: [0xf1,0x9d,0xfa,0x70] +vsrarn.b.h $vr17, $vr15, $vr7 + +# CHECK: vsrarn.h.w $vr12, $vr10, $vr29 +# CHECK: encoding: [0x4c,0x75,0xfb,0x70] +vsrarn.h.w $vr12, $vr10, $vr29 + +# CHECK: vsrarn.w.d $vr24, $vr26, $vr27 +# CHECK: encoding: [0x58,0xef,0xfb,0x70] +vsrarn.w.d $vr24, $vr26, $vr27 + +# CHECK: vssrln.b.h $vr1, $vr12, $vr14 +# CHECK: encoding: [0x81,0xb9,0xfc,0x70] +vssrln.b.h $vr1, $vr12, $vr14 + +# CHECK: vssrln.h.w $vr11, $vr8, $vr18 +# CHECK: encoding: [0x0b,0x49,0xfd,0x70] +vssrln.h.w $vr11, $vr8, $vr18 + +# CHECK: vssrln.w.d $vr31, $vr15, $vr6 +# CHECK: encoding: [0xff,0x99,0xfd,0x70] +vssrln.w.d $vr31, $vr15, $vr6 + +# CHECK: vssran.b.h $vr13, $vr5, $vr24 +# CHECK: encoding: [0xad,0xe0,0xfe,0x70] +vssran.b.h $vr13, $vr5, $vr24 + +# CHECK: vssran.h.w $vr4, $vr26, $vr7 +# CHECK: encoding: [0x44,0x1f,0xff,0x70] +vssran.h.w $vr4, $vr26, $vr7 + +# CHECK: vssran.w.d $vr25, $vr10, $vr6 +# CHECK: encoding: [0x59,0x99,0xff,0x70] +vssran.w.d $vr25, $vr10, $vr6 + +# CHECK: vssrlrn.b.h $vr28, $vr28, $vr6 +# CHECK: encoding: [0x9c,0x9b,0x00,0x71] +vssrlrn.b.h $vr28, $vr28, $vr6 + +# CHECK: vssrlrn.h.w $vr15, $vr23, $vr17 +# CHECK: encoding: [0xef,0x46,0x01,0x71] +vssrlrn.h.w $vr15, $vr23, $vr17 + +# CHECK: vssrlrn.w.d $vr12, $vr9, $vr2 +# CHECK: encoding: [0x2c,0x89,0x01,0x71] +vssrlrn.w.d $vr12, $vr9, $vr2 + +# CHECK: vssrarn.b.h $vr1, $vr25, $vr17 +# CHECK: encoding: [0x21,0xc7,0x02,0x71] +vssrarn.b.h $vr1, $vr25, $vr17 + +# CHECK: vssrarn.h.w $vr3, $vr9, $vr23 +# CHECK: encoding: [0x23,0x5d,0x03,0x71] +vssrarn.h.w $vr3, $vr9, $vr23 + +# CHECK: vssrarn.w.d $vr14, $vr9, $vr27 +# CHECK: encoding: [0x2e,0xed,0x03,0x71] +vssrarn.w.d $vr14, $vr9, $vr27 + +# CHECK: vssrln.bu.h $vr16, $vr24, $vr15 +# CHECK: encoding: [0x10,0xbf,0x04,0x71] +vssrln.bu.h $vr16, $vr24, $vr15 + +# CHECK: vssrln.hu.w $vr21, $vr23, $vr30 +# CHECK: encoding: [0xf5,0x7a,0x05,0x71] +vssrln.hu.w $vr21, $vr23, $vr30 + +# CHECK: vssrln.wu.d $vr12, $vr8, $vr30 +# CHECK: encoding: [0x0c,0xf9,0x05,0x71] +vssrln.wu.d $vr12, $vr8, $vr30 + +# CHECK: vssran.bu.h $vr5, $vr18, $vr12 +# CHECK: encoding: [0x45,0xb2,0x06,0x71] +vssran.bu.h $vr5, $vr18, $vr12 + +# CHECK: vssran.hu.w $vr0, $vr7, $vr28 +# CHECK: encoding: [0xe0,0x70,0x07,0x71] +vssran.hu.w $vr0, $vr7, $vr28 + +# CHECK: vssran.wu.d $vr5, $vr11, $vr8 +# CHECK: encoding: [0x65,0xa1,0x07,0x71] +vssran.wu.d $vr5, $vr11, $vr8 + +# CHECK: vssrlrn.bu.h $vr18, $vr25, $vr3 +# CHECK: encoding: [0x32,0x8f,0x08,0x71] +vssrlrn.bu.h $vr18, $vr25, $vr3 + +# CHECK: vssrlrn.hu.w $vr19, $vr1, $vr20 +# CHECK: encoding: [0x33,0x50,0x09,0x71] +vssrlrn.hu.w $vr19, $vr1, $vr20 + +# CHECK: vssrlrn.wu.d $vr6, $vr30, $vr18 +# CHECK: encoding: [0xc6,0xcb,0x09,0x71] +vssrlrn.wu.d $vr6, $vr30, $vr18 + +# CHECK: vssrarn.bu.h $vr12, $vr13, $vr3 +# CHECK: encoding: [0xac,0x8d,0x0a,0x71] +vssrarn.bu.h $vr12, $vr13, $vr3 + +# CHECK: vssrarn.hu.w $vr18, $vr5, $vr20 +# CHECK: encoding: [0xb2,0x50,0x0b,0x71] +vssrarn.hu.w $vr18, $vr5, $vr20 + +# CHECK: vssrarn.wu.d $vr23, $vr8, $vr21 +# CHECK: encoding: [0x17,0xd5,0x0b,0x71] +vssrarn.wu.d $vr23, $vr8, $vr21 + +# CHECK: vbitclr.b $vr14, $vr2, $vr31 +# CHECK: encoding: [0x4e,0x7c,0x0c,0x71] +vbitclr.b $vr14, $vr2, $vr31 + +# CHECK: vbitclr.h $vr17, $vr25, $vr8 +# CHECK: encoding: [0x31,0xa3,0x0c,0x71] +vbitclr.h $vr17, $vr25, $vr8 + +# CHECK: vbitclr.w $vr18, $vr11, $vr3 +# CHECK: encoding: [0x72,0x0d,0x0d,0x71] +vbitclr.w $vr18, $vr11, $vr3 + +# CHECK: vbitclr.d $vr31, $vr15, $vr29 +# CHECK: encoding: [0xff,0xf5,0x0d,0x71] +vbitclr.d $vr31, $vr15, $vr29 + +# CHECK: vbitset.b $vr8, $vr29, $vr16 +# CHECK: encoding: [0xa8,0x43,0x0e,0x71] +vbitset.b $vr8, $vr29, $vr16 + +# CHECK: vbitset.h $vr5, $vr17, $vr17 +# CHECK: encoding: [0x25,0xc6,0x0e,0x71] +vbitset.h $vr5, $vr17, $vr17 + +# CHECK: vbitset.w $vr5, $vr19, $vr5 +# CHECK: encoding: [0x65,0x16,0x0f,0x71] +vbitset.w $vr5, $vr19, $vr5 + +# CHECK: vbitset.d $vr5, $vr27, $vr10 +# CHECK: encoding: [0x65,0xab,0x0f,0x71] +vbitset.d $vr5, $vr27, $vr10 + +# CHECK: vbitrev.b $vr16, $vr5, $vr8 +# CHECK: encoding: [0xb0,0x20,0x10,0x71] +vbitrev.b $vr16, $vr5, $vr8 + +# CHECK: vbitrev.h $vr12, $vr29, $vr12 +# CHECK: encoding: [0xac,0xb3,0x10,0x71] +vbitrev.h $vr12, $vr29, $vr12 + +# CHECK: vbitrev.w $vr3, $vr14, $vr14 +# CHECK: encoding: [0xc3,0x39,0x11,0x71] +vbitrev.w $vr3, $vr14, $vr14 + +# CHECK: vbitrev.d $vr31, $vr27, $vr14 +# CHECK: encoding: [0x7f,0xbb,0x11,0x71] +vbitrev.d $vr31, $vr27, $vr14 + +# CHECK: vpackev.b $vr22, $vr24, $vr19 +# CHECK: encoding: [0x16,0x4f,0x16,0x71] +vpackev.b $vr22, $vr24, $vr19 + +# CHECK: vpackev.h $vr28, $vr2, $vr18 +# CHECK: encoding: [0x5c,0xc8,0x16,0x71] +vpackev.h $vr28, $vr2, $vr18 + +# CHECK: vpackev.w $vr21, $vr3, $vr4 +# CHECK: encoding: [0x75,0x10,0x17,0x71] +vpackev.w $vr21, $vr3, $vr4 + +# CHECK: vpackev.d $vr24, $vr21, $vr11 +# CHECK: encoding: [0xb8,0xae,0x17,0x71] +vpackev.d $vr24, $vr21, $vr11 + +# CHECK: vpackod.b $vr12, $vr31, $vr26 +# CHECK: encoding: [0xec,0x6b,0x18,0x71] +vpackod.b $vr12, $vr31, $vr26 + +# CHECK: vpackod.h $vr25, $vr3, $vr16 +# CHECK: encoding: [0x79,0xc0,0x18,0x71] +vpackod.h $vr25, $vr3, $vr16 + +# CHECK: vpackod.w $vr21, $vr18, $vr15 +# CHECK: encoding: [0x55,0x3e,0x19,0x71] +vpackod.w $vr21, $vr18, $vr15 + +# CHECK: vpackod.d $vr2, $vr3, $vr0 +# CHECK: encoding: [0x62,0x80,0x19,0x71] +vpackod.d $vr2, $vr3, $vr0 + +# CHECK: vilvl.b $vr8, $vr8, $vr28 +# CHECK: encoding: [0x08,0x71,0x1a,0x71] +vilvl.b $vr8, $vr8, $vr28 + +# CHECK: vilvl.h $vr20, $vr0, $vr31 +# CHECK: encoding: [0x14,0xfc,0x1a,0x71] +vilvl.h $vr20, $vr0, $vr31 + +# CHECK: vilvl.w $vr11, $vr10, $vr17 +# CHECK: encoding: [0x4b,0x45,0x1b,0x71] +vilvl.w $vr11, $vr10, $vr17 + +# CHECK: vilvl.d $vr7, $vr7, $vr1 +# CHECK: encoding: [0xe7,0x84,0x1b,0x71] +vilvl.d $vr7, $vr7, $vr1 + +# CHECK: vilvh.b $vr11, $vr11, $vr1 +# CHECK: encoding: [0x6b,0x05,0x1c,0x71] +vilvh.b $vr11, $vr11, $vr1 + +# CHECK: vilvh.h $vr0, $vr31, $vr13 +# CHECK: encoding: [0xe0,0xb7,0x1c,0x71] +vilvh.h $vr0, $vr31, $vr13 + +# CHECK: vilvh.w $vr28, $vr21, $vr7 +# CHECK: encoding: [0xbc,0x1e,0x1d,0x71] +vilvh.w $vr28, $vr21, $vr7 + +# CHECK: vilvh.d $vr23, $vr3, $vr19 +# CHECK: encoding: [0x77,0xcc,0x1d,0x71] +vilvh.d $vr23, $vr3, $vr19 + +# CHECK: vpickev.b $vr1, $vr21, $vr8 +# CHECK: encoding: [0xa1,0x22,0x1e,0x71] +vpickev.b $vr1, $vr21, $vr8 + +# CHECK: vpickev.h $vr16, $vr1, $vr9 +# CHECK: encoding: [0x30,0xa4,0x1e,0x71] +vpickev.h $vr16, $vr1, $vr9 + +# CHECK: vpickev.w $vr13, $vr13, $vr4 +# CHECK: encoding: [0xad,0x11,0x1f,0x71] +vpickev.w $vr13, $vr13, $vr4 + +# CHECK: vpickev.d $vr11, $vr30, $vr30 +# CHECK: encoding: [0xcb,0xfb,0x1f,0x71] +vpickev.d $vr11, $vr30, $vr30 + +# CHECK: vpickod.b $vr7, $vr11, $vr13 +# CHECK: encoding: [0x67,0x35,0x20,0x71] +vpickod.b $vr7, $vr11, $vr13 + +# CHECK: vpickod.h $vr18, $vr3, $vr1 +# CHECK: encoding: [0x72,0x84,0x20,0x71] +vpickod.h $vr18, $vr3, $vr1 + +# CHECK: vpickod.w $vr3, $vr16, $vr19 +# CHECK: encoding: [0x03,0x4e,0x21,0x71] +vpickod.w $vr3, $vr16, $vr19 + +# CHECK: vpickod.d $vr12, $vr13, $vr21 +# CHECK: encoding: [0xac,0xd5,0x21,0x71] +vpickod.d $vr12, $vr13, $vr21 + +# CHECK: vreplve.b $vr15, $vr17, $r19 +# CHECK: encoding: [0x2f,0x4e,0x22,0x71] +vreplve.b $vr15, $vr17, $r19 + +# CHECK: vreplve.h $vr14, $vr23, $r4 +# CHECK: encoding: [0xee,0x92,0x22,0x71] +vreplve.h $vr14, $vr23, $r4 + +# CHECK: vreplve.w $vr29, $vr19, $r27 +# CHECK: encoding: [0x7d,0x6e,0x23,0x71] +vreplve.w $vr29, $vr19, $r27 + +# CHECK: vreplve.d $vr13, $vr20, $r20 +# CHECK: encoding: [0x8d,0xd2,0x23,0x71] +vreplve.d $vr13, $vr20, $r20 + +# CHECK: vand.v $vr25, $vr2, $vr21 +# CHECK: encoding: [0x59,0x54,0x26,0x71] +vand.v $vr25, $vr2, $vr21 + +# CHECK: vor.v $vr4, $vr27, $vr16 +# CHECK: encoding: [0x64,0xc3,0x26,0x71] +vor.v $vr4, $vr27, $vr16 + +# CHECK: vxor.v $vr30, $vr25, $vr4 +# CHECK: encoding: [0x3e,0x13,0x27,0x71] +vxor.v $vr30, $vr25, $vr4 + +# CHECK: vnor.v $vr9, $vr2, $vr22 +# CHECK: encoding: [0x49,0xd8,0x27,0x71] +vnor.v $vr9, $vr2, $vr22 + +# CHECK: vandn.v $vr20, $vr26, $vr4 +# CHECK: encoding: [0x54,0x13,0x28,0x71] +vandn.v $vr20, $vr26, $vr4 + +# CHECK: vorn.v $vr6, $vr21, $vr30 +# CHECK: encoding: [0xa6,0xfa,0x28,0x71] +vorn.v $vr6, $vr21, $vr30 + +# CHECK: vfrstp.b $vr11, $vr9, $vr13 +# CHECK: encoding: [0x2b,0x35,0x2b,0x71] +vfrstp.b $vr11, $vr9, $vr13 + +# CHECK: vfrstp.h $vr21, $vr26, $vr22 +# CHECK: encoding: [0x55,0xdb,0x2b,0x71] +vfrstp.h $vr21, $vr26, $vr22 + +# CHECK: vadd.q $vr9, $vr7, $vr16 +# CHECK: encoding: [0xe9,0x40,0x2d,0x71] +vadd.q $vr9, $vr7, $vr16 + +# CHECK: vsub.q $vr2, $vr1, $vr16 +# CHECK: encoding: [0x22,0xc0,0x2d,0x71] +vsub.q $vr2, $vr1, $vr16 + +# CHECK: vsigncov.b $vr2, $vr2, $vr14 +# CHECK: encoding: [0x42,0x38,0x2e,0x71] +vsigncov.b $vr2, $vr2, $vr14 + +# CHECK: vsigncov.h $vr21, $vr21, $vr13 +# CHECK: encoding: [0xb5,0xb6,0x2e,0x71] +vsigncov.h $vr21, $vr21, $vr13 + +# CHECK: vsigncov.w $vr21, $vr7, $vr5 +# CHECK: encoding: [0xf5,0x14,0x2f,0x71] +vsigncov.w $vr21, $vr7, $vr5 + +# CHECK: vsigncov.d $vr10, $vr10, $vr3 +# CHECK: encoding: [0x4a,0x8d,0x2f,0x71] +vsigncov.d $vr10, $vr10, $vr3 + +# CHECK: vfadd.s $vr10, $vr4, $vr1 +# CHECK: encoding: [0x8a,0x84,0x30,0x71] +vfadd.s $vr10, $vr4, $vr1 + +# CHECK: vfadd.d $vr15, $vr27, $vr2 +# CHECK: encoding: [0x6f,0x0b,0x31,0x71] +vfadd.d $vr15, $vr27, $vr2 + +# CHECK: vfsub.s $vr14, $vr16, $vr9 +# CHECK: encoding: [0x0e,0xa6,0x32,0x71] +vfsub.s $vr14, $vr16, $vr9 + +# CHECK: vfsub.d $vr4, $vr1, $vr8 +# CHECK: encoding: [0x24,0x20,0x33,0x71] +vfsub.d $vr4, $vr1, $vr8 + +# CHECK: vfmul.s $vr0, $vr18, $vr6 +# CHECK: encoding: [0x40,0x9a,0x38,0x71] +vfmul.s $vr0, $vr18, $vr6 + +# CHECK: vfmul.d $vr27, $vr31, $vr30 +# CHECK: encoding: [0xfb,0x7b,0x39,0x71] +vfmul.d $vr27, $vr31, $vr30 + +# CHECK: vfdiv.s $vr3, $vr7, $vr6 +# CHECK: encoding: [0xe3,0x98,0x3a,0x71] +vfdiv.s $vr3, $vr7, $vr6 + +# CHECK: vfdiv.d $vr16, $vr6, $vr30 +# CHECK: encoding: [0xd0,0x78,0x3b,0x71] +vfdiv.d $vr16, $vr6, $vr30 + +# CHECK: vfmax.s $vr18, $vr30, $vr8 +# CHECK: encoding: [0xd2,0xa3,0x3c,0x71] +vfmax.s $vr18, $vr30, $vr8 + +# CHECK: vfmax.d $vr19, $vr8, $vr24 +# CHECK: encoding: [0x13,0x61,0x3d,0x71] +vfmax.d $vr19, $vr8, $vr24 + +# CHECK: vfmin.s $vr24, $vr26, $vr6 +# CHECK: encoding: [0x58,0x9b,0x3e,0x71] +vfmin.s $vr24, $vr26, $vr6 + +# CHECK: vfmin.d $vr16, $vr25, $vr1 +# CHECK: encoding: [0x30,0x07,0x3f,0x71] +vfmin.d $vr16, $vr25, $vr1 + +# CHECK: vfmaxa.s $vr8, $vr7, $vr14 +# CHECK: encoding: [0xe8,0xb8,0x40,0x71] +vfmaxa.s $vr8, $vr7, $vr14 + +# CHECK: vfmaxa.d $vr10, $vr8, $vr4 +# CHECK: encoding: [0x0a,0x11,0x41,0x71] +vfmaxa.d $vr10, $vr8, $vr4 + +# CHECK: vfmina.s $vr16, $vr6, $vr18 +# CHECK: encoding: [0xd0,0xc8,0x42,0x71] +vfmina.s $vr16, $vr6, $vr18 + +# CHECK: vfmina.d $vr26, $vr7, $vr14 +# CHECK: encoding: [0xfa,0x38,0x43,0x71] +vfmina.d $vr26, $vr7, $vr14 + +# CHECK: vfcvt.h.s $vr30, $vr4, $vr24 +# CHECK: encoding: [0x9e,0x60,0x46,0x71] +vfcvt.h.s $vr30, $vr4, $vr24 + +# CHECK: vfcvt.s.d $vr16, $vr17, $vr4 +# CHECK: encoding: [0x30,0x92,0x46,0x71] +vfcvt.s.d $vr16, $vr17, $vr4 + +# CHECK: vffint.s.l $vr25, $vr23, $vr10 +# CHECK: encoding: [0xf9,0x2a,0x48,0x71] +vffint.s.l $vr25, $vr23, $vr10 + +# CHECK: vftint.w.d $vr9, $vr22, $vr27 +# CHECK: encoding: [0xc9,0xee,0x49,0x71] +vftint.w.d $vr9, $vr22, $vr27 + +# CHECK: vftintrm.w.d $vr31, $vr10, $vr29 +# CHECK: encoding: [0x5f,0x75,0x4a,0x71] +vftintrm.w.d $vr31, $vr10, $vr29 + +# CHECK: vftintrp.w.d $vr23, $vr13, $vr15 +# CHECK: encoding: [0xb7,0xbd,0x4a,0x71] +vftintrp.w.d $vr23, $vr13, $vr15 + +# CHECK: vftintrz.w.d $vr18, $vr9, $vr6 +# CHECK: encoding: [0x32,0x19,0x4b,0x71] +vftintrz.w.d $vr18, $vr9, $vr6 + +# CHECK: vftintrne.w.d $vr21, $vr12, $vr30 +# CHECK: encoding: [0x95,0xf9,0x4b,0x71] +vftintrne.w.d $vr21, $vr12, $vr30 + +# CHECK: vshuf.h $vr3, $vr11, $vr2 +# CHECK: encoding: [0x63,0x89,0x7a,0x71] +vshuf.h $vr3, $vr11, $vr2 + +# CHECK: vshuf.w $vr21, $vr4, $vr29 +# CHECK: encoding: [0x95,0x74,0x7b,0x71] +vshuf.w $vr21, $vr4, $vr29 + +# CHECK: vshuf.d $vr11, $vr23, $vr18 +# CHECK: encoding: [0xeb,0xca,0x7b,0x71] +vshuf.d $vr11, $vr23, $vr18 + +# CHECK: vseqi.b $vr27, $vr14, 7 +# CHECK: encoding: [0xdb,0x1d,0x80,0x72] +vseqi.b $vr27, $vr14, 7 + +# CHECK: vseqi.h $vr23, $vr27, -6 +# CHECK: encoding: [0x77,0xeb,0x80,0x72] +vseqi.h $vr23, $vr27, -6 + +# CHECK: vseqi.w $vr8, $vr8, -16 +# CHECK: encoding: [0x08,0x41,0x81,0x72] +vseqi.w $vr8, $vr8, -16 + +# CHECK: vseqi.d $vr11, $vr5, 5 +# CHECK: encoding: [0xab,0x94,0x81,0x72] +vseqi.d $vr11, $vr5, 5 + +# CHECK: vslei.b $vr8, $vr27, 7 +# CHECK: encoding: [0x68,0x1f,0x82,0x72] +vslei.b $vr8, $vr27, 7 + +# CHECK: vslei.h $vr27, $vr29, -5 +# CHECK: encoding: [0xbb,0xef,0x82,0x72] +vslei.h $vr27, $vr29, -5 + +# CHECK: vslei.w $vr23, $vr13, -3 +# CHECK: encoding: [0xb7,0x75,0x83,0x72] +vslei.w $vr23, $vr13, -3 + +# CHECK: vslei.d $vr5, $vr15, -8 +# CHECK: encoding: [0xe5,0xe1,0x83,0x72] +vslei.d $vr5, $vr15, -8 + +# CHECK: vslei.bu $vr29, $vr10, 9 +# CHECK: encoding: [0x5d,0x25,0x84,0x72] +vslei.bu $vr29, $vr10, 9 + +# CHECK: vslei.hu $vr29, $vr18, 11 +# CHECK: encoding: [0x5d,0xae,0x84,0x72] +vslei.hu $vr29, $vr18, 11 + +# CHECK: vslei.wu $vr8, $vr1, 2 +# CHECK: encoding: [0x28,0x08,0x85,0x72] +vslei.wu $vr8, $vr1, 2 + +# CHECK: vslei.du $vr16, $vr5, 10 +# CHECK: encoding: [0xb0,0xa8,0x85,0x72] +vslei.du $vr16, $vr5, 10 + +# CHECK: vslti.b $vr8, $vr4, -2 +# CHECK: encoding: [0x88,0x78,0x86,0x72] +vslti.b $vr8, $vr4, -2 + +# CHECK: vslti.h $vr26, $vr7, -14 +# CHECK: encoding: [0xfa,0xc8,0x86,0x72] +vslti.h $vr26, $vr7, -14 + +# CHECK: vslti.w $vr28, $vr8, 12 +# CHECK: encoding: [0x1c,0x31,0x87,0x72] +vslti.w $vr28, $vr8, 12 + +# CHECK: vslti.d $vr4, $vr27, 9 +# CHECK: encoding: [0x64,0xa7,0x87,0x72] +vslti.d $vr4, $vr27, 9 + +# CHECK: vslti.bu $vr10, $vr14, 18 +# CHECK: encoding: [0xca,0x49,0x88,0x72] +vslti.bu $vr10, $vr14, 18 + +# CHECK: vslti.hu $vr28, $vr28, 30 +# CHECK: encoding: [0x9c,0xfb,0x88,0x72] +vslti.hu $vr28, $vr28, 30 + +# CHECK: vslti.wu $vr15, $vr27, 27 +# CHECK: encoding: [0x6f,0x6f,0x89,0x72] +vslti.wu $vr15, $vr27, 27 + +# CHECK: vslti.du $vr30, $vr17, 19 +# CHECK: encoding: [0x3e,0xce,0x89,0x72] +vslti.du $vr30, $vr17, 19 + +# CHECK: vaddi.bu $vr6, $vr1, 18 +# CHECK: encoding: [0x26,0x48,0x8a,0x72] +vaddi.bu $vr6, $vr1, 18 + +# CHECK: vaddi.hu $vr12, $vr14, 5 +# CHECK: encoding: [0xcc,0x95,0x8a,0x72] +vaddi.hu $vr12, $vr14, 5 + +# CHECK: vaddi.wu $vr28, $vr0, 26 +# CHECK: encoding: [0x1c,0x68,0x8b,0x72] +vaddi.wu $vr28, $vr0, 26 + +# CHECK: vaddi.du $vr10, $vr5, 2 +# CHECK: encoding: [0xaa,0x88,0x8b,0x72] +vaddi.du $vr10, $vr5, 2 + +# CHECK: vsubi.bu $vr22, $vr28, 2 +# CHECK: encoding: [0x96,0x0b,0x8c,0x72] +vsubi.bu $vr22, $vr28, 2 + +# CHECK: vsubi.hu $vr0, $vr22, 31 +# CHECK: encoding: [0xc0,0xfe,0x8c,0x72] +vsubi.hu $vr0, $vr22, 31 + +# CHECK: vsubi.wu $vr20, $vr6, 5 +# CHECK: encoding: [0xd4,0x14,0x8d,0x72] +vsubi.wu $vr20, $vr6, 5 + +# CHECK: vsubi.du $vr18, $vr11, 1 +# CHECK: encoding: [0x72,0x85,0x8d,0x72] +vsubi.du $vr18, $vr11, 1 + +# CHECK: vbsll.v $vr4, $vr26, 4 +# CHECK: encoding: [0x44,0x13,0x8e,0x72] +vbsll.v $vr4, $vr26, 4 + +# CHECK: vbsrl.v $vr7, $vr31, 15 +# CHECK: encoding: [0xe7,0xbf,0x8e,0x72] +vbsrl.v $vr7, $vr31, 15 + +# CHECK: vmaxi.b $vr19, $vr15, 14 +# CHECK: encoding: [0xf3,0x39,0x90,0x72] +vmaxi.b $vr19, $vr15, 14 + +# CHECK: vmaxi.h $vr25, $vr3, -12 +# CHECK: encoding: [0x79,0xd0,0x90,0x72] +vmaxi.h $vr25, $vr3, -12 + +# CHECK: vmaxi.w $vr20, $vr25, 5 +# CHECK: encoding: [0x34,0x17,0x91,0x72] +vmaxi.w $vr20, $vr25, 5 + +# CHECK: vmaxi.d $vr9, $vr10, 12 +# CHECK: encoding: [0x49,0xb1,0x91,0x72] +vmaxi.d $vr9, $vr10, 12 + +# CHECK: vmini.b $vr30, $vr21, -4 +# CHECK: encoding: [0xbe,0x72,0x92,0x72] +vmini.b $vr30, $vr21, -4 + +# CHECK: vmini.h $vr11, $vr28, -3 +# CHECK: encoding: [0x8b,0xf7,0x92,0x72] +vmini.h $vr11, $vr28, -3 + +# CHECK: vmini.w $vr6, $vr25, -9 +# CHECK: encoding: [0x26,0x5f,0x93,0x72] +vmini.w $vr6, $vr25, -9 + +# CHECK: vmini.d $vr28, $vr8, 2 +# CHECK: encoding: [0x1c,0x89,0x93,0x72] +vmini.d $vr28, $vr8, 2 + +# CHECK: vmaxi.bu $vr13, $vr24, 19 +# CHECK: encoding: [0x0d,0x4f,0x94,0x72] +vmaxi.bu $vr13, $vr24, 19 + +# CHECK: vmaxi.hu $vr3, $vr1, 22 +# CHECK: encoding: [0x23,0xd8,0x94,0x72] +vmaxi.hu $vr3, $vr1, 22 + +# CHECK: vmaxi.wu $vr1, $vr3, 23 +# CHECK: encoding: [0x61,0x5c,0x95,0x72] +vmaxi.wu $vr1, $vr3, 23 + +# CHECK: vmaxi.du $vr6, $vr18, 21 +# CHECK: encoding: [0x46,0xd6,0x95,0x72] +vmaxi.du $vr6, $vr18, 21 + +# CHECK: vmini.bu $vr10, $vr2, 20 +# CHECK: encoding: [0x4a,0x50,0x96,0x72] +vmini.bu $vr10, $vr2, 20 + +# CHECK: vmini.hu $vr17, $vr17, 15 +# CHECK: encoding: [0x31,0xbe,0x96,0x72] +vmini.hu $vr17, $vr17, 15 + +# CHECK: vmini.wu $vr26, $vr27, 23 +# CHECK: encoding: [0x7a,0x5f,0x97,0x72] +vmini.wu $vr26, $vr27, 23 + +# CHECK: vmini.du $vr12, $vr27, 8 +# CHECK: encoding: [0x6c,0xa3,0x97,0x72] +vmini.du $vr12, $vr27, 8 + +# CHECK: vfrstpi.b $vr26, $vr8, 9 +# CHECK: encoding: [0x1a,0x25,0x9a,0x72] +vfrstpi.b $vr26, $vr8, 9 + +# CHECK: vfrstpi.h $vr16, $vr2, 20 +# CHECK: encoding: [0x50,0xd0,0x9a,0x72] +vfrstpi.h $vr16, $vr2, 20 + +# CHECK: vclo.b $vr5, $vr17 +# CHECK: encoding: [0x25,0x02,0x9c,0x72] +vclo.b $vr5, $vr17 + +# CHECK: vclo.h $vr8, $vr4 +# CHECK: encoding: [0x88,0x04,0x9c,0x72] +vclo.h $vr8, $vr4 + +# CHECK: vclo.w $vr1, $vr13 +# CHECK: encoding: [0xa1,0x09,0x9c,0x72] +vclo.w $vr1, $vr13 + +# CHECK: vclo.d $vr0, $vr23 +# CHECK: encoding: [0xe0,0x0e,0x9c,0x72] +vclo.d $vr0, $vr23 + +# CHECK: vclz.b $vr4, $vr25 +# CHECK: encoding: [0x24,0x13,0x9c,0x72] +vclz.b $vr4, $vr25 + +# CHECK: vclz.h $vr1, $vr25 +# CHECK: encoding: [0x21,0x17,0x9c,0x72] +vclz.h $vr1, $vr25 + +# CHECK: vclz.w $vr1, $vr5 +# CHECK: encoding: [0xa1,0x18,0x9c,0x72] +vclz.w $vr1, $vr5 + +# CHECK: vclz.d $vr16, $vr17 +# CHECK: encoding: [0x30,0x1e,0x9c,0x72] +vclz.d $vr16, $vr17 + +# CHECK: vpcnt.b $vr4, $vr3 +# CHECK: encoding: [0x64,0x20,0x9c,0x72] +vpcnt.b $vr4, $vr3 + +# CHECK: vpcnt.h $vr15, $vr17 +# CHECK: encoding: [0x2f,0x26,0x9c,0x72] +vpcnt.h $vr15, $vr17 + +# CHECK: vpcnt.w $vr13, $vr8 +# CHECK: encoding: [0x0d,0x29,0x9c,0x72] +vpcnt.w $vr13, $vr8 + +# CHECK: vpcnt.d $vr0, $vr8 +# CHECK: encoding: [0x00,0x2d,0x9c,0x72] +vpcnt.d $vr0, $vr8 + +# CHECK: vneg.b $vr14, $vr24 +# CHECK: encoding: [0x0e,0x33,0x9c,0x72] +vneg.b $vr14, $vr24 + +# CHECK: vneg.h $vr24, $vr7 +# CHECK: encoding: [0xf8,0x34,0x9c,0x72] +vneg.h $vr24, $vr7 + +# CHECK: vneg.w $vr19, $vr5 +# CHECK: encoding: [0xb3,0x38,0x9c,0x72] +vneg.w $vr19, $vr5 + +# CHECK: vneg.d $vr3, $vr28 +# CHECK: encoding: [0x83,0x3f,0x9c,0x72] +vneg.d $vr3, $vr28 + +# CHECK: vmskltz.b $vr31, $vr25 +# CHECK: encoding: [0x3f,0x43,0x9c,0x72] +vmskltz.b $vr31, $vr25 + +# CHECK: vmskltz.h $vr9, $vr20 +# CHECK: encoding: [0x89,0x46,0x9c,0x72] +vmskltz.h $vr9, $vr20 + +# CHECK: vmskltz.w $vr22, $vr26 +# CHECK: encoding: [0x56,0x4b,0x9c,0x72] +vmskltz.w $vr22, $vr26 + +# CHECK: vmskltz.d $vr28, $vr10 +# CHECK: encoding: [0x5c,0x4d,0x9c,0x72] +vmskltz.d $vr28, $vr10 + +# CHECK: vmskgez.b $vr7, $vr5 +# CHECK: encoding: [0xa7,0x50,0x9c,0x72] +vmskgez.b $vr7, $vr5 + +# CHECK: vmsknz.b $vr20, $vr12 +# CHECK: encoding: [0x94,0x61,0x9c,0x72] +vmsknz.b $vr20, $vr12 + +# CHECK: vseteqz.v $fcc5, $vr14 +# CHECK: encoding: [0xc5,0x99,0x9c,0x72] +vseteqz.v $fcc5, $vr14 + +# CHECK: vsetnez.v $fcc2, $vr8 +# CHECK: encoding: [0x02,0x9d,0x9c,0x72] +vsetnez.v $fcc2, $vr8 + +# CHECK: vsetanyeqz.b $fcc0, $vr20 +# CHECK: encoding: [0x80,0xa2,0x9c,0x72] +vsetanyeqz.b $fcc0, $vr20 + +# CHECK: vsetanyeqz.h $fcc4, $vr16 +# CHECK: encoding: [0x04,0xa6,0x9c,0x72] +vsetanyeqz.h $fcc4, $vr16 + +# CHECK: vsetanyeqz.w $fcc7, $vr2 +# CHECK: encoding: [0x47,0xa8,0x9c,0x72] +vsetanyeqz.w $fcc7, $vr2 + +# CHECK: vsetanyeqz.d $fcc4, $vr12 +# CHECK: encoding: [0x84,0xad,0x9c,0x72] +vsetanyeqz.d $fcc4, $vr12 + +# CHECK: vsetallnez.b $fcc7, $vr0 +# CHECK: encoding: [0x07,0xb0,0x9c,0x72] +vsetallnez.b $fcc7, $vr0 + +# CHECK: vsetallnez.h $fcc2, $vr11 +# CHECK: encoding: [0x62,0xb5,0x9c,0x72] +vsetallnez.h $fcc2, $vr11 + +# CHECK: vsetallnez.w $fcc6, $vr25 +# CHECK: encoding: [0x26,0xbb,0x9c,0x72] +vsetallnez.w $fcc6, $vr25 + +# CHECK: vsetallnez.d $fcc7, $vr31 +# CHECK: encoding: [0xe7,0xbf,0x9c,0x72] +vsetallnez.d $fcc7, $vr31 + +# CHECK: vflogb.s $vr14, $vr28 +# CHECK: encoding: [0x8e,0xc7,0x9c,0x72] +vflogb.s $vr14, $vr28 + +# CHECK: vflogb.d $vr29, $vr9 +# CHECK: encoding: [0x3d,0xc9,0x9c,0x72] +vflogb.d $vr29, $vr9 + +# CHECK: vfclass.s $vr3, $vr13 +# CHECK: encoding: [0xa3,0xd5,0x9c,0x72] +vfclass.s $vr3, $vr13 + +# CHECK: vfclass.d $vr5, $vr15 +# CHECK: encoding: [0xe5,0xd9,0x9c,0x72] +vfclass.d $vr5, $vr15 + +# CHECK: vfsqrt.s $vr19, $vr27 +# CHECK: encoding: [0x73,0xe7,0x9c,0x72] +vfsqrt.s $vr19, $vr27 + +# CHECK: vfsqrt.d $vr31, $vr3 +# CHECK: encoding: [0x7f,0xe8,0x9c,0x72] +vfsqrt.d $vr31, $vr3 + +# CHECK: vfrecip.s $vr24, $vr16 +# CHECK: encoding: [0x18,0xf6,0x9c,0x72] +vfrecip.s $vr24, $vr16 + +# CHECK: vfrecip.d $vr23, $vr19 +# CHECK: encoding: [0x77,0xfa,0x9c,0x72] +vfrecip.d $vr23, $vr19 + +# CHECK: vfrsqrt.s $vr18, $vr15 +# CHECK: encoding: [0xf2,0x05,0x9d,0x72] +vfrsqrt.s $vr18, $vr15 + +# CHECK: vfrsqrt.d $vr18, $vr31 +# CHECK: encoding: [0xf2,0x0b,0x9d,0x72] +vfrsqrt.d $vr18, $vr31 + +# CHECK: vfrint.s $vr26, $vr11 +# CHECK: encoding: [0x7a,0x35,0x9d,0x72] +vfrint.s $vr26, $vr11 + +# CHECK: vfrint.d $vr24, $vr18 +# CHECK: encoding: [0x58,0x3a,0x9d,0x72] +vfrint.d $vr24, $vr18 + +# CHECK: vfrintrm.s $vr5, $vr3 +# CHECK: encoding: [0x65,0x44,0x9d,0x72] +vfrintrm.s $vr5, $vr3 + +# CHECK: vfrintrm.d $vr23, $vr10 +# CHECK: encoding: [0x57,0x49,0x9d,0x72] +vfrintrm.d $vr23, $vr10 + +# CHECK: vfrintrp.s $vr20, $vr2 +# CHECK: encoding: [0x54,0x54,0x9d,0x72] +vfrintrp.s $vr20, $vr2 + +# CHECK: vfrintrp.d $vr30, $vr17 +# CHECK: encoding: [0x3e,0x5a,0x9d,0x72] +vfrintrp.d $vr30, $vr17 + +# CHECK: vfrintrz.s $vr19, $vr6 +# CHECK: encoding: [0xd3,0x64,0x9d,0x72] +vfrintrz.s $vr19, $vr6 + +# CHECK: vfrintrz.d $vr16, $vr18 +# CHECK: encoding: [0x50,0x6a,0x9d,0x72] +vfrintrz.d $vr16, $vr18 + +# CHECK: vfrintrne.s $vr8, $vr24 +# CHECK: encoding: [0x08,0x77,0x9d,0x72] +vfrintrne.s $vr8, $vr24 + +# CHECK: vfrintrne.d $vr6, $vr5 +# CHECK: encoding: [0xa6,0x78,0x9d,0x72] +vfrintrne.d $vr6, $vr5 + +# CHECK: vfcvtl.s.h $vr4, $vr6 +# CHECK: encoding: [0xc4,0xe8,0x9d,0x72] +vfcvtl.s.h $vr4, $vr6 + +# CHECK: vfcvth.s.h $vr16, $vr7 +# CHECK: encoding: [0xf0,0xec,0x9d,0x72] +vfcvth.s.h $vr16, $vr7 + +# CHECK: vfcvtl.d.s $vr16, $vr10 +# CHECK: encoding: [0x50,0xf1,0x9d,0x72] +vfcvtl.d.s $vr16, $vr10 + +# CHECK: vfcvth.d.s $vr28, $vr25 +# CHECK: encoding: [0x3c,0xf7,0x9d,0x72] +vfcvth.d.s $vr28, $vr25 + +# CHECK: vffint.s.w $vr28, $vr16 +# CHECK: encoding: [0x1c,0x02,0x9e,0x72] +vffint.s.w $vr28, $vr16 + +# CHECK: vffint.s.wu $vr4, $vr31 +# CHECK: encoding: [0xe4,0x07,0x9e,0x72] +vffint.s.wu $vr4, $vr31 + +# CHECK: vffint.d.l $vr18, $vr25 +# CHECK: encoding: [0x32,0x0b,0x9e,0x72] +vffint.d.l $vr18, $vr25 + +# CHECK: vffint.d.lu $vr24, $vr17 +# CHECK: encoding: [0x38,0x0e,0x9e,0x72] +vffint.d.lu $vr24, $vr17 + +# CHECK: vffintl.d.w $vr2, $vr27 +# CHECK: encoding: [0x62,0x13,0x9e,0x72] +vffintl.d.w $vr2, $vr27 + +# CHECK: vffinth.d.w $vr4, $vr16 +# CHECK: encoding: [0x04,0x16,0x9e,0x72] +vffinth.d.w $vr4, $vr16 + +# CHECK: vftint.w.s $vr17, $vr0 +# CHECK: encoding: [0x11,0x30,0x9e,0x72] +vftint.w.s $vr17, $vr0 + +# CHECK: vftint.l.d $vr23, $vr18 +# CHECK: encoding: [0x57,0x36,0x9e,0x72] +vftint.l.d $vr23, $vr18 + +# CHECK: vftintrm.w.s $vr23, $vr4 +# CHECK: encoding: [0x97,0x38,0x9e,0x72] +vftintrm.w.s $vr23, $vr4 + +# CHECK: vftintrm.l.d $vr30, $vr14 +# CHECK: encoding: [0xde,0x3d,0x9e,0x72] +vftintrm.l.d $vr30, $vr14 + +# CHECK: vftintrp.w.s $vr7, $vr0 +# CHECK: encoding: [0x07,0x40,0x9e,0x72] +vftintrp.w.s $vr7, $vr0 + +# CHECK: vftintrp.l.d $vr28, $vr20 +# CHECK: encoding: [0x9c,0x46,0x9e,0x72] +vftintrp.l.d $vr28, $vr20 + +# CHECK: vftintrz.w.s $vr28, $vr31 +# CHECK: encoding: [0xfc,0x4b,0x9e,0x72] +vftintrz.w.s $vr28, $vr31 + +# CHECK: vftintrz.l.d $vr18, $vr0 +# CHECK: encoding: [0x12,0x4c,0x9e,0x72] +vftintrz.l.d $vr18, $vr0 + +# CHECK: vftintrne.w.s $vr14, $vr17 +# CHECK: encoding: [0x2e,0x52,0x9e,0x72] +vftintrne.w.s $vr14, $vr17 + +# CHECK: vftintrne.l.d $vr22, $vr18 +# CHECK: encoding: [0x56,0x56,0x9e,0x72] +vftintrne.l.d $vr22, $vr18 + +# CHECK: vftint.wu.s $vr26, $vr25 +# CHECK: encoding: [0x3a,0x5b,0x9e,0x72] +vftint.wu.s $vr26, $vr25 + +# CHECK: vftint.lu.d $vr9, $vr27 +# CHECK: encoding: [0x69,0x5f,0x9e,0x72] +vftint.lu.d $vr9, $vr27 + +# CHECK: vftintrz.wu.s $vr26, $vr22 +# CHECK: encoding: [0xda,0x72,0x9e,0x72] +vftintrz.wu.s $vr26, $vr22 + +# CHECK: vftintrz.lu.d $vr29, $vr20 +# CHECK: encoding: [0x9d,0x76,0x9e,0x72] +vftintrz.lu.d $vr29, $vr20 + +# CHECK: vftintl.l.s $vr22, $vr1 +# CHECK: encoding: [0x36,0x80,0x9e,0x72] +vftintl.l.s $vr22, $vr1 + +# CHECK: vftinth.l.s $vr13, $vr24 +# CHECK: encoding: [0x0d,0x87,0x9e,0x72] +vftinth.l.s $vr13, $vr24 + +# CHECK: vftintrml.l.s $vr8, $vr27 +# CHECK: encoding: [0x68,0x8b,0x9e,0x72] +vftintrml.l.s $vr8, $vr27 + +# CHECK: vftintrmh.l.s $vr18, $vr28 +# CHECK: encoding: [0x92,0x8f,0x9e,0x72] +vftintrmh.l.s $vr18, $vr28 + +# CHECK: vftintrpl.l.s $vr27, $vr28 +# CHECK: encoding: [0x9b,0x93,0x9e,0x72] +vftintrpl.l.s $vr27, $vr28 + +# CHECK: vftintrph.l.s $vr20, $vr7 +# CHECK: encoding: [0xf4,0x94,0x9e,0x72] +vftintrph.l.s $vr20, $vr7 + +# CHECK: vftintrzl.l.s $vr6, $vr2 +# CHECK: encoding: [0x46,0x98,0x9e,0x72] +vftintrzl.l.s $vr6, $vr2 + +# CHECK: vftintrzh.l.s $vr21, $vr6 +# CHECK: encoding: [0xd5,0x9c,0x9e,0x72] +vftintrzh.l.s $vr21, $vr6 + +# CHECK: vftintrnel.l.s $vr25, $vr3 +# CHECK: encoding: [0x79,0xa0,0x9e,0x72] +vftintrnel.l.s $vr25, $vr3 + +# CHECK: vftintrneh.l.s $vr7, $vr5 +# CHECK: encoding: [0xa7,0xa4,0x9e,0x72] +vftintrneh.l.s $vr7, $vr5 + +# CHECK: vexth.h.b $vr9, $vr2 +# CHECK: encoding: [0x49,0xe0,0x9e,0x72] +vexth.h.b $vr9, $vr2 + +# CHECK: vexth.w.h $vr4, $vr27 +# CHECK: encoding: [0x64,0xe7,0x9e,0x72] +vexth.w.h $vr4, $vr27 + +# CHECK: vexth.d.w $vr23, $vr1 +# CHECK: encoding: [0x37,0xe8,0x9e,0x72] +vexth.d.w $vr23, $vr1 + +# CHECK: vexth.q.d $vr15, $vr6 +# CHECK: encoding: [0xcf,0xec,0x9e,0x72] +vexth.q.d $vr15, $vr6 + +# CHECK: vexth.hu.bu $vr3, $vr2 +# CHECK: encoding: [0x43,0xf0,0x9e,0x72] +vexth.hu.bu $vr3, $vr2 + +# CHECK: vexth.wu.hu $vr31, $vr26 +# CHECK: encoding: [0x5f,0xf7,0x9e,0x72] +vexth.wu.hu $vr31, $vr26 + +# CHECK: vexth.du.wu $vr10, $vr31 +# CHECK: encoding: [0xea,0xfb,0x9e,0x72] +vexth.du.wu $vr10, $vr31 + +# CHECK: vexth.qu.du $vr28, $vr8 +# CHECK: encoding: [0x1c,0xfd,0x9e,0x72] +vexth.qu.du $vr28, $vr8 + +# CHECK: vreplgr2vr.b $vr15, $sp +# CHECK: encoding: [0x6f,0x00,0x9f,0x72] +vreplgr2vr.b $vr15, $sp + +# CHECK: vreplgr2vr.h $vr10, $r23 +# CHECK: encoding: [0xea,0x06,0x9f,0x72] +vreplgr2vr.h $vr10, $r23 + +# CHECK: vreplgr2vr.w $vr25, $r16 +# CHECK: encoding: [0x19,0x0a,0x9f,0x72] +vreplgr2vr.w $vr25, $r16 + +# CHECK: vreplgr2vr.d $vr27, $r7 +# CHECK: encoding: [0xfb,0x0c,0x9f,0x72] +vreplgr2vr.d $vr27, $r7 + +# CHECK: vrotri.b $vr24, $vr24, 7 +# CHECK: encoding: [0x18,0x3f,0xa0,0x72] +vrotri.b $vr24, $vr24, 7 + +# CHECK: vrotri.h $vr1, $vr5, 0 +# CHECK: encoding: [0xa1,0x40,0xa0,0x72] +vrotri.h $vr1, $vr5, 0 + +# CHECK: vrotri.w $vr10, $vr8, 12 +# CHECK: encoding: [0x0a,0xb1,0xa0,0x72] +vrotri.w $vr10, $vr8, 12 + +# CHECK: vrotri.d $vr30, $vr29, 42 +# CHECK: encoding: [0xbe,0xab,0xa1,0x72] +vrotri.d $vr30, $vr29, 42 + +# CHECK: vsrlri.b $vr1, $vr16, 3 +# CHECK: encoding: [0x01,0x2e,0xa4,0x72] +vsrlri.b $vr1, $vr16, 3 + +# CHECK: vsrlri.h $vr28, $vr21, 1 +# CHECK: encoding: [0xbc,0x46,0xa4,0x72] +vsrlri.h $vr28, $vr21, 1 + +# CHECK: vsrlri.w $vr18, $vr4, 15 +# CHECK: encoding: [0x92,0xbc,0xa4,0x72] +vsrlri.w $vr18, $vr4, 15 + +# CHECK: vsrlri.d $vr30, $vr3, 19 +# CHECK: encoding: [0x7e,0x4c,0xa5,0x72] +vsrlri.d $vr30, $vr3, 19 + +# CHECK: vsrari.b $vr13, $vr0, 7 +# CHECK: encoding: [0x0d,0x3c,0xa8,0x72] +vsrari.b $vr13, $vr0, 7 + +# CHECK: vsrari.h $vr17, $vr9, 6 +# CHECK: encoding: [0x31,0x59,0xa8,0x72] +vsrari.h $vr17, $vr9, 6 + +# CHECK: vsrari.w $vr3, $vr28, 6 +# CHECK: encoding: [0x83,0x9b,0xa8,0x72] +vsrari.w $vr3, $vr28, 6 + +# CHECK: vsrari.d $vr4, $vr2, 34 +# CHECK: encoding: [0x44,0x88,0xa9,0x72] +vsrari.d $vr4, $vr2, 34 + +# CHECK: vinsgr2vr.b $vr8, $r8, 4 +# CHECK: encoding: [0x08,0x91,0xeb,0x72] +vinsgr2vr.b $vr8, $r8, 4 + +# CHECK: vinsgr2vr.h $vr13, $r7, 1 +# CHECK: encoding: [0xed,0xc4,0xeb,0x72] +vinsgr2vr.h $vr13, $r7, 1 + +# CHECK: vinsgr2vr.w $vr4, $r6, 3 +# CHECK: encoding: [0xc4,0xec,0xeb,0x72] +vinsgr2vr.w $vr4, $r6, 3 + +# CHECK: vinsgr2vr.d $vr23, $r31, 0 +# CHECK: encoding: [0xf7,0xf3,0xeb,0x72] +vinsgr2vr.d $vr23, $r31, 0 + +# CHECK: vpickve2gr.b $r24, $vr16, 10 +# CHECK: encoding: [0x18,0xaa,0xef,0x72] +vpickve2gr.b $r24, $vr16, 10 + +# CHECK: vpickve2gr.h $r17, $vr25, 3 +# CHECK: encoding: [0x31,0xcf,0xef,0x72] +vpickve2gr.h $r17, $vr25, 3 + +# CHECK: vpickve2gr.w $r30, $vr28, 2 +# CHECK: encoding: [0x9e,0xeb,0xef,0x72] +vpickve2gr.w $r30, $vr28, 2 + +# CHECK: vpickve2gr.d $r25, $vr9, 1 +# CHECK: encoding: [0x39,0xf5,0xef,0x72] +vpickve2gr.d $r25, $vr9, 1 + +# CHECK: vpickve2gr.bu $r31, $vr14, 2 +# CHECK: encoding: [0xdf,0x89,0xf3,0x72] +vpickve2gr.bu $r31, $vr14, 2 + +# CHECK: vpickve2gr.hu $r12, $vr1, 6 +# CHECK: encoding: [0x2c,0xd8,0xf3,0x72] +vpickve2gr.hu $r12, $vr1, 6 + +# CHECK: vpickve2gr.wu $r10, $vr17, 1 +# CHECK: encoding: [0x2a,0xe6,0xf3,0x72] +vpickve2gr.wu $r10, $vr17, 1 + +# CHECK: vpickve2gr.du $r26, $vr8, 1 +# CHECK: encoding: [0x1a,0xf5,0xf3,0x72] +vpickve2gr.du $r26, $vr8, 1 + +# CHECK: vreplvei.b $vr3, $vr6, 12 +# CHECK: encoding: [0xc3,0xb0,0xf7,0x72] +vreplvei.b $vr3, $vr6, 12 + +# CHECK: vreplvei.h $vr22, $vr29, 7 +# CHECK: encoding: [0xb6,0xdf,0xf7,0x72] +vreplvei.h $vr22, $vr29, 7 + +# CHECK: vreplvei.w $vr17, $vr26, 1 +# CHECK: encoding: [0x51,0xe7,0xf7,0x72] +vreplvei.w $vr17, $vr26, 1 + +# CHECK: vreplvei.d $vr0, $vr17, 1 +# CHECK: encoding: [0x20,0xf6,0xf7,0x72] +vreplvei.d $vr0, $vr17, 1 + +# CHECK: vsllwil.h.b $vr25, $vr14, 2 +# CHECK: encoding: [0xd9,0x29,0x08,0x73] +vsllwil.h.b $vr25, $vr14, 2 + +# CHECK: vsllwil.w.h $vr24, $vr5, 1 +# CHECK: encoding: [0xb8,0x44,0x08,0x73] +vsllwil.w.h $vr24, $vr5, 1 + +# CHECK: vsllwil.d.w $vr25, $vr14, 8 +# CHECK: encoding: [0xd9,0xa1,0x08,0x73] +vsllwil.d.w $vr25, $vr14, 8 + +# CHECK: vextl.q.d $vr3, $vr22 +# CHECK: encoding: [0xc3,0x02,0x09,0x73] +vextl.q.d $vr3, $vr22 + +# CHECK: vsllwil.hu.bu $vr11, $vr25, 3 +# CHECK: encoding: [0x2b,0x2f,0x0c,0x73] +vsllwil.hu.bu $vr11, $vr25, 3 + +# CHECK: vsllwil.wu.hu $vr2, $vr26, 10 +# CHECK: encoding: [0x42,0x6b,0x0c,0x73] +vsllwil.wu.hu $vr2, $vr26, 10 + +# CHECK: vsllwil.du.wu $vr18, $vr9, 28 +# CHECK: encoding: [0x32,0xf1,0x0c,0x73] +vsllwil.du.wu $vr18, $vr9, 28 + +# CHECK: vextl.qu.du $vr13, $vr25 +# CHECK: encoding: [0x2d,0x03,0x0d,0x73] +vextl.qu.du $vr13, $vr25 + +# CHECK: vbitclri.b $vr29, $vr24, 6 +# CHECK: encoding: [0x1d,0x3b,0x10,0x73] +vbitclri.b $vr29, $vr24, 6 + +# CHECK: vbitclri.h $vr27, $vr15, 5 +# CHECK: encoding: [0xfb,0x55,0x10,0x73] +vbitclri.h $vr27, $vr15, 5 + +# CHECK: vbitclri.w $vr11, $vr10, 8 +# CHECK: encoding: [0x4b,0xa1,0x10,0x73] +vbitclri.w $vr11, $vr10, 8 + +# CHECK: vbitclri.d $vr4, $vr7, 15 +# CHECK: encoding: [0xe4,0x3c,0x11,0x73] +vbitclri.d $vr4, $vr7, 15 + +# CHECK: vbitseti.b $vr24, $vr20, 3 +# CHECK: encoding: [0x98,0x2e,0x14,0x73] +vbitseti.b $vr24, $vr20, 3 + +# CHECK: vbitseti.h $vr6, $vr8, 8 +# CHECK: encoding: [0x06,0x61,0x14,0x73] +vbitseti.h $vr6, $vr8, 8 + +# CHECK: vbitseti.w $vr21, $vr9, 24 +# CHECK: encoding: [0x35,0xe1,0x14,0x73] +vbitseti.w $vr21, $vr9, 24 + +# CHECK: vbitseti.d $vr28, $vr18, 30 +# CHECK: encoding: [0x5c,0x7a,0x15,0x73] +vbitseti.d $vr28, $vr18, 30 + +# CHECK: vbitrevi.b $vr19, $vr31, 0 +# CHECK: encoding: [0xf3,0x23,0x18,0x73] +vbitrevi.b $vr19, $vr31, 0 + +# CHECK: vbitrevi.h $vr18, $vr1, 0 +# CHECK: encoding: [0x32,0x40,0x18,0x73] +vbitrevi.h $vr18, $vr1, 0 + +# CHECK: vbitrevi.w $vr25, $vr6, 18 +# CHECK: encoding: [0xd9,0xc8,0x18,0x73] +vbitrevi.w $vr25, $vr6, 18 + +# CHECK: vbitrevi.d $vr8, $vr27, 22 +# CHECK: encoding: [0x68,0x5b,0x19,0x73] +vbitrevi.d $vr8, $vr27, 22 + +# CHECK: vsat.b $vr21, $vr28, 2 +# CHECK: encoding: [0x95,0x2b,0x24,0x73] +vsat.b $vr21, $vr28, 2 + +# CHECK: vsat.h $vr6, $vr5, 12 +# CHECK: encoding: [0xa6,0x70,0x24,0x73] +vsat.h $vr6, $vr5, 12 + +# CHECK: vsat.w $vr3, $vr30, 16 +# CHECK: encoding: [0xc3,0xc3,0x24,0x73] +vsat.w $vr3, $vr30, 16 + +# CHECK: vsat.d $vr0, $vr31, 24 +# CHECK: encoding: [0xe0,0x63,0x25,0x73] +vsat.d $vr0, $vr31, 24 + +# CHECK: vsat.bu $vr20, $vr20, 2 +# CHECK: encoding: [0x94,0x2a,0x28,0x73] +vsat.bu $vr20, $vr20, 2 + +# CHECK: vsat.hu $vr8, $vr6, 12 +# CHECK: encoding: [0xc8,0x70,0x28,0x73] +vsat.hu $vr8, $vr6, 12 + +# CHECK: vsat.wu $vr18, $vr20, 26 +# CHECK: encoding: [0x92,0xea,0x28,0x73] +vsat.wu $vr18, $vr20, 26 + +# CHECK: vsat.du $vr10, $vr6, 33 +# CHECK: encoding: [0xca,0x84,0x29,0x73] +vsat.du $vr10, $vr6, 33 + +# CHECK: vslli.b $vr4, $vr19, 3 +# CHECK: encoding: [0x64,0x2e,0x2c,0x73] +vslli.b $vr4, $vr19, 3 + +# CHECK: vslli.h $vr3, $vr23, 14 +# CHECK: encoding: [0xe3,0x7a,0x2c,0x73] +vslli.h $vr3, $vr23, 14 + +# CHECK: vslli.w $vr22, $vr21, 6 +# CHECK: encoding: [0xb6,0x9a,0x2c,0x73] +vslli.w $vr22, $vr21, 6 + +# CHECK: vslli.d $vr23, $vr15, 36 +# CHECK: encoding: [0xf7,0x91,0x2d,0x73] +vslli.d $vr23, $vr15, 36 + +# CHECK: vsrli.b $vr5, $vr25, 4 +# CHECK: encoding: [0x25,0x33,0x30,0x73] +vsrli.b $vr5, $vr25, 4 + +# CHECK: vsrli.h $vr9, $vr14, 9 +# CHECK: encoding: [0xc9,0x65,0x30,0x73] +vsrli.h $vr9, $vr14, 9 + +# CHECK: vsrli.w $vr7, $vr24, 12 +# CHECK: encoding: [0x07,0xb3,0x30,0x73] +vsrli.w $vr7, $vr24, 12 + +# CHECK: vsrli.d $vr15, $vr18, 63 +# CHECK: encoding: [0x4f,0xfe,0x31,0x73] +vsrli.d $vr15, $vr18, 63 + +# CHECK: vsrai.b $vr6, $vr1, 3 +# CHECK: encoding: [0x26,0x2c,0x34,0x73] +vsrai.b $vr6, $vr1, 3 + +# CHECK: vsrai.h $vr7, $vr29, 3 +# CHECK: encoding: [0xa7,0x4f,0x34,0x73] +vsrai.h $vr7, $vr29, 3 + +# CHECK: vsrai.w $vr31, $vr27, 29 +# CHECK: encoding: [0x7f,0xf7,0x34,0x73] +vsrai.w $vr31, $vr27, 29 + +# CHECK: vsrai.d $vr28, $vr30, 56 +# CHECK: encoding: [0xdc,0xe3,0x35,0x73] +vsrai.d $vr28, $vr30, 56 + +# CHECK: vsrlni.b.h $vr2, $vr26, 2 +# CHECK: encoding: [0x42,0x4b,0x40,0x73] +vsrlni.b.h $vr2, $vr26, 2 + +# CHECK: vsrlni.h.w $vr31, $vr14, 3 +# CHECK: encoding: [0xdf,0x8d,0x40,0x73] +vsrlni.h.w $vr31, $vr14, 3 + +# CHECK: vsrlni.w.d $vr19, $vr4, 33 +# CHECK: encoding: [0x93,0x84,0x41,0x73] +vsrlni.w.d $vr19, $vr4, 33 + +# CHECK: vsrlni.d.q $vr31, $vr3, 63 +# CHECK: encoding: [0x7f,0xfc,0x42,0x73] +vsrlni.d.q $vr31, $vr3, 63 + +# CHECK: vsrlrni.b.h $vr26, $vr18, 0 +# CHECK: encoding: [0x5a,0x42,0x44,0x73] +vsrlrni.b.h $vr26, $vr18, 0 + +# CHECK: vsrlrni.h.w $vr18, $vr22, 5 +# CHECK: encoding: [0xd2,0x96,0x44,0x73] +vsrlrni.h.w $vr18, $vr22, 5 + +# CHECK: vsrlrni.w.d $vr24, $vr11, 21 +# CHECK: encoding: [0x78,0x55,0x45,0x73] +vsrlrni.w.d $vr24, $vr11, 21 + +# CHECK: vsrlrni.d.q $vr6, $vr11, 37 +# CHECK: encoding: [0x66,0x95,0x46,0x73] +vsrlrni.d.q $vr6, $vr11, 37 + +# CHECK: vssrlni.b.h $vr3, $vr21, 5 +# CHECK: encoding: [0xa3,0x56,0x48,0x73] +vssrlni.b.h $vr3, $vr21, 5 + +# CHECK: vssrlni.h.w $vr6, $vr1, 16 +# CHECK: encoding: [0x26,0xc0,0x48,0x73] +vssrlni.h.w $vr6, $vr1, 16 + +# CHECK: vssrlni.w.d $vr4, $vr21, 27 +# CHECK: encoding: [0xa4,0x6e,0x49,0x73] +vssrlni.w.d $vr4, $vr21, 27 + +# CHECK: vssrlni.d.q $vr8, $vr18, 94 +# CHECK: encoding: [0x48,0x7a,0x4b,0x73] +vssrlni.d.q $vr8, $vr18, 94 + +# CHECK: vssrlni.bu.h $vr6, $vr2, 5 +# CHECK: encoding: [0x46,0x54,0x4c,0x73] +vssrlni.bu.h $vr6, $vr2, 5 + +# CHECK: vssrlni.hu.w $vr29, $vr29, 2 +# CHECK: encoding: [0xbd,0x8b,0x4c,0x73] +vssrlni.hu.w $vr29, $vr29, 2 + +# CHECK: vssrlni.wu.d $vr28, $vr20, 47 +# CHECK: encoding: [0x9c,0xbe,0x4d,0x73] +vssrlni.wu.d $vr28, $vr20, 47 + +# CHECK: vssrlni.du.q $vr22, $vr10, 82 +# CHECK: encoding: [0x56,0x49,0x4f,0x73] +vssrlni.du.q $vr22, $vr10, 82 + +# CHECK: vssrlrni.b.h $vr17, $vr25, 10 +# CHECK: encoding: [0x31,0x6b,0x50,0x73] +vssrlrni.b.h $vr17, $vr25, 10 + +# CHECK: vssrlrni.h.w $vr21, $vr29, 0 +# CHECK: encoding: [0xb5,0x83,0x50,0x73] +vssrlrni.h.w $vr21, $vr29, 0 + +# CHECK: vssrlrni.w.d $vr9, $vr15, 63 +# CHECK: encoding: [0xe9,0xfd,0x51,0x73] +vssrlrni.w.d $vr9, $vr15, 63 + +# CHECK: vssrlrni.d.q $vr4, $vr1, 117 +# CHECK: encoding: [0x24,0xd4,0x53,0x73] +vssrlrni.d.q $vr4, $vr1, 117 + +# CHECK: vssrlrni.bu.h $vr25, $vr13, 3 +# CHECK: encoding: [0xb9,0x4d,0x54,0x73] +vssrlrni.bu.h $vr25, $vr13, 3 + +# CHECK: vssrlrni.hu.w $vr30, $vr28, 7 +# CHECK: encoding: [0x9e,0x9f,0x54,0x73] +vssrlrni.hu.w $vr30, $vr28, 7 + +# CHECK: vssrlrni.wu.d $vr16, $vr27, 11 +# CHECK: encoding: [0x70,0x2f,0x55,0x73] +vssrlrni.wu.d $vr16, $vr27, 11 + +# CHECK: vssrlrni.du.q $vr20, $vr13, 63 +# CHECK: encoding: [0xb4,0xfd,0x56,0x73] +vssrlrni.du.q $vr20, $vr13, 63 + +# CHECK: vsrani.b.h $vr3, $vr25, 4 +# CHECK: encoding: [0x23,0x53,0x58,0x73] +vsrani.b.h $vr3, $vr25, 4 + +# CHECK: vsrani.h.w $vr12, $vr13, 17 +# CHECK: encoding: [0xac,0xc5,0x58,0x73] +vsrani.h.w $vr12, $vr13, 17 + +# CHECK: vsrani.w.d $vr2, $vr6, 25 +# CHECK: encoding: [0xc2,0x64,0x59,0x73] +vsrani.w.d $vr2, $vr6, 25 + +# CHECK: vsrani.d.q $vr12, $vr8, 105 +# CHECK: encoding: [0x0c,0xa5,0x5b,0x73] +vsrani.d.q $vr12, $vr8, 105 + +# CHECK: vsrarni.b.h $vr27, $vr21, 2 +# CHECK: encoding: [0xbb,0x4a,0x5c,0x73] +vsrarni.b.h $vr27, $vr21, 2 + +# CHECK: vsrarni.h.w $vr13, $vr3, 0 +# CHECK: encoding: [0x6d,0x80,0x5c,0x73] +vsrarni.h.w $vr13, $vr3, 0 + +# CHECK: vsrarni.w.d $vr9, $vr31, 42 +# CHECK: encoding: [0xe9,0xab,0x5d,0x73] +vsrarni.w.d $vr9, $vr31, 42 + +# CHECK: vsrarni.d.q $vr25, $vr5, 59 +# CHECK: encoding: [0xb9,0xec,0x5e,0x73] +vsrarni.d.q $vr25, $vr5, 59 + +# CHECK: vssrani.b.h $vr8, $vr7, 12 +# CHECK: encoding: [0xe8,0x70,0x60,0x73] +vssrani.b.h $vr8, $vr7, 12 + +# CHECK: vssrani.h.w $vr21, $vr18, 30 +# CHECK: encoding: [0x55,0xfa,0x60,0x73] +vssrani.h.w $vr21, $vr18, 30 + +# CHECK: vssrani.w.d $vr23, $vr7, 51 +# CHECK: encoding: [0xf7,0xcc,0x61,0x73] +vssrani.w.d $vr23, $vr7, 51 + +# CHECK: vssrani.d.q $vr12, $vr14, 8 +# CHECK: encoding: [0xcc,0x21,0x62,0x73] +vssrani.d.q $vr12, $vr14, 8 + +# CHECK: vssrani.bu.h $vr19, $vr5, 12 +# CHECK: encoding: [0xb3,0x70,0x64,0x73] +vssrani.bu.h $vr19, $vr5, 12 + +# CHECK: vssrani.hu.w $vr27, $vr25, 15 +# CHECK: encoding: [0x3b,0xbf,0x64,0x73] +vssrani.hu.w $vr27, $vr25, 15 + +# CHECK: vssrani.wu.d $vr24, $vr28, 42 +# CHECK: encoding: [0x98,0xab,0x65,0x73] +vssrani.wu.d $vr24, $vr28, 42 + +# CHECK: vssrani.du.q $vr4, $vr23, 63 +# CHECK: encoding: [0xe4,0xfe,0x66,0x73] +vssrani.du.q $vr4, $vr23, 63 + +# CHECK: vssrarni.b.h $vr26, $vr8, 0 +# CHECK: encoding: [0x1a,0x41,0x68,0x73] +vssrarni.b.h $vr26, $vr8, 0 + +# CHECK: vssrarni.h.w $vr4, $vr3, 25 +# CHECK: encoding: [0x64,0xe4,0x68,0x73] +vssrarni.h.w $vr4, $vr3, 25 + +# CHECK: vssrarni.w.d $vr0, $vr25, 19 +# CHECK: encoding: [0x20,0x4f,0x69,0x73] +vssrarni.w.d $vr0, $vr25, 19 + +# CHECK: vssrarni.d.q $vr20, $vr11, 106 +# CHECK: encoding: [0x74,0xa9,0x6b,0x73] +vssrarni.d.q $vr20, $vr11, 106 + +# CHECK: vssrarni.bu.h $vr25, $vr28, 9 +# CHECK: encoding: [0x99,0x67,0x6c,0x73] +vssrarni.bu.h $vr25, $vr28, 9 + +# CHECK: vssrarni.hu.w $vr20, $vr23, 12 +# CHECK: encoding: [0xf4,0xb2,0x6c,0x73] +vssrarni.hu.w $vr20, $vr23, 12 + +# CHECK: vssrarni.wu.d $vr28, $vr23, 58 +# CHECK: encoding: [0xfc,0xea,0x6d,0x73] +vssrarni.wu.d $vr28, $vr23, 58 + +# CHECK: vssrarni.du.q $vr1, $vr14, 93 +# CHECK: encoding: [0xc1,0x75,0x6f,0x73] +vssrarni.du.q $vr1, $vr14, 93 + +# CHECK: vextrins.d $vr15, $vr27, 7 +# CHECK: encoding: [0x6f,0x1f,0x80,0x73] +vextrins.d $vr15, $vr27, 7 + +# CHECK: vextrins.w $vr19, $vr0, 147 +# CHECK: encoding: [0x13,0x4c,0x86,0x73] +vextrins.w $vr19, $vr0, 147 + +# CHECK: vextrins.h $vr29, $vr9, 69 +# CHECK: encoding: [0x3d,0x15,0x89,0x73] +vextrins.h $vr29, $vr9, 69 + +# CHECK: vextrins.b $vr0, $vr21, 23 +# CHECK: encoding: [0xa0,0x5e,0x8c,0x73] +vextrins.b $vr0, $vr21, 23 + +# CHECK: vshuf4i.b $vr19, $vr10, 188 +# CHECK: encoding: [0x53,0xf1,0x92,0x73] +vshuf4i.b $vr19, $vr10, 188 + +# CHECK: vshuf4i.h $vr15, $vr1, 139 +# CHECK: encoding: [0x2f,0x2c,0x96,0x73] +vshuf4i.h $vr15, $vr1, 139 + +# CHECK: vshuf4i.w $vr3, $vr5, 130 +# CHECK: encoding: [0xa3,0x08,0x9a,0x73] +vshuf4i.w $vr3, $vr5, 130 + +# CHECK: vshuf4i.d $vr8, $vr29, 131 +# CHECK: encoding: [0xa8,0x0f,0x9e,0x73] +vshuf4i.d $vr8, $vr29, 131 + +# CHECK: vbitseli.b $vr16, $vr25, 168 +# CHECK: encoding: [0x30,0xa3,0xc6,0x73] +vbitseli.b $vr16, $vr25, 168 + +# CHECK: vandi.b $vr4, $vr23, 121 +# CHECK: encoding: [0xe4,0xe6,0xd1,0x73] +vandi.b $vr4, $vr23, 121 + +# CHECK: vori.b $vr7, $vr10, 188 +# CHECK: encoding: [0x47,0xf1,0xd6,0x73] +vori.b $vr7, $vr10, 188 + +# CHECK: vxori.b $vr9, $vr26, 216 +# CHECK: encoding: [0x49,0x63,0xdb,0x73] +vxori.b $vr9, $vr26, 216 + +# CHECK: vnori.b $vr4, $vr28, 219 +# CHECK: encoding: [0x84,0x6f,0xdf,0x73] +vnori.b $vr4, $vr28, 219 + +# CHECK: vldi $vr22, -3742 +# CHECK: encoding: [0x56,0x2c,0xe2,0x73] +vldi $vr22, -3742 + +# CHECK: vpermi.w $vr14, $vr29, 16 +# CHECK: encoding: [0xae,0x43,0xe4,0x73] +vpermi.w $vr14, $vr29, 16 + +# CHECK: xvseq.b $xr11, $xr23, $xr21 +# CHECK: encoding: [0xeb,0x56,0x00,0x74] +xvseq.b $xr11, $xr23, $xr21 + +# CHECK: xvseq.h $xr6, $xr10, $xr27 +# CHECK: encoding: [0x46,0xed,0x00,0x74] +xvseq.h $xr6, $xr10, $xr27 + +# CHECK: xvseq.w $xr19, $xr27, $xr21 +# CHECK: encoding: [0x73,0x57,0x01,0x74] +xvseq.w $xr19, $xr27, $xr21 + +# CHECK: xvseq.d $xr18, $xr4, $xr2 +# CHECK: encoding: [0x92,0x88,0x01,0x74] +xvseq.d $xr18, $xr4, $xr2 + +# CHECK: xvsle.b $xr19, $xr10, $xr5 +# CHECK: encoding: [0x53,0x15,0x02,0x74] +xvsle.b $xr19, $xr10, $xr5 + +# CHECK: xvsle.h $xr10, $xr25, $xr14 +# CHECK: encoding: [0x2a,0xbb,0x02,0x74] +xvsle.h $xr10, $xr25, $xr14 + +# CHECK: xvsle.w $xr17, $xr23, $xr18 +# CHECK: encoding: [0xf1,0x4a,0x03,0x74] +xvsle.w $xr17, $xr23, $xr18 + +# CHECK: xvsle.d $xr15, $xr7, $xr9 +# CHECK: encoding: [0xef,0xa4,0x03,0x74] +xvsle.d $xr15, $xr7, $xr9 + +# CHECK: xvsle.bu $xr5, $xr14, $xr15 +# CHECK: encoding: [0xc5,0x3d,0x04,0x74] +xvsle.bu $xr5, $xr14, $xr15 + +# CHECK: xvsle.hu $xr9, $xr25, $xr25 +# CHECK: encoding: [0x29,0xe7,0x04,0x74] +xvsle.hu $xr9, $xr25, $xr25 + +# CHECK: xvsle.wu $xr28, $xr31, $xr16 +# CHECK: encoding: [0xfc,0x43,0x05,0x74] +xvsle.wu $xr28, $xr31, $xr16 + +# CHECK: xvsle.du $xr17, $xr24, $xr24 +# CHECK: encoding: [0x11,0xe3,0x05,0x74] +xvsle.du $xr17, $xr24, $xr24 + +# CHECK: xvslt.b $xr18, $xr28, $xr25 +# CHECK: encoding: [0x92,0x67,0x06,0x74] +xvslt.b $xr18, $xr28, $xr25 + +# CHECK: xvslt.h $xr29, $xr6, $xr2 +# CHECK: encoding: [0xdd,0x88,0x06,0x74] +xvslt.h $xr29, $xr6, $xr2 + +# CHECK: xvslt.w $xr14, $xr10, $xr5 +# CHECK: encoding: [0x4e,0x15,0x07,0x74] +xvslt.w $xr14, $xr10, $xr5 + +# CHECK: xvslt.d $xr19, $xr30, $xr15 +# CHECK: encoding: [0xd3,0xbf,0x07,0x74] +xvslt.d $xr19, $xr30, $xr15 + +# CHECK: xvslt.bu $xr14, $xr6, $xr27 +# CHECK: encoding: [0xce,0x6c,0x08,0x74] +xvslt.bu $xr14, $xr6, $xr27 + +# CHECK: xvslt.hu $xr27, $xr26, $xr5 +# CHECK: encoding: [0x5b,0x97,0x08,0x74] +xvslt.hu $xr27, $xr26, $xr5 + +# CHECK: xvslt.wu $xr6, $xr9, $xr10 +# CHECK: encoding: [0x26,0x29,0x09,0x74] +xvslt.wu $xr6, $xr9, $xr10 + +# CHECK: xvslt.du $xr13, $xr12, $xr28 +# CHECK: encoding: [0x8d,0xf1,0x09,0x74] +xvslt.du $xr13, $xr12, $xr28 + +# CHECK: xvadd.b $xr0, $xr6, $xr3 +# CHECK: encoding: [0xc0,0x0c,0x0a,0x74] +xvadd.b $xr0, $xr6, $xr3 + +# CHECK: xvadd.h $xr8, $xr11, $xr10 +# CHECK: encoding: [0x68,0xa9,0x0a,0x74] +xvadd.h $xr8, $xr11, $xr10 + +# CHECK: xvadd.w $xr5, $xr6, $xr21 +# CHECK: encoding: [0xc5,0x54,0x0b,0x74] +xvadd.w $xr5, $xr6, $xr21 + +# CHECK: xvadd.d $xr4, $xr21, $xr10 +# CHECK: encoding: [0xa4,0xaa,0x0b,0x74] +xvadd.d $xr4, $xr21, $xr10 + +# CHECK: xvsub.b $xr16, $xr0, $xr30 +# CHECK: encoding: [0x10,0x78,0x0c,0x74] +xvsub.b $xr16, $xr0, $xr30 + +# CHECK: xvsub.h $xr28, $xr11, $xr18 +# CHECK: encoding: [0x7c,0xc9,0x0c,0x74] +xvsub.h $xr28, $xr11, $xr18 + +# CHECK: xvsub.w $xr13, $xr2, $xr13 +# CHECK: encoding: [0x4d,0x34,0x0d,0x74] +xvsub.w $xr13, $xr2, $xr13 + +# CHECK: xvsub.d $xr0, $xr25, $xr21 +# CHECK: encoding: [0x20,0xd7,0x0d,0x74] +xvsub.d $xr0, $xr25, $xr21 + +# CHECK: xvaddwev.h.b $xr8, $xr30, $xr11 +# CHECK: encoding: [0xc8,0x2f,0x1e,0x74] +xvaddwev.h.b $xr8, $xr30, $xr11 + +# CHECK: xvaddwev.w.h $xr10, $xr30, $xr5 +# CHECK: encoding: [0xca,0x97,0x1e,0x74] +xvaddwev.w.h $xr10, $xr30, $xr5 + +# CHECK: xvaddwev.d.w $xr20, $xr25, $xr1 +# CHECK: encoding: [0x34,0x07,0x1f,0x74] +xvaddwev.d.w $xr20, $xr25, $xr1 + +# CHECK: xvaddwev.q.d $xr22, $xr24, $xr24 +# CHECK: encoding: [0x16,0xe3,0x1f,0x74] +xvaddwev.q.d $xr22, $xr24, $xr24 + +# CHECK: xvsubwev.h.b $xr1, $xr25, $xr1 +# CHECK: encoding: [0x21,0x07,0x20,0x74] +xvsubwev.h.b $xr1, $xr25, $xr1 + +# CHECK: xvsubwev.w.h $xr4, $xr30, $xr11 +# CHECK: encoding: [0xc4,0xaf,0x20,0x74] +xvsubwev.w.h $xr4, $xr30, $xr11 + +# CHECK: xvsubwev.d.w $xr6, $xr2, $xr18 +# CHECK: encoding: [0x46,0x48,0x21,0x74] +xvsubwev.d.w $xr6, $xr2, $xr18 + +# CHECK: xvsubwev.q.d $xr0, $xr11, $xr31 +# CHECK: encoding: [0x60,0xfd,0x21,0x74] +xvsubwev.q.d $xr0, $xr11, $xr31 + +# CHECK: xvaddwod.h.b $xr4, $xr4, $xr25 +# CHECK: encoding: [0x84,0x64,0x22,0x74] +xvaddwod.h.b $xr4, $xr4, $xr25 + +# CHECK: xvaddwod.w.h $xr12, $xr25, $xr29 +# CHECK: encoding: [0x2c,0xf7,0x22,0x74] +xvaddwod.w.h $xr12, $xr25, $xr29 + +# CHECK: xvaddwod.d.w $xr16, $xr22, $xr19 +# CHECK: encoding: [0xd0,0x4e,0x23,0x74] +xvaddwod.d.w $xr16, $xr22, $xr19 + +# CHECK: xvaddwod.q.d $xr23, $xr25, $xr14 +# CHECK: encoding: [0x37,0xbb,0x23,0x74] +xvaddwod.q.d $xr23, $xr25, $xr14 + +# CHECK: xvsubwod.h.b $xr1, $xr16, $xr8 +# CHECK: encoding: [0x01,0x22,0x24,0x74] +xvsubwod.h.b $xr1, $xr16, $xr8 + +# CHECK: xvsubwod.w.h $xr5, $xr11, $xr8 +# CHECK: encoding: [0x65,0xa1,0x24,0x74] +xvsubwod.w.h $xr5, $xr11, $xr8 + +# CHECK: xvsubwod.d.w $xr20, $xr7, $xr0 +# CHECK: encoding: [0xf4,0x00,0x25,0x74] +xvsubwod.d.w $xr20, $xr7, $xr0 + +# CHECK: xvsubwod.q.d $xr17, $xr23, $xr20 +# CHECK: encoding: [0xf1,0xd2,0x25,0x74] +xvsubwod.q.d $xr17, $xr23, $xr20 + +# CHECK: xvaddwev.h.bu $xr15, $xr10, $xr31 +# CHECK: encoding: [0x4f,0x7d,0x2e,0x74] +xvaddwev.h.bu $xr15, $xr10, $xr31 + +# CHECK: xvaddwev.w.hu $xr21, $xr24, $xr28 +# CHECK: encoding: [0x15,0xf3,0x2e,0x74] +xvaddwev.w.hu $xr21, $xr24, $xr28 + +# CHECK: xvaddwev.d.wu $xr9, $xr31, $xr14 +# CHECK: encoding: [0xe9,0x3b,0x2f,0x74] +xvaddwev.d.wu $xr9, $xr31, $xr14 + +# CHECK: xvaddwev.q.du $xr25, $xr1, $xr8 +# CHECK: encoding: [0x39,0xa0,0x2f,0x74] +xvaddwev.q.du $xr25, $xr1, $xr8 + +# CHECK: xvsubwev.h.bu $xr30, $xr31, $xr13 +# CHECK: encoding: [0xfe,0x37,0x30,0x74] +xvsubwev.h.bu $xr30, $xr31, $xr13 + +# CHECK: xvsubwev.w.hu $xr1, $xr28, $xr1 +# CHECK: encoding: [0x81,0x87,0x30,0x74] +xvsubwev.w.hu $xr1, $xr28, $xr1 + +# CHECK: xvsubwev.d.wu $xr29, $xr23, $xr29 +# CHECK: encoding: [0xfd,0x76,0x31,0x74] +xvsubwev.d.wu $xr29, $xr23, $xr29 + +# CHECK: xvsubwev.q.du $xr13, $xr16, $xr27 +# CHECK: encoding: [0x0d,0xee,0x31,0x74] +xvsubwev.q.du $xr13, $xr16, $xr27 + +# CHECK: xvaddwod.h.bu $xr13, $xr29, $xr2 +# CHECK: encoding: [0xad,0x0b,0x32,0x74] +xvaddwod.h.bu $xr13, $xr29, $xr2 + +# CHECK: xvaddwod.w.hu $xr14, $xr10, $xr13 +# CHECK: encoding: [0x4e,0xb5,0x32,0x74] +xvaddwod.w.hu $xr14, $xr10, $xr13 + +# CHECK: xvaddwod.d.wu $xr30, $xr26, $xr10 +# CHECK: encoding: [0x5e,0x2b,0x33,0x74] +xvaddwod.d.wu $xr30, $xr26, $xr10 + +# CHECK: xvaddwod.q.du $xr2, $xr13, $xr0 +# CHECK: encoding: [0xa2,0x81,0x33,0x74] +xvaddwod.q.du $xr2, $xr13, $xr0 + +# CHECK: xvsubwod.h.bu $xr6, $xr22, $xr5 +# CHECK: encoding: [0xc6,0x16,0x34,0x74] +xvsubwod.h.bu $xr6, $xr22, $xr5 + +# CHECK: xvsubwod.w.hu $xr19, $xr21, $xr8 +# CHECK: encoding: [0xb3,0xa2,0x34,0x74] +xvsubwod.w.hu $xr19, $xr21, $xr8 + +# CHECK: xvsubwod.d.wu $xr16, $xr11, $xr30 +# CHECK: encoding: [0x70,0x79,0x35,0x74] +xvsubwod.d.wu $xr16, $xr11, $xr30 + +# CHECK: xvsubwod.q.du $xr1, $xr26, $xr9 +# CHECK: encoding: [0x41,0xa7,0x35,0x74] +xvsubwod.q.du $xr1, $xr26, $xr9 + +# CHECK: xvaddwev.h.bu.b $xr5, $xr13, $xr2 +# CHECK: encoding: [0xa5,0x09,0x3e,0x74] +xvaddwev.h.bu.b $xr5, $xr13, $xr2 + +# CHECK: xvaddwev.w.hu.h $xr17, $xr21, $xr20 +# CHECK: encoding: [0xb1,0xd2,0x3e,0x74] +xvaddwev.w.hu.h $xr17, $xr21, $xr20 + +# CHECK: xvaddwev.d.wu.w $xr11, $xr27, $xr19 +# CHECK: encoding: [0x6b,0x4f,0x3f,0x74] +xvaddwev.d.wu.w $xr11, $xr27, $xr19 + +# CHECK: xvaddwev.q.du.d $xr20, $xr21, $xr29 +# CHECK: encoding: [0xb4,0xf6,0x3f,0x74] +xvaddwev.q.du.d $xr20, $xr21, $xr29 + +# CHECK: xvaddwod.h.bu.b $xr1, $xr6, $xr14 +# CHECK: encoding: [0xc1,0x38,0x40,0x74] +xvaddwod.h.bu.b $xr1, $xr6, $xr14 + +# CHECK: xvaddwod.w.hu.h $xr7, $xr29, $xr11 +# CHECK: encoding: [0xa7,0xaf,0x40,0x74] +xvaddwod.w.hu.h $xr7, $xr29, $xr11 + +# CHECK: xvaddwod.d.wu.w $xr16, $xr10, $xr14 +# CHECK: encoding: [0x50,0x39,0x41,0x74] +xvaddwod.d.wu.w $xr16, $xr10, $xr14 + +# CHECK: xvaddwod.q.du.d $xr10, $xr11, $xr23 +# CHECK: encoding: [0x6a,0xdd,0x41,0x74] +xvaddwod.q.du.d $xr10, $xr11, $xr23 + +# CHECK: xvsadd.b $xr24, $xr10, $xr28 +# CHECK: encoding: [0x58,0x71,0x46,0x74] +xvsadd.b $xr24, $xr10, $xr28 + +# CHECK: xvsadd.h $xr19, $xr18, $xr17 +# CHECK: encoding: [0x53,0xc6,0x46,0x74] +xvsadd.h $xr19, $xr18, $xr17 + +# CHECK: xvsadd.w $xr2, $xr6, $xr12 +# CHECK: encoding: [0xc2,0x30,0x47,0x74] +xvsadd.w $xr2, $xr6, $xr12 + +# CHECK: xvsadd.d $xr15, $xr18, $xr29 +# CHECK: encoding: [0x4f,0xf6,0x47,0x74] +xvsadd.d $xr15, $xr18, $xr29 + +# CHECK: xvssub.b $xr15, $xr29, $xr16 +# CHECK: encoding: [0xaf,0x43,0x48,0x74] +xvssub.b $xr15, $xr29, $xr16 + +# CHECK: xvssub.h $xr28, $xr3, $xr9 +# CHECK: encoding: [0x7c,0xa4,0x48,0x74] +xvssub.h $xr28, $xr3, $xr9 + +# CHECK: xvssub.w $xr8, $xr20, $xr15 +# CHECK: encoding: [0x88,0x3e,0x49,0x74] +xvssub.w $xr8, $xr20, $xr15 + +# CHECK: xvssub.d $xr23, $xr8, $xr19 +# CHECK: encoding: [0x17,0xcd,0x49,0x74] +xvssub.d $xr23, $xr8, $xr19 + +# CHECK: xvsadd.bu $xr12, $xr4, $xr16 +# CHECK: encoding: [0x8c,0x40,0x4a,0x74] +xvsadd.bu $xr12, $xr4, $xr16 + +# CHECK: xvsadd.hu $xr9, $xr26, $xr20 +# CHECK: encoding: [0x49,0xd3,0x4a,0x74] +xvsadd.hu $xr9, $xr26, $xr20 + +# CHECK: xvsadd.wu $xr30, $xr15, $xr28 +# CHECK: encoding: [0xfe,0x71,0x4b,0x74] +xvsadd.wu $xr30, $xr15, $xr28 + +# CHECK: xvsadd.du $xr15, $xr13, $xr28 +# CHECK: encoding: [0xaf,0xf1,0x4b,0x74] +xvsadd.du $xr15, $xr13, $xr28 + +# CHECK: xvssub.bu $xr10, $xr3, $xr15 +# CHECK: encoding: [0x6a,0x3c,0x4c,0x74] +xvssub.bu $xr10, $xr3, $xr15 + +# CHECK: xvssub.hu $xr0, $xr12, $xr2 +# CHECK: encoding: [0x80,0x89,0x4c,0x74] +xvssub.hu $xr0, $xr12, $xr2 + +# CHECK: xvssub.wu $xr30, $xr10, $xr23 +# CHECK: encoding: [0x5e,0x5d,0x4d,0x74] +xvssub.wu $xr30, $xr10, $xr23 + +# CHECK: xvssub.du $xr9, $xr30, $xr14 +# CHECK: encoding: [0xc9,0xbb,0x4d,0x74] +xvssub.du $xr9, $xr30, $xr14 + +# CHECK: xvhaddw.h.b $xr25, $xr5, $xr18 +# CHECK: encoding: [0xb9,0x48,0x54,0x74] +xvhaddw.h.b $xr25, $xr5, $xr18 + +# CHECK: xvhaddw.w.h $xr7, $xr20, $xr19 +# CHECK: encoding: [0x87,0xce,0x54,0x74] +xvhaddw.w.h $xr7, $xr20, $xr19 + +# CHECK: xvhaddw.d.w $xr23, $xr5, $xr4 +# CHECK: encoding: [0xb7,0x10,0x55,0x74] +xvhaddw.d.w $xr23, $xr5, $xr4 + +# CHECK: xvhaddw.q.d $xr17, $xr7, $xr25 +# CHECK: encoding: [0xf1,0xe4,0x55,0x74] +xvhaddw.q.d $xr17, $xr7, $xr25 + +# CHECK: xvhsubw.h.b $xr29, $xr18, $xr19 +# CHECK: encoding: [0x5d,0x4e,0x56,0x74] +xvhsubw.h.b $xr29, $xr18, $xr19 + +# CHECK: xvhsubw.w.h $xr30, $xr28, $xr3 +# CHECK: encoding: [0x9e,0x8f,0x56,0x74] +xvhsubw.w.h $xr30, $xr28, $xr3 + +# CHECK: xvhsubw.d.w $xr5, $xr9, $xr13 +# CHECK: encoding: [0x25,0x35,0x57,0x74] +xvhsubw.d.w $xr5, $xr9, $xr13 + +# CHECK: xvhsubw.q.d $xr20, $xr12, $xr29 +# CHECK: encoding: [0x94,0xf5,0x57,0x74] +xvhsubw.q.d $xr20, $xr12, $xr29 + +# CHECK: xvhaddw.hu.bu $xr11, $xr10, $xr7 +# CHECK: encoding: [0x4b,0x1d,0x58,0x74] +xvhaddw.hu.bu $xr11, $xr10, $xr7 + +# CHECK: xvhaddw.wu.hu $xr16, $xr21, $xr21 +# CHECK: encoding: [0xb0,0xd6,0x58,0x74] +xvhaddw.wu.hu $xr16, $xr21, $xr21 + +# CHECK: xvhaddw.du.wu $xr17, $xr31, $xr8 +# CHECK: encoding: [0xf1,0x23,0x59,0x74] +xvhaddw.du.wu $xr17, $xr31, $xr8 + +# CHECK: xvhaddw.qu.du $xr2, $xr4, $xr11 +# CHECK: encoding: [0x82,0xac,0x59,0x74] +xvhaddw.qu.du $xr2, $xr4, $xr11 + +# CHECK: xvhsubw.hu.bu $xr21, $xr14, $xr8 +# CHECK: encoding: [0xd5,0x21,0x5a,0x74] +xvhsubw.hu.bu $xr21, $xr14, $xr8 + +# CHECK: xvhsubw.wu.hu $xr25, $xr0, $xr27 +# CHECK: encoding: [0x19,0xec,0x5a,0x74] +xvhsubw.wu.hu $xr25, $xr0, $xr27 + +# CHECK: xvhsubw.du.wu $xr4, $xr16, $xr30 +# CHECK: encoding: [0x04,0x7a,0x5b,0x74] +xvhsubw.du.wu $xr4, $xr16, $xr30 + +# CHECK: xvhsubw.qu.du $xr11, $xr9, $xr6 +# CHECK: encoding: [0x2b,0x99,0x5b,0x74] +xvhsubw.qu.du $xr11, $xr9, $xr6 + +# CHECK: xvadda.b $xr14, $xr21, $xr26 +# CHECK: encoding: [0xae,0x6a,0x5c,0x74] +xvadda.b $xr14, $xr21, $xr26 + +# CHECK: xvadda.h $xr21, $xr30, $xr21 +# CHECK: encoding: [0xd5,0xd7,0x5c,0x74] +xvadda.h $xr21, $xr30, $xr21 + +# CHECK: xvadda.w $xr31, $xr19, $xr19 +# CHECK: encoding: [0x7f,0x4e,0x5d,0x74] +xvadda.w $xr31, $xr19, $xr19 + +# CHECK: xvadda.d $xr9, $xr4, $xr31 +# CHECK: encoding: [0x89,0xfc,0x5d,0x74] +xvadda.d $xr9, $xr4, $xr31 + +# CHECK: xvabsd.b $xr20, $xr19, $xr13 +# CHECK: encoding: [0x74,0x36,0x60,0x74] +xvabsd.b $xr20, $xr19, $xr13 + +# CHECK: xvabsd.h $xr20, $xr7, $xr10 +# CHECK: encoding: [0xf4,0xa8,0x60,0x74] +xvabsd.h $xr20, $xr7, $xr10 + +# CHECK: xvabsd.w $xr23, $xr31, $xr0 +# CHECK: encoding: [0xf7,0x03,0x61,0x74] +xvabsd.w $xr23, $xr31, $xr0 + +# CHECK: xvabsd.d $xr7, $xr17, $xr14 +# CHECK: encoding: [0x27,0xba,0x61,0x74] +xvabsd.d $xr7, $xr17, $xr14 + +# CHECK: xvabsd.bu $xr12, $xr23, $xr6 +# CHECK: encoding: [0xec,0x1a,0x62,0x74] +xvabsd.bu $xr12, $xr23, $xr6 + +# CHECK: xvabsd.hu $xr16, $xr30, $xr19 +# CHECK: encoding: [0xd0,0xcf,0x62,0x74] +xvabsd.hu $xr16, $xr30, $xr19 + +# CHECK: xvabsd.wu $xr19, $xr5, $xr26 +# CHECK: encoding: [0xb3,0x68,0x63,0x74] +xvabsd.wu $xr19, $xr5, $xr26 + +# CHECK: xvabsd.du $xr0, $xr12, $xr7 +# CHECK: encoding: [0x80,0x9d,0x63,0x74] +xvabsd.du $xr0, $xr12, $xr7 + +# CHECK: xvavg.b $xr23, $xr31, $xr25 +# CHECK: encoding: [0xf7,0x67,0x64,0x74] +xvavg.b $xr23, $xr31, $xr25 + +# CHECK: xvavg.h $xr27, $xr2, $xr27 +# CHECK: encoding: [0x5b,0xec,0x64,0x74] +xvavg.h $xr27, $xr2, $xr27 + +# CHECK: xvavg.w $xr20, $xr0, $xr16 +# CHECK: encoding: [0x14,0x40,0x65,0x74] +xvavg.w $xr20, $xr0, $xr16 + +# CHECK: xvavg.d $xr13, $xr9, $xr10 +# CHECK: encoding: [0x2d,0xa9,0x65,0x74] +xvavg.d $xr13, $xr9, $xr10 + +# CHECK: xvavg.bu $xr31, $xr30, $xr4 +# CHECK: encoding: [0xdf,0x13,0x66,0x74] +xvavg.bu $xr31, $xr30, $xr4 + +# CHECK: xvavg.hu $xr22, $xr17, $xr5 +# CHECK: encoding: [0x36,0x96,0x66,0x74] +xvavg.hu $xr22, $xr17, $xr5 + +# CHECK: xvavg.wu $xr21, $xr29, $xr17 +# CHECK: encoding: [0xb5,0x47,0x67,0x74] +xvavg.wu $xr21, $xr29, $xr17 + +# CHECK: xvavg.du $xr11, $xr5, $xr29 +# CHECK: encoding: [0xab,0xf4,0x67,0x74] +xvavg.du $xr11, $xr5, $xr29 + +# CHECK: xvavgr.b $xr23, $xr13, $xr13 +# CHECK: encoding: [0xb7,0x35,0x68,0x74] +xvavgr.b $xr23, $xr13, $xr13 + +# CHECK: xvavgr.h $xr30, $xr20, $xr31 +# CHECK: encoding: [0x9e,0xfe,0x68,0x74] +xvavgr.h $xr30, $xr20, $xr31 + +# CHECK: xvavgr.w $xr29, $xr28, $xr9 +# CHECK: encoding: [0x9d,0x27,0x69,0x74] +xvavgr.w $xr29, $xr28, $xr9 + +# CHECK: xvavgr.d $xr21, $xr20, $xr8 +# CHECK: encoding: [0x95,0xa2,0x69,0x74] +xvavgr.d $xr21, $xr20, $xr8 + +# CHECK: xvavgr.bu $xr0, $xr9, $xr4 +# CHECK: encoding: [0x20,0x11,0x6a,0x74] +xvavgr.bu $xr0, $xr9, $xr4 + +# CHECK: xvavgr.hu $xr3, $xr0, $xr27 +# CHECK: encoding: [0x03,0xec,0x6a,0x74] +xvavgr.hu $xr3, $xr0, $xr27 + +# CHECK: xvavgr.wu $xr2, $xr30, $xr21 +# CHECK: encoding: [0xc2,0x57,0x6b,0x74] +xvavgr.wu $xr2, $xr30, $xr21 + +# CHECK: xvavgr.du $xr22, $xr21, $xr17 +# CHECK: encoding: [0xb6,0xc6,0x6b,0x74] +xvavgr.du $xr22, $xr21, $xr17 + +# CHECK: xvmax.b $xr1, $xr20, $xr19 +# CHECK: encoding: [0x81,0x4e,0x70,0x74] +xvmax.b $xr1, $xr20, $xr19 + +# CHECK: xvmax.h $xr0, $xr17, $xr14 +# CHECK: encoding: [0x20,0xba,0x70,0x74] +xvmax.h $xr0, $xr17, $xr14 + +# CHECK: xvmax.w $xr0, $xr8, $xr16 +# CHECK: encoding: [0x00,0x41,0x71,0x74] +xvmax.w $xr0, $xr8, $xr16 + +# CHECK: xvmax.d $xr16, $xr23, $xr16 +# CHECK: encoding: [0xf0,0xc2,0x71,0x74] +xvmax.d $xr16, $xr23, $xr16 + +# CHECK: xvmin.b $xr20, $xr6, $xr14 +# CHECK: encoding: [0xd4,0x38,0x72,0x74] +xvmin.b $xr20, $xr6, $xr14 + +# CHECK: xvmin.h $xr4, $xr3, $xr24 +# CHECK: encoding: [0x64,0xe0,0x72,0x74] +xvmin.h $xr4, $xr3, $xr24 + +# CHECK: xvmin.w $xr5, $xr2, $xr23 +# CHECK: encoding: [0x45,0x5c,0x73,0x74] +xvmin.w $xr5, $xr2, $xr23 + +# CHECK: xvmin.d $xr31, $xr23, $xr26 +# CHECK: encoding: [0xff,0xea,0x73,0x74] +xvmin.d $xr31, $xr23, $xr26 + +# CHECK: xvmax.bu $xr14, $xr13, $xr3 +# CHECK: encoding: [0xae,0x0d,0x74,0x74] +xvmax.bu $xr14, $xr13, $xr3 + +# CHECK: xvmax.hu $xr22, $xr17, $xr4 +# CHECK: encoding: [0x36,0x92,0x74,0x74] +xvmax.hu $xr22, $xr17, $xr4 + +# CHECK: xvmax.wu $xr17, $xr13, $xr29 +# CHECK: encoding: [0xb1,0x75,0x75,0x74] +xvmax.wu $xr17, $xr13, $xr29 + +# CHECK: xvmax.du $xr13, $xr2, $xr0 +# CHECK: encoding: [0x4d,0x80,0x75,0x74] +xvmax.du $xr13, $xr2, $xr0 + +# CHECK: xvmin.bu $xr18, $xr31, $xr27 +# CHECK: encoding: [0xf2,0x6f,0x76,0x74] +xvmin.bu $xr18, $xr31, $xr27 + +# CHECK: xvmin.hu $xr2, $xr10, $xr14 +# CHECK: encoding: [0x42,0xb9,0x76,0x74] +xvmin.hu $xr2, $xr10, $xr14 + +# CHECK: xvmin.wu $xr31, $xr8, $xr26 +# CHECK: encoding: [0x1f,0x69,0x77,0x74] +xvmin.wu $xr31, $xr8, $xr26 + +# CHECK: xvmin.du $xr12, $xr26, $xr9 +# CHECK: encoding: [0x4c,0xa7,0x77,0x74] +xvmin.du $xr12, $xr26, $xr9 + +# CHECK: xvmul.b $xr26, $xr2, $xr3 +# CHECK: encoding: [0x5a,0x0c,0x84,0x74] +xvmul.b $xr26, $xr2, $xr3 + +# CHECK: xvmul.h $xr16, $xr29, $xr5 +# CHECK: encoding: [0xb0,0x97,0x84,0x74] +xvmul.h $xr16, $xr29, $xr5 + +# CHECK: xvmul.w $xr19, $xr1, $xr3 +# CHECK: encoding: [0x33,0x0c,0x85,0x74] +xvmul.w $xr19, $xr1, $xr3 + +# CHECK: xvmul.d $xr15, $xr15, $xr0 +# CHECK: encoding: [0xef,0x81,0x85,0x74] +xvmul.d $xr15, $xr15, $xr0 + +# CHECK: xvmuh.b $xr9, $xr12, $xr9 +# CHECK: encoding: [0x89,0x25,0x86,0x74] +xvmuh.b $xr9, $xr12, $xr9 + +# CHECK: xvmuh.h $xr8, $xr23, $xr16 +# CHECK: encoding: [0xe8,0xc2,0x86,0x74] +xvmuh.h $xr8, $xr23, $xr16 + +# CHECK: xvmuh.w $xr29, $xr6, $xr11 +# CHECK: encoding: [0xdd,0x2c,0x87,0x74] +xvmuh.w $xr29, $xr6, $xr11 + +# CHECK: xvmuh.d $xr3, $xr18, $xr7 +# CHECK: encoding: [0x43,0x9e,0x87,0x74] +xvmuh.d $xr3, $xr18, $xr7 + +# CHECK: xvmuh.bu $xr3, $xr7, $xr19 +# CHECK: encoding: [0xe3,0x4c,0x88,0x74] +xvmuh.bu $xr3, $xr7, $xr19 + +# CHECK: xvmuh.hu $xr13, $xr1, $xr18 +# CHECK: encoding: [0x2d,0xc8,0x88,0x74] +xvmuh.hu $xr13, $xr1, $xr18 + +# CHECK: xvmuh.wu $xr15, $xr21, $xr16 +# CHECK: encoding: [0xaf,0x42,0x89,0x74] +xvmuh.wu $xr15, $xr21, $xr16 + +# CHECK: xvmuh.du $xr11, $xr10, $xr19 +# CHECK: encoding: [0x4b,0xcd,0x89,0x74] +xvmuh.du $xr11, $xr10, $xr19 + +# CHECK: xvmulwev.h.b $xr4, $xr12, $xr9 +# CHECK: encoding: [0x84,0x25,0x90,0x74] +xvmulwev.h.b $xr4, $xr12, $xr9 + +# CHECK: xvmulwev.w.h $xr10, $xr3, $xr20 +# CHECK: encoding: [0x6a,0xd0,0x90,0x74] +xvmulwev.w.h $xr10, $xr3, $xr20 + +# CHECK: xvmulwev.d.w $xr4, $xr22, $xr18 +# CHECK: encoding: [0xc4,0x4a,0x91,0x74] +xvmulwev.d.w $xr4, $xr22, $xr18 + +# CHECK: xvmulwev.q.d $xr20, $xr21, $xr27 +# CHECK: encoding: [0xb4,0xee,0x91,0x74] +xvmulwev.q.d $xr20, $xr21, $xr27 + +# CHECK: xvmulwod.h.b $xr5, $xr7, $xr0 +# CHECK: encoding: [0xe5,0x00,0x92,0x74] +xvmulwod.h.b $xr5, $xr7, $xr0 + +# CHECK: xvmulwod.w.h $xr19, $xr28, $xr11 +# CHECK: encoding: [0x93,0xaf,0x92,0x74] +xvmulwod.w.h $xr19, $xr28, $xr11 + +# CHECK: xvmulwod.d.w $xr19, $xr7, $xr16 +# CHECK: encoding: [0xf3,0x40,0x93,0x74] +xvmulwod.d.w $xr19, $xr7, $xr16 + +# CHECK: xvmulwod.q.d $xr11, $xr12, $xr13 +# CHECK: encoding: [0x8b,0xb5,0x93,0x74] +xvmulwod.q.d $xr11, $xr12, $xr13 + +# CHECK: xvmulwev.h.bu $xr22, $xr2, $xr1 +# CHECK: encoding: [0x56,0x04,0x98,0x74] +xvmulwev.h.bu $xr22, $xr2, $xr1 + +# CHECK: xvmulwev.w.hu $xr2, $xr3, $xr4 +# CHECK: encoding: [0x62,0x90,0x98,0x74] +xvmulwev.w.hu $xr2, $xr3, $xr4 + +# CHECK: xvmulwev.d.wu $xr2, $xr12, $xr25 +# CHECK: encoding: [0x82,0x65,0x99,0x74] +xvmulwev.d.wu $xr2, $xr12, $xr25 + +# CHECK: xvmulwev.q.du $xr22, $xr29, $xr17 +# CHECK: encoding: [0xb6,0xc7,0x99,0x74] +xvmulwev.q.du $xr22, $xr29, $xr17 + +# CHECK: xvmulwod.h.bu $xr9, $xr9, $xr0 +# CHECK: encoding: [0x29,0x01,0x9a,0x74] +xvmulwod.h.bu $xr9, $xr9, $xr0 + +# CHECK: xvmulwod.w.hu $xr20, $xr2, $xr16 +# CHECK: encoding: [0x54,0xc0,0x9a,0x74] +xvmulwod.w.hu $xr20, $xr2, $xr16 + +# CHECK: xvmulwod.d.wu $xr1, $xr11, $xr24 +# CHECK: encoding: [0x61,0x61,0x9b,0x74] +xvmulwod.d.wu $xr1, $xr11, $xr24 + +# CHECK: xvmulwod.q.du $xr19, $xr2, $xr22 +# CHECK: encoding: [0x53,0xd8,0x9b,0x74] +xvmulwod.q.du $xr19, $xr2, $xr22 + +# CHECK: xvmulwev.h.bu.b $xr22, $xr29, $xr24 +# CHECK: encoding: [0xb6,0x63,0xa0,0x74] +xvmulwev.h.bu.b $xr22, $xr29, $xr24 + +# CHECK: xvmulwev.w.hu.h $xr1, $xr22, $xr11 +# CHECK: encoding: [0xc1,0xae,0xa0,0x74] +xvmulwev.w.hu.h $xr1, $xr22, $xr11 + +# CHECK: xvmulwev.d.wu.w $xr12, $xr12, $xr12 +# CHECK: encoding: [0x8c,0x31,0xa1,0x74] +xvmulwev.d.wu.w $xr12, $xr12, $xr12 + +# CHECK: xvmulwev.q.du.d $xr0, $xr17, $xr23 +# CHECK: encoding: [0x20,0xde,0xa1,0x74] +xvmulwev.q.du.d $xr0, $xr17, $xr23 + +# CHECK: xvmulwod.h.bu.b $xr26, $xr16, $xr23 +# CHECK: encoding: [0x1a,0x5e,0xa2,0x74] +xvmulwod.h.bu.b $xr26, $xr16, $xr23 + +# CHECK: xvmulwod.w.hu.h $xr31, $xr12, $xr9 +# CHECK: encoding: [0x9f,0xa5,0xa2,0x74] +xvmulwod.w.hu.h $xr31, $xr12, $xr9 + +# CHECK: xvmulwod.d.wu.w $xr21, $xr27, $xr19 +# CHECK: encoding: [0x75,0x4f,0xa3,0x74] +xvmulwod.d.wu.w $xr21, $xr27, $xr19 + +# CHECK: xvmulwod.q.du.d $xr7, $xr5, $xr11 +# CHECK: encoding: [0xa7,0xac,0xa3,0x74] +xvmulwod.q.du.d $xr7, $xr5, $xr11 + +# CHECK: xvmadd.b $xr22, $xr11, $xr15 +# CHECK: encoding: [0x76,0x3d,0xa8,0x74] +xvmadd.b $xr22, $xr11, $xr15 + +# CHECK: xvmadd.h $xr3, $xr30, $xr25 +# CHECK: encoding: [0xc3,0xe7,0xa8,0x74] +xvmadd.h $xr3, $xr30, $xr25 + +# CHECK: xvmadd.w $xr1, $xr18, $xr5 +# CHECK: encoding: [0x41,0x16,0xa9,0x74] +xvmadd.w $xr1, $xr18, $xr5 + +# CHECK: xvmadd.d $xr16, $xr21, $xr11 +# CHECK: encoding: [0xb0,0xae,0xa9,0x74] +xvmadd.d $xr16, $xr21, $xr11 + +# CHECK: xvmsub.b $xr11, $xr12, $xr10 +# CHECK: encoding: [0x8b,0x29,0xaa,0x74] +xvmsub.b $xr11, $xr12, $xr10 + +# CHECK: xvmsub.h $xr16, $xr11, $xr1 +# CHECK: encoding: [0x70,0x85,0xaa,0x74] +xvmsub.h $xr16, $xr11, $xr1 + +# CHECK: xvmsub.w $xr15, $xr21, $xr21 +# CHECK: encoding: [0xaf,0x56,0xab,0x74] +xvmsub.w $xr15, $xr21, $xr21 + +# CHECK: xvmsub.d $xr12, $xr11, $xr4 +# CHECK: encoding: [0x6c,0x91,0xab,0x74] +xvmsub.d $xr12, $xr11, $xr4 + +# CHECK: xvmaddwev.h.b $xr21, $xr7, $xr6 +# CHECK: encoding: [0xf5,0x18,0xac,0x74] +xvmaddwev.h.b $xr21, $xr7, $xr6 + +# CHECK: xvmaddwev.w.h $xr16, $xr29, $xr13 +# CHECK: encoding: [0xb0,0xb7,0xac,0x74] +xvmaddwev.w.h $xr16, $xr29, $xr13 + +# CHECK: xvmaddwev.d.w $xr7, $xr25, $xr30 +# CHECK: encoding: [0x27,0x7b,0xad,0x74] +xvmaddwev.d.w $xr7, $xr25, $xr30 + +# CHECK: xvmaddwev.q.d $xr19, $xr3, $xr8 +# CHECK: encoding: [0x73,0xa0,0xad,0x74] +xvmaddwev.q.d $xr19, $xr3, $xr8 + +# CHECK: xvmaddwod.h.b $xr20, $xr27, $xr12 +# CHECK: encoding: [0x74,0x33,0xae,0x74] +xvmaddwod.h.b $xr20, $xr27, $xr12 + +# CHECK: xvmaddwod.w.h $xr0, $xr21, $xr13 +# CHECK: encoding: [0xa0,0xb6,0xae,0x74] +xvmaddwod.w.h $xr0, $xr21, $xr13 + +# CHECK: xvmaddwod.d.w $xr25, $xr13, $xr31 +# CHECK: encoding: [0xb9,0x7d,0xaf,0x74] +xvmaddwod.d.w $xr25, $xr13, $xr31 + +# CHECK: xvmaddwod.q.d $xr26, $xr26, $xr16 +# CHECK: encoding: [0x5a,0xc3,0xaf,0x74] +xvmaddwod.q.d $xr26, $xr26, $xr16 + +# CHECK: xvmaddwev.h.bu $xr18, $xr26, $xr21 +# CHECK: encoding: [0x52,0x57,0xb4,0x74] +xvmaddwev.h.bu $xr18, $xr26, $xr21 + +# CHECK: xvmaddwev.w.hu $xr14, $xr16, $xr5 +# CHECK: encoding: [0x0e,0x96,0xb4,0x74] +xvmaddwev.w.hu $xr14, $xr16, $xr5 + +# CHECK: xvmaddwev.d.wu $xr19, $xr29, $xr20 +# CHECK: encoding: [0xb3,0x53,0xb5,0x74] +xvmaddwev.d.wu $xr19, $xr29, $xr20 + +# CHECK: xvmaddwev.q.du $xr15, $xr29, $xr17 +# CHECK: encoding: [0xaf,0xc7,0xb5,0x74] +xvmaddwev.q.du $xr15, $xr29, $xr17 + +# CHECK: xvmaddwod.h.bu $xr13, $xr26, $xr1 +# CHECK: encoding: [0x4d,0x07,0xb6,0x74] +xvmaddwod.h.bu $xr13, $xr26, $xr1 + +# CHECK: xvmaddwod.w.hu $xr15, $xr25, $xr16 +# CHECK: encoding: [0x2f,0xc3,0xb6,0x74] +xvmaddwod.w.hu $xr15, $xr25, $xr16 + +# CHECK: xvmaddwod.d.wu $xr23, $xr4, $xr9 +# CHECK: encoding: [0x97,0x24,0xb7,0x74] +xvmaddwod.d.wu $xr23, $xr4, $xr9 + +# CHECK: xvmaddwod.q.du $xr29, $xr22, $xr17 +# CHECK: encoding: [0xdd,0xc6,0xb7,0x74] +xvmaddwod.q.du $xr29, $xr22, $xr17 + +# CHECK: xvmaddwev.h.bu.b $xr23, $xr1, $xr6 +# CHECK: encoding: [0x37,0x18,0xbc,0x74] +xvmaddwev.h.bu.b $xr23, $xr1, $xr6 + +# CHECK: xvmaddwev.w.hu.h $xr4, $xr27, $xr12 +# CHECK: encoding: [0x64,0xb3,0xbc,0x74] +xvmaddwev.w.hu.h $xr4, $xr27, $xr12 + +# CHECK: xvmaddwev.d.wu.w $xr0, $xr2, $xr5 +# CHECK: encoding: [0x40,0x14,0xbd,0x74] +xvmaddwev.d.wu.w $xr0, $xr2, $xr5 + +# CHECK: xvmaddwev.q.du.d $xr9, $xr31, $xr1 +# CHECK: encoding: [0xe9,0x87,0xbd,0x74] +xvmaddwev.q.du.d $xr9, $xr31, $xr1 + +# CHECK: xvmaddwod.h.bu.b $xr9, $xr19, $xr20 +# CHECK: encoding: [0x69,0x52,0xbe,0x74] +xvmaddwod.h.bu.b $xr9, $xr19, $xr20 + +# CHECK: xvmaddwod.w.hu.h $xr7, $xr5, $xr13 +# CHECK: encoding: [0xa7,0xb4,0xbe,0x74] +xvmaddwod.w.hu.h $xr7, $xr5, $xr13 + +# CHECK: xvmaddwod.d.wu.w $xr10, $xr27, $xr1 +# CHECK: encoding: [0x6a,0x07,0xbf,0x74] +xvmaddwod.d.wu.w $xr10, $xr27, $xr1 + +# CHECK: xvmaddwod.q.du.d $xr25, $xr19, $xr0 +# CHECK: encoding: [0x79,0x82,0xbf,0x74] +xvmaddwod.q.du.d $xr25, $xr19, $xr0 + +# CHECK: xvdiv.b $xr3, $xr31, $xr2 +# CHECK: encoding: [0xe3,0x0b,0xe0,0x74] +xvdiv.b $xr3, $xr31, $xr2 + +# CHECK: xvdiv.h $xr1, $xr12, $xr17 +# CHECK: encoding: [0x81,0xc5,0xe0,0x74] +xvdiv.h $xr1, $xr12, $xr17 + +# CHECK: xvdiv.w $xr13, $xr0, $xr12 +# CHECK: encoding: [0x0d,0x30,0xe1,0x74] +xvdiv.w $xr13, $xr0, $xr12 + +# CHECK: xvdiv.d $xr17, $xr5, $xr11 +# CHECK: encoding: [0xb1,0xac,0xe1,0x74] +xvdiv.d $xr17, $xr5, $xr11 + +# CHECK: xvmod.b $xr22, $xr17, $xr1 +# CHECK: encoding: [0x36,0x06,0xe2,0x74] +xvmod.b $xr22, $xr17, $xr1 + +# CHECK: xvmod.h $xr28, $xr5, $xr12 +# CHECK: encoding: [0xbc,0xb0,0xe2,0x74] +xvmod.h $xr28, $xr5, $xr12 + +# CHECK: xvmod.w $xr29, $xr19, $xr14 +# CHECK: encoding: [0x7d,0x3a,0xe3,0x74] +xvmod.w $xr29, $xr19, $xr14 + +# CHECK: xvmod.d $xr17, $xr8, $xr6 +# CHECK: encoding: [0x11,0x99,0xe3,0x74] +xvmod.d $xr17, $xr8, $xr6 + +# CHECK: xvdiv.bu $xr23, $xr6, $xr2 +# CHECK: encoding: [0xd7,0x08,0xe4,0x74] +xvdiv.bu $xr23, $xr6, $xr2 + +# CHECK: xvdiv.hu $xr9, $xr31, $xr0 +# CHECK: encoding: [0xe9,0x83,0xe4,0x74] +xvdiv.hu $xr9, $xr31, $xr0 + +# CHECK: xvdiv.wu $xr15, $xr1, $xr4 +# CHECK: encoding: [0x2f,0x10,0xe5,0x74] +xvdiv.wu $xr15, $xr1, $xr4 + +# CHECK: xvdiv.du $xr14, $xr29, $xr11 +# CHECK: encoding: [0xae,0xaf,0xe5,0x74] +xvdiv.du $xr14, $xr29, $xr11 + +# CHECK: xvmod.bu $xr4, $xr12, $xr31 +# CHECK: encoding: [0x84,0x7d,0xe6,0x74] +xvmod.bu $xr4, $xr12, $xr31 + +# CHECK: xvmod.hu $xr22, $xr12, $xr11 +# CHECK: encoding: [0x96,0xad,0xe6,0x74] +xvmod.hu $xr22, $xr12, $xr11 + +# CHECK: xvmod.wu $xr21, $xr23, $xr10 +# CHECK: encoding: [0xf5,0x2a,0xe7,0x74] +xvmod.wu $xr21, $xr23, $xr10 + +# CHECK: xvmod.du $xr21, $xr21, $xr31 +# CHECK: encoding: [0xb5,0xfe,0xe7,0x74] +xvmod.du $xr21, $xr21, $xr31 + +# CHECK: xvsll.b $xr16, $xr10, $xr11 +# CHECK: encoding: [0x50,0x2d,0xe8,0x74] +xvsll.b $xr16, $xr10, $xr11 + +# CHECK: xvsll.h $xr12, $xr10, $xr27 +# CHECK: encoding: [0x4c,0xed,0xe8,0x74] +xvsll.h $xr12, $xr10, $xr27 + +# CHECK: xvsll.w $xr30, $xr2, $xr26 +# CHECK: encoding: [0x5e,0x68,0xe9,0x74] +xvsll.w $xr30, $xr2, $xr26 + +# CHECK: xvsll.d $xr8, $xr21, $xr17 +# CHECK: encoding: [0xa8,0xc6,0xe9,0x74] +xvsll.d $xr8, $xr21, $xr17 + +# CHECK: xvsrl.b $xr27, $xr24, $xr18 +# CHECK: encoding: [0x1b,0x4b,0xea,0x74] +xvsrl.b $xr27, $xr24, $xr18 + +# CHECK: xvsrl.h $xr17, $xr31, $xr24 +# CHECK: encoding: [0xf1,0xe3,0xea,0x74] +xvsrl.h $xr17, $xr31, $xr24 + +# CHECK: xvsrl.w $xr5, $xr3, $xr4 +# CHECK: encoding: [0x65,0x10,0xeb,0x74] +xvsrl.w $xr5, $xr3, $xr4 + +# CHECK: xvsrl.d $xr21, $xr6, $xr8 +# CHECK: encoding: [0xd5,0xa0,0xeb,0x74] +xvsrl.d $xr21, $xr6, $xr8 + +# CHECK: xvsra.b $xr28, $xr28, $xr21 +# CHECK: encoding: [0x9c,0x57,0xec,0x74] +xvsra.b $xr28, $xr28, $xr21 + +# CHECK: xvsra.h $xr19, $xr4, $xr26 +# CHECK: encoding: [0x93,0xe8,0xec,0x74] +xvsra.h $xr19, $xr4, $xr26 + +# CHECK: xvsra.w $xr13, $xr20, $xr1 +# CHECK: encoding: [0x8d,0x06,0xed,0x74] +xvsra.w $xr13, $xr20, $xr1 + +# CHECK: xvsra.d $xr0, $xr8, $xr18 +# CHECK: encoding: [0x00,0xc9,0xed,0x74] +xvsra.d $xr0, $xr8, $xr18 + +# CHECK: xvrotr.b $xr8, $xr30, $xr28 +# CHECK: encoding: [0xc8,0x73,0xee,0x74] +xvrotr.b $xr8, $xr30, $xr28 + +# CHECK: xvrotr.h $xr17, $xr19, $xr0 +# CHECK: encoding: [0x71,0x82,0xee,0x74] +xvrotr.h $xr17, $xr19, $xr0 + +# CHECK: xvrotr.w $xr15, $xr28, $xr23 +# CHECK: encoding: [0x8f,0x5f,0xef,0x74] +xvrotr.w $xr15, $xr28, $xr23 + +# CHECK: xvrotr.d $xr31, $xr2, $xr21 +# CHECK: encoding: [0x5f,0xd4,0xef,0x74] +xvrotr.d $xr31, $xr2, $xr21 + +# CHECK: xvsrlr.b $xr20, $xr26, $xr11 +# CHECK: encoding: [0x54,0x2f,0xf0,0x74] +xvsrlr.b $xr20, $xr26, $xr11 + +# CHECK: xvsrlr.h $xr13, $xr18, $xr7 +# CHECK: encoding: [0x4d,0x9e,0xf0,0x74] +xvsrlr.h $xr13, $xr18, $xr7 + +# CHECK: xvsrlr.w $xr28, $xr1, $xr3 +# CHECK: encoding: [0x3c,0x0c,0xf1,0x74] +xvsrlr.w $xr28, $xr1, $xr3 + +# CHECK: xvsrlr.d $xr6, $xr3, $xr14 +# CHECK: encoding: [0x66,0xb8,0xf1,0x74] +xvsrlr.d $xr6, $xr3, $xr14 + +# CHECK: xvsrar.b $xr10, $xr8, $xr17 +# CHECK: encoding: [0x0a,0x45,0xf2,0x74] +xvsrar.b $xr10, $xr8, $xr17 + +# CHECK: xvsrar.h $xr31, $xr2, $xr11 +# CHECK: encoding: [0x5f,0xac,0xf2,0x74] +xvsrar.h $xr31, $xr2, $xr11 + +# CHECK: xvsrar.w $xr13, $xr8, $xr5 +# CHECK: encoding: [0x0d,0x15,0xf3,0x74] +xvsrar.w $xr13, $xr8, $xr5 + +# CHECK: xvsrar.d $xr12, $xr18, $xr0 +# CHECK: encoding: [0x4c,0x82,0xf3,0x74] +xvsrar.d $xr12, $xr18, $xr0 + +# CHECK: xvsrln.b.h $xr15, $xr6, $xr15 +# CHECK: encoding: [0xcf,0xbc,0xf4,0x74] +xvsrln.b.h $xr15, $xr6, $xr15 + +# CHECK: xvsrln.h.w $xr22, $xr19, $xr17 +# CHECK: encoding: [0x76,0x46,0xf5,0x74] +xvsrln.h.w $xr22, $xr19, $xr17 + +# CHECK: xvsrln.w.d $xr4, $xr7, $xr5 +# CHECK: encoding: [0xe4,0x94,0xf5,0x74] +xvsrln.w.d $xr4, $xr7, $xr5 + +# CHECK: xvsran.b.h $xr3, $xr19, $xr23 +# CHECK: encoding: [0x63,0xde,0xf6,0x74] +xvsran.b.h $xr3, $xr19, $xr23 + +# CHECK: xvsran.h.w $xr16, $xr6, $xr1 +# CHECK: encoding: [0xd0,0x04,0xf7,0x74] +xvsran.h.w $xr16, $xr6, $xr1 + +# CHECK: xvsran.w.d $xr27, $xr16, $xr0 +# CHECK: encoding: [0x1b,0x82,0xf7,0x74] +xvsran.w.d $xr27, $xr16, $xr0 + +# CHECK: xvsrlrn.b.h $xr2, $xr9, $xr9 +# CHECK: encoding: [0x22,0xa5,0xf8,0x74] +xvsrlrn.b.h $xr2, $xr9, $xr9 + +# CHECK: xvsrlrn.h.w $xr16, $xr11, $xr19 +# CHECK: encoding: [0x70,0x4d,0xf9,0x74] +xvsrlrn.h.w $xr16, $xr11, $xr19 + +# CHECK: xvsrlrn.w.d $xr29, $xr25, $xr15 +# CHECK: encoding: [0x3d,0xbf,0xf9,0x74] +xvsrlrn.w.d $xr29, $xr25, $xr15 + +# CHECK: xvsrarn.b.h $xr13, $xr20, $xr13 +# CHECK: encoding: [0x8d,0xb6,0xfa,0x74] +xvsrarn.b.h $xr13, $xr20, $xr13 + +# CHECK: xvsrarn.h.w $xr13, $xr22, $xr1 +# CHECK: encoding: [0xcd,0x06,0xfb,0x74] +xvsrarn.h.w $xr13, $xr22, $xr1 + +# CHECK: xvsrarn.w.d $xr13, $xr12, $xr2 +# CHECK: encoding: [0x8d,0x89,0xfb,0x74] +xvsrarn.w.d $xr13, $xr12, $xr2 + +# CHECK: xvssrln.b.h $xr19, $xr19, $xr10 +# CHECK: encoding: [0x73,0xaa,0xfc,0x74] +xvssrln.b.h $xr19, $xr19, $xr10 + +# CHECK: xvssrln.h.w $xr12, $xr24, $xr17 +# CHECK: encoding: [0x0c,0x47,0xfd,0x74] +xvssrln.h.w $xr12, $xr24, $xr17 + +# CHECK: xvssrln.w.d $xr7, $xr30, $xr14 +# CHECK: encoding: [0xc7,0xbb,0xfd,0x74] +xvssrln.w.d $xr7, $xr30, $xr14 + +# CHECK: xvssran.b.h $xr6, $xr9, $xr23 +# CHECK: encoding: [0x26,0xdd,0xfe,0x74] +xvssran.b.h $xr6, $xr9, $xr23 + +# CHECK: xvssran.h.w $xr13, $xr9, $xr2 +# CHECK: encoding: [0x2d,0x09,0xff,0x74] +xvssran.h.w $xr13, $xr9, $xr2 + +# CHECK: xvssran.w.d $xr18, $xr26, $xr1 +# CHECK: encoding: [0x52,0x87,0xff,0x74] +xvssran.w.d $xr18, $xr26, $xr1 + +# CHECK: xvssrlrn.b.h $xr24, $xr17, $xr23 +# CHECK: encoding: [0x38,0xde,0x00,0x75] +xvssrlrn.b.h $xr24, $xr17, $xr23 + +# CHECK: xvssrlrn.h.w $xr10, $xr12, $xr8 +# CHECK: encoding: [0x8a,0x21,0x01,0x75] +xvssrlrn.h.w $xr10, $xr12, $xr8 + +# CHECK: xvssrlrn.w.d $xr30, $xr27, $xr6 +# CHECK: encoding: [0x7e,0x9b,0x01,0x75] +xvssrlrn.w.d $xr30, $xr27, $xr6 + +# CHECK: xvssrarn.b.h $xr20, $xr27, $xr31 +# CHECK: encoding: [0x74,0xff,0x02,0x75] +xvssrarn.b.h $xr20, $xr27, $xr31 + +# CHECK: xvssrarn.h.w $xr24, $xr23, $xr23 +# CHECK: encoding: [0xf8,0x5e,0x03,0x75] +xvssrarn.h.w $xr24, $xr23, $xr23 + +# CHECK: xvssrarn.w.d $xr8, $xr29, $xr25 +# CHECK: encoding: [0xa8,0xe7,0x03,0x75] +xvssrarn.w.d $xr8, $xr29, $xr25 + +# CHECK: xvssrln.bu.h $xr14, $xr4, $xr17 +# CHECK: encoding: [0x8e,0xc4,0x04,0x75] +xvssrln.bu.h $xr14, $xr4, $xr17 + +# CHECK: xvssrln.hu.w $xr28, $xr20, $xr10 +# CHECK: encoding: [0x9c,0x2a,0x05,0x75] +xvssrln.hu.w $xr28, $xr20, $xr10 + +# CHECK: xvssrln.wu.d $xr10, $xr8, $xr20 +# CHECK: encoding: [0x0a,0xd1,0x05,0x75] +xvssrln.wu.d $xr10, $xr8, $xr20 + +# CHECK: xvssran.bu.h $xr18, $xr28, $xr23 +# CHECK: encoding: [0x92,0xdf,0x06,0x75] +xvssran.bu.h $xr18, $xr28, $xr23 + +# CHECK: xvssran.hu.w $xr25, $xr19, $xr24 +# CHECK: encoding: [0x79,0x62,0x07,0x75] +xvssran.hu.w $xr25, $xr19, $xr24 + +# CHECK: xvssran.wu.d $xr16, $xr29, $xr18 +# CHECK: encoding: [0xb0,0xcb,0x07,0x75] +xvssran.wu.d $xr16, $xr29, $xr18 + +# CHECK: xvssrlrn.bu.h $xr2, $xr19, $xr14 +# CHECK: encoding: [0x62,0xba,0x08,0x75] +xvssrlrn.bu.h $xr2, $xr19, $xr14 + +# CHECK: xvssrlrn.hu.w $xr6, $xr0, $xr18 +# CHECK: encoding: [0x06,0x48,0x09,0x75] +xvssrlrn.hu.w $xr6, $xr0, $xr18 + +# CHECK: xvssrlrn.wu.d $xr30, $xr4, $xr31 +# CHECK: encoding: [0x9e,0xfc,0x09,0x75] +xvssrlrn.wu.d $xr30, $xr4, $xr31 + +# CHECK: xvssrarn.bu.h $xr16, $xr28, $xr8 +# CHECK: encoding: [0x90,0xa3,0x0a,0x75] +xvssrarn.bu.h $xr16, $xr28, $xr8 + +# CHECK: xvssrarn.hu.w $xr11, $xr2, $xr6 +# CHECK: encoding: [0x4b,0x18,0x0b,0x75] +xvssrarn.hu.w $xr11, $xr2, $xr6 + +# CHECK: xvssrarn.wu.d $xr22, $xr6, $xr12 +# CHECK: encoding: [0xd6,0xb0,0x0b,0x75] +xvssrarn.wu.d $xr22, $xr6, $xr12 + +# CHECK: xvbitclr.b $xr4, $xr16, $xr16 +# CHECK: encoding: [0x04,0x42,0x0c,0x75] +xvbitclr.b $xr4, $xr16, $xr16 + +# CHECK: xvbitclr.h $xr16, $xr31, $xr26 +# CHECK: encoding: [0xf0,0xeb,0x0c,0x75] +xvbitclr.h $xr16, $xr31, $xr26 + +# CHECK: xvbitclr.w $xr24, $xr2, $xr20 +# CHECK: encoding: [0x58,0x50,0x0d,0x75] +xvbitclr.w $xr24, $xr2, $xr20 + +# CHECK: xvbitclr.d $xr18, $xr12, $xr30 +# CHECK: encoding: [0x92,0xf9,0x0d,0x75] +xvbitclr.d $xr18, $xr12, $xr30 + +# CHECK: xvbitset.b $xr26, $xr27, $xr23 +# CHECK: encoding: [0x7a,0x5f,0x0e,0x75] +xvbitset.b $xr26, $xr27, $xr23 + +# CHECK: xvbitset.h $xr19, $xr19, $xr11 +# CHECK: encoding: [0x73,0xae,0x0e,0x75] +xvbitset.h $xr19, $xr19, $xr11 + +# CHECK: xvbitset.w $xr7, $xr9, $xr18 +# CHECK: encoding: [0x27,0x49,0x0f,0x75] +xvbitset.w $xr7, $xr9, $xr18 + +# CHECK: xvbitset.d $xr6, $xr30, $xr3 +# CHECK: encoding: [0xc6,0x8f,0x0f,0x75] +xvbitset.d $xr6, $xr30, $xr3 + +# CHECK: xvbitrev.b $xr30, $xr13, $xr7 +# CHECK: encoding: [0xbe,0x1d,0x10,0x75] +xvbitrev.b $xr30, $xr13, $xr7 + +# CHECK: xvbitrev.h $xr12, $xr3, $xr8 +# CHECK: encoding: [0x6c,0xa0,0x10,0x75] +xvbitrev.h $xr12, $xr3, $xr8 + +# CHECK: xvbitrev.w $xr8, $xr20, $xr20 +# CHECK: encoding: [0x88,0x52,0x11,0x75] +xvbitrev.w $xr8, $xr20, $xr20 + +# CHECK: xvbitrev.d $xr28, $xr7, $xr17 +# CHECK: encoding: [0xfc,0xc4,0x11,0x75] +xvbitrev.d $xr28, $xr7, $xr17 + +# CHECK: xvpackev.b $xr29, $xr18, $xr12 +# CHECK: encoding: [0x5d,0x32,0x16,0x75] +xvpackev.b $xr29, $xr18, $xr12 + +# CHECK: xvpackev.h $xr6, $xr11, $xr17 +# CHECK: encoding: [0x66,0xc5,0x16,0x75] +xvpackev.h $xr6, $xr11, $xr17 + +# CHECK: xvpackev.w $xr2, $xr2, $xr30 +# CHECK: encoding: [0x42,0x78,0x17,0x75] +xvpackev.w $xr2, $xr2, $xr30 + +# CHECK: xvpackev.d $xr26, $xr15, $xr21 +# CHECK: encoding: [0xfa,0xd5,0x17,0x75] +xvpackev.d $xr26, $xr15, $xr21 + +# CHECK: xvpackod.b $xr19, $xr17, $xr17 +# CHECK: encoding: [0x33,0x46,0x18,0x75] +xvpackod.b $xr19, $xr17, $xr17 + +# CHECK: xvpackod.h $xr15, $xr8, $xr3 +# CHECK: encoding: [0x0f,0x8d,0x18,0x75] +xvpackod.h $xr15, $xr8, $xr3 + +# CHECK: xvpackod.w $xr13, $xr15, $xr12 +# CHECK: encoding: [0xed,0x31,0x19,0x75] +xvpackod.w $xr13, $xr15, $xr12 + +# CHECK: xvpackod.d $xr5, $xr3, $xr26 +# CHECK: encoding: [0x65,0xe8,0x19,0x75] +xvpackod.d $xr5, $xr3, $xr26 + +# CHECK: xvilvl.b $xr27, $xr9, $xr1 +# CHECK: encoding: [0x3b,0x05,0x1a,0x75] +xvilvl.b $xr27, $xr9, $xr1 + +# CHECK: xvilvl.h $xr29, $xr8, $xr1 +# CHECK: encoding: [0x1d,0x85,0x1a,0x75] +xvilvl.h $xr29, $xr8, $xr1 + +# CHECK: xvilvl.w $xr9, $xr8, $xr7 +# CHECK: encoding: [0x09,0x1d,0x1b,0x75] +xvilvl.w $xr9, $xr8, $xr7 + +# CHECK: xvilvl.d $xr25, $xr7, $xr18 +# CHECK: encoding: [0xf9,0xc8,0x1b,0x75] +xvilvl.d $xr25, $xr7, $xr18 + +# CHECK: xvilvh.b $xr7, $xr24, $xr26 +# CHECK: encoding: [0x07,0x6b,0x1c,0x75] +xvilvh.b $xr7, $xr24, $xr26 + +# CHECK: xvilvh.h $xr6, $xr20, $xr28 +# CHECK: encoding: [0x86,0xf2,0x1c,0x75] +xvilvh.h $xr6, $xr20, $xr28 + +# CHECK: xvilvh.w $xr13, $xr5, $xr12 +# CHECK: encoding: [0xad,0x30,0x1d,0x75] +xvilvh.w $xr13, $xr5, $xr12 + +# CHECK: xvilvh.d $xr1, $xr21, $xr31 +# CHECK: encoding: [0xa1,0xfe,0x1d,0x75] +xvilvh.d $xr1, $xr21, $xr31 + +# CHECK: xvpickev.b $xr17, $xr13, $xr31 +# CHECK: encoding: [0xb1,0x7d,0x1e,0x75] +xvpickev.b $xr17, $xr13, $xr31 + +# CHECK: xvpickev.h $xr4, $xr8, $xr14 +# CHECK: encoding: [0x04,0xb9,0x1e,0x75] +xvpickev.h $xr4, $xr8, $xr14 + +# CHECK: xvpickev.w $xr10, $xr8, $xr11 +# CHECK: encoding: [0x0a,0x2d,0x1f,0x75] +xvpickev.w $xr10, $xr8, $xr11 + +# CHECK: xvpickev.d $xr26, $xr20, $xr8 +# CHECK: encoding: [0x9a,0xa2,0x1f,0x75] +xvpickev.d $xr26, $xr20, $xr8 + +# CHECK: xvpickod.b $xr19, $xr21, $xr27 +# CHECK: encoding: [0xb3,0x6e,0x20,0x75] +xvpickod.b $xr19, $xr21, $xr27 + +# CHECK: xvpickod.h $xr28, $xr5, $xr19 +# CHECK: encoding: [0xbc,0xcc,0x20,0x75] +xvpickod.h $xr28, $xr5, $xr19 + +# CHECK: xvpickod.w $xr21, $xr18, $xr22 +# CHECK: encoding: [0x55,0x5a,0x21,0x75] +xvpickod.w $xr21, $xr18, $xr22 + +# CHECK: xvpickod.d $xr28, $xr7, $xr18 +# CHECK: encoding: [0xfc,0xc8,0x21,0x75] +xvpickod.d $xr28, $xr7, $xr18 + +# CHECK: xvreplve.b $xr6, $xr20, $r25 +# CHECK: encoding: [0x86,0x66,0x22,0x75] +xvreplve.b $xr6, $xr20, $r25 + +# CHECK: xvreplve.h $xr27, $xr7, $r14 +# CHECK: encoding: [0xfb,0xb8,0x22,0x75] +xvreplve.h $xr27, $xr7, $r14 + +# CHECK: xvreplve.w $xr1, $xr4, $r15 +# CHECK: encoding: [0x81,0x3c,0x23,0x75] +xvreplve.w $xr1, $xr4, $r15 + +# CHECK: xvreplve.d $xr12, $xr12, $r16 +# CHECK: encoding: [0x8c,0xc1,0x23,0x75] +xvreplve.d $xr12, $xr12, $r16 + +# CHECK: xvand.v $xr1, $xr3, $xr29 +# CHECK: encoding: [0x61,0x74,0x26,0x75] +xvand.v $xr1, $xr3, $xr29 + +# CHECK: xvor.v $xr23, $xr11, $xr20 +# CHECK: encoding: [0x77,0xd1,0x26,0x75] +xvor.v $xr23, $xr11, $xr20 + +# CHECK: xvxor.v $xr31, $xr1, $xr30 +# CHECK: encoding: [0x3f,0x78,0x27,0x75] +xvxor.v $xr31, $xr1, $xr30 + +# CHECK: xvnor.v $xr29, $xr26, $xr13 +# CHECK: encoding: [0x5d,0xb7,0x27,0x75] +xvnor.v $xr29, $xr26, $xr13 + +# CHECK: xvandn.v $xr9, $xr14, $xr0 +# CHECK: encoding: [0xc9,0x01,0x28,0x75] +xvandn.v $xr9, $xr14, $xr0 + +# CHECK: xvorn.v $xr25, $xr8, $xr12 +# CHECK: encoding: [0x19,0xb1,0x28,0x75] +xvorn.v $xr25, $xr8, $xr12 + +# CHECK: xvfrstp.b $xr21, $xr26, $xr26 +# CHECK: encoding: [0x55,0x6b,0x2b,0x75] +xvfrstp.b $xr21, $xr26, $xr26 + +# CHECK: xvfrstp.h $xr4, $xr17, $xr2 +# CHECK: encoding: [0x24,0x8a,0x2b,0x75] +xvfrstp.h $xr4, $xr17, $xr2 + +# CHECK: xvadd.q $xr29, $xr28, $xr17 +# CHECK: encoding: [0x9d,0x47,0x2d,0x75] +xvadd.q $xr29, $xr28, $xr17 + +# CHECK: xvsub.q $xr29, $xr2, $xr27 +# CHECK: encoding: [0x5d,0xec,0x2d,0x75] +xvsub.q $xr29, $xr2, $xr27 + +# CHECK: xvsigncov.b $xr18, $xr28, $xr7 +# CHECK: encoding: [0x92,0x1f,0x2e,0x75] +xvsigncov.b $xr18, $xr28, $xr7 + +# CHECK: xvsigncov.h $xr18, $xr12, $xr17 +# CHECK: encoding: [0x92,0xc5,0x2e,0x75] +xvsigncov.h $xr18, $xr12, $xr17 + +# CHECK: xvsigncov.w $xr26, $xr1, $xr0 +# CHECK: encoding: [0x3a,0x00,0x2f,0x75] +xvsigncov.w $xr26, $xr1, $xr0 + +# CHECK: xvsigncov.d $xr10, $xr27, $xr14 +# CHECK: encoding: [0x6a,0xbb,0x2f,0x75] +xvsigncov.d $xr10, $xr27, $xr14 + +# CHECK: xvfadd.s $xr15, $xr25, $xr8 +# CHECK: encoding: [0x2f,0xa3,0x30,0x75] +xvfadd.s $xr15, $xr25, $xr8 + +# CHECK: xvfadd.d $xr19, $xr6, $xr21 +# CHECK: encoding: [0xd3,0x54,0x31,0x75] +xvfadd.d $xr19, $xr6, $xr21 + +# CHECK: xvfsub.s $xr26, $xr6, $xr6 +# CHECK: encoding: [0xda,0x98,0x32,0x75] +xvfsub.s $xr26, $xr6, $xr6 + +# CHECK: xvfsub.d $xr9, $xr0, $xr21 +# CHECK: encoding: [0x09,0x54,0x33,0x75] +xvfsub.d $xr9, $xr0, $xr21 + +# CHECK: xvfmul.s $xr6, $xr8, $xr14 +# CHECK: encoding: [0x06,0xb9,0x38,0x75] +xvfmul.s $xr6, $xr8, $xr14 + +# CHECK: xvfmul.d $xr11, $xr21, $xr26 +# CHECK: encoding: [0xab,0x6a,0x39,0x75] +xvfmul.d $xr11, $xr21, $xr26 + +# CHECK: xvfdiv.s $xr11, $xr7, $xr6 +# CHECK: encoding: [0xeb,0x98,0x3a,0x75] +xvfdiv.s $xr11, $xr7, $xr6 + +# CHECK: xvfdiv.d $xr0, $xr26, $xr4 +# CHECK: encoding: [0x40,0x13,0x3b,0x75] +xvfdiv.d $xr0, $xr26, $xr4 + +# CHECK: xvfmax.s $xr7, $xr9, $xr4 +# CHECK: encoding: [0x27,0x91,0x3c,0x75] +xvfmax.s $xr7, $xr9, $xr4 + +# CHECK: xvfmax.d $xr0, $xr26, $xr20 +# CHECK: encoding: [0x40,0x53,0x3d,0x75] +xvfmax.d $xr0, $xr26, $xr20 + +# CHECK: xvfmin.s $xr8, $xr10, $xr26 +# CHECK: encoding: [0x48,0xe9,0x3e,0x75] +xvfmin.s $xr8, $xr10, $xr26 + +# CHECK: xvfmin.d $xr2, $xr22, $xr25 +# CHECK: encoding: [0xc2,0x66,0x3f,0x75] +xvfmin.d $xr2, $xr22, $xr25 + +# CHECK: xvfmaxa.s $xr17, $xr4, $xr1 +# CHECK: encoding: [0x91,0x84,0x40,0x75] +xvfmaxa.s $xr17, $xr4, $xr1 + +# CHECK: xvfmaxa.d $xr27, $xr23, $xr9 +# CHECK: encoding: [0xfb,0x26,0x41,0x75] +xvfmaxa.d $xr27, $xr23, $xr9 + +# CHECK: xvfmina.s $xr21, $xr3, $xr27 +# CHECK: encoding: [0x75,0xec,0x42,0x75] +xvfmina.s $xr21, $xr3, $xr27 + +# CHECK: xvfmina.d $xr7, $xr6, $xr4 +# CHECK: encoding: [0xc7,0x10,0x43,0x75] +xvfmina.d $xr7, $xr6, $xr4 + +# CHECK: xvfcvt.h.s $xr9, $xr10, $xr20 +# CHECK: encoding: [0x49,0x51,0x46,0x75] +xvfcvt.h.s $xr9, $xr10, $xr20 + +# CHECK: xvfcvt.s.d $xr5, $xr23, $xr21 +# CHECK: encoding: [0xe5,0xd6,0x46,0x75] +xvfcvt.s.d $xr5, $xr23, $xr21 + +# CHECK: xvffint.s.l $xr28, $xr24, $xr10 +# CHECK: encoding: [0x1c,0x2b,0x48,0x75] +xvffint.s.l $xr28, $xr24, $xr10 + +# CHECK: xvftint.w.d $xr6, $xr24, $xr1 +# CHECK: encoding: [0x06,0x87,0x49,0x75] +xvftint.w.d $xr6, $xr24, $xr1 + +# CHECK: xvftintrm.w.d $xr27, $xr26, $xr30 +# CHECK: encoding: [0x5b,0x7b,0x4a,0x75] +xvftintrm.w.d $xr27, $xr26, $xr30 + +# CHECK: xvftintrp.w.d $xr31, $xr12, $xr1 +# CHECK: encoding: [0x9f,0x85,0x4a,0x75] +xvftintrp.w.d $xr31, $xr12, $xr1 + +# CHECK: xvftintrz.w.d $xr11, $xr21, $xr21 +# CHECK: encoding: [0xab,0x56,0x4b,0x75] +xvftintrz.w.d $xr11, $xr21, $xr21 + +# CHECK: xvftintrne.w.d $xr15, $xr8, $xr28 +# CHECK: encoding: [0x0f,0xf1,0x4b,0x75] +xvftintrne.w.d $xr15, $xr8, $xr28 + +# CHECK: xvshuf.h $xr20, $xr21, $xr3 +# CHECK: encoding: [0xb4,0x8e,0x7a,0x75] +xvshuf.h $xr20, $xr21, $xr3 + +# CHECK: xvshuf.w $xr22, $xr2, $xr31 +# CHECK: encoding: [0x56,0x7c,0x7b,0x75] +xvshuf.w $xr22, $xr2, $xr31 + +# CHECK: xvshuf.d $xr15, $xr3, $xr26 +# CHECK: encoding: [0x6f,0xe8,0x7b,0x75] +xvshuf.d $xr15, $xr3, $xr26 + +# CHECK: xvperm.w $xr21, $xr23, $xr24 +# CHECK: encoding: [0xf5,0x62,0x7d,0x75] +xvperm.w $xr21, $xr23, $xr24 + +# CHECK: xvseqi.b $xr28, $xr5, 1 +# CHECK: encoding: [0xbc,0x04,0x80,0x76] +xvseqi.b $xr28, $xr5, 1 + +# CHECK: xvseqi.h $xr19, $xr9, -5 +# CHECK: encoding: [0x33,0xed,0x80,0x76] +xvseqi.h $xr19, $xr9, -5 + +# CHECK: xvseqi.w $xr8, $xr18, -2 +# CHECK: encoding: [0x48,0x7a,0x81,0x76] +xvseqi.w $xr8, $xr18, -2 + +# CHECK: xvseqi.d $xr2, $xr22, -4 +# CHECK: encoding: [0xc2,0xf2,0x81,0x76] +xvseqi.d $xr2, $xr22, -4 + +# CHECK: xvslei.b $xr4, $xr21, -10 +# CHECK: encoding: [0xa4,0x5a,0x82,0x76] +xvslei.b $xr4, $xr21, -10 + +# CHECK: xvslei.h $xr17, $xr20, -12 +# CHECK: encoding: [0x91,0xd2,0x82,0x76] +xvslei.h $xr17, $xr20, -12 + +# CHECK: xvslei.w $xr9, $xr20, -7 +# CHECK: encoding: [0x89,0x66,0x83,0x76] +xvslei.w $xr9, $xr20, -7 + +# CHECK: xvslei.d $xr19, $xr30, 10 +# CHECK: encoding: [0xd3,0xab,0x83,0x76] +xvslei.d $xr19, $xr30, 10 + +# CHECK: xvslei.bu $xr4, $xr26, 1 +# CHECK: encoding: [0x44,0x07,0x84,0x76] +xvslei.bu $xr4, $xr26, 1 + +# CHECK: xvslei.hu $xr11, $xr8, 4 +# CHECK: encoding: [0x0b,0x91,0x84,0x76] +xvslei.hu $xr11, $xr8, 4 + +# CHECK: xvslei.wu $xr18, $xr12, 31 +# CHECK: encoding: [0x92,0x7d,0x85,0x76] +xvslei.wu $xr18, $xr12, 31 + +# CHECK: xvslei.du $xr30, $xr7, 26 +# CHECK: encoding: [0xfe,0xe8,0x85,0x76] +xvslei.du $xr30, $xr7, 26 + +# CHECK: xvslti.b $xr11, $xr29, 2 +# CHECK: encoding: [0xab,0x0b,0x86,0x76] +xvslti.b $xr11, $xr29, 2 + +# CHECK: xvslti.h $xr6, $xr27, 8 +# CHECK: encoding: [0x66,0xa3,0x86,0x76] +xvslti.h $xr6, $xr27, 8 + +# CHECK: xvslti.w $xr21, $xr23, 1 +# CHECK: encoding: [0xf5,0x06,0x87,0x76] +xvslti.w $xr21, $xr23, 1 + +# CHECK: xvslti.d $xr18, $xr31, -5 +# CHECK: encoding: [0xf2,0xef,0x87,0x76] +xvslti.d $xr18, $xr31, -5 + +# CHECK: xvslti.bu $xr27, $xr12, 17 +# CHECK: encoding: [0x9b,0x45,0x88,0x76] +xvslti.bu $xr27, $xr12, 17 + +# CHECK: xvslti.hu $xr18, $xr14, 12 +# CHECK: encoding: [0xd2,0xb1,0x88,0x76] +xvslti.hu $xr18, $xr14, 12 + +# CHECK: xvslti.wu $xr4, $xr12, 14 +# CHECK: encoding: [0x84,0x39,0x89,0x76] +xvslti.wu $xr4, $xr12, 14 + +# CHECK: xvslti.du $xr26, $xr0, 24 +# CHECK: encoding: [0x1a,0xe0,0x89,0x76] +xvslti.du $xr26, $xr0, 24 + +# CHECK: xvaddi.bu $xr30, $xr2, 5 +# CHECK: encoding: [0x5e,0x14,0x8a,0x76] +xvaddi.bu $xr30, $xr2, 5 + +# CHECK: xvaddi.hu $xr22, $xr17, 9 +# CHECK: encoding: [0x36,0xa6,0x8a,0x76] +xvaddi.hu $xr22, $xr17, 9 + +# CHECK: xvaddi.wu $xr3, $xr26, 29 +# CHECK: encoding: [0x43,0x77,0x8b,0x76] +xvaddi.wu $xr3, $xr26, 29 + +# CHECK: xvaddi.du $xr0, $xr20, 30 +# CHECK: encoding: [0x80,0xfa,0x8b,0x76] +xvaddi.du $xr0, $xr20, 30 + +# CHECK: xvsubi.bu $xr0, $xr20, 7 +# CHECK: encoding: [0x80,0x1e,0x8c,0x76] +xvsubi.bu $xr0, $xr20, 7 + +# CHECK: xvsubi.hu $xr4, $xr24, 18 +# CHECK: encoding: [0x04,0xcb,0x8c,0x76] +xvsubi.hu $xr4, $xr24, 18 + +# CHECK: xvsubi.wu $xr1, $xr26, 26 +# CHECK: encoding: [0x41,0x6b,0x8d,0x76] +xvsubi.wu $xr1, $xr26, 26 + +# CHECK: xvsubi.du $xr9, $xr28, 8 +# CHECK: encoding: [0x89,0xa3,0x8d,0x76] +xvsubi.du $xr9, $xr28, 8 + +# CHECK: xvbsll.v $xr0, $xr21, 8 +# CHECK: encoding: [0xa0,0x22,0x8e,0x76] +xvbsll.v $xr0, $xr21, 8 + +# CHECK: xvbsrl.v $xr4, $xr8, 28 +# CHECK: encoding: [0x04,0xf1,0x8e,0x76] +xvbsrl.v $xr4, $xr8, 28 + +# CHECK: xvmaxi.b $xr8, $xr1, -14 +# CHECK: encoding: [0x28,0x48,0x90,0x76] +xvmaxi.b $xr8, $xr1, -14 + +# CHECK: xvmaxi.h $xr19, $xr12, -16 +# CHECK: encoding: [0x93,0xc1,0x90,0x76] +xvmaxi.h $xr19, $xr12, -16 + +# CHECK: xvmaxi.w $xr27, $xr1, 5 +# CHECK: encoding: [0x3b,0x14,0x91,0x76] +xvmaxi.w $xr27, $xr1, 5 + +# CHECK: xvmaxi.d $xr6, $xr7, 3 +# CHECK: encoding: [0xe6,0x8c,0x91,0x76] +xvmaxi.d $xr6, $xr7, 3 + +# CHECK: xvmini.b $xr10, $xr6, 5 +# CHECK: encoding: [0xca,0x14,0x92,0x76] +xvmini.b $xr10, $xr6, 5 + +# CHECK: xvmini.h $xr8, $xr18, -12 +# CHECK: encoding: [0x48,0xd2,0x92,0x76] +xvmini.h $xr8, $xr18, -12 + +# CHECK: xvmini.w $xr31, $xr13, -7 +# CHECK: encoding: [0xbf,0x65,0x93,0x76] +xvmini.w $xr31, $xr13, -7 + +# CHECK: xvmini.d $xr15, $xr27, 9 +# CHECK: encoding: [0x6f,0xa7,0x93,0x76] +xvmini.d $xr15, $xr27, 9 + +# CHECK: xvmaxi.bu $xr5, $xr17, 22 +# CHECK: encoding: [0x25,0x5a,0x94,0x76] +xvmaxi.bu $xr5, $xr17, 22 + +# CHECK: xvmaxi.hu $xr6, $xr3, 4 +# CHECK: encoding: [0x66,0x90,0x94,0x76] +xvmaxi.hu $xr6, $xr3, 4 + +# CHECK: xvmaxi.wu $xr26, $xr12, 17 +# CHECK: encoding: [0x9a,0x45,0x95,0x76] +xvmaxi.wu $xr26, $xr12, 17 + +# CHECK: xvmaxi.du $xr30, $xr11, 30 +# CHECK: encoding: [0x7e,0xf9,0x95,0x76] +xvmaxi.du $xr30, $xr11, 30 + +# CHECK: xvmini.bu $xr15, $xr8, 7 +# CHECK: encoding: [0x0f,0x1d,0x96,0x76] +xvmini.bu $xr15, $xr8, 7 + +# CHECK: xvmini.hu $xr18, $xr25, 1 +# CHECK: encoding: [0x32,0x87,0x96,0x76] +xvmini.hu $xr18, $xr25, 1 + +# CHECK: xvmini.wu $xr16, $xr28, 0 +# CHECK: encoding: [0x90,0x03,0x97,0x76] +xvmini.wu $xr16, $xr28, 0 + +# CHECK: xvmini.du $xr10, $xr19, 29 +# CHECK: encoding: [0x6a,0xf6,0x97,0x76] +xvmini.du $xr10, $xr19, 29 + +# CHECK: xvfrstpi.b $xr8, $xr25, 2 +# CHECK: encoding: [0x28,0x0b,0x9a,0x76] +xvfrstpi.b $xr8, $xr25, 2 + +# CHECK: xvfrstpi.h $xr28, $xr19, 26 +# CHECK: encoding: [0x7c,0xea,0x9a,0x76] +xvfrstpi.h $xr28, $xr19, 26 + +# CHECK: xvclo.b $xr2, $xr8 +# CHECK: encoding: [0x02,0x01,0x9c,0x76] +xvclo.b $xr2, $xr8 + +# CHECK: xvclo.h $xr10, $xr9 +# CHECK: encoding: [0x2a,0x05,0x9c,0x76] +xvclo.h $xr10, $xr9 + +# CHECK: xvclo.w $xr2, $xr31 +# CHECK: encoding: [0xe2,0x0b,0x9c,0x76] +xvclo.w $xr2, $xr31 + +# CHECK: xvclo.d $xr21, $xr24 +# CHECK: encoding: [0x15,0x0f,0x9c,0x76] +xvclo.d $xr21, $xr24 + +# CHECK: xvclz.b $xr13, $xr24 +# CHECK: encoding: [0x0d,0x13,0x9c,0x76] +xvclz.b $xr13, $xr24 + +# CHECK: xvclz.h $xr4, $xr31 +# CHECK: encoding: [0xe4,0x17,0x9c,0x76] +xvclz.h $xr4, $xr31 + +# CHECK: xvclz.w $xr7, $xr1 +# CHECK: encoding: [0x27,0x18,0x9c,0x76] +xvclz.w $xr7, $xr1 + +# CHECK: xvclz.d $xr13, $xr22 +# CHECK: encoding: [0xcd,0x1e,0x9c,0x76] +xvclz.d $xr13, $xr22 + +# CHECK: xvpcnt.b $xr9, $xr26 +# CHECK: encoding: [0x49,0x23,0x9c,0x76] +xvpcnt.b $xr9, $xr26 + +# CHECK: xvpcnt.h $xr10, $xr3 +# CHECK: encoding: [0x6a,0x24,0x9c,0x76] +xvpcnt.h $xr10, $xr3 + +# CHECK: xvpcnt.w $xr24, $xr7 +# CHECK: encoding: [0xf8,0x28,0x9c,0x76] +xvpcnt.w $xr24, $xr7 + +# CHECK: xvpcnt.d $xr5, $xr8 +# CHECK: encoding: [0x05,0x2d,0x9c,0x76] +xvpcnt.d $xr5, $xr8 + +# CHECK: xvneg.b $xr19, $xr11 +# CHECK: encoding: [0x73,0x31,0x9c,0x76] +xvneg.b $xr19, $xr11 + +# CHECK: xvneg.h $xr21, $xr21 +# CHECK: encoding: [0xb5,0x36,0x9c,0x76] +xvneg.h $xr21, $xr21 + +# CHECK: xvneg.w $xr19, $xr17 +# CHECK: encoding: [0x33,0x3a,0x9c,0x76] +xvneg.w $xr19, $xr17 + +# CHECK: xvneg.d $xr31, $xr29 +# CHECK: encoding: [0xbf,0x3f,0x9c,0x76] +xvneg.d $xr31, $xr29 + +# CHECK: xvmskltz.b $xr22, $xr27 +# CHECK: encoding: [0x76,0x43,0x9c,0x76] +xvmskltz.b $xr22, $xr27 + +# CHECK: xvmskltz.h $xr5, $xr0 +# CHECK: encoding: [0x05,0x44,0x9c,0x76] +xvmskltz.h $xr5, $xr0 + +# CHECK: xvmskltz.w $xr24, $xr28 +# CHECK: encoding: [0x98,0x4b,0x9c,0x76] +xvmskltz.w $xr24, $xr28 + +# CHECK: xvmskltz.d $xr25, $xr2 +# CHECK: encoding: [0x59,0x4c,0x9c,0x76] +xvmskltz.d $xr25, $xr2 + +# CHECK: xvmskgez.b $xr30, $xr30 +# CHECK: encoding: [0xde,0x53,0x9c,0x76] +xvmskgez.b $xr30, $xr30 + +# CHECK: xvmsknz.b $xr5, $xr20 +# CHECK: encoding: [0x85,0x62,0x9c,0x76] +xvmsknz.b $xr5, $xr20 + +# CHECK: xvseteqz.v $fcc1, $xr25 +# CHECK: encoding: [0x21,0x9b,0x9c,0x76] +xvseteqz.v $fcc1, $xr25 + +# CHECK: xvsetnez.v $fcc5, $xr13 +# CHECK: encoding: [0xa5,0x9d,0x9c,0x76] +xvsetnez.v $fcc5, $xr13 + +# CHECK: xvsetanyeqz.b $fcc0, $xr4 +# CHECK: encoding: [0x80,0xa0,0x9c,0x76] +xvsetanyeqz.b $fcc0, $xr4 + +# CHECK: xvsetanyeqz.h $fcc0, $xr31 +# CHECK: encoding: [0xe0,0xa7,0x9c,0x76] +xvsetanyeqz.h $fcc0, $xr31 + +# CHECK: xvsetanyeqz.w $fcc2, $xr30 +# CHECK: encoding: [0xc2,0xab,0x9c,0x76] +xvsetanyeqz.w $fcc2, $xr30 + +# CHECK: xvsetanyeqz.d $fcc3, $xr31 +# CHECK: encoding: [0xe3,0xaf,0x9c,0x76] +xvsetanyeqz.d $fcc3, $xr31 + +# CHECK: xvsetallnez.b $fcc1, $xr21 +# CHECK: encoding: [0xa1,0xb2,0x9c,0x76] +xvsetallnez.b $fcc1, $xr21 + +# CHECK: xvsetallnez.h $fcc0, $xr21 +# CHECK: encoding: [0xa0,0xb6,0x9c,0x76] +xvsetallnez.h $fcc0, $xr21 + +# CHECK: xvsetallnez.w $fcc0, $xr0 +# CHECK: encoding: [0x00,0xb8,0x9c,0x76] +xvsetallnez.w $fcc0, $xr0 + +# CHECK: xvsetallnez.d $fcc1, $xr31 +# CHECK: encoding: [0xe1,0xbf,0x9c,0x76] +xvsetallnez.d $fcc1, $xr31 + +# CHECK: xvflogb.s $xr21, $xr4 +# CHECK: encoding: [0x95,0xc4,0x9c,0x76] +xvflogb.s $xr21, $xr4 + +# CHECK: xvflogb.d $xr8, $xr20 +# CHECK: encoding: [0x88,0xca,0x9c,0x76] +xvflogb.d $xr8, $xr20 + +# CHECK: xvfclass.s $xr15, $xr29 +# CHECK: encoding: [0xaf,0xd7,0x9c,0x76] +xvfclass.s $xr15, $xr29 + +# CHECK: xvfclass.d $xr7, $xr14 +# CHECK: encoding: [0xc7,0xd9,0x9c,0x76] +xvfclass.d $xr7, $xr14 + +# CHECK: xvfsqrt.s $xr28, $xr19 +# CHECK: encoding: [0x7c,0xe6,0x9c,0x76] +xvfsqrt.s $xr28, $xr19 + +# CHECK: xvfsqrt.d $xr11, $xr31 +# CHECK: encoding: [0xeb,0xeb,0x9c,0x76] +xvfsqrt.d $xr11, $xr31 + +# CHECK: xvfrecip.s $xr6, $xr23 +# CHECK: encoding: [0xe6,0xf6,0x9c,0x76] +xvfrecip.s $xr6, $xr23 + +# CHECK: xvfrecip.d $xr0, $xr24 +# CHECK: encoding: [0x00,0xfb,0x9c,0x76] +xvfrecip.d $xr0, $xr24 + +# CHECK: xvfrsqrt.s $xr8, $xr16 +# CHECK: encoding: [0x08,0x06,0x9d,0x76] +xvfrsqrt.s $xr8, $xr16 + +# CHECK: xvfrsqrt.d $xr15, $xr17 +# CHECK: encoding: [0x2f,0x0a,0x9d,0x76] +xvfrsqrt.d $xr15, $xr17 + +# CHECK: xvfrint.s $xr4, $xr25 +# CHECK: encoding: [0x24,0x37,0x9d,0x76] +xvfrint.s $xr4, $xr25 + +# CHECK: xvfrint.d $xr1, $xr20 +# CHECK: encoding: [0x81,0x3a,0x9d,0x76] +xvfrint.d $xr1, $xr20 + +# CHECK: xvfrintrm.s $xr29, $xr16 +# CHECK: encoding: [0x1d,0x46,0x9d,0x76] +xvfrintrm.s $xr29, $xr16 + +# CHECK: xvfrintrm.d $xr4, $xr10 +# CHECK: encoding: [0x44,0x49,0x9d,0x76] +xvfrintrm.d $xr4, $xr10 + +# CHECK: xvfrintrp.s $xr13, $xr31 +# CHECK: encoding: [0xed,0x57,0x9d,0x76] +xvfrintrp.s $xr13, $xr31 + +# CHECK: xvfrintrp.d $xr20, $xr11 +# CHECK: encoding: [0x74,0x59,0x9d,0x76] +xvfrintrp.d $xr20, $xr11 + +# CHECK: xvfrintrz.s $xr27, $xr13 +# CHECK: encoding: [0xbb,0x65,0x9d,0x76] +xvfrintrz.s $xr27, $xr13 + +# CHECK: xvfrintrz.d $xr17, $xr25 +# CHECK: encoding: [0x31,0x6b,0x9d,0x76] +xvfrintrz.d $xr17, $xr25 + +# CHECK: xvfrintrne.s $xr14, $xr8 +# CHECK: encoding: [0x0e,0x75,0x9d,0x76] +xvfrintrne.s $xr14, $xr8 + +# CHECK: xvfrintrne.d $xr23, $xr26 +# CHECK: encoding: [0x57,0x7b,0x9d,0x76] +xvfrintrne.d $xr23, $xr26 + +# CHECK: xvfcvtl.s.h $xr4, $xr23 +# CHECK: encoding: [0xe4,0xea,0x9d,0x76] +xvfcvtl.s.h $xr4, $xr23 + +# CHECK: xvfcvth.s.h $xr14, $xr11 +# CHECK: encoding: [0x6e,0xed,0x9d,0x76] +xvfcvth.s.h $xr14, $xr11 + +# CHECK: xvfcvtl.d.s $xr26, $xr31 +# CHECK: encoding: [0xfa,0xf3,0x9d,0x76] +xvfcvtl.d.s $xr26, $xr31 + +# CHECK: xvfcvth.d.s $xr13, $xr28 +# CHECK: encoding: [0x8d,0xf7,0x9d,0x76] +xvfcvth.d.s $xr13, $xr28 + +# CHECK: xvffint.s.w $xr14, $xr28 +# CHECK: encoding: [0x8e,0x03,0x9e,0x76] +xvffint.s.w $xr14, $xr28 + +# CHECK: xvffint.s.wu $xr0, $xr8 +# CHECK: encoding: [0x00,0x05,0x9e,0x76] +xvffint.s.wu $xr0, $xr8 + +# CHECK: xvffint.d.l $xr5, $xr27 +# CHECK: encoding: [0x65,0x0b,0x9e,0x76] +xvffint.d.l $xr5, $xr27 + +# CHECK: xvffint.d.lu $xr29, $xr18 +# CHECK: encoding: [0x5d,0x0e,0x9e,0x76] +xvffint.d.lu $xr29, $xr18 + +# CHECK: xvffintl.d.w $xr9, $xr20 +# CHECK: encoding: [0x89,0x12,0x9e,0x76] +xvffintl.d.w $xr9, $xr20 + +# CHECK: xvffinth.d.w $xr11, $xr13 +# CHECK: encoding: [0xab,0x15,0x9e,0x76] +xvffinth.d.w $xr11, $xr13 + +# CHECK: xvftint.w.s $xr6, $xr4 +# CHECK: encoding: [0x86,0x30,0x9e,0x76] +xvftint.w.s $xr6, $xr4 + +# CHECK: xvftint.l.d $xr11, $xr22 +# CHECK: encoding: [0xcb,0x36,0x9e,0x76] +xvftint.l.d $xr11, $xr22 + +# CHECK: xvftintrm.w.s $xr20, $xr21 +# CHECK: encoding: [0xb4,0x3a,0x9e,0x76] +xvftintrm.w.s $xr20, $xr21 + +# CHECK: xvftintrm.l.d $xr28, $xr27 +# CHECK: encoding: [0x7c,0x3f,0x9e,0x76] +xvftintrm.l.d $xr28, $xr27 + +# CHECK: xvftintrp.w.s $xr14, $xr16 +# CHECK: encoding: [0x0e,0x42,0x9e,0x76] +xvftintrp.w.s $xr14, $xr16 + +# CHECK: xvftintrp.l.d $xr14, $xr25 +# CHECK: encoding: [0x2e,0x47,0x9e,0x76] +xvftintrp.l.d $xr14, $xr25 + +# CHECK: xvftintrz.w.s $xr5, $xr30 +# CHECK: encoding: [0xc5,0x4b,0x9e,0x76] +xvftintrz.w.s $xr5, $xr30 + +# CHECK: xvftintrz.l.d $xr11, $xr19 +# CHECK: encoding: [0x6b,0x4e,0x9e,0x76] +xvftintrz.l.d $xr11, $xr19 + +# CHECK: xvftintrne.w.s $xr27, $xr23 +# CHECK: encoding: [0xfb,0x52,0x9e,0x76] +xvftintrne.w.s $xr27, $xr23 + +# CHECK: xvftintrne.l.d $xr27, $xr13 +# CHECK: encoding: [0xbb,0x55,0x9e,0x76] +xvftintrne.l.d $xr27, $xr13 + +# CHECK: xvftint.wu.s $xr28, $xr2 +# CHECK: encoding: [0x5c,0x58,0x9e,0x76] +xvftint.wu.s $xr28, $xr2 + +# CHECK: xvftint.lu.d $xr27, $xr12 +# CHECK: encoding: [0x9b,0x5d,0x9e,0x76] +xvftint.lu.d $xr27, $xr12 + +# CHECK: xvftintrz.wu.s $xr21, $xr29 +# CHECK: encoding: [0xb5,0x73,0x9e,0x76] +xvftintrz.wu.s $xr21, $xr29 + +# CHECK: xvftintrz.lu.d $xr19, $xr2 +# CHECK: encoding: [0x53,0x74,0x9e,0x76] +xvftintrz.lu.d $xr19, $xr2 + +# CHECK: xvftintl.l.s $xr2, $xr18 +# CHECK: encoding: [0x42,0x82,0x9e,0x76] +xvftintl.l.s $xr2, $xr18 + +# CHECK: xvftinth.l.s $xr8, $xr30 +# CHECK: encoding: [0xc8,0x87,0x9e,0x76] +xvftinth.l.s $xr8, $xr30 + +# CHECK: xvftintrml.l.s $xr13, $xr17 +# CHECK: encoding: [0x2d,0x8a,0x9e,0x76] +xvftintrml.l.s $xr13, $xr17 + +# CHECK: xvftintrmh.l.s $xr30, $xr26 +# CHECK: encoding: [0x5e,0x8f,0x9e,0x76] +xvftintrmh.l.s $xr30, $xr26 + +# CHECK: xvftintrpl.l.s $xr11, $xr26 +# CHECK: encoding: [0x4b,0x93,0x9e,0x76] +xvftintrpl.l.s $xr11, $xr26 + +# CHECK: xvftintrph.l.s $xr30, $xr11 +# CHECK: encoding: [0x7e,0x95,0x9e,0x76] +xvftintrph.l.s $xr30, $xr11 + +# CHECK: xvftintrzl.l.s $xr25, $xr7 +# CHECK: encoding: [0xf9,0x98,0x9e,0x76] +xvftintrzl.l.s $xr25, $xr7 + +# CHECK: xvftintrzh.l.s $xr12, $xr5 +# CHECK: encoding: [0xac,0x9c,0x9e,0x76] +xvftintrzh.l.s $xr12, $xr5 + +# CHECK: xvftintrnel.l.s $xr8, $xr24 +# CHECK: encoding: [0x08,0xa3,0x9e,0x76] +xvftintrnel.l.s $xr8, $xr24 + +# CHECK: xvftintrneh.l.s $xr25, $xr24 +# CHECK: encoding: [0x19,0xa7,0x9e,0x76] +xvftintrneh.l.s $xr25, $xr24 + +# CHECK: xvexth.h.b $xr23, $xr5 +# CHECK: encoding: [0xb7,0xe0,0x9e,0x76] +xvexth.h.b $xr23, $xr5 + +# CHECK: xvexth.w.h $xr25, $xr6 +# CHECK: encoding: [0xd9,0xe4,0x9e,0x76] +xvexth.w.h $xr25, $xr6 + +# CHECK: xvexth.d.w $xr7, $xr27 +# CHECK: encoding: [0x67,0xeb,0x9e,0x76] +xvexth.d.w $xr7, $xr27 + +# CHECK: xvexth.q.d $xr14, $xr10 +# CHECK: encoding: [0x4e,0xed,0x9e,0x76] +xvexth.q.d $xr14, $xr10 + +# CHECK: xvexth.hu.bu $xr0, $xr21 +# CHECK: encoding: [0xa0,0xf2,0x9e,0x76] +xvexth.hu.bu $xr0, $xr21 + +# CHECK: xvexth.wu.hu $xr15, $xr22 +# CHECK: encoding: [0xcf,0xf6,0x9e,0x76] +xvexth.wu.hu $xr15, $xr22 + +# CHECK: xvexth.du.wu $xr24, $xr15 +# CHECK: encoding: [0xf8,0xf9,0x9e,0x76] +xvexth.du.wu $xr24, $xr15 + +# CHECK: xvexth.qu.du $xr4, $xr2 +# CHECK: encoding: [0x44,0xfc,0x9e,0x76] +xvexth.qu.du $xr4, $xr2 + +# CHECK: xvreplgr2vr.b $xr21, $r6 +# CHECK: encoding: [0xd5,0x00,0x9f,0x76] +xvreplgr2vr.b $xr21, $r6 + +# CHECK: xvreplgr2vr.h $xr11, $ra +# CHECK: encoding: [0x2b,0x04,0x9f,0x76] +xvreplgr2vr.h $xr11, $ra + +# CHECK: xvreplgr2vr.w $xr13, $r22 +# CHECK: encoding: [0xcd,0x0a,0x9f,0x76] +xvreplgr2vr.w $xr13, $r22 + +# CHECK: xvreplgr2vr.d $xr9, $r17 +# CHECK: encoding: [0x29,0x0e,0x9f,0x76] +xvreplgr2vr.d $xr9, $r17 + +# CHECK: vext2xv.h.b $xr18, $xr16 +# CHECK: encoding: [0x12,0x12,0x9f,0x76] +vext2xv.h.b $xr18, $xr16 + +# CHECK: vext2xv.w.b $xr3, $xr23 +# CHECK: encoding: [0xe3,0x16,0x9f,0x76] +vext2xv.w.b $xr3, $xr23 + +# CHECK: vext2xv.d.b $xr30, $xr16 +# CHECK: encoding: [0x1e,0x1a,0x9f,0x76] +vext2xv.d.b $xr30, $xr16 + +# CHECK: vext2xv.w.h $xr28, $xr23 +# CHECK: encoding: [0xfc,0x1e,0x9f,0x76] +vext2xv.w.h $xr28, $xr23 + +# CHECK: vext2xv.d.h $xr4, $xr1 +# CHECK: encoding: [0x24,0x20,0x9f,0x76] +vext2xv.d.h $xr4, $xr1 + +# CHECK: vext2xv.d.w $xr23, $xr12 +# CHECK: encoding: [0x97,0x25,0x9f,0x76] +vext2xv.d.w $xr23, $xr12 + +# CHECK: vext2xv.hu.bu $xr0, $xr5 +# CHECK: encoding: [0xa0,0x28,0x9f,0x76] +vext2xv.hu.bu $xr0, $xr5 + +# CHECK: vext2xv.wu.bu $xr1, $xr4 +# CHECK: encoding: [0x81,0x2c,0x9f,0x76] +vext2xv.wu.bu $xr1, $xr4 + +# CHECK: vext2xv.du.bu $xr17, $xr11 +# CHECK: encoding: [0x71,0x31,0x9f,0x76] +vext2xv.du.bu $xr17, $xr11 + +# CHECK: vext2xv.wu.hu $xr28, $xr0 +# CHECK: encoding: [0x1c,0x34,0x9f,0x76] +vext2xv.wu.hu $xr28, $xr0 + +# CHECK: vext2xv.du.hu $xr26, $xr25 +# CHECK: encoding: [0x3a,0x3b,0x9f,0x76] +vext2xv.du.hu $xr26, $xr25 + +# CHECK: vext2xv.du.wu $xr29, $xr14 +# CHECK: encoding: [0xdd,0x3d,0x9f,0x76] +vext2xv.du.wu $xr29, $xr14 + +# CHECK: xvhseli.d $xr3, $xr22, 13 +# CHECK: encoding: [0xc3,0xb6,0x9f,0x76] +xvhseli.d $xr3, $xr22, 13 + +# CHECK: xvrotri.b $xr0, $xr14, 2 +# CHECK: encoding: [0xc0,0x29,0xa0,0x76] +xvrotri.b $xr0, $xr14, 2 + +# CHECK: xvrotri.h $xr0, $xr7, 11 +# CHECK: encoding: [0xe0,0x6c,0xa0,0x76] +xvrotri.h $xr0, $xr7, 11 + +# CHECK: xvrotri.w $xr24, $xr1, 3 +# CHECK: encoding: [0x38,0x8c,0xa0,0x76] +xvrotri.w $xr24, $xr1, 3 + +# CHECK: xvrotri.d $xr31, $xr7, 16 +# CHECK: encoding: [0xff,0x40,0xa1,0x76] +xvrotri.d $xr31, $xr7, 16 + +# CHECK: xvsrlri.b $xr20, $xr19, 1 +# CHECK: encoding: [0x74,0x26,0xa4,0x76] +xvsrlri.b $xr20, $xr19, 1 + +# CHECK: xvsrlri.h $xr28, $xr1, 11 +# CHECK: encoding: [0x3c,0x6c,0xa4,0x76] +xvsrlri.h $xr28, $xr1, 11 + +# CHECK: xvsrlri.w $xr25, $xr2, 27 +# CHECK: encoding: [0x59,0xec,0xa4,0x76] +xvsrlri.w $xr25, $xr2, 27 + +# CHECK: xvsrlri.d $xr29, $xr9, 6 +# CHECK: encoding: [0x3d,0x19,0xa5,0x76] +xvsrlri.d $xr29, $xr9, 6 + +# CHECK: xvsrari.b $xr7, $xr5, 2 +# CHECK: encoding: [0xa7,0x28,0xa8,0x76] +xvsrari.b $xr7, $xr5, 2 + +# CHECK: xvsrari.h $xr0, $xr10, 9 +# CHECK: encoding: [0x40,0x65,0xa8,0x76] +xvsrari.h $xr0, $xr10, 9 + +# CHECK: xvsrari.w $xr17, $xr24, 10 +# CHECK: encoding: [0x11,0xab,0xa8,0x76] +xvsrari.w $xr17, $xr24, 10 + +# CHECK: xvsrari.d $xr7, $xr14, 38 +# CHECK: encoding: [0xc7,0x99,0xa9,0x76] +xvsrari.d $xr7, $xr14, 38 + +# CHECK: xvinsgr2vr.w $xr5, $r31, 1 +# CHECK: encoding: [0xe5,0xc7,0xeb,0x76] +xvinsgr2vr.w $xr5, $r31, 1 + +# CHECK: xvinsgr2vr.d $xr5, $r26, 1 +# CHECK: encoding: [0x45,0xe7,0xeb,0x76] +xvinsgr2vr.d $xr5, $r26, 1 + +# CHECK: xvpickve2gr.w $r18, $xr28, 2 +# CHECK: encoding: [0x92,0xcb,0xef,0x76] +xvpickve2gr.w $r18, $xr28, 2 + +# CHECK: xvpickve2gr.d $r20, $xr10, 1 +# CHECK: encoding: [0x54,0xe5,0xef,0x76] +xvpickve2gr.d $r20, $xr10, 1 + +# CHECK: xvpickve2gr.wu $r9, $xr12, 6 +# CHECK: encoding: [0x89,0xd9,0xf3,0x76] +xvpickve2gr.wu $r9, $xr12, 6 + +# CHECK: xvpickve2gr.du $r9, $xr13, 2 +# CHECK: encoding: [0xa9,0xe9,0xf3,0x76] +xvpickve2gr.du $r9, $xr13, 2 + +# CHECK: xvrepl128vei.b $xr1, $xr30, 5 +# CHECK: encoding: [0xc1,0x97,0xf7,0x76] +xvrepl128vei.b $xr1, $xr30, 5 + +# CHECK: xvrepl128vei.h $xr13, $xr13, 7 +# CHECK: encoding: [0xad,0xdd,0xf7,0x76] +xvrepl128vei.h $xr13, $xr13, 7 + +# CHECK: xvrepl128vei.w $xr7, $xr13, 2 +# CHECK: encoding: [0xa7,0xe9,0xf7,0x76] +xvrepl128vei.w $xr7, $xr13, 2 + +# CHECK: xvrepl128vei.d $xr2, $xr31, 1 +# CHECK: encoding: [0xe2,0xf7,0xf7,0x76] +xvrepl128vei.d $xr2, $xr31, 1 + +# CHECK: xvinsve0.w $xr4, $xr13, 3 +# CHECK: encoding: [0xa4,0xcd,0xff,0x76] +xvinsve0.w $xr4, $xr13, 3 + +# CHECK: xvinsve0.d $xr27, $xr25, 0 +# CHECK: encoding: [0x3b,0xe3,0xff,0x76] +xvinsve0.d $xr27, $xr25, 0 + +# CHECK: xvpickve.w $xr29, $xr19, 7 +# CHECK: encoding: [0x7d,0xde,0x03,0x77] +xvpickve.w $xr29, $xr19, 7 + +# CHECK: xvpickve.d $xr19, $xr16, 3 +# CHECK: encoding: [0x13,0xee,0x03,0x77] +xvpickve.d $xr19, $xr16, 3 + +# CHECK: xvreplve0.b $xr5, $xr5 +# CHECK: encoding: [0xa5,0x00,0x07,0x77] +xvreplve0.b $xr5, $xr5 + +# CHECK: xvreplve0.h $xr14, $xr24 +# CHECK: encoding: [0x0e,0x83,0x07,0x77] +xvreplve0.h $xr14, $xr24 + +# CHECK: xvreplve0.w $xr15, $xr13 +# CHECK: encoding: [0xaf,0xc1,0x07,0x77] +xvreplve0.w $xr15, $xr13 + +# CHECK: xvreplve0.d $xr20, $xr20 +# CHECK: encoding: [0x94,0xe2,0x07,0x77] +xvreplve0.d $xr20, $xr20 + +# CHECK: xvreplve0.q $xr5, $xr10 +# CHECK: encoding: [0x45,0xf1,0x07,0x77] +xvreplve0.q $xr5, $xr10 + +# CHECK: xvsllwil.h.b $xr31, $xr0, 3 +# CHECK: encoding: [0x1f,0x2c,0x08,0x77] +xvsllwil.h.b $xr31, $xr0, 3 + +# CHECK: xvsllwil.w.h $xr21, $xr24, 7 +# CHECK: encoding: [0x15,0x5f,0x08,0x77] +xvsllwil.w.h $xr21, $xr24, 7 + +# CHECK: xvsllwil.d.w $xr26, $xr24, 18 +# CHECK: encoding: [0x1a,0xcb,0x08,0x77] +xvsllwil.d.w $xr26, $xr24, 18 + +# CHECK: xvextl.q.d $xr5, $xr6 +# CHECK: encoding: [0xc5,0x00,0x09,0x77] +xvextl.q.d $xr5, $xr6 + +# CHECK: xvsllwil.hu.bu $xr13, $xr31, 6 +# CHECK: encoding: [0xed,0x3b,0x0c,0x77] +xvsllwil.hu.bu $xr13, $xr31, 6 + +# CHECK: xvsllwil.wu.hu $xr19, $xr20, 8 +# CHECK: encoding: [0x93,0x62,0x0c,0x77] +xvsllwil.wu.hu $xr19, $xr20, 8 + +# CHECK: xvsllwil.du.wu $xr14, $xr13, 2 +# CHECK: encoding: [0xae,0x89,0x0c,0x77] +xvsllwil.du.wu $xr14, $xr13, 2 + +# CHECK: xvextl.qu.du $xr10, $xr7 +# CHECK: encoding: [0xea,0x00,0x0d,0x77] +xvextl.qu.du $xr10, $xr7 + +# CHECK: xvbitclri.b $xr31, $xr21, 5 +# CHECK: encoding: [0xbf,0x36,0x10,0x77] +xvbitclri.b $xr31, $xr21, 5 + +# CHECK: xvbitclri.h $xr26, $xr4, 2 +# CHECK: encoding: [0x9a,0x48,0x10,0x77] +xvbitclri.h $xr26, $xr4, 2 + +# CHECK: xvbitclri.w $xr21, $xr25, 15 +# CHECK: encoding: [0x35,0xbf,0x10,0x77] +xvbitclri.w $xr21, $xr25, 15 + +# CHECK: xvbitclri.d $xr14, $xr0, 63 +# CHECK: encoding: [0x0e,0xfc,0x11,0x77] +xvbitclri.d $xr14, $xr0, 63 + +# CHECK: xvbitseti.b $xr16, $xr1, 5 +# CHECK: encoding: [0x30,0x34,0x14,0x77] +xvbitseti.b $xr16, $xr1, 5 + +# CHECK: xvbitseti.h $xr19, $xr30, 3 +# CHECK: encoding: [0xd3,0x4f,0x14,0x77] +xvbitseti.h $xr19, $xr30, 3 + +# CHECK: xvbitseti.w $xr18, $xr22, 27 +# CHECK: encoding: [0xd2,0xee,0x14,0x77] +xvbitseti.w $xr18, $xr22, 27 + +# CHECK: xvbitseti.d $xr15, $xr1, 40 +# CHECK: encoding: [0x2f,0xa0,0x15,0x77] +xvbitseti.d $xr15, $xr1, 40 + +# CHECK: xvbitrevi.b $xr23, $xr5, 0 +# CHECK: encoding: [0xb7,0x20,0x18,0x77] +xvbitrevi.b $xr23, $xr5, 0 + +# CHECK: xvbitrevi.h $xr5, $xr2, 7 +# CHECK: encoding: [0x45,0x5c,0x18,0x77] +xvbitrevi.h $xr5, $xr2, 7 + +# CHECK: xvbitrevi.w $xr23, $xr6, 12 +# CHECK: encoding: [0xd7,0xb0,0x18,0x77] +xvbitrevi.w $xr23, $xr6, 12 + +# CHECK: xvbitrevi.d $xr18, $xr14, 33 +# CHECK: encoding: [0xd2,0x85,0x19,0x77] +xvbitrevi.d $xr18, $xr14, 33 + +# CHECK: xvsat.b $xr27, $xr26, 4 +# CHECK: encoding: [0x5b,0x33,0x24,0x77] +xvsat.b $xr27, $xr26, 4 + +# CHECK: xvsat.h $xr4, $xr21, 5 +# CHECK: encoding: [0xa4,0x56,0x24,0x77] +xvsat.h $xr4, $xr21, 5 + +# CHECK: xvsat.w $xr29, $xr27, 10 +# CHECK: encoding: [0x7d,0xab,0x24,0x77] +xvsat.w $xr29, $xr27, 10 + +# CHECK: xvsat.d $xr14, $xr0, 60 +# CHECK: encoding: [0x0e,0xf0,0x25,0x77] +xvsat.d $xr14, $xr0, 60 + +# CHECK: xvsat.bu $xr31, $xr25, 3 +# CHECK: encoding: [0x3f,0x2f,0x28,0x77] +xvsat.bu $xr31, $xr25, 3 + +# CHECK: xvsat.hu $xr17, $xr4, 14 +# CHECK: encoding: [0x91,0x78,0x28,0x77] +xvsat.hu $xr17, $xr4, 14 + +# CHECK: xvsat.wu $xr17, $xr17, 4 +# CHECK: encoding: [0x31,0x92,0x28,0x77] +xvsat.wu $xr17, $xr17, 4 + +# CHECK: xvsat.du $xr11, $xr0, 43 +# CHECK: encoding: [0x0b,0xac,0x29,0x77] +xvsat.du $xr11, $xr0, 43 + +# CHECK: xvslli.b $xr24, $xr24, 2 +# CHECK: encoding: [0x18,0x2b,0x2c,0x77] +xvslli.b $xr24, $xr24, 2 + +# CHECK: xvslli.h $xr23, $xr9, 7 +# CHECK: encoding: [0x37,0x5d,0x2c,0x77] +xvslli.h $xr23, $xr9, 7 + +# CHECK: xvslli.w $xr13, $xr12, 16 +# CHECK: encoding: [0x8d,0xc1,0x2c,0x77] +xvslli.w $xr13, $xr12, 16 + +# CHECK: xvslli.d $xr11, $xr22, 17 +# CHECK: encoding: [0xcb,0x46,0x2d,0x77] +xvslli.d $xr11, $xr22, 17 + +# CHECK: xvsrli.b $xr9, $xr14, 1 +# CHECK: encoding: [0xc9,0x25,0x30,0x77] +xvsrli.b $xr9, $xr14, 1 + +# CHECK: xvsrli.h $xr22, $xr20, 15 +# CHECK: encoding: [0x96,0x7e,0x30,0x77] +xvsrli.h $xr22, $xr20, 15 + +# CHECK: xvsrli.w $xr5, $xr30, 20 +# CHECK: encoding: [0xc5,0xd3,0x30,0x77] +xvsrli.w $xr5, $xr30, 20 + +# CHECK: xvsrli.d $xr1, $xr16, 58 +# CHECK: encoding: [0x01,0xea,0x31,0x77] +xvsrli.d $xr1, $xr16, 58 + +# CHECK: xvsrai.b $xr18, $xr6, 2 +# CHECK: encoding: [0xd2,0x28,0x34,0x77] +xvsrai.b $xr18, $xr6, 2 + +# CHECK: xvsrai.h $xr21, $xr16, 12 +# CHECK: encoding: [0x15,0x72,0x34,0x77] +xvsrai.h $xr21, $xr16, 12 + +# CHECK: xvsrai.w $xr13, $xr17, 17 +# CHECK: encoding: [0x2d,0xc6,0x34,0x77] +xvsrai.w $xr13, $xr17, 17 + +# CHECK: xvsrai.d $xr3, $xr12, 51 +# CHECK: encoding: [0x83,0xcd,0x35,0x77] +xvsrai.d $xr3, $xr12, 51 + +# CHECK: xvsrlni.b.h $xr1, $xr7, 4 +# CHECK: encoding: [0xe1,0x50,0x40,0x77] +xvsrlni.b.h $xr1, $xr7, 4 + +# CHECK: xvsrlni.h.w $xr16, $xr21, 25 +# CHECK: encoding: [0xb0,0xe6,0x40,0x77] +xvsrlni.h.w $xr16, $xr21, 25 + +# CHECK: xvsrlni.w.d $xr13, $xr10, 48 +# CHECK: encoding: [0x4d,0xc1,0x41,0x77] +xvsrlni.w.d $xr13, $xr10, 48 + +# CHECK: xvsrlni.d.q $xr17, $xr12, 126 +# CHECK: encoding: [0x91,0xf9,0x43,0x77] +xvsrlni.d.q $xr17, $xr12, 126 + +# CHECK: xvsrlrni.b.h $xr17, $xr19, 15 +# CHECK: encoding: [0x71,0x7e,0x44,0x77] +xvsrlrni.b.h $xr17, $xr19, 15 + +# CHECK: xvsrlrni.h.w $xr21, $xr24, 14 +# CHECK: encoding: [0x15,0xbb,0x44,0x77] +xvsrlrni.h.w $xr21, $xr24, 14 + +# CHECK: xvsrlrni.w.d $xr20, $xr31, 3 +# CHECK: encoding: [0xf4,0x0f,0x45,0x77] +xvsrlrni.w.d $xr20, $xr31, 3 + +# CHECK: xvsrlrni.d.q $xr28, $xr24, 76 +# CHECK: encoding: [0x1c,0x33,0x47,0x77] +xvsrlrni.d.q $xr28, $xr24, 76 + +# CHECK: xvssrlni.b.h $xr26, $xr7, 7 +# CHECK: encoding: [0xfa,0x5c,0x48,0x77] +xvssrlni.b.h $xr26, $xr7, 7 + +# CHECK: xvssrlni.h.w $xr27, $xr28, 25 +# CHECK: encoding: [0x9b,0xe7,0x48,0x77] +xvssrlni.h.w $xr27, $xr28, 25 + +# CHECK: xvssrlni.w.d $xr4, $xr8, 16 +# CHECK: encoding: [0x04,0x41,0x49,0x77] +xvssrlni.w.d $xr4, $xr8, 16 + +# CHECK: xvssrlni.d.q $xr14, $xr17, 84 +# CHECK: encoding: [0x2e,0x52,0x4b,0x77] +xvssrlni.d.q $xr14, $xr17, 84 + +# CHECK: xvssrlni.bu.h $xr17, $xr6, 2 +# CHECK: encoding: [0xd1,0x48,0x4c,0x77] +xvssrlni.bu.h $xr17, $xr6, 2 + +# CHECK: xvssrlni.hu.w $xr6, $xr26, 3 +# CHECK: encoding: [0x46,0x8f,0x4c,0x77] +xvssrlni.hu.w $xr6, $xr26, 3 + +# CHECK: xvssrlni.wu.d $xr10, $xr18, 54 +# CHECK: encoding: [0x4a,0xda,0x4d,0x77] +xvssrlni.wu.d $xr10, $xr18, 54 + +# CHECK: xvssrlni.du.q $xr29, $xr26, 70 +# CHECK: encoding: [0x5d,0x1b,0x4f,0x77] +xvssrlni.du.q $xr29, $xr26, 70 + +# CHECK: xvssrlrni.b.h $xr6, $xr9, 6 +# CHECK: encoding: [0x26,0x59,0x50,0x77] +xvssrlrni.b.h $xr6, $xr9, 6 + +# CHECK: xvssrlrni.h.w $xr22, $xr8, 1 +# CHECK: encoding: [0x16,0x85,0x50,0x77] +xvssrlrni.h.w $xr22, $xr8, 1 + +# CHECK: xvssrlrni.w.d $xr28, $xr9, 28 +# CHECK: encoding: [0x3c,0x71,0x51,0x77] +xvssrlrni.w.d $xr28, $xr9, 28 + +# CHECK: xvssrlrni.d.q $xr20, $xr27, 104 +# CHECK: encoding: [0x74,0xa3,0x53,0x77] +xvssrlrni.d.q $xr20, $xr27, 104 + +# CHECK: xvssrlrni.bu.h $xr25, $xr4, 12 +# CHECK: encoding: [0x99,0x70,0x54,0x77] +xvssrlrni.bu.h $xr25, $xr4, 12 + +# CHECK: xvssrlrni.hu.w $xr21, $xr29, 5 +# CHECK: encoding: [0xb5,0x97,0x54,0x77] +xvssrlrni.hu.w $xr21, $xr29, 5 + +# CHECK: xvssrlrni.wu.d $xr1, $xr16, 54 +# CHECK: encoding: [0x01,0xda,0x55,0x77] +xvssrlrni.wu.d $xr1, $xr16, 54 + +# CHECK: xvssrlrni.du.q $xr29, $xr7, 25 +# CHECK: encoding: [0xfd,0x64,0x56,0x77] +xvssrlrni.du.q $xr29, $xr7, 25 + +# CHECK: xvsrani.b.h $xr16, $xr25, 4 +# CHECK: encoding: [0x30,0x53,0x58,0x77] +xvsrani.b.h $xr16, $xr25, 4 + +# CHECK: xvsrani.h.w $xr13, $xr10, 6 +# CHECK: encoding: [0x4d,0x99,0x58,0x77] +xvsrani.h.w $xr13, $xr10, 6 + +# CHECK: xvsrani.w.d $xr7, $xr21, 53 +# CHECK: encoding: [0xa7,0xd6,0x59,0x77] +xvsrani.w.d $xr7, $xr21, 53 + +# CHECK: xvsrani.d.q $xr26, $xr18, 55 +# CHECK: encoding: [0x5a,0xde,0x5a,0x77] +xvsrani.d.q $xr26, $xr18, 55 + +# CHECK: xvsrarni.b.h $xr17, $xr21, 11 +# CHECK: encoding: [0xb1,0x6e,0x5c,0x77] +xvsrarni.b.h $xr17, $xr21, 11 + +# CHECK: xvsrarni.h.w $xr15, $xr30, 2 +# CHECK: encoding: [0xcf,0x8b,0x5c,0x77] +xvsrarni.h.w $xr15, $xr30, 2 + +# CHECK: xvsrarni.w.d $xr23, $xr11, 31 +# CHECK: encoding: [0x77,0x7d,0x5d,0x77] +xvsrarni.w.d $xr23, $xr11, 31 + +# CHECK: xvsrarni.d.q $xr22, $xr25, 16 +# CHECK: encoding: [0x36,0x43,0x5e,0x77] +xvsrarni.d.q $xr22, $xr25, 16 + +# CHECK: xvssrani.b.h $xr19, $xr20, 10 +# CHECK: encoding: [0x93,0x6a,0x60,0x77] +xvssrani.b.h $xr19, $xr20, 10 + +# CHECK: xvssrani.h.w $xr25, $xr9, 22 +# CHECK: encoding: [0x39,0xd9,0x60,0x77] +xvssrani.h.w $xr25, $xr9, 22 + +# CHECK: xvssrani.w.d $xr23, $xr2, 7 +# CHECK: encoding: [0x57,0x1c,0x61,0x77] +xvssrani.w.d $xr23, $xr2, 7 + +# CHECK: xvssrani.d.q $xr6, $xr8, 127 +# CHECK: encoding: [0x06,0xfd,0x63,0x77] +xvssrani.d.q $xr6, $xr8, 127 + +# CHECK: xvssrani.bu.h $xr27, $xr14, 5 +# CHECK: encoding: [0xdb,0x55,0x64,0x77] +xvssrani.bu.h $xr27, $xr14, 5 + +# CHECK: xvssrani.hu.w $xr14, $xr1, 20 +# CHECK: encoding: [0x2e,0xd0,0x64,0x77] +xvssrani.hu.w $xr14, $xr1, 20 + +# CHECK: xvssrani.wu.d $xr10, $xr4, 59 +# CHECK: encoding: [0x8a,0xec,0x65,0x77] +xvssrani.wu.d $xr10, $xr4, 59 + +# CHECK: xvssrani.du.q $xr17, $xr1, 82 +# CHECK: encoding: [0x31,0x48,0x67,0x77] +xvssrani.du.q $xr17, $xr1, 82 + +# CHECK: xvssrarni.b.h $xr27, $xr18, 15 +# CHECK: encoding: [0x5b,0x7e,0x68,0x77] +xvssrarni.b.h $xr27, $xr18, 15 + +# CHECK: xvssrarni.h.w $xr16, $xr3, 15 +# CHECK: encoding: [0x70,0xbc,0x68,0x77] +xvssrarni.h.w $xr16, $xr3, 15 + +# CHECK: xvssrarni.w.d $xr26, $xr25, 18 +# CHECK: encoding: [0x3a,0x4b,0x69,0x77] +xvssrarni.w.d $xr26, $xr25, 18 + +# CHECK: xvssrarni.d.q $xr28, $xr25, 0 +# CHECK: encoding: [0x3c,0x03,0x6a,0x77] +xvssrarni.d.q $xr28, $xr25, 0 + +# CHECK: xvssrarni.bu.h $xr1, $xr12, 8 +# CHECK: encoding: [0x81,0x61,0x6c,0x77] +xvssrarni.bu.h $xr1, $xr12, 8 + +# CHECK: xvssrarni.hu.w $xr3, $xr27, 31 +# CHECK: encoding: [0x63,0xff,0x6c,0x77] +xvssrarni.hu.w $xr3, $xr27, 31 + +# CHECK: xvssrarni.wu.d $xr24, $xr27, 52 +# CHECK: encoding: [0x78,0xd3,0x6d,0x77] +xvssrarni.wu.d $xr24, $xr27, 52 + +# CHECK: xvssrarni.du.q $xr5, $xr3, 112 +# CHECK: encoding: [0x65,0xc0,0x6f,0x77] +xvssrarni.du.q $xr5, $xr3, 112 + +# CHECK: xvextrins.d $xr21, $xr25, 163 +# CHECK: encoding: [0x35,0x8f,0x82,0x77] +xvextrins.d $xr21, $xr25, 163 + +# CHECK: xvextrins.w $xr19, $xr17, 28 +# CHECK: encoding: [0x33,0x72,0x84,0x77] +xvextrins.w $xr19, $xr17, 28 + +# CHECK: xvextrins.h $xr30, $xr7, 79 +# CHECK: encoding: [0xfe,0x3c,0x89,0x77] +xvextrins.h $xr30, $xr7, 79 + +# CHECK: xvextrins.b $xr1, $xr31, 210 +# CHECK: encoding: [0xe1,0x4b,0x8f,0x77] +xvextrins.b $xr1, $xr31, 210 + +# CHECK: xvshuf4i.b $xr3, $xr22, 148 +# CHECK: encoding: [0xc3,0x52,0x92,0x77] +xvshuf4i.b $xr3, $xr22, 148 + +# CHECK: xvshuf4i.h $xr2, $xr22, 34 +# CHECK: encoding: [0xc2,0x8a,0x94,0x77] +xvshuf4i.h $xr2, $xr22, 34 + +# CHECK: xvshuf4i.w $xr31, $xr19, 165 +# CHECK: encoding: [0x7f,0x96,0x9a,0x77] +xvshuf4i.w $xr31, $xr19, 165 + +# CHECK: xvshuf4i.d $xr31, $xr17, 14 +# CHECK: encoding: [0x3f,0x3a,0x9c,0x77] +xvshuf4i.d $xr31, $xr17, 14 + +# CHECK: xvbitseli.b $xr27, $xr0, 80 +# CHECK: encoding: [0x1b,0x40,0xc5,0x77] +xvbitseli.b $xr27, $xr0, 80 + +# CHECK: xvandi.b $xr23, $xr2, 153 +# CHECK: encoding: [0x57,0x64,0xd2,0x77] +xvandi.b $xr23, $xr2, 153 + +# CHECK: xvori.b $xr27, $xr28, 188 +# CHECK: encoding: [0x9b,0xf3,0xd6,0x77] +xvori.b $xr27, $xr28, 188 + +# CHECK: xvxori.b $xr28, $xr1, 254 +# CHECK: encoding: [0x3c,0xf8,0xdb,0x77] +xvxori.b $xr28, $xr1, 254 + +# CHECK: xvnori.b $xr4, $xr2, 36 +# CHECK: encoding: [0x44,0x90,0xdc,0x77] +xvnori.b $xr4, $xr2, 36 + +# CHECK: xvldi $xr26, -2544 +# CHECK: encoding: [0x1a,0xc2,0xe2,0x77] +xvldi $xr26, -2544 + +# CHECK: xvpermi.w $xr22, $xr24, 168 +# CHECK: encoding: [0x16,0xa3,0xe6,0x77] +xvpermi.w $xr22, $xr24, 168 + +# CHECK: xvpermi.d $xr14, $xr31, 136 +# CHECK: encoding: [0xee,0x23,0xea,0x77] +xvpermi.d $xr14, $xr31, 136 + +# CHECK: xvpermi.q $xr28, $xr14, 211 +# CHECK: encoding: [0xdc,0x4d,0xef,0x77] +xvpermi.q $xr28, $xr14, 211 + +# CHECK: vaddwev.h.b $vr0, $vr31, $vr31 +# CHECK: encoding: [0xe0,0x7f,0x1e,0x70] +vaddwev.h.b $vr0, $vr31, $vr31 + +# CHECK: vaddwev.w.h $vr3, $vr4, $vr23 +# CHECK: encoding: [0x83,0xdc,0x1e,0x70] +vaddwev.w.h $vr3, $vr4, $vr23 + +# CHECK: vaddwev.d.w $vr30, $vr26, $vr11 +# CHECK: encoding: [0x5e,0x2f,0x1f,0x70] +vaddwev.d.w $vr30, $vr26, $vr11 + +# CHECK: vaddwev.q.d $vr25, $vr29, $vr13 +# CHECK: encoding: [0xb9,0xb7,0x1f,0x70] +vaddwev.q.d $vr25, $vr29, $vr13 + +# CHECK: vsubwev.h.b $vr11, $vr28, $vr1 +# CHECK: encoding: [0x8b,0x07,0x20,0x70] +vsubwev.h.b $vr11, $vr28, $vr1 + +# CHECK: vsubwev.w.h $vr9, $vr15, $vr5 +# CHECK: encoding: [0xe9,0x95,0x20,0x70] +vsubwev.w.h $vr9, $vr15, $vr5 + +# CHECK: vsubwev.d.w $vr17, $vr9, $vr10 +# CHECK: encoding: [0x31,0x29,0x21,0x70] +vsubwev.d.w $vr17, $vr9, $vr10 + +# CHECK: vsubwev.q.d $vr26, $vr18, $vr11 +# CHECK: encoding: [0x5a,0xae,0x21,0x70] +vsubwev.q.d $vr26, $vr18, $vr11 + +# CHECK: vaddwod.h.b $vr7, $vr11, $vr18 +# CHECK: encoding: [0x67,0x49,0x22,0x70] +vaddwod.h.b $vr7, $vr11, $vr18 + +# CHECK: vaddwod.w.h $vr0, $vr7, $vr12 +# CHECK: encoding: [0xe0,0xb0,0x22,0x70] +vaddwod.w.h $vr0, $vr7, $vr12 + +# CHECK: vaddwod.d.w $vr30, $vr27, $vr16 +# CHECK: encoding: [0x7e,0x43,0x23,0x70] +vaddwod.d.w $vr30, $vr27, $vr16 + +# CHECK: vaddwod.q.d $vr2, $vr20, $vr29 +# CHECK: encoding: [0x82,0xf6,0x23,0x70] +vaddwod.q.d $vr2, $vr20, $vr29 + +# CHECK: vsubwod.h.b $vr26, $vr7, $vr19 +# CHECK: encoding: [0xfa,0x4c,0x24,0x70] +vsubwod.h.b $vr26, $vr7, $vr19 + +# CHECK: vsubwod.w.h $vr19, $vr3, $vr11 +# CHECK: encoding: [0x73,0xac,0x24,0x70] +vsubwod.w.h $vr19, $vr3, $vr11 + +# CHECK: vsubwod.d.w $vr31, $vr28, $vr12 +# CHECK: encoding: [0x9f,0x33,0x25,0x70] +vsubwod.d.w $vr31, $vr28, $vr12 + +# CHECK: vsubwod.q.d $vr1, $vr24, $vr16 +# CHECK: encoding: [0x01,0xc3,0x25,0x70] +vsubwod.q.d $vr1, $vr24, $vr16 + +# CHECK: vaddwev.h.bu $vr3, $vr29, $vr29 +# CHECK: encoding: [0xa3,0x77,0x2e,0x70] +vaddwev.h.bu $vr3, $vr29, $vr29 + +# CHECK: vaddwev.w.hu $vr10, $vr15, $vr10 +# CHECK: encoding: [0xea,0xa9,0x2e,0x70] +vaddwev.w.hu $vr10, $vr15, $vr10 + +# CHECK: vaddwev.d.wu $vr24, $vr29, $vr4 +# CHECK: encoding: [0xb8,0x13,0x2f,0x70] +vaddwev.d.wu $vr24, $vr29, $vr4 + +# CHECK: vaddwev.q.du $vr17, $vr23, $vr0 +# CHECK: encoding: [0xf1,0x82,0x2f,0x70] +vaddwev.q.du $vr17, $vr23, $vr0 + +# CHECK: vsubwev.h.bu $vr25, $vr11, $vr20 +# CHECK: encoding: [0x79,0x51,0x30,0x70] +vsubwev.h.bu $vr25, $vr11, $vr20 + +# CHECK: vsubwev.w.hu $vr17, $vr15, $vr20 +# CHECK: encoding: [0xf1,0xd1,0x30,0x70] +vsubwev.w.hu $vr17, $vr15, $vr20 + +# CHECK: vsubwev.d.wu $vr10, $vr25, $vr5 +# CHECK: encoding: [0x2a,0x17,0x31,0x70] +vsubwev.d.wu $vr10, $vr25, $vr5 + +# CHECK: vsubwev.q.du $vr29, $vr3, $vr8 +# CHECK: encoding: [0x7d,0xa0,0x31,0x70] +vsubwev.q.du $vr29, $vr3, $vr8 + +# CHECK: vaddwod.h.bu $vr10, $vr0, $vr25 +# CHECK: encoding: [0x0a,0x64,0x32,0x70] +vaddwod.h.bu $vr10, $vr0, $vr25 + +# CHECK: vaddwod.w.hu $vr2, $vr27, $vr23 +# CHECK: encoding: [0x62,0xdf,0x32,0x70] +vaddwod.w.hu $vr2, $vr27, $vr23 + +# CHECK: vaddwod.d.wu $vr2, $vr0, $vr22 +# CHECK: encoding: [0x02,0x58,0x33,0x70] +vaddwod.d.wu $vr2, $vr0, $vr22 + +# CHECK: vaddwod.q.du $vr0, $vr2, $vr3 +# CHECK: encoding: [0x40,0x8c,0x33,0x70] +vaddwod.q.du $vr0, $vr2, $vr3 + +# CHECK: vsubwod.h.bu $vr14, $vr31, $vr3 +# CHECK: encoding: [0xee,0x0f,0x34,0x70] +vsubwod.h.bu $vr14, $vr31, $vr3 + +# CHECK: vsubwod.w.hu $vr21, $vr2, $vr7 +# CHECK: encoding: [0x55,0x9c,0x34,0x70] +vsubwod.w.hu $vr21, $vr2, $vr7 + +# CHECK: vsubwod.d.wu $vr11, $vr8, $vr18 +# CHECK: encoding: [0x0b,0x49,0x35,0x70] +vsubwod.d.wu $vr11, $vr8, $vr18 + +# CHECK: vsubwod.q.du $vr30, $vr20, $vr0 +# CHECK: encoding: [0x9e,0x82,0x35,0x70] +vsubwod.q.du $vr30, $vr20, $vr0 + +# CHECK: vaddwev.h.bu.b $vr19, $vr28, $vr17 +# CHECK: encoding: [0x93,0x47,0x3e,0x70] +vaddwev.h.bu.b $vr19, $vr28, $vr17 + +# CHECK: vaddwev.w.hu.h $vr14, $vr15, $vr30 +# CHECK: encoding: [0xee,0xf9,0x3e,0x70] +vaddwev.w.hu.h $vr14, $vr15, $vr30 + +# CHECK: vaddwev.d.wu.w $vr15, $vr7, $vr10 +# CHECK: encoding: [0xef,0x28,0x3f,0x70] +vaddwev.d.wu.w $vr15, $vr7, $vr10 + +# CHECK: vaddwev.q.du.d $vr19, $vr14, $vr30 +# CHECK: encoding: [0xd3,0xf9,0x3f,0x70] +vaddwev.q.du.d $vr19, $vr14, $vr30 + +# CHECK: vaddwod.h.bu.b $vr15, $vr18, $vr8 +# CHECK: encoding: [0x4f,0x22,0x40,0x70] +vaddwod.h.bu.b $vr15, $vr18, $vr8 + +# CHECK: vaddwod.w.hu.h $vr19, $vr27, $vr6 +# CHECK: encoding: [0x73,0x9b,0x40,0x70] +vaddwod.w.hu.h $vr19, $vr27, $vr6 + +# CHECK: vaddwod.d.wu.w $vr7, $vr11, $vr15 +# CHECK: encoding: [0x67,0x3d,0x41,0x70] +vaddwod.d.wu.w $vr7, $vr11, $vr15 + +# CHECK: vaddwod.q.du.d $vr0, $vr0, $vr26 +# CHECK: encoding: [0x00,0xe8,0x41,0x70] +vaddwod.q.du.d $vr0, $vr0, $vr26 + +# CHECK: vmulwev.h.b $vr24, $vr19, $vr21 +# CHECK: encoding: [0x78,0x56,0x90,0x70] +vmulwev.h.b $vr24, $vr19, $vr21 + +# CHECK: vmulwev.w.h $vr13, $vr22, $vr18 +# CHECK: encoding: [0xcd,0xca,0x90,0x70] +vmulwev.w.h $vr13, $vr22, $vr18 + +# CHECK: vmulwev.d.w $vr24, $vr22, $vr13 +# CHECK: encoding: [0xd8,0x36,0x91,0x70] +vmulwev.d.w $vr24, $vr22, $vr13 + +# CHECK: vmulwev.q.d $vr4, $vr22, $vr30 +# CHECK: encoding: [0xc4,0xfa,0x91,0x70] +vmulwev.q.d $vr4, $vr22, $vr30 + +# CHECK: vmulwod.h.b $vr22, $vr26, $vr24 +# CHECK: encoding: [0x56,0x63,0x92,0x70] +vmulwod.h.b $vr22, $vr26, $vr24 + +# CHECK: vmulwod.w.h $vr17, $vr12, $vr4 +# CHECK: encoding: [0x91,0x91,0x92,0x70] +vmulwod.w.h $vr17, $vr12, $vr4 + +# CHECK: vmulwod.d.w $vr16, $vr15, $vr26 +# CHECK: encoding: [0xf0,0x69,0x93,0x70] +vmulwod.d.w $vr16, $vr15, $vr26 + +# CHECK: vmulwod.q.d $vr3, $vr16, $vr5 +# CHECK: encoding: [0x03,0x96,0x93,0x70] +vmulwod.q.d $vr3, $vr16, $vr5 + +# CHECK: vmulwev.h.bu $vr31, $vr19, $vr19 +# CHECK: encoding: [0x7f,0x4e,0x98,0x70] +vmulwev.h.bu $vr31, $vr19, $vr19 + +# CHECK: vmulwev.w.hu $vr22, $vr31, $vr5 +# CHECK: encoding: [0xf6,0x97,0x98,0x70] +vmulwev.w.hu $vr22, $vr31, $vr5 + +# CHECK: vmulwev.d.wu $vr0, $vr4, $vr30 +# CHECK: encoding: [0x80,0x78,0x99,0x70] +vmulwev.d.wu $vr0, $vr4, $vr30 + +# CHECK: vmulwev.q.du $vr31, $vr3, $vr20 +# CHECK: encoding: [0x7f,0xd0,0x99,0x70] +vmulwev.q.du $vr31, $vr3, $vr20 + +# CHECK: vmulwod.h.bu $vr25, $vr7, $vr13 +# CHECK: encoding: [0xf9,0x34,0x9a,0x70] +vmulwod.h.bu $vr25, $vr7, $vr13 + +# CHECK: vmulwod.w.hu $vr1, $vr12, $vr12 +# CHECK: encoding: [0x81,0xb1,0x9a,0x70] +vmulwod.w.hu $vr1, $vr12, $vr12 + +# CHECK: vmulwod.d.wu $vr15, $vr15, $vr30 +# CHECK: encoding: [0xef,0x79,0x9b,0x70] +vmulwod.d.wu $vr15, $vr15, $vr30 + +# CHECK: vmulwod.q.du $vr13, $vr28, $vr6 +# CHECK: encoding: [0x8d,0x9b,0x9b,0x70] +vmulwod.q.du $vr13, $vr28, $vr6 + +# CHECK: vmulwev.h.bu.b $vr8, $vr26, $vr3 +# CHECK: encoding: [0x48,0x0f,0xa0,0x70] +vmulwev.h.bu.b $vr8, $vr26, $vr3 + +# CHECK: vmulwev.w.hu.h $vr10, $vr25, $vr1 +# CHECK: encoding: [0x2a,0x87,0xa0,0x70] +vmulwev.w.hu.h $vr10, $vr25, $vr1 + +# CHECK: vmulwev.d.wu.w $vr9, $vr0, $vr19 +# CHECK: encoding: [0x09,0x4c,0xa1,0x70] +vmulwev.d.wu.w $vr9, $vr0, $vr19 + +# CHECK: vmulwev.q.du.d $vr13, $vr24, $vr23 +# CHECK: encoding: [0x0d,0xdf,0xa1,0x70] +vmulwev.q.du.d $vr13, $vr24, $vr23 + +# CHECK: vmulwod.h.bu.b $vr20, $vr0, $vr14 +# CHECK: encoding: [0x14,0x38,0xa2,0x70] +vmulwod.h.bu.b $vr20, $vr0, $vr14 + +# CHECK: vmulwod.w.hu.h $vr16, $vr20, $vr3 +# CHECK: encoding: [0x90,0x8e,0xa2,0x70] +vmulwod.w.hu.h $vr16, $vr20, $vr3 + +# CHECK: vmulwod.d.wu.w $vr5, $vr23, $vr27 +# CHECK: encoding: [0xe5,0x6e,0xa3,0x70] +vmulwod.d.wu.w $vr5, $vr23, $vr27 + +# CHECK: vmulwod.q.du.d $vr30, $vr30, $vr29 +# CHECK: encoding: [0xde,0xf7,0xa3,0x70] +vmulwod.q.du.d $vr30, $vr30, $vr29 + +# CHECK: vmaddwev.h.b $vr18, $vr0, $vr8 +# CHECK: encoding: [0x12,0x20,0xac,0x70] +vmaddwev.h.b $vr18, $vr0, $vr8 + +# CHECK: vmaddwev.w.h $vr29, $vr22, $vr7 +# CHECK: encoding: [0xdd,0x9e,0xac,0x70] +vmaddwev.w.h $vr29, $vr22, $vr7 + +# CHECK: vmaddwev.d.w $vr28, $vr13, $vr31 +# CHECK: encoding: [0xbc,0x7d,0xad,0x70] +vmaddwev.d.w $vr28, $vr13, $vr31 + +# CHECK: vmaddwev.q.d $vr5, $vr3, $vr13 +# CHECK: encoding: [0x65,0xb4,0xad,0x70] +vmaddwev.q.d $vr5, $vr3, $vr13 + +# CHECK: vmaddwod.h.b $vr4, $vr1, $vr9 +# CHECK: encoding: [0x24,0x24,0xae,0x70] +vmaddwod.h.b $vr4, $vr1, $vr9 + +# CHECK: vmaddwod.w.h $vr26, $vr9, $vr24 +# CHECK: encoding: [0x3a,0xe1,0xae,0x70] +vmaddwod.w.h $vr26, $vr9, $vr24 + +# CHECK: vmaddwod.d.w $vr30, $vr3, $vr13 +# CHECK: encoding: [0x7e,0x34,0xaf,0x70] +vmaddwod.d.w $vr30, $vr3, $vr13 + +# CHECK: vmaddwod.q.d $vr15, $vr13, $vr29 +# CHECK: encoding: [0xaf,0xf5,0xaf,0x70] +vmaddwod.q.d $vr15, $vr13, $vr29 + +# CHECK: vmaddwev.h.bu $vr24, $vr20, $vr5 +# CHECK: encoding: [0x98,0x16,0xb4,0x70] +vmaddwev.h.bu $vr24, $vr20, $vr5 + +# CHECK: vmaddwev.w.hu $vr3, $vr4, $vr8 +# CHECK: encoding: [0x83,0xa0,0xb4,0x70] +vmaddwev.w.hu $vr3, $vr4, $vr8 + +# CHECK: vmaddwev.d.wu $vr27, $vr19, $vr4 +# CHECK: encoding: [0x7b,0x12,0xb5,0x70] +vmaddwev.d.wu $vr27, $vr19, $vr4 + +# CHECK: vmaddwev.q.du $vr28, $vr27, $vr29 +# CHECK: encoding: [0x7c,0xf7,0xb5,0x70] +vmaddwev.q.du $vr28, $vr27, $vr29 + +# CHECK: vmaddwod.h.bu $vr5, $vr20, $vr26 +# CHECK: encoding: [0x85,0x6a,0xb6,0x70] +vmaddwod.h.bu $vr5, $vr20, $vr26 + +# CHECK: vmaddwod.w.hu $vr21, $vr30, $vr10 +# CHECK: encoding: [0xd5,0xab,0xb6,0x70] +vmaddwod.w.hu $vr21, $vr30, $vr10 + +# CHECK: vmaddwod.d.wu $vr7, $vr11, $vr20 +# CHECK: encoding: [0x67,0x51,0xb7,0x70] +vmaddwod.d.wu $vr7, $vr11, $vr20 + +# CHECK: vmaddwod.q.du $vr30, $vr18, $vr24 +# CHECK: encoding: [0x5e,0xe2,0xb7,0x70] +vmaddwod.q.du $vr30, $vr18, $vr24 + +# CHECK: vmaddwev.h.bu.b $vr4, $vr1, $vr4 +# CHECK: encoding: [0x24,0x10,0xbc,0x70] +vmaddwev.h.bu.b $vr4, $vr1, $vr4 + +# CHECK: vmaddwev.w.hu.h $vr25, $vr11, $vr15 +# CHECK: encoding: [0x79,0xbd,0xbc,0x70] +vmaddwev.w.hu.h $vr25, $vr11, $vr15 + +# CHECK: vmaddwev.d.wu.w $vr10, $vr16, $vr20 +# CHECK: encoding: [0x0a,0x52,0xbd,0x70] +vmaddwev.d.wu.w $vr10, $vr16, $vr20 + +# CHECK: vmaddwev.q.du.d $vr22, $vr20, $vr23 +# CHECK: encoding: [0x96,0xde,0xbd,0x70] +vmaddwev.q.du.d $vr22, $vr20, $vr23 + +# CHECK: vmaddwod.h.bu.b $vr31, $vr25, $vr27 +# CHECK: encoding: [0x3f,0x6f,0xbe,0x70] +vmaddwod.h.bu.b $vr31, $vr25, $vr27 + +# CHECK: vmaddwod.w.hu.h $vr8, $vr18, $vr24 +# CHECK: encoding: [0x48,0xe2,0xbe,0x70] +vmaddwod.w.hu.h $vr8, $vr18, $vr24 + +# CHECK: vmaddwod.d.wu.w $vr18, $vr13, $vr10 +# CHECK: encoding: [0xb2,0x29,0xbf,0x70] +vmaddwod.d.wu.w $vr18, $vr13, $vr10 + +# CHECK: vmaddwod.q.du.d $vr10, $vr5, $vr15 +# CHECK: encoding: [0xaa,0xbc,0xbf,0x70] +vmaddwod.q.du.d $vr10, $vr5, $vr15 + diff --git a/llvm/test/Object/LoongArch/elf-flags.yaml b/llvm/test/Object/LoongArch/elf-flags.yaml new file mode 100644 index 000000000000..b313d3b2b6c7 --- /dev/null +++ b/llvm/test/Object/LoongArch/elf-flags.yaml @@ -0,0 +1,22 @@ +# RUN: yaml2obj %s > %t +# RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=OBJ %s +# RUN: obj2yaml %t | FileCheck --check-prefix=YAML %s + +# OBJ: Flags [ (0x3) +# OBJ-NEXT: EF_LARCH_ABI_LP64 (0x3) +# OBJ-NEXT: ] + +# YAML: FileHeader: +# YAML-NEXT: Class: ELFCLASS64 +# YAML-NEXT: Data: ELFDATA2LSB +# YAML-NEXT: Type: ET_EXEC +# YAML-NEXT: Machine: EM_LOONGARCH +# YAML-NEXT: Flags: [ EF_LARCH_ABI_LP64 ] + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_LOONGARCH + Flags: [ EF_LARCH_ABI_LP64 ] diff --git a/llvm/test/Object/LoongArch/elf-loongarch64-rel.yaml b/llvm/test/Object/LoongArch/elf-loongarch64-rel.yaml new file mode 100644 index 000000000000..0bf2bedc5252 --- /dev/null +++ b/llvm/test/Object/LoongArch/elf-loongarch64-rel.yaml @@ -0,0 +1,193 @@ +# RUN: yaml2obj %s > %t +# RUN: llvm-readobj -r %t | FileCheck --check-prefix=OBJ %s +# RUN: obj2yaml %t | FileCheck --check-prefix=YAML %s + +# OBJ: Relocations [ +# OBJ-NEXT: Section (2) .rela.text { +# OBJ-NEXT: 0x40 R_LARCH_SOP_PUSH_PLT_PCREL foo 0x0 +# OBJ-NEXT: 0x40 R_LARCH_SOP_POP_32_S_0_10_10_16_S2 - 0x0 +# OBJ-NEXT: 0x44 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x800 +# OBJ-NEXT: 0x44 R_LARCH_SOP_PUSH_GPREL shared 0x0 +# OBJ-NEXT: 0x44 R_LARCH_SOP_ADD - 0x0 +# OBJ-NEXT: 0x44 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# OBJ-NEXT: 0x44 R_LARCH_SOP_SR - 0x0 +# OBJ-NEXT: 0x44 R_LARCH_SOP_POP_32_S_5_20 - 0x0 +# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x4 +# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_GPREL shared 0x0 +# OBJ-NEXT: 0x48 R_LARCH_SOP_ADD - 0x0 +# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_PCREL _GLOBAL_OFFSET_TABLE_ 0x804 +# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_GPREL shared 0x0 +# OBJ-NEXT: 0x48 R_LARCH_SOP_ADD - 0x0 +# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# OBJ-NEXT: 0x48 R_LARCH_SOP_SR - 0x0 +# OBJ-NEXT: 0x48 R_LARCH_SOP_PUSH_ABSOLUTE - 0xC +# OBJ-NEXT: 0x48 R_LARCH_SOP_SL - 0x0 +# OBJ-NEXT: 0x48 R_LARCH_SOP_SUB - 0x0 +# OBJ-NEXT: 0x48 R_LARCH_SOP_POP_32_S_10_12 - 0x0 +# OBJ-NEXT: 0x50 R_LARCH_SOP_PUSH_PLT_PCREL swap 0x0 +# OBJ-NEXT: 0x50 R_LARCH_SOP_POP_32_S_0_10_10_16_S2 - 0x0 +# OBJ-NEXT: } +# OBJ-NEXT: ] + +# YAML: Relocations: +# YAML-NEXT: - Offset: 0x40 +# YAML-NEXT: Symbol: foo +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PLT_PCREL +# YAML-NEXT: - Offset: 0x40 +# YAML-NEXT: Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 +# YAML-NEXT: - Offset: 0x44 +# YAML-NEXT: Symbol: _GLOBAL_OFFSET_TABLE_ +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PCREL +# YAML-NEXT: Addend: 2048 +# YAML-NEXT: - Offset: 0x44 +# YAML-NEXT: Symbol: shared +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_GPREL +# YAML-NEXT: - Offset: 0x44 +# YAML-NEXT: Type: R_LARCH_SOP_ADD +# YAML-NEXT: - Offset: 0x44 +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_ABSOLUTE +# YAML-NEXT: Addend: 12 +# YAML-NEXT: - Offset: 0x44 +# YAML-NEXT: Type: R_LARCH_SOP_SR +# YAML-NEXT: - Offset: 0x44 +# YAML-NEXT: Type: R_LARCH_SOP_POP_32_S_5_20 +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Symbol: _GLOBAL_OFFSET_TABLE_ +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PCREL +# YAML-NEXT: Addend: 4 +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Symbol: shared +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_GPREL +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Type: R_LARCH_SOP_ADD +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Symbol: _GLOBAL_OFFSET_TABLE_ +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PCREL +# YAML-NEXT: Addend: 2052 +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Symbol: shared +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_GPREL +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Type: R_LARCH_SOP_ADD +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_ABSOLUTE +# YAML-NEXT: Addend: 12 +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Type: R_LARCH_SOP_SR +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_ABSOLUTE +# YAML-NEXT: Addend: 12 +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Type: R_LARCH_SOP_SL +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Type: R_LARCH_SOP_SUB +# YAML-NEXT: - Offset: 0x48 +# YAML-NEXT: Type: R_LARCH_SOP_POP_32_S_10_12 +# YAML-NEXT: - Offset: 0x50 +# YAML-NEXT: Symbol: swap +# YAML-NEXT: Type: R_LARCH_SOP_PUSH_PLT_PCREL +# YAML-NEXT: - Offset: 0x50 +# YAML-NEXT: Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_LOONGARCH + Flags: [ EF_LARCH_ABI_LP64 ] +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x10 + - Name: .rela.text + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + AddressAlign: 0x8 + Info: .text + Relocations: + - Offset: 0x40 + Symbol: foo + Type: R_LARCH_SOP_PUSH_PLT_PCREL + - Offset: 0x40 + Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 + - Offset: 0x44 + Symbol: _GLOBAL_OFFSET_TABLE_ + Type: R_LARCH_SOP_PUSH_PCREL + Addend: 2048 + - Offset: 0x44 + Symbol: shared + Type: R_LARCH_SOP_PUSH_GPREL + - Offset: 0x44 + Type: R_LARCH_SOP_ADD + - Offset: 0x44 + Type: R_LARCH_SOP_PUSH_ABSOLUTE + Addend: 12 + - Offset: 0x44 + Type: R_LARCH_SOP_SR + - Offset: 0x44 + Type: R_LARCH_SOP_POP_32_S_5_20 + - Offset: 0x48 + Symbol: _GLOBAL_OFFSET_TABLE_ + Type: R_LARCH_SOP_PUSH_PCREL + Addend: 4 + - Offset: 0x48 + Symbol: shared + Type: R_LARCH_SOP_PUSH_GPREL + - Offset: 0x48 + Type: R_LARCH_SOP_ADD + - Offset: 0x48 + Symbol: _GLOBAL_OFFSET_TABLE_ + Type: R_LARCH_SOP_PUSH_PCREL + Addend: 2052 + - Offset: 0x48 + Symbol: shared + Type: R_LARCH_SOP_PUSH_GPREL + - Offset: 0x48 + Type: R_LARCH_SOP_ADD + - Offset: 0x48 + Type: R_LARCH_SOP_PUSH_ABSOLUTE + Addend: 12 + - Offset: 0x48 + Type: R_LARCH_SOP_SR + - Offset: 0x48 + Type: R_LARCH_SOP_PUSH_ABSOLUTE + Addend: 12 + - Offset: 0x48 + Type: R_LARCH_SOP_SL + - Offset: 0x48 + Type: R_LARCH_SOP_SUB + - Offset: 0x48 + Type: R_LARCH_SOP_POP_32_S_10_12 + - Offset: 0x50 + Symbol: swap + Type: R_LARCH_SOP_PUSH_PLT_PCREL + - Offset: 0x50 + Type: R_LARCH_SOP_POP_32_S_0_10_10_16_S2 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + AddressAlign: 0x10 + Content: '' + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + AddressAlign: 0x10 + +Symbols: + - Name: a.c + Type: STT_FILE + - Name: _GLOBAL_OFFSET_TABLE_ + - Name: foo + Type: STT_FUNC + Section: .text + Size: 0x24 + - Name: main + Type: STT_FUNC + Section: .text + Value: 0x28 + Size: 0x4C + - Name: shared + - Name: swap +... diff --git a/llvm/test/Object/LoongArch/lit.local.cfg b/llvm/test/Object/LoongArch/lit.local.cfg new file mode 100644 index 000000000000..2b5a4893e686 --- /dev/null +++ b/llvm/test/Object/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/lit.local.cfg b/llvm/test/Transforms/AtomicExpand/LoongArch/lit.local.cfg deleted file mode 100644 index 31902e060f32..000000000000 --- a/llvm/test/Transforms/AtomicExpand/LoongArch/lit.local.cfg +++ /dev/null @@ -1,5 +0,0 @@ -config.suffixes = ['.ll'] - -targets = set(config.root.targets_to_build.split()) -if not 'LoongArch' in targets: - config.unsupported = True diff --git a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll b/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll deleted file mode 100644 index 4acf9761421a..000000000000 --- a/llvm/test/Transforms/AtomicExpand/LoongArch/load-store-atomic.ll +++ /dev/null @@ -1,121 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S --mtriple=loongarch32 --atomic-expand %s | FileCheck %s --check-prefix=LA32 -; RUN: opt -S --mtriple=loongarch64 --atomic-expand %s | FileCheck %s --check-prefix=LA64 - -define i8 @load_acquire_i8(ptr %ptr) { -; LA32-LABEL: @load_acquire_i8( -; LA32-NEXT: [[VAL:%.*]] = load atomic i8, ptr [[PTR:%.*]] monotonic, align 1 -; LA32-NEXT: fence acquire -; LA32-NEXT: ret i8 [[VAL]] -; -; LA64-LABEL: @load_acquire_i8( -; LA64-NEXT: [[VAL:%.*]] = load atomic i8, ptr [[PTR:%.*]] monotonic, align 1 -; LA64-NEXT: fence acquire -; LA64-NEXT: ret i8 [[VAL]] -; - %val = load atomic i8, ptr %ptr acquire, align 1 - ret i8 %val -} - -define i16 @load_acquire_i16(ptr %ptr) { -; LA32-LABEL: @load_acquire_i16( -; LA32-NEXT: [[VAL:%.*]] = load atomic i16, ptr [[PTR:%.*]] monotonic, align 2 -; LA32-NEXT: fence acquire -; LA32-NEXT: ret i16 [[VAL]] -; -; LA64-LABEL: @load_acquire_i16( -; LA64-NEXT: [[VAL:%.*]] = load atomic i16, ptr [[PTR:%.*]] monotonic, align 2 -; LA64-NEXT: fence acquire -; LA64-NEXT: ret i16 [[VAL]] -; - %val = load atomic i16, ptr %ptr acquire, align 2 - ret i16 %val -} - -define i32 @load_acquire_i32(ptr %ptr) { -; LA32-LABEL: @load_acquire_i32( -; LA32-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[PTR:%.*]] monotonic, align 4 -; LA32-NEXT: fence acquire -; LA32-NEXT: ret i32 [[VAL]] -; -; LA64-LABEL: @load_acquire_i32( -; LA64-NEXT: [[VAL:%.*]] = load atomic i32, ptr [[PTR:%.*]] monotonic, align 4 -; LA64-NEXT: fence acquire -; LA64-NEXT: ret i32 [[VAL]] -; - %val = load atomic i32, ptr %ptr acquire, align 4 - ret i32 %val -} - -define i64 @load_acquire_i64(ptr %ptr) { -; LA32-LABEL: @load_acquire_i64( -; LA32-NEXT: [[TMP1:%.*]] = call i64 @__atomic_load_8(ptr [[PTR:%.*]], i32 2) -; LA32-NEXT: ret i64 [[TMP1]] -; -; LA64-LABEL: @load_acquire_i64( -; LA64-NEXT: [[VAL:%.*]] = load atomic i64, ptr [[PTR:%.*]] monotonic, align 8 -; LA64-NEXT: fence acquire -; LA64-NEXT: ret i64 [[VAL]] -; - %val = load atomic i64, ptr %ptr acquire, align 8 - ret i64 %val -} - -define void @store_release_i8(ptr %ptr, i8 signext %v) { -; LA32-LABEL: @store_release_i8( -; LA32-NEXT: fence release -; LA32-NEXT: store atomic i8 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 1 -; LA32-NEXT: ret void -; -; LA64-LABEL: @store_release_i8( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i8 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 1 -; LA64-NEXT: ret void -; - store atomic i8 %v, ptr %ptr release, align 1 - ret void -} - -define void @store_release_i16(ptr %ptr, i16 signext %v) { -; LA32-LABEL: @store_release_i16( -; LA32-NEXT: fence release -; LA32-NEXT: store atomic i16 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 2 -; LA32-NEXT: ret void -; -; LA64-LABEL: @store_release_i16( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i16 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 2 -; LA64-NEXT: ret void -; - store atomic i16 %v, ptr %ptr release, align 2 - ret void -} - -define void @store_release_i32(ptr %ptr, i32 signext %v) { -; LA32-LABEL: @store_release_i32( -; LA32-NEXT: fence release -; LA32-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 4 -; LA32-NEXT: ret void -; -; LA64-LABEL: @store_release_i32( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i32 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 4 -; LA64-NEXT: ret void -; - store atomic i32 %v, ptr %ptr release, align 4 - ret void -} - -define void @store_release_i64(ptr %ptr, i64 %v) { -; LA32-LABEL: @store_release_i64( -; LA32-NEXT: call void @__atomic_store_8(ptr [[PTR:%.*]], i64 [[V:%.*]], i32 3) -; LA32-NEXT: ret void -; -; LA64-LABEL: @store_release_i64( -; LA64-NEXT: fence release -; LA64-NEXT: store atomic i64 [[V:%.*]], ptr [[PTR:%.*]] monotonic, align 8 -; LA64-NEXT: ret void -; - store atomic i64 %v, ptr %ptr release, align 8 - ret void -} diff --git a/llvm/test/Transforms/SROA/pr57796.ll b/llvm/test/Transforms/SROA/pr57796.ll new file mode 100644 index 000000000000..7dac24af8699 --- /dev/null +++ b/llvm/test/Transforms/SROA/pr57796.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +%struct.Value = type { %union.anon } +%union.anon = type { <32 x i8> } + +@A = dso_local global i64 0, align 8 + +; Make sure that sroa does not crash when dealing with an invalid vector +; element type. +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca [[STRUCT_VALUE:%.*]], align 32 +; CHECK-NEXT: call void @value_create(ptr sret([[STRUCT_VALUE]]) align 32 [[REF_TMP_I]]) +; CHECK-NEXT: [[CALL_I:%.*]] = call align 32 ptr @value_set_type(ptr align 32 [[REF_TMP_I]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[CALL_I]], align 32 +; CHECK-NEXT: [[REF_TMP_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <32 x i8> [[TMP0]], <32 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[REF_TMP_SROA_0_0_VEC_EXTRACT]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 0) +; CHECK-NEXT: store x86_mmx [[TMP2]], ptr @A, align 8 +; CHECK-NEXT: ret void +; +entry: + %ref.tmp.i = alloca %struct.Value, align 32 + %ref.tmp = alloca %struct.Value, align 32 + call void @value_create(ptr sret(%struct.Value) align 32 %ref.tmp.i) + %call.i = call align 32 ptr @value_set_type(ptr align 32 %ref.tmp.i) + %0 = load <32 x i8>, ptr %call.i, align 32 + store <32 x i8> %0, ptr %ref.tmp, align 32 + %1 = load x86_mmx, ptr %ref.tmp, align 32 + %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0) + store x86_mmx %2, ptr @A, align 8 + ret void +} + +declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8 immarg) + +declare dso_local void @value_create(ptr sret(%struct.Value) align 32) + +declare dso_local align 32 ptr @value_set_type(ptr align 32) diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index aed0d50e0fcc..6912e6a01b17 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,NOTMIPS64 +; RUN: opt --mtriple=mips64 < %s -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,MIPS64 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" %S1 = type { i64, [42 x float] } @@ -532,13 +533,20 @@ define <2 x float> @test11(<4 x i16> %x, i32 %y) { ; If there are multiple different element types for different vector types, ; pick the integer types. This isn't really important, but seems like the best ; heuristic for making a deterministic decision. -; CHECK-LABEL: @test11( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[Y:%.*]] to <2 x i16> -; CHECK-NEXT: [[A_SROA_0_4_VEC_EXPAND:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <4 x i32> -; CHECK-NEXT: [[A_SROA_0_4_VECBLEND:%.*]] = select <4 x i1> , <4 x i16> [[A_SROA_0_4_VEC_EXPAND]], <4 x i16> [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[A_SROA_0_4_VECBLEND]] to <2 x float> -; CHECK-NEXT: ret <2 x float> [[TMP1]] +; NOTMIPS64-LABEL: @test11( +; NOTMIPS64-NEXT: entry: +; NOTMIPS64-NEXT: [[TMP0:%.*]] = bitcast i32 [[Y:%.*]] to <2 x i16> +; NOTMIPS64-NEXT: [[A_SROA_0_4_VEC_EXPAND:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <4 x i32> +; NOTMIPS64-NEXT: [[A_SROA_0_4_VECBLEND:%.*]] = select <4 x i1> , <4 x i16> [[A_SROA_0_4_VEC_EXPAND]], <4 x i16> [[X:%.*]] +; NOTMIPS64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[A_SROA_0_4_VECBLEND]] to <2 x float> +; NOTMIPS64-NEXT: ret <2 x float> [[TMP1]] +; +; MIPS64-LABEL: @test11( +; MIPS64-NEXT: entry: +; MIPS64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[X:%.*]] to <2 x i32> +; MIPS64-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Y:%.*]], i32 1 +; MIPS64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[A_SROA_0_4_VEC_INSERT]] to <2 x float> +; MIPS64-NEXT: ret <2 x float> [[TMP1]] ; entry: %a = alloca i64 @@ -565,3 +573,173 @@ define <4 x float> @test12(<4 x i32> %val) { ret <4 x float> %vec } + +define <2 x i64> @test13(i32 %a, i32 %b, i32 %c, i32 %d) { +; Ensure that we can promote an alloca that needs to be +; cast to a different vector type +; CHECK-LABEL: @test13( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca <2 x i64>, align 16 +; CHECK-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 16 +; CHECK-NEXT: [[X_SROA_0_4_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4 +; CHECK-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_SROA_IDX1]], align 4 +; CHECK-NEXT: [[X_SROA_0_8_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8 +; CHECK-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_SROA_IDX2]], align 8 +; CHECK-NEXT: [[X_SROA_0_12_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12 +; CHECK-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_SROA_IDX3]], align 4 +; CHECK-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <2 x i64>, ptr [[X_SROA_0]], align 16 +; CHECK-NEXT: ret <2 x i64> [[X_SROA_0_0_X_SROA_0_0_RESULT]] +; +entry: + %x = alloca [4 x i32] + store i32 %a, ptr %x + %x.tmp2 = getelementptr inbounds i32, ptr %x, i64 1 + store i32 %b, ptr %x.tmp2 + %x.tmp3 = getelementptr inbounds i32, ptr %x, i64 2 + store i32 %c, ptr %x.tmp3 + %x.tmp4 = getelementptr inbounds i32, ptr %x, i64 3 + store i32 %d, ptr %x.tmp4 + %result = load <2 x i64>, ptr %x + ret <2 x i64> %result +} + +define i32 @test14(<2 x i64> %x) { +; Ensure that we can promote an alloca that needs to be +; cast to a different vector type +; CHECK-LABEL: @test14( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x i64>, align 16 +; CHECK-NEXT: store <2 x i64> [[X:%.*]], ptr [[X_ADDR]], align 16 +; CHECK-NEXT: [[X_ADDR_0_A:%.*]] = load i32, ptr [[X_ADDR]], align 16 +; CHECK-NEXT: [[X_ADDR_4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 4 +; CHECK-NEXT: [[X_ADDR_4_B:%.*]] = load i32, ptr [[X_ADDR_4_SROA_IDX]], align 4 +; CHECK-NEXT: [[X_ADDR_8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 8 +; CHECK-NEXT: [[X_ADDR_8_C:%.*]] = load i32, ptr [[X_ADDR_8_SROA_IDX]], align 8 +; CHECK-NEXT: [[X_ADDR_12_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 12 +; CHECK-NEXT: [[X_ADDR_12_D:%.*]] = load i32, ptr [[X_ADDR_12_SROA_IDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X_ADDR_0_A]], [[X_ADDR_4_B]] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X_ADDR_8_C]], [[X_ADDR_12_D]] +; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[ADD1]] +; CHECK-NEXT: ret i32 [[ADD2]] +; +entry: + %x.addr = alloca <2 x i64>, align 16 + store <2 x i64> %x, <2 x i64>* %x.addr, align 16 + %x.cast = bitcast <2 x i64>* %x.addr to i32* + %a = load i32, ptr %x.cast + %x.tmp2 = getelementptr inbounds i32, ptr %x.cast, i64 1 + %b = load i32, ptr %x.tmp2 + %x.tmp3 = getelementptr inbounds i32, ptr %x.cast, i64 2 + %c = load i32, ptr %x.tmp3 + %x.tmp4 = getelementptr inbounds i32, ptr %x.cast, i64 3 + %d = load i32, ptr %x.tmp4 + %add = add i32 %a, %b + %add1 = add i32 %c, %d + %add2 = add i32 %add, %add1 + ret i32 %add2 +} + +define <4 x ptr> @test15(i32 %a, i32 %b, i32 %c, i32 %d) { +; NOTMIPS64-LABEL: @test15( +; NOTMIPS64-NEXT: entry: +; NOTMIPS64-NEXT: [[X_SROA_0:%.*]] = alloca <4 x ptr>, align 32 +; NOTMIPS64-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 32 +; NOTMIPS64-NEXT: [[X_SROA_0_4_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4 +; NOTMIPS64-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_SROA_IDX1]], align 4 +; NOTMIPS64-NEXT: [[X_SROA_0_8_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8 +; NOTMIPS64-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_SROA_IDX2]], align 8 +; NOTMIPS64-NEXT: [[X_SROA_0_12_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12 +; NOTMIPS64-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_SROA_IDX3]], align 4 +; NOTMIPS64-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <4 x ptr>, ptr [[X_SROA_0]], align 32 +; NOTMIPS64-NEXT: ret <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]] +; +; MIPS64-LABEL: @test15( +; MIPS64-NEXT: entry: +; MIPS64-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <8 x i32> undef, i32 [[A:%.*]], i32 0 +; MIPS64-NEXT: [[X_SROA_0_4_VEC_INSERT:%.*]] = insertelement <8 x i32> [[X_SROA_0_0_VEC_INSERT]], i32 [[B:%.*]], i32 1 +; MIPS64-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <8 x i32> [[X_SROA_0_4_VEC_INSERT]], i32 [[C:%.*]], i32 2 +; MIPS64-NEXT: [[X_SROA_0_12_VEC_INSERT:%.*]] = insertelement <8 x i32> [[X_SROA_0_8_VEC_INSERT]], i32 [[D:%.*]], i32 3 +; MIPS64-NEXT: [[TMP0:%.*]] = bitcast <8 x i32> [[X_SROA_0_12_VEC_INSERT]] to <4 x i64> +; MIPS64-NEXT: [[TMP1:%.*]] = inttoptr <4 x i64> [[TMP0]] to <4 x ptr> +; MIPS64-NEXT: ret <4 x ptr> [[TMP1]] +; +entry: + %x = alloca [4 x ptr] + store i32 %a, ptr %x + %x.tmp2 = getelementptr inbounds i32, ptr %x, i64 1 + store i32 %b, ptr %x.tmp2 + %x.tmp3 = getelementptr inbounds i32, ptr %x, i64 2 + store i32 %c, ptr %x.tmp3 + %x.tmp4 = getelementptr inbounds i32, ptr %x, i64 3 + store i32 %d, ptr %x.tmp4 + %result = load <4 x ptr>, ptr %x + ret <4 x ptr> %result +} + +define <4 x ptr> @test16(i64 %a, i64 %b, i64 %c, i64 %d) { +; CHECK-LABEL: @test16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A:%.*]] to ptr +; CHECK-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x ptr> undef, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[B:%.*]] to ptr +; CHECK-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_0_VEC_INSERT]], ptr [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[C:%.*]] to ptr +; CHECK-NEXT: [[X_SROA_0_16_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_8_VEC_INSERT]], ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[D:%.*]] to ptr +; CHECK-NEXT: [[X_SROA_0_24_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_16_VEC_INSERT]], ptr [[TMP3]], i32 3 +; CHECK-NEXT: ret <4 x ptr> [[X_SROA_0_24_VEC_INSERT]] +; +entry: + %x = alloca [4 x ptr] + store i64 %a, ptr %x + %x.tmp2 = getelementptr inbounds i64, ptr %x, i64 1 + store i64 %b, ptr %x.tmp2 + %x.tmp3 = getelementptr inbounds i64, ptr %x, i64 2 + store i64 %c, ptr %x.tmp3 + %x.tmp4 = getelementptr inbounds i64, ptr %x, i64 3 + store i64 %d, ptr %x.tmp4 + %result = load <4 x ptr>, ptr %x + ret <4 x ptr> %result +} + +define <4 x ptr> @test17(i32 %a, i32 %b, i64 %c, i64 %d) { +; CHECK-LABEL: @test17( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca <4 x ptr>, align 32 +; CHECK-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 32 +; CHECK-NEXT: [[X_SROA_0_4_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4 +; CHECK-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_SROA_IDX1]], align 4 +; CHECK-NEXT: [[X_SROA_0_16_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 16 +; CHECK-NEXT: store i64 [[C:%.*]], ptr [[X_SROA_0_16_SROA_IDX2]], align 16 +; CHECK-NEXT: [[X_SROA_0_24_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 24 +; CHECK-NEXT: store i64 [[D:%.*]], ptr [[X_SROA_0_24_SROA_IDX3]], align 8 +; CHECK-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <4 x ptr>, ptr [[X_SROA_0]], align 32 +; CHECK-NEXT: ret <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]] +; +entry: + %x = alloca [4 x ptr] + store i32 %a, ptr %x + %x.tmp2 = getelementptr inbounds i32, ptr %x, i64 1 + store i32 %b, ptr %x.tmp2 + %x.tmp3 = getelementptr inbounds i64, ptr %x, i64 2 + store i64 %c, ptr %x.tmp3 + %x.tmp4 = getelementptr inbounds i64, ptr %x, i64 3 + store i64 %d, ptr %x.tmp4 + %result = load <4 x ptr>, ptr %x + ret <4 x ptr> %result +} + +; This used to hit an assert after commit de3445e0ef15c4. +; Added as regression test to verify that we handle this without crashing. +define i1 @test18() { +; CHECK-LABEL: @test18( +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca <2 x i64>, align 32 +; CHECK-NEXT: store <2 x i64> , ptr [[A_SROA_0]], align 32 +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i1, ptr [[A_SROA_0]], align 32 +; CHECK-NEXT: ret i1 [[A_SROA_0_0_A_SROA_0_0_L]] +; + %a = alloca <8 x i32> + store <2 x i64> , ptr %a + %l = load i1, ptr %a, align 1 + ret i1 %l +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll index 058245269f62..d9b2fc09d6d6 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll @@ -1,6 +1,6 @@ ; Check that we accept functions with '$' in the name. -; RUN: llc -mtriple=loongarch32-unknown-linux < %s | FileCheck %s +; RUN: llc -mtriple=loongarch64-unknown-linux < %s | FileCheck %s define hidden i32 @"_Z54bar$ompvariant$bar"() { entry: diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected index 692941b506b8..306b22ebb95d 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_function_name.ll.expected @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Check that we accept functions with '$' in the name. -; RUN: llc -mtriple=loongarch32-unknown-linux < %s | FileCheck %s +; RUN: llc -mtriple=loongarch64-unknown-linux < %s | FileCheck %s define hidden i32 @"_Z54bar$ompvariant$bar"() { ; CHECK-LABEL: _Z54bar$ompvariant$bar: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ori $a0, $zero, 2 -; CHECK-NEXT: jirl $zero, $ra, 0 +; CHECK-NEXT: ori $r4, $zero, 2 +; CHECK-NEXT: jr $ra entry: ret i32 2 } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll deleted file mode 100644 index 5de94d73d8d5..000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: llc --enable-machine-outliner --mtriple=loongarch32-unknown-linux < %s | FileCheck %s -@x = dso_local global i32 0, align 4 - -define dso_local i32 @check_boundaries() #0 { - %1 = alloca i32, align 4 - %2 = alloca i32, align 4 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i32, align 4 - store i32 0, ptr %1, align 4 - store i32 0, ptr %2, align 4 - %6 = load i32, ptr %2, align 4 - %7 = icmp ne i32 %6, 0 - br i1 %7, label %9, label %8 - - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - br label %10 - - store i32 1, ptr %4, align 4 - br label %10 - - %11 = load i32, ptr %2, align 4 - %12 = icmp ne i32 %11, 0 - br i1 %12, label %14, label %13 - - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - br label %15 - - store i32 1, ptr %4, align 4 - br label %15 - - ret i32 0 -} - -define dso_local i32 @main() #0 { - %1 = alloca i32, align 4 - %2 = alloca i32, align 4 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i32, align 4 - - store i32 0, ptr %1, align 4 - store i32 0, ptr @x, align 4 - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - store i32 1, ptr @x, align 4 - call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - ret i32 0 -} - -attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected deleted file mode 100644 index 5d091d7352b7..000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.generated.expected +++ /dev/null @@ -1,148 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs -; RUN: llc --enable-machine-outliner --mtriple=loongarch32-unknown-linux < %s | FileCheck %s -@x = dso_local global i32 0, align 4 - -define dso_local i32 @check_boundaries() #0 { - %1 = alloca i32, align 4 - %2 = alloca i32, align 4 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i32, align 4 - store i32 0, ptr %1, align 4 - store i32 0, ptr %2, align 4 - %6 = load i32, ptr %2, align 4 - %7 = icmp ne i32 %6, 0 - br i1 %7, label %9, label %8 - - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - br label %10 - - store i32 1, ptr %4, align 4 - br label %10 - - %11 = load i32, ptr %2, align 4 - %12 = icmp ne i32 %11, 0 - br i1 %12, label %14, label %13 - - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - br label %15 - - store i32 1, ptr %4, align 4 - br label %15 - - ret i32 0 -} - -define dso_local i32 @main() #0 { - %1 = alloca i32, align 4 - %2 = alloca i32, align 4 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i32, align 4 - - store i32 0, ptr %1, align 4 - store i32 0, ptr @x, align 4 - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - store i32 1, ptr @x, align 4 - call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - ret i32 0 -} - -attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } -; CHECK-LABEL: check_boundaries: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $sp, $sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; CHECK-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -4 -; CHECK-NEXT: .cfi_offset 22, -8 -; CHECK-NEXT: addi.w $fp, $sp, 32 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: st.w $zero, $fp, -16 -; CHECK-NEXT: st.w $zero, $fp, -12 -; CHECK-NEXT: bnez $zero, .LBB0_2 -; CHECK-NEXT: b .LBB0_1 -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: ld.w $a0, $fp, -16 -; CHECK-NEXT: bne $a0, $zero, .LBB0_5 -; CHECK-NEXT: b .LBB0_4 -; CHECK-NEXT: .LBB0_5: -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: .LBB0_6: -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; CHECK-NEXT: addi.w $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: ori $a0, $zero, 2 -; CHECK-NEXT: st.w $a0, $fp, -20 -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: st.w $a0, $fp, -16 -; CHECK-NEXT: ori $a0, $zero, 3 -; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: ori $a0, $zero, 4 -; CHECK-NEXT: st.w $a0, $fp, -28 -; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: ori $a0, $zero, 2 -; CHECK-NEXT: st.w $a0, $fp, -20 -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: st.w $a0, $fp, -16 -; CHECK-NEXT: ori $a0, $zero, 3 -; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: ori $a0, $zero, 4 -; CHECK-NEXT: st.w $a0, $fp, -28 -; CHECK-NEXT: b .LBB0_6 -; -; CHECK-LABEL: main: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $sp, $sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; CHECK-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -4 -; CHECK-NEXT: .cfi_offset 22, -8 -; CHECK-NEXT: addi.w $fp, $sp, 32 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: pcalau12i $a0, x -; CHECK-NEXT: addi.w $a0, $a0, x -; CHECK-NEXT: ori $a1, $zero, 1 -; CHECK-NEXT: st.w $a1, $a0, 0 -; CHECK-NEXT: st.w $zero, $fp, -12 -; CHECK-NEXT: st.w $a1, $fp, -16 -; CHECK-NEXT: ori $a0, $zero, 2 -; CHECK-NEXT: st.w $a0, $fp, -20 -; CHECK-NEXT: ori $a2, $zero, 3 -; CHECK-NEXT: st.w $a2, $fp, -24 -; CHECK-NEXT: ori $a3, $zero, 4 -; CHECK-NEXT: st.w $a3, $fp, -28 -; CHECK-NEXT: #APP -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: st.w $a0, $fp, -20 -; CHECK-NEXT: st.w $a1, $fp, -16 -; CHECK-NEXT: st.w $a2, $fp, -24 -; CHECK-NEXT: st.w $a3, $fp, -28 -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; CHECK-NEXT: addi.w $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected deleted file mode 100644 index d4edfe5e0854..000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/loongarch_generated_funcs.ll.nogenerated.expected +++ /dev/null @@ -1,147 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --enable-machine-outliner --mtriple=loongarch32-unknown-linux < %s | FileCheck %s -@x = dso_local global i32 0, align 4 - -define dso_local i32 @check_boundaries() #0 { -; CHECK-LABEL: check_boundaries: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $sp, $sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; CHECK-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -4 -; CHECK-NEXT: .cfi_offset 22, -8 -; CHECK-NEXT: addi.w $fp, $sp, 32 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: st.w $zero, $fp, -16 -; CHECK-NEXT: st.w $zero, $fp, -12 -; CHECK-NEXT: bnez $zero, .LBB0_2 -; CHECK-NEXT: b .LBB0_1 -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: ld.w $a0, $fp, -16 -; CHECK-NEXT: bne $a0, $zero, .LBB0_5 -; CHECK-NEXT: b .LBB0_4 -; CHECK-NEXT: .LBB0_5: -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: .LBB0_6: -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; CHECK-NEXT: addi.w $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: ori $a0, $zero, 2 -; CHECK-NEXT: st.w $a0, $fp, -20 -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: st.w $a0, $fp, -16 -; CHECK-NEXT: ori $a0, $zero, 3 -; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: ori $a0, $zero, 4 -; CHECK-NEXT: st.w $a0, $fp, -28 -; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: ori $a0, $zero, 2 -; CHECK-NEXT: st.w $a0, $fp, -20 -; CHECK-NEXT: ori $a0, $zero, 1 -; CHECK-NEXT: st.w $a0, $fp, -16 -; CHECK-NEXT: ori $a0, $zero, 3 -; CHECK-NEXT: st.w $a0, $fp, -24 -; CHECK-NEXT: ori $a0, $zero, 4 -; CHECK-NEXT: st.w $a0, $fp, -28 -; CHECK-NEXT: b .LBB0_6 - %1 = alloca i32, align 4 - %2 = alloca i32, align 4 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i32, align 4 - store i32 0, ptr %1, align 4 - store i32 0, ptr %2, align 4 - %6 = load i32, ptr %2, align 4 - %7 = icmp ne i32 %6, 0 - br i1 %7, label %9, label %8 - - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - br label %10 - - store i32 1, ptr %4, align 4 - br label %10 - - %11 = load i32, ptr %2, align 4 - %12 = icmp ne i32 %11, 0 - br i1 %12, label %14, label %13 - - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - br label %15 - - store i32 1, ptr %4, align 4 - br label %15 - - ret i32 0 -} - -define dso_local i32 @main() #0 { -; CHECK-LABEL: main: -; CHECK: # %bb.0: -; CHECK-NEXT: addi.w $sp, $sp, -32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; CHECK-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill -; CHECK-NEXT: .cfi_offset 1, -4 -; CHECK-NEXT: .cfi_offset 22, -8 -; CHECK-NEXT: addi.w $fp, $sp, 32 -; CHECK-NEXT: .cfi_def_cfa 22, 0 -; CHECK-NEXT: pcalau12i $a0, x -; CHECK-NEXT: addi.w $a0, $a0, x -; CHECK-NEXT: ori $a1, $zero, 1 -; CHECK-NEXT: st.w $a1, $a0, 0 -; CHECK-NEXT: st.w $zero, $fp, -12 -; CHECK-NEXT: st.w $a1, $fp, -16 -; CHECK-NEXT: ori $a0, $zero, 2 -; CHECK-NEXT: st.w $a0, $fp, -20 -; CHECK-NEXT: ori $a2, $zero, 3 -; CHECK-NEXT: st.w $a2, $fp, -24 -; CHECK-NEXT: ori $a3, $zero, 4 -; CHECK-NEXT: st.w $a3, $fp, -28 -; CHECK-NEXT: #APP -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: st.w $a0, $fp, -20 -; CHECK-NEXT: st.w $a1, $fp, -16 -; CHECK-NEXT: st.w $a2, $fp, -24 -; CHECK-NEXT: st.w $a3, $fp, -28 -; CHECK-NEXT: move $a0, $zero -; CHECK-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload -; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; CHECK-NEXT: addi.w $sp, $sp, 32 -; CHECK-NEXT: jirl $zero, $ra, 0 - %1 = alloca i32, align 4 - %2 = alloca i32, align 4 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i32, align 4 - - store i32 0, ptr %1, align 4 - store i32 0, ptr @x, align 4 - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - store i32 1, ptr @x, align 4 - call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"() - store i32 1, ptr %2, align 4 - store i32 2, ptr %3, align 4 - store i32 3, ptr %4, align 4 - store i32 4, ptr %5, align 4 - ret i32 0 -} - -attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/loongarch_generated_funcs.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/loongarch_generated_funcs.test deleted file mode 100644 index 2209d3036864..000000000000 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/loongarch_generated_funcs.test +++ /dev/null @@ -1,17 +0,0 @@ -# REQUIRES: loongarch-registered-target - -## Check that generated functions are included. -# RUN: cp -f %S/Inputs/loongarch_generated_funcs.ll %t.ll && %update_llc_test_checks --include-generated-funcs %t.ll -# RUN: diff -u %t.ll %S/Inputs/loongarch_generated_funcs.ll.generated.expected - -## Check that running the script again does not change the result: -# RUN: %update_llc_test_checks --include-generated-funcs %t.ll -# RUN: diff -u %t.ll %S/Inputs/loongarch_generated_funcs.ll.generated.expected - -## Check that generated functions are not included. -# RUN: cp -f %S/Inputs/loongarch_generated_funcs.ll %t.ll && %update_llc_test_checks %t.ll -# RUN: diff -u %t.ll %S/Inputs/loongarch_generated_funcs.ll.nogenerated.expected - -## Check that running the script again does not change the result: -# RUN: %update_llc_test_checks %t.ll -# RUN: diff -u %t.ll %S/Inputs/loongarch_generated_funcs.ll.nogenerated.expected diff --git a/llvm/test/tools/llvm-profgen/lit.local.cfg b/llvm/test/tools/llvm-profgen/lit.local.cfg index 197150e220e8..0ca12783a2eb 100644 --- a/llvm/test/tools/llvm-profgen/lit.local.cfg +++ b/llvm/test/tools/llvm-profgen/lit.local.cfg @@ -3,5 +3,5 @@ import lit.util config.suffixes = ['.test', '.ll', '.s', '.yaml'] -if not 'X86' in config.root.targets: +if not ('X86' in config.root.targets and 'LoongArch' in config.root.targets): config.unsupported = True diff --git a/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test b/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test deleted file mode 100644 index b6627364054b..000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/loongarch-eflags.test +++ /dev/null @@ -1,64 +0,0 @@ -## Check llvm-readobj is able to decode all possible LoongArch e_flags field values. - -# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DFLAG=LP64S -# RUN: llvm-readobj -h %t-lp64s | FileCheck --check-prefix=READOBJ-LP64S %s -# RUN: llvm-readelf -h %t-lp64s | FileCheck --check-prefix=READELF-LP64S --match-full-lines %s - -# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DFLAG=LP64F -# RUN: llvm-readobj -h %t-lp64f | FileCheck --check-prefix=READOBJ-LP64F %s -# RUN: llvm-readelf -h %t-lp64f | FileCheck --check-prefix=READELF-LP64F --match-full-lines %s - -# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DFLAG=LP64D -# RUN: llvm-readobj -h %t-lp64d | FileCheck --check-prefix=READOBJ-LP64D %s -# RUN: llvm-readelf -h %t-lp64d | FileCheck --check-prefix=READELF-LP64D --match-full-lines %s - -# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DFLAG=ILP32S -# RUN: llvm-readobj -h %t-ilp32s | FileCheck --check-prefix=READOBJ-ILP32S %s -# RUN: llvm-readelf -h %t-ilp32s | FileCheck --check-prefix=READELF-ILP32S --match-full-lines %s - -# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DFLAG=ILP32F -# RUN: llvm-readobj -h %t-ilp32f | FileCheck --check-prefix=READOBJ-ILP32F %s -# RUN: llvm-readelf -h %t-ilp32f | FileCheck --check-prefix=READELF-ILP32F --match-full-lines %s - -# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DFLAG=ILP32D -# RUN: llvm-readobj -h %t-ilp32d | FileCheck --check-prefix=READOBJ-ILP32D %s -# RUN: llvm-readelf -h %t-ilp32d | FileCheck --check-prefix=READELF-ILP32D --match-full-lines %s - -# READOBJ-LP64S: Flags [ (0x1) -# READOBJ-LP64S-NEXT: EF_LOONGARCH_BASE_ABI_LP64S (0x1) -# READOBJ-LP64S-NEXT: ] - -# READOBJ-LP64F: Flags [ (0x2) -# READOBJ-LP64F-NEXT: EF_LOONGARCH_BASE_ABI_LP64F (0x2) -# READOBJ-LP64F-NEXT: ] - -# READOBJ-LP64D: Flags [ (0x3) -# READOBJ-LP64D-NEXT: EF_LOONGARCH_BASE_ABI_LP64D (0x3) -# READOBJ-LP64D-NEXT: ] - -# READOBJ-ILP32S: Flags [ (0x5) -# READOBJ-ILP32S-NEXT: EF_LOONGARCH_BASE_ABI_ILP32S (0x5) -# READOBJ-ILP32S-NEXT: ] - -# READOBJ-ILP32F: Flags [ (0x6) -# READOBJ-ILP32F-NEXT: EF_LOONGARCH_BASE_ABI_ILP32F (0x6) -# READOBJ-ILP32F-NEXT: ] - -# READOBJ-ILP32D: Flags [ (0x7) -# READOBJ-ILP32D-NEXT: EF_LOONGARCH_BASE_ABI_ILP32D (0x7) -# READOBJ-ILP32D-NEXT: ] - -# READELF-LP64S: Flags: 0x1, LP64, SOFT-FLOAT -# READELF-LP64F: Flags: 0x2, LP64, SINGLE-FLOAT -# READELF-LP64D: Flags: 0x3, LP64, DOUBLE-FLOAT -# READELF-ILP32S: Flags: 0x5, ILP32, SOFT-FLOAT -# READELF-ILP32F: Flags: 0x6, ILP32, SINGLE-FLOAT -# READELF-ILP32D: Flags: 0x7, ILP32, DOUBLE-FLOAT - ---- !ELF -FileHeader: - Class: ELFCLASS[[CLASS]] - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_LOONGARCH - Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] diff --git a/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml b/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml deleted file mode 100644 index e4c4c292e0da..000000000000 --- a/llvm/test/tools/obj2yaml/ELF/loongarch-eflags.yaml +++ /dev/null @@ -1,29 +0,0 @@ -## Check obj2yaml is able to decode all possible LoongArch e_flags field values. - -# RUN: yaml2obj %s -o %t-lp64s -DCLASS=64 -DFLAG=LP64S -# RUN: obj2yaml %t-lp64s | FileCheck -DFLAG=LP64S %s - -# RUN: yaml2obj %s -o %t-lp64f -DCLASS=64 -DFLAG=LP64F -# RUN: obj2yaml %t-lp64f | FileCheck -DFLAG=LP64F %s - -# RUN: yaml2obj %s -o %t-lp64d -DCLASS=64 -DFLAG=LP64D -# RUN: obj2yaml %t-lp64d | FileCheck -DFLAG=LP64D %s - -# RUN: yaml2obj %s -o %t-ilp32s -DCLASS=32 -DFLAG=ILP32S -# RUN: obj2yaml %t-ilp32s | FileCheck -DFLAG=ILP32S %s - -# RUN: yaml2obj %s -o %t-ilp32f -DCLASS=32 -DFLAG=ILP32F -# RUN: obj2yaml %t-ilp32f | FileCheck -DFLAG=ILP32F %s - -# RUN: yaml2obj %s -o %t-ilp32d -DCLASS=32 -DFLAG=ILP32D -# RUN: obj2yaml %t-ilp32d | FileCheck -DFLAG=ILP32D %s - -# CHECK: Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] - ---- !ELF -FileHeader: - Class: ELFCLASS[[CLASS]] - Data: ELFDATA2LSB - Type: ET_EXEC - Machine: EM_LOONGARCH - Flags: [ EF_LOONGARCH_BASE_ABI_[[FLAG]] ] diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index eef5b8eb8a0f..ceac763078e1 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -196,9 +196,6 @@ void ProfiledBinary::load() { exitWithError("not a valid Elf image", Path); TheTriple = Obj->makeTriple(); - // Current only support X86 - if (!TheTriple.isX86()) - exitWithError("unsupported target", TheTriple.getTriple()); LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); // Find the preferred load address for text sections. diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index c9a239f785d2..10d01027f18f 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -1650,16 +1650,13 @@ const EnumEntry ElfHeaderAVRFlags[] = { ENUM_ENT(EF_AVR_LINKRELAX_PREPARED, "relaxable"), }; -const EnumEntry ElfHeaderLoongArchFlags[] = { - ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32S, "ILP32, SOFT-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32F, "ILP32, SINGLE-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_ILP32D, "ILP32, DOUBLE-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64S, "LP64, SOFT-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64F, "LP64, SINGLE-FLOAT"), - ENUM_ENT(EF_LOONGARCH_BASE_ABI_LP64D, "LP64, DOUBLE-FLOAT"), +static const EnumEntry ElfHeaderLoongArchFlags[] = { + ENUM_ENT(EF_LARCH_ABI_LP64, "LP64") + // FIXME: Change these and add more flags in future when all ABIs definition were finalized. + // See current definitions: + // https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_e_flags_identifies_abi_type_and_version }; - const EnumEntry ElfSymOtherFlags[] = { LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL), LLVM_READOBJ_ENUM_ENT(ELF, STV_HIDDEN), @@ -3369,8 +3366,7 @@ template void GNUELFDumper::printFileHeaders() { ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderAVRFlags), unsigned(ELF::EF_AVR_ARCH_MASK)); else if (e.e_machine == EM_LOONGARCH) - ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderLoongArchFlags), - unsigned(ELF::EF_LOONGARCH_BASE_ABI_MASK)); + ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderLoongArchFlags)); Str = "0x" + utohexstr(e.e_flags); if (!ElfFlags.empty()) Str = Str + ", " + ElfFlags; @@ -6522,8 +6518,7 @@ template void LLVMELFDumper::printFileHeaders() { W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAVRFlags), unsigned(ELF::EF_AVR_ARCH_MASK)); else if (E.e_machine == EM_LOONGARCH) - W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderLoongArchFlags), - unsigned(ELF::EF_LOONGARCH_BASE_ABI_MASK)); + W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderLoongArchFlags)); else W.printFlags("Flags", E.e_flags); W.printNumber("HeaderSize", E.e_ehsize); diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn new file mode 100644 index 000000000000..cc3bb49a6ac7 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/AsmParser/BUILD.gn @@ -0,0 +1,24 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenAsmMatcher") { + visibility = [ ":AsmParser" ] + args = [ "-gen-asm-matcher" ] + td_file = "../LoongArch.td" +} + +static_library("AsmParser") { + output_name = "LLVMLoongArchAsmParser" + deps = [ + ":LoongArchGenAsmMatcher", + "//llvm/lib/MC", + "//llvm/lib/MC/MCParser", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/MCTargetDesc", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ + # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. + "LoongArchAsmParser.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn new file mode 100644 index 000000000000..e89db5200e89 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/BUILD.gn @@ -0,0 +1,102 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenCallingConv") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-callingconv" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenDAGISel") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-dag-isel" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenFastISel") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-fast-isel" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenGlobalISel") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-global-isel" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenMCPseudoLowering") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-pseudo-lowering" ] + td_file = "LoongArch.td" +} + +tablegen("LoongArchGenRegisterBank") { + visibility = [ ":LLVMLoongArchCodeGen" ] + args = [ "-gen-register-bank" ] + td_file = "LoongArch.td" +} + +static_library("LLVMLoongArchCodeGen") { + deps = [ + ":LoongArchGenCallingConv", + ":LoongArchGenDAGISel", + ":LoongArchGenFastISel", + ":LoongArchGenGlobalISel", + ":LoongArchGenMCPseudoLowering", + ":LoongArchGenRegisterBank", + "MCTargetDesc", + "TargetInfo", + "//llvm/include/llvm/Config:llvm-config", + "//llvm/lib/Analysis", + "//llvm/lib/CodeGen", + "//llvm/lib/CodeGen/AsmPrinter", + "//llvm/lib/CodeGen/GlobalISel", + "//llvm/lib/CodeGen/SelectionDAG", + "//llvm/lib/IR", + "//llvm/lib/MC", + "//llvm/lib/Support", + "//llvm/lib/Target", + ] + include_dirs = [ "." ] + sources = [ + "LoongArchAnalyzeImmediate.cpp", + "LoongArchAsmPrinter.cpp", + "LoongArchCCState.cpp", + "LoongArchCallLowering.cpp", + "LoongArchConstantIslandPass.cpp", + "LoongArchDelaySlotFiller.cpp", + "LoongArchExpandPseudo.cpp", + "LoongArchFrameLowering.cpp", + "LoongArchISelDAGToDAG.cpp", + "LoongArchISelLowering.cpp", + "LoongArchInstrInfo.cpp", + "LoongArchInstructionSelector.cpp", + "LoongArchLegalizerInfo.cpp", + "LoongArchMCInstLower.cpp", + "LoongArchMachineFunction.cpp", + "LoongArchModuleISelDAGToDAG.cpp", + "LoongArchOptimizePICCall.cpp", + "LoongArchPreLegalizerCombiner.cpp", + "LoongArchRegisterBankInfo.cpp", + "LoongArchRegisterInfo.cpp", + "LoongArchSubtarget.cpp", + "LoongArchTargetMachine.cpp", + "LoongArchTargetObjectFile.cpp", + ] +} + +# This is a bit different from most build files: Due to this group +# having the directory's name, "//llvm/lib/Target/LoongArch" will refer to this +# target, which pulls in the code in this directory *and all subdirectories*. +# For most other directories, "//llvm/lib/Foo" only pulls in the code directly +# in "llvm/lib/Foo". The forwarding targets in //llvm/lib/Target expect this +# different behavior. +group("LoongArch") { + deps = [ + ":LLVMLoongArchCodeGen", + "AsmParser", + "Disassembler", + "MCTargetDesc", + "TargetInfo", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn new file mode 100644 index 000000000000..0a9b4cf59441 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/Disassembler/BUILD.gn @@ -0,0 +1,23 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenDisassemblerTables") { + visibility = [ ":Disassembler" ] + args = [ "-gen-disassembler" ] + td_file = "../LoongArch.td" +} + +static_library("Disassembler") { + output_name = "LLVMLoongArchDisassembler" + deps = [ + ":LoongArchGenDisassemblerTables", + "//llvm/lib/MC/MCDisassembler", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/MCTargetDesc", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ + # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. + "LoongArchDisassembler.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn new file mode 100644 index 000000000000..f0b96c965fe4 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/MCTargetDesc/BUILD.gn @@ -0,0 +1,74 @@ +import("//llvm/utils/TableGen/tablegen.gni") + +tablegen("LoongArchGenAsmWriter") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-asm-writer" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenInstrInfo") { + visibility = [ ":tablegen" ] + args = [ "-gen-instr-info" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenMCCodeEmitter") { + visibility = [ ":MCTargetDesc" ] + args = [ "-gen-emitter" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenRegisterInfo") { + visibility = [ ":tablegen" ] + args = [ "-gen-register-info" ] + td_file = "../LoongArch.td" +} + +tablegen("LoongArchGenSubtargetInfo") { + visibility = [ ":tablegen" ] + args = [ "-gen-subtarget" ] + td_file = "../LoongArch.td" +} + +# This should contain tablegen targets generating .inc files included +# by other targets. .inc files only used by .cpp files in this directory +# should be in deps on the static_library instead. +group("tablegen") { + visibility = [ + ":MCTargetDesc", + "../TargetInfo", + ] + public_deps = [ + ":LoongArchGenInstrInfo", + ":LoongArchGenRegisterInfo", + ":LoongArchGenSubtargetInfo", + ] +} + +static_library("MCTargetDesc") { + output_name = "LLVMLoongArchDesc" + public_deps = [ ":tablegen" ] + deps = [ + ":LoongArchGenAsmWriter", + ":LoongArchGenMCCodeEmitter", + "//llvm/lib/MC", + "//llvm/lib/Support", + "//llvm/lib/Target/LoongArch/TargetInfo", + ] + include_dirs = [ ".." ] + sources = [ + "LoongArchABIFlagsSection.cpp", + "LoongArchABIInfo.cpp", + "LoongArchAsmBackend.cpp", + "LoongArchELFObjectWriter.cpp", + "LoongArchELFStreamer.cpp", + "LoongArchInstPrinter.cpp", + "LoongArchMCAsmInfo.cpp", + "LoongArchMCCodeEmitter.cpp", + "LoongArchMCExpr.cpp", + "LoongArchMCTargetDesc.cpp", + "LoongArchNaClELFStreamer.cpp", + "LoongArchOptionRecord.cpp", + "LoongArchTargetStreamer.cpp", + ] +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn new file mode 100644 index 000000000000..a476bdd5fd16 --- /dev/null +++ b/llvm/utils/gn/secondary/llvm/lib/Target/LoongArch/TargetInfo/BUILD.gn @@ -0,0 +1,9 @@ +static_library("TargetInfo") { + output_name = "LLVMLoongArchInfo" + deps = [ "//llvm/lib/Support" ] + include_dirs = [ ".." ] + sources = [ + # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py. + "LoongArchTargetInfo.cpp", + ] +} diff --git a/openmp/README.rst b/openmp/README.rst index ffa49e4d2a49..a12c62897914 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -137,7 +137,7 @@ Options for all Libraries Options for ``libomp`` ---------------------- -**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64`` +**LIBOMP_ARCH** = ``aarch64|arm|i386|mic|mips|mips64|ppc64|ppc64le|x86_64|riscv64|loongarch64`` The default value for this option is chosen based on probing the compiler for architecture macros (e.g., is ``__x86_64__`` predefined by compiler?). @@ -194,7 +194,7 @@ Optional Features **LIBOMP_OMPT_SUPPORT** = ``ON|OFF`` Include support for the OpenMP Tools Interface (OMPT). This option is supported and ``ON`` by default for x86, x86_64, AArch64, - PPC64 and RISCV64 on Linux* and macOS*. + PPC64, RISCV64 and loongarch64 on Linux* and macOS*. This option is ``OFF`` if this feature is not supported for the platform. **LIBOMP_OMPT_OPTIONAL** = ``ON|OFF`` diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index c9dbcb2ab22c..31995363a689 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -30,7 +30,7 @@ if(${OPENMP_STANDALONE_BUILD}) # If adding a new architecture, take a look at cmake/LibompGetArchitecture.cmake libomp_get_architecture(LIBOMP_DETECTED_ARCH) set(LIBOMP_ARCH ${LIBOMP_DETECTED_ARCH} CACHE STRING - "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64).") + "The architecture to build for (x86_64/i386/arm/ppc64/ppc64le/aarch64/mic/mips/mips64/riscv64/loongarch64).") # Should assertions be enabled? They are on by default. set(LIBOMP_ENABLE_ASSERTIONS TRUE CACHE BOOL "enable assertions?") @@ -61,6 +61,8 @@ else() # Part of LLVM build set(LIBOMP_ARCH arm) elseif(LIBOMP_NATIVE_ARCH MATCHES "riscv64") set(LIBOMP_ARCH riscv64) + elseif(LIBOMP_NATIVE_ARCH MATCHES "loongarch64") + set(LIBOMP_ARCH loongarch64) else() # last ditch effort libomp_get_architecture(LIBOMP_ARCH) @@ -81,7 +83,7 @@ if(LIBOMP_ARCH STREQUAL "aarch64") endif() endif() -libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64) +libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64 loongarch64) set(LIBOMP_LIB_TYPE normal CACHE STRING "Performance,Profiling,Stubs library (normal/profile/stubs)") @@ -159,6 +161,7 @@ set(MIC FALSE) set(MIPS64 FALSE) set(MIPS FALSE) set(RISCV64 FALSE) +set(LoongArch64 FALSE) if("${LIBOMP_ARCH}" STREQUAL "i386" OR "${LIBOMP_ARCH}" STREQUAL "32") # IA-32 architecture set(IA32 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "x86_64" OR "${LIBOMP_ARCH}" STREQUAL "32e") # Intel(R) 64 architecture @@ -183,6 +186,8 @@ elseif("${LIBOMP_ARCH}" STREQUAL "mips64") # MIPS64 architecture set(MIPS64 TRUE) elseif("${LIBOMP_ARCH}" STREQUAL "riscv64") # RISCV64 architecture set(RISCV64 TRUE) +elseif("${LIBOMP_ARCH}" STREQUAL "loongarch64") # LoongArch64 architecture + set(LoongArch64 TRUE) endif() # Set some flags based on build_type diff --git a/openmp/runtime/README.txt b/openmp/runtime/README.txt index 874a5634e93e..ddd8b0e4282d 100644 --- a/openmp/runtime/README.txt +++ b/openmp/runtime/README.txt @@ -54,6 +54,7 @@ Architectures Supported * IBM(R) Power architecture (little endian) * MIPS and MIPS64 architecture * RISCV64 architecture +* LoongArch64 architecture Supported RTL Build Configurations ================================== diff --git a/openmp/runtime/cmake/LibompGetArchitecture.cmake b/openmp/runtime/cmake/LibompGetArchitecture.cmake index dd60a2d347b1..72cbf64d4bbf 100644 --- a/openmp/runtime/cmake/LibompGetArchitecture.cmake +++ b/openmp/runtime/cmake/LibompGetArchitecture.cmake @@ -47,6 +47,8 @@ function(libomp_get_architecture return_arch) #error ARCHITECTURE=mips #elif defined(__riscv) && __riscv_xlen == 64 #error ARCHITECTURE=riscv64 + #elif defined(__loongarch__) && defined(__loongarch64) + #error ARCHITECTURE=loongarch64 #else #error ARCHITECTURE=UnknownArchitecture #endif diff --git a/openmp/runtime/cmake/LibompMicroTests.cmake b/openmp/runtime/cmake/LibompMicroTests.cmake index 1ca3412edc8e..d34405673b44 100644 --- a/openmp/runtime/cmake/LibompMicroTests.cmake +++ b/openmp/runtime/cmake/LibompMicroTests.cmake @@ -214,6 +214,9 @@ else() elseif(${RISCV64}) libomp_append(libomp_expected_library_deps libc.so.6) libomp_append(libomp_expected_library_deps ld.so.1) + elseif(${LoongArch64}) + libomp_append(libomp_expected_library_deps libc.so.6) + libomp_append(libomp_expected_library_deps ld.so.1) endif() libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY) libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC) diff --git a/openmp/runtime/cmake/LibompUtils.cmake b/openmp/runtime/cmake/LibompUtils.cmake index b1de242372b8..8d6b6efea323 100644 --- a/openmp/runtime/cmake/LibompUtils.cmake +++ b/openmp/runtime/cmake/LibompUtils.cmake @@ -109,6 +109,8 @@ function(libomp_get_legal_arch return_arch_string) set(${return_arch_string} "MIPS64" PARENT_SCOPE) elseif(${RISCV64}) set(${return_arch_string} "RISCV64" PARENT_SCOPE) + elseif(${LoongArch64}) + set(${return_arch_string} "LoongArch64" PARENT_SCOPE) else() set(${return_arch_string} "${LIBOMP_ARCH}" PARENT_SCOPE) libomp_warning_say("libomp_get_legal_arch(): Warning: Unknown architecture: Using ${LIBOMP_ARCH}") diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index ce00362f04ca..06fd323a52a3 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -254,6 +254,18 @@ public: #elif __NR_sched_getaffinity != 5196 #error Wrong code for getaffinity system call. #endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_LOONGARCH64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 122 +#elif __NR_sched_setaffinity != 122 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 123 +#elif __NR_sched_getaffinity != 123 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#else #error Unknown or unsupported architecture #endif /* KMP_ARCH_* */ #elif KMP_OS_FREEBSD diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index c932d450c84e..ba25d2e169bb 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -700,7 +700,7 @@ void __kmpc_flush(ident_t *loc) { } #endif // KMP_MIC #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ - KMP_ARCH_RISCV64) + KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64) // Nothing to see here move along #elif KMP_ARCH_PPC64 // Nothing needed here (we have a real MB above). diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index 02efaa1b2613..f1d24962acb9 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -178,7 +178,7 @@ typedef unsigned long long kmp_uint64; #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC #else #error "Can't determine size_t printf format specifier." @@ -1044,7 +1044,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); #endif /* KMP_OS_WINDOWS */ #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 #if KMP_OS_WINDOWS #undef KMP_MB #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index bbbd72dd6951..6392d17b1c66 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -92,6 +92,7 @@ #define KMP_ARCH_MIPS 0 #define KMP_ARCH_MIPS64 0 #define KMP_ARCH_RISCV64 0 +#define KMP_ARCH_LOONGARCH64 0 #if KMP_OS_WINDOWS #if defined(_M_AMD64) || defined(__x86_64) @@ -135,6 +136,9 @@ #elif defined __riscv && __riscv_xlen == 64 #undef KMP_ARCH_RISCV64 #define KMP_ARCH_RISCV64 1 +#elif defined __loongarch__ && defined __loongarch64 +#undef KMP_ARCH_LOONGARCH64 +#define KMP_ARCH_LOONGARCH64 1 #endif #endif @@ -199,7 +203,7 @@ // TODO: Fixme - This is clever, but really fugly #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ - KMP_ARCH_RISCV64) + KMP_ARCH_RISCV64 + KMP_ARCH_LOONGARCH64) #error Unknown or unsupported architecture #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index b8d470528798..3895ece65adc 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -8751,7 +8751,7 @@ __kmp_determine_reduction_method( int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ - KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h index a452b7643bdb..d6d2cb08508e 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h @@ -162,6 +162,10 @@ #define ITT_ARCH_ARM64 6 #endif /* ITT_ARCH_ARM64 */ +#ifndef ITT_ARCH_LOONGARCH64 +#define ITT_ARCH_LOONGARCH64 7 +#endif /* ITT_ARCH_LOONGARCH64 */ + #ifndef ITT_ARCH #if defined _M_IX86 || defined __i386__ #define ITT_ARCH ITT_ARCH_IA32 @@ -175,6 +179,8 @@ #define ITT_ARCH ITT_ARCH_ARM64 #elif defined __powerpc64__ #define ITT_ARCH ITT_ARCH_PPC64 +#elif defined __loongarch__ && defined __loongarch64 +#define ITT_ARCH ITT_ARCH_LOONGARCH64 #endif #endif diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index b4a45c1ac6f5..4f80a810c272 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -1725,6 +1725,157 @@ __kmp_invoke_microtask: #endif /* KMP_ARCH_RISCV64 */ +#if KMP_ARCH_LOONGARCH64 + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)( int *gtid, int *tid, ... ); +// +// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, +// void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// +// return 1; +// } +// +// parameters: +// a0: pkfn +// a1: gtid +// a2: tid +// a3: argc +// a4: p_argv +// a5: exit_frame_ptr +// +// Temp. registers: +// +// t0: used to calculate the dynamic stack size / used to hold pkfn address +// t1: used as temporary for stack placement calculation +// t2: used as temporary for stack arguments +// t3: used as temporary for number of remaining pkfn parms +// t4: used to traverse p_argv array +// +// return: a0 (always 1/TRUE) +// + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + .globl __kmp_invoke_microtask + .p2align 3 + .type __kmp_invoke_microtask,@function +__kmp_invoke_microtask: + + // First, save ra and fp + addi.d $sp, $sp, -16 + st.d $ra, $sp, 8 + st.d $fp, $sp, 0 + addi.d $fp, $sp, 16 + + // Compute the dynamic stack size: + // + // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by + // reference + // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' + // function by register. Given that we have 8 of such registers (a[0-7]) + // and two + 'argc' arguments (consider >id and &tid), we need to + // reserve max(0, argc - 6)*8 ext$ra bytes + // + // The total number of bytes is then max(0, argc - 6)*8 + 8 + + // Compute max(0, argc - 6) using the following bithack: + // max(0, x) = x - (x & (x >> 31?63)), where x := argc - 6 + // Source: http://g$raphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + addi.d $t0, $a3, -6 + srai.d $t1, $t0, 63 + and $t1, $t0, $t1 + sub.d $t0, $t0, $t1 + + addi.d $t0, $t0, 1 + + slli.d $t0, $t0, 3 // t0: total number of bytes for stack storing arguments. + sub.d $sp, $sp, $t0 + + move $t0, $a0 + move $t3, $a3 + move $t4, $a4 + +#if OMPT_SUPPORT + // Save frame pointer into exit_frame + st.d $fp, $a5, 0 +#endif + + // Prepare arguments for the pkfn function (first 8 using $a0-$a7 registers) + + st.w $a1, $fp, -20 // gtid + st.w $a2, $fp, -24 // tid + + addi.d $a0, $fp, -20 // >id + addi.d $a1, $fp, -24 // &tid + + beqz $t3, .L_kmp_3 + ld.d $a2, $t4, 0 // argv[0] + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a3, $t4, 8 // argv[1] + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a4, $t4, 16 // argv[2] + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a5, $t4, 24 // argv[3] + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a6, $t4, 32 // argv[4] + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a7, $t4, 40 // argv[5] + + // Prepare any additional argument passed through the stack + addi.d $t4, $t4, 48 + move $t1, $sp + b .L_kmp_2 +.L_kmp_1: + ld.d $t2, $t4, 0 + st.d $t2, $t1, 0 + addi.d $t4, $t4, 8 + addi.d $t1, $t1, 8 +.L_kmp_2: + addi.d $t3, $t3, -1 + bnez $t3, .L_kmp_1 + +.L_kmp_3: + // Call pkfn function + jirl $ra, $t0, 0 + + // Restore stack and return + + addi.d $a0, $zero, 1 + + addi.d $sp, $fp, -16 + ld.d $fp, $sp, 0 + ld.d $ra, $sp, 8 + addi.d $sp, $sp, 16 + jr $ra +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_LOONGARCH64 */ + #if KMP_ARCH_ARM || KMP_ARCH_MIPS .data .comm .gomp_critical_user_,32,8 @@ -1736,7 +1887,8 @@ __kmp_unnamed_critical_addr: .size __kmp_unnamed_critical_addr,4 #endif /* KMP_ARCH_ARM */ -#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 +#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \ + KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 #ifndef KMP_PREFIX_UNDERSCORE # define KMP_PREFIX_UNDERSCORE(x) x #endif @@ -1751,7 +1903,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 #endif #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || - KMP_ARCH_RISCV64 */ + KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */ #if KMP_OS_LINUX # if KMP_ARCH_ARM diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 91edf0254a77..2a0154ee8a66 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -2447,7 +2447,7 @@ finish: // Clean up and exit. #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ - KMP_ARCH_PPC64 || KMP_ARCH_RISCV64) + KMP_ARCH_PPC64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64) // we really only need the case with 1 argument, because CLANG always build // a struct of pointers to shared variables referenced in the outlined function diff --git a/openmp/runtime/tools/lib/Platform.pm b/openmp/runtime/tools/lib/Platform.pm index 38593a154d03..2d262ae69ff7 100644 --- a/openmp/runtime/tools/lib/Platform.pm +++ b/openmp/runtime/tools/lib/Platform.pm @@ -63,6 +63,8 @@ sub canon_arch($) { $arch = "mips"; } elsif ( $arch =~ m{\Ariscv64} ) { $arch = "riscv64"; + } elsif ( $arch =~ m{\Aloongarch64} ) { + $arch = "loongarch64"; } else { $arch = undef; }; # if @@ -97,6 +99,7 @@ sub canon_mic_arch($) { "mips" => "MIPS", "mips64" => "MIPS64", "riscv64" => "RISC-V (64-bit)", + "loongarch64" => "LoongArch64", ); sub legal_arch($) { @@ -119,6 +122,7 @@ sub canon_mic_arch($) { "mic" => "intel64", "mips" => "mips", "mips64" => "MIPS64", + "loongarch64" => "loongarch64", ); sub arch_opt($) { @@ -225,6 +229,8 @@ sub target_options() { $_host_arch = "mips"; } elsif ( $hardware_platform eq "riscv64" ) { $_host_arch = "riscv64"; + } elsif ( $hardware_platform eq "loongarch64" ) { + $_host_arch = "loongarch64"; } else { die "Unsupported host hardware platform: \"$hardware_platform\"; stopped"; }; # if @@ -414,7 +420,7 @@ the script assumes host architecture is target one. Input string is an architecture name to canonize. The function recognizes many variants, for example: C<32e>, C, C, etc. Returned string is a canonized architecture name, -one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C or C is input string is not recognized. +one of: C<32>, C<32e>, C<64>, C, C, C, C, C, C, C, C or C is input string is not recognized. =item B diff --git a/openmp/runtime/tools/lib/Uname.pm b/openmp/runtime/tools/lib/Uname.pm index 99fe1cdbf00c..8a976addcff0 100644 --- a/openmp/runtime/tools/lib/Uname.pm +++ b/openmp/runtime/tools/lib/Uname.pm @@ -158,6 +158,8 @@ if ( 0 ) { $values{ hardware_platform } = "mips"; } elsif ( $values{ machine } =~ m{\Ariscv64\z} ) { $values{ hardware_platform } = "riscv64"; + } elsif ( $values{ machine } =~ m{\Aloongarch64\z} ) { + $values{ hardware_platform } = "loongarch64"; } else { die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped"; }; # if -- Gitee From d5fdcb1f29bbe83f0b69156dff9b47429dab9caa Mon Sep 17 00:00:00 2001 From: ludao518 Date: Mon, 29 Apr 2024 02:56:01 +0000 Subject: [PATCH 2/6] update llvm-build/build.py. Signed-off-by: ludao518 --- llvm-build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm-build/build.py b/llvm-build/build.py index 1cc96b12b344..801010b62135 100755 --- a/llvm-build/build.py +++ b/llvm-build/build.py @@ -74,7 +74,7 @@ class BuildConfig(): self.enable_check_abi = args.enable_check_abi self.discover_paths() - self.TARGETS = 'AArch64;ARM;BPF;Mips;RISCV;X86;LoongArch' + self.TARGETS = 'AArch64;ARM;BPF;Mips;RISCV;X86' self.ORIG_ENV = dict(os.environ) self.VERSION = None # autodetected -- Gitee From b0a18f6596cac88435468f94c9ada2f80ce53a7f Mon Sep 17 00:00:00 2001 From: ludao518 Date: Mon, 29 Apr 2024 03:10:49 +0000 Subject: [PATCH 3/6] update lld/ELF/Relocations.cpp. Signed-off-by: ludao518 --- lld/ELF/Relocations.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 93841d7d9a4c..d83c6a6222d2 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -934,12 +934,12 @@ static bool canDefineSymbolInExecutable(Symbol &sym) { // from llvm-16. if (config->emachine == EM_LOONGARCH) { if ((sym.stOther & 0x3) != STV_PROTECTED) - return true; + return true; } else { // the visibility of the symbol in the output file we are producing. That is // why we use Sym.stOther. if ((sym.stOther & 0x3) == STV_DEFAULT) - return true; + return true; } // If we are allowed to break address equality of functions, defining -- Gitee From 0fed8d880f2dac8e237db268aa9053334064622c Mon Sep 17 00:00:00 2001 From: ludao518 Date: Mon, 29 Apr 2024 03:49:37 +0000 Subject: [PATCH 4/6] update llvm-build/build.py. Signed-off-by: ludao518 --- llvm-build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm-build/build.py b/llvm-build/build.py index 801010b62135..bd40cce39e45 100755 --- a/llvm-build/build.py +++ b/llvm-build/build.py @@ -1173,7 +1173,7 @@ class SysrootComposer(BuildUtils): sysroot_multi_lib_dir, ld_musl_lib, gn_args) if target_cpu == 'loongarch64': - gn_args += ' is_2k1500=true musl_target_multilib=la264' + gn_args += ' is_ls2k_flag=true musl_target_multilib=la264' multi_lib_dir = os.path.join(ohos_lib_dir, 'la264') sysroot_multi_lib_dir = os.path.join(sysroot_lib_dir, 'la264') ld_musl_lib = os.path.join(sysroot_multi_lib_dir, 'ld-musl-{}.so.1'.format(ld_arch)) -- Gitee From c1f35bfbeab6e22d98d090b54f4c95da3e9266f6 Mon Sep 17 00:00:00 2001 From: ludao518 Date: Mon, 29 Apr 2024 05:51:45 +0000 Subject: [PATCH 5/6] update llvm-build/build.py. Signed-off-by: ludao518 --- llvm-build/build.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm-build/build.py b/llvm-build/build.py index bd40cce39e45..8e51c077f320 100755 --- a/llvm-build/build.py +++ b/llvm-build/build.py @@ -1345,8 +1345,7 @@ class LlvmLibs(BuildUtils): llvm_path = self.merge_out_path('llvm_make') arch_list = [self.liteos_triple('arm'), self.open_ohos_triple('arm'), self.open_ohos_triple('aarch64'), self.open_ohos_triple('riscv64'), - self.open_ohos_triple('mipsel'), self.open_ohos_triple('x86_64'), - self.open_ohos_triple('loongarch64')] + self.open_ohos_triple('mipsel'), self.open_ohos_triple('x86_64')] libcxx_ndk_install = self.merge_out_path('libcxx-ndk') self.check_create_dir(libcxx_ndk_install) -- Gitee From 12d4701b1633669f8c1da43f567f9488d24097a3 Mon Sep 17 00:00:00 2001 From: ludao518 Date: Mon, 29 Apr 2024 07:15:09 +0000 Subject: [PATCH 6/6] update llvm-build/build.py. Signed-off-by: ludao518 --- llvm-build/build.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm-build/build.py b/llvm-build/build.py index 8e51c077f320..967d3ba4cf10 100755 --- a/llvm-build/build.py +++ b/llvm-build/build.py @@ -2575,9 +2575,6 @@ def main(): if not build_config.no_build_x86_64: configs.append(('x86_64', build_utils.open_ohos_triple('x86_64'))) - if not build_config.no_build_loongarch64: - configs.append(('loongarch64', build_utils.open_ohos_triple('loongarch64'))) - if build_config.build_ncurses: llvm_libs.build_ncurses(llvm_make, llvm_install) -- Gitee